diff --git a/compilerutils/include/compilerutils/CompilerUtils.h b/compilerutils/include/compilerutils/CompilerUtils.h index 3669ebeb2b..506dcad925 100644 --- a/compilerutils/include/compilerutils/CompilerUtils.h +++ b/compilerutils/include/compilerutils/CompilerUtils.h @@ -70,6 +70,9 @@ llvm::Function *cloneFunctionHeader(llvm::Function &f, llvm::FunctionType *newTy // Add an unreachable at the current position and remove the rest of the basic block. void createUnreachable(llvm::IRBuilder<> &b); +// Specifies a memory that is loaded is the last use. +void setIsLastUseLoad(llvm::LoadInst &Load); + struct CrossModuleInlinerResult { llvm::Value *returnValue; llvm::iterator_range newBBs; diff --git a/compilerutils/lib/CompilerUtils.cpp b/compilerutils/lib/CompilerUtils.cpp index 40cc96bd83..8bf7ca653d 100644 --- a/compilerutils/lib/CompilerUtils.cpp +++ b/compilerutils/lib/CompilerUtils.cpp @@ -41,6 +41,10 @@ using namespace llvm; +// Whether this is a load instruction that should translate to a last_use +// load. +static constexpr const char *MDIsLastUseName = "amdgpu.last.use"; + // ===================================================================================================================== // Create an LLVM function call to the named function. The callee is built // automatically based on return type and its parameters. @@ -150,6 +154,10 @@ void CompilerUtils::createUnreachable(llvm::IRBuilder<> &b) { DeleteDeadBlock(oldCode); } +void CompilerUtils::setIsLastUseLoad(llvm::LoadInst &Load) { + Load.setMetadata(MDIsLastUseName, MDTuple::get(Load.getContext(), {})); +} + namespace { // Get the name of a global that is copied to a different module for inlining. diff --git a/gfxruntime/src/shaders/AdvancedBlend.hlsl b/gfxruntime/src/shaders/AdvancedBlend.hlsl index 872f01b360..5bff6c24cc 100644 --- a/gfxruntime/src/shaders/AdvancedBlend.hlsl +++ b/gfxruntime/src/shaders/AdvancedBlend.hlsl @@ -49,8 +49,8 @@ float4 AmdExtFragCoord() DUMMY_FLOAT4_FUNC int AmdExtSampleId() DUMMY_INT_FUNC -float4 AmdAdvancedBlendTexelLoad(int4 imageLow, int4 imageHigh, int2 iCoord, int lod) DUMMY_FLOAT4_FUNC -float4 AmdAdvancedBlendTexelLoadFmask(int4 imageMsLow, int4 imageMsHigh, int4 fmaskLow, int4 fmaskHigh, int2 iCoord, int lod) DUMMY_FLOAT4_FUNC +float4 AmdAdvancedBlendTexelLoad(int64_t imageDesc, int2 iCoord, int lod) DUMMY_FLOAT4_FUNC +float4 AmdAdvancedBlendTexelLoadFmask(int64_t imageDesc, int64_t fmaskDesc, int2 iCoord, int lod) DUMMY_FLOAT4_FUNC float4 AmdAdvancedBlendCoherentTexelLoad(float4 color, int2 iCoord, int sampleId) DUMMY_FLOAT4_FUNC void AmdAdvancedBlendCoherentTexelStore(float4 color, int2 iCoord, int sampleId) DUMMY_VOID_FUNC @@ -224,8 +224,8 @@ float AmdAdvancedBlendDivide(float dividend, float divisor) { } } -export float4 AmdAdvancedBlendInternal(float4 inColor, int4 imageMsLow, int4 imageMsHigh, int4 imageLow, int4 imageHigh, - int4 fmaskLow, int4 fmaskHigh, int mode, bool isMsaa) { +export float4 AmdAdvancedBlendInternal(float4 inColor, int64_t imageDescMs, int64_t imageDesc, int64_t fmaskDesc, + int mode, bool isMsaa) { float4 srcColor = inColor; if (mode == 0) { return srcColor; @@ -234,9 +234,9 @@ export float4 AmdAdvancedBlendInternal(float4 inColor, int4 imageMsLow, int4 ima int2 iCoord = int2(fragCoord.x, fragCoord.y); float4 dstColor; if (isMsaa) { - dstColor = AmdAdvancedBlendTexelLoadFmask(imageMsLow, imageMsHigh, fmaskLow, fmaskHigh, iCoord, 0); + dstColor = AmdAdvancedBlendTexelLoadFmask(imageDescMs, fmaskDesc, iCoord, 0); } else { - dstColor = AmdAdvancedBlendTexelLoad(imageLow, imageHigh, iCoord, 0); + dstColor = AmdAdvancedBlendTexelLoad(imageDesc, iCoord, 0); } // TODO: Uncomment them once ROV is support in LLPC // int sampleId = AmdExtSampleId(); diff --git a/imported/llvm-dialects b/imported/llvm-dialects index 55e176fb88..ed4b46e842 160000 --- a/imported/llvm-dialects +++ b/imported/llvm-dialects @@ -1 +1 @@ -Subproject commit 55e176fb88bcfc4fae45bafaa3ff209ec4c0d4ee +Subproject commit ed4b46e8425066a96a5e79afc29bce3d82eecf71 diff --git a/include/vkgcDefs.h b/include/vkgcDefs.h index 60b5621565..aed46de1b2 100644 --- a/include/vkgcDefs.h +++ b/include/vkgcDefs.h @@ -472,6 +472,7 @@ struct PipelineOptions { bool internalRtShaders; ///< Whether this pipeline has internal raytracing shaders unsigned forceNonUniformResourceIndexStageMask; ///< Mask of the stage to force using non-uniform resource index. bool reserved16; +#if LLPC_CLIENT_INTERFACE_MAJOR_VERSION < 73 bool replaceSetWithResourceType; ///< For OGL only, replace 'set' with resource type during spirv translate bool disableSampleMask; ///< For OGL only, disabled if framebuffer doesn't attach multisample texture bool buildResourcesDataForShaderModule; ///< For OGL only, build resources usage data while building shader module @@ -482,6 +483,25 @@ struct PipelineOptions { bool enableFragColor; ///< For OGL only, need to do frag color broadcast if it is enabled. bool disableBaseVertex; ///< For OGL only, force the BaseVertex builtin to 0 instead of /// loading it from userdata + bool bindlessTextureMode; ///< For OGL only, true if bindless textures are used + bool bindlessImageMode; ///< For OGL only, true if bindless images are used + const auto &getGlState() const { return *this; } +#else + struct GLState { + bool replaceSetWithResourceType; ///< For OGL only, replace 'set' with resource type during spirv translate + bool disableSampleMask; ///< For OGL only, disabled if framebuffer doesn't attach multisample texture + bool buildResourcesDataForShaderModule; ///< For OGL only, build resources usage data while building shader module + bool disableTruncCoordForGather; ///< If set, trunc_coord of sampler srd is disabled for gather4 + bool enableCombinedTexture; ///< For OGL only, use the 'set' for DescriptorCombinedTexture + ///< for sampled images and samplers + bool vertex64BitsAttribSingleLoc; ///< For OGL only, dvec3/dvec4 vertex attrib only consumes 1 location. + bool enableFragColor; ///< For OGL only, need to do frag color broadcast if it is enabled. + bool disableBaseVertex; ///< For OGL only, force the BaseVertex builtin to 0 instead of + bool bindlessTextureMode; ///< For OGL only, true if bindless textures are used + bool bindlessImageMode; ///< For OGL only, true if bindless images are used + } glState; + const auto &getGlState() const { return glState; } +#endif unsigned reserved20; bool enablePrimGeneratedQuery; ///< If set, primitive generated query is enabled bool disablePerCompFetch; ///< Disable per component fetch in uber fetch shader. @@ -512,6 +532,7 @@ struct ResourceNodeData { unsigned isTexelFetchUsed; ///< TRUE if texelFetch is used unsigned isDefaultUniformSampler; ///< TRUE if it's sampler image in default uniform struct unsigned columnCount; ///< Column count if this is a matrix variable. + unsigned componentCount; ///< Component count if this is a vector, row count if it is a matrix. BasicType basicType; ///< Type of the variable or element }; @@ -545,6 +566,43 @@ struct ResourcesNodes { unsigned defaultUniformInfoCount; }; +// raytracing system value usage flags +union RayTracingSystemValueUsage { + struct { + union { + struct { + uint16_t flags : 1; // Shader calls gl_IncomingRayFlagsEXT + uint16_t worldRayOrigin : 1; // Shader calls gl_WorldRayOriginEXT + uint16_t tMin : 1; // Shader calls gl_RayTminEXT + uint16_t worldRayDirection : 1; // Shader calls gl_WorldRayDirectionEXT + uint16_t tCurrent : 1; // Shader calls gl_HitTEXT + uint16_t launchId : 1; // Shader calls gl_LaunchIDEXT + uint16_t launchSize : 1; // Shader calls gl_LaunchSizeEXT + uint16_t reserved : 9; // Reserved + }; + uint16_t u16All; + } ray; + + union { + struct { + uint16_t hitKind : 1; // Shader calls gl_HitKindEXT + uint16_t instanceIndex : 1; // Shader calls gl_InstanceCustomIndexEXT + uint16_t instanceID : 1; // Shader calls gl_InstanceID + uint16_t primitiveIndex : 1; // Shader calls gl_PrimitiveID + uint16_t geometryIndex : 1; // Shader calls gl_GeometryIndexEXT + uint16_t objectToWorld : 1; // Shader calls gl_ObjectToWorldEXT + uint16_t objectRayOrigin : 1; // Shader calls gl_ObjectRayOriginEXT + uint16_t objectRayDirection : 1; // Shader calls gl_ObjectRayDirectionEXT + uint16_t worldToObject : 1; // Shader calls gl_WorldToObjectEXT + uint16_t hitTrianglePosition : 1; // Shader calls gl_HitTriangleVertexPositionsEXT + uint16_t reserved : 6; // Reserved + }; + uint16_t u16All; + } primitive; + }; + uint32_t u32All; +}; + /// Represents usage info of a shader module struct ShaderModuleUsage { bool enableVarPtrStorageBuf; ///< Whether to enable "VariablePointerStorageBuffer" capability @@ -573,12 +631,14 @@ struct ShaderModuleUsage { bool pixelCenterInteger; ///< Whether pixel coord is Integer bool useGenericBuiltIn; ///< Whether to use builtIn inputs that include gl_PointCoord, gl_PrimitiveId, /// gl_Layer, gl_ClipDistance or gl_CullDistance. + bool useBarycentric; ///< Whether to use gl_BarycentricXX or pervertexEXT decoration bool enableXfb; ///< Whether transform feedback is enabled unsigned localSizeX; ///< Compute shader work-group size in the X dimension unsigned localSizeY; ///< Compute shader work-group size in the Y dimension unsigned localSizeZ; ///< Compute shader work-group size in the Z dimension bool disableDualSource; ///< Whether disable dualSource blend uint32_t clipDistanceArraySize; ///< Count of output clip distance + RayTracingSystemValueUsage rtSystemValueUsage; ///< Usage flags for ray tracing builtins }; /// Represents common part of shader module data @@ -1001,43 +1061,6 @@ enum RayTracingRayFlag : unsigned { }; // ===================================================================================================================== -// raytracing system value usage flags -union RayTracingSystemValueUsage { - struct { - union { - struct { - uint16_t flags : 1; // Shader calls gl_IncomingRayFlagsEXT - uint16_t worldRayOrigin : 1; // Shader calls gl_WorldRayOriginEXT - uint16_t tMin : 1; // Shader calls gl_RayTminEXT - uint16_t worldRayDirection : 1; // Shader calls gl_WorldRayDirectionEXT - uint16_t tCurrent : 1; // Shader calls gl_HitTEXT - uint16_t launchId : 1; // Shader calls gl_LaunchIDEXT - uint16_t launchSize : 1; // Shader calls gl_LaunchSizeEXT - uint16_t reserved : 9; // Reserved - }; - uint16_t u16All; - } ray; - - union { - struct { - uint16_t hitKind : 1; // Shader calls gl_HitKindEXT - uint16_t instanceIndex : 1; // Shader calls gl_InstanceCustomIndexEXT - uint16_t instanceID : 1; // Shader calls gl_InstanceID - uint16_t primitiveIndex : 1; // Shader calls gl_PrimitiveID - uint16_t geometryIndex : 1; // Shader calls gl_GeometryIndexEXT - uint16_t objectToWorld : 1; // Shader calls gl_ObjectToWorldEXT - uint16_t objectRayOrigin : 1; // Shader calls gl_ObjectRayOriginEXT - uint16_t objectRayDirection : 1; // Shader calls gl_ObjectRayDirectionEXT - uint16_t worldToObject : 1; // Shader calls gl_WorldToObjectEXT - uint16_t hitTrianglePosition : 1; // Shader calls gl_HitTriangleVertexPositionsEXT - uint16_t reserved : 6; // Reserved - }; - uint16_t u16All; - } primitive; - }; - uint32_t u32All; -}; - /// Represents ray-tracing shader export configuration struct RayTracingShaderExportConfig { unsigned indirectCallingConvention; ///< Indirect calling convention @@ -1299,6 +1322,7 @@ struct GraphicsPipelineBuildInfo { float pixelTransferBias[4]; ///< Bias apply to render color target bool enableColorClampVs; ///< Enable clamp vertex output color bool enableColorClampFs; ///< Enable clamp fragment output color + bool enableFlatShade; ///< Whether enable flat shade. } glState; const auto &getGlState() const { return glState; } #endif @@ -1597,6 +1621,7 @@ class IUtil { /// /// @param [in] spvBin SPIR-V binary static const char *VKAPI_CALL GetEntryPointNameFromSpirvBinary(const BinaryData *spvBin); + static const char *VKAPI_CALL GetResourceMappingNodeTypeName(ResourceMappingNodeType type); }; /// 128-bit hash compatible structure diff --git a/lgc/CMakeLists.txt b/lgc/CMakeLists.txt index e7feab6d76..e49c3a34fc 100644 --- a/lgc/CMakeLists.txt +++ b/lgc/CMakeLists.txt @@ -191,6 +191,7 @@ target_sources(LLVMlgc PRIVATE state/ShaderModes.cpp state/ShaderStage.cpp state/TargetInfo.cpp + state/RuntimeContext.cpp ) # lgc/util diff --git a/lgc/builder/ArithBuilder.cpp b/lgc/builder/ArithBuilder.cpp index 57ba3e3f8c..fee6b1a564 100644 --- a/lgc/builder/ArithBuilder.cpp +++ b/lgc/builder/ArithBuilder.cpp @@ -507,29 +507,20 @@ Value *BuilderImpl::CreateCosh(Value *x, const Twine &instName) { // @param x : Input value X // @param instName : Name to give instruction(s) Value *BuilderImpl::CreateTanh(Value *x, const Twine &instName) { - // sinh(x) / cosh(x) - // (e^x - e^(-x))/(e^x + e^(-x)) + // tanh(x) = copysign(1-2/(e^-|2x|+1),x) // 1/log(2) = 1.442695 - // e^x = 2^(x*(1/log(2))) = 2^(x*1.442695)) - Value *divLog2 = CreateFMul(x, getRecipLog2(x->getType())); - Value *negDivLog2 = CreateFSub(ConstantFP::get(x->getType(), 0.0), divLog2); - Value *exp = CreateUnaryIntrinsic(Intrinsic::exp2, divLog2); - Value *expNeg = CreateUnaryIntrinsic(Intrinsic::exp2, negDivLog2); - Value *doubleSinh = CreateFSub(exp, expNeg); - Value *doubleCosh = CreateFAdd(exp, expNeg); - Value *result = fDivFast(doubleSinh, doubleCosh); - - if (!getFastMathFlags().noInfs()) { - // NOTE: If the fast math flags might have INFs, we should check the special case when the input is +INF or -INF. - // According to the limit of tanh(x), we have following definitions: - // / 1.0, when x -> +INF - // lim(tanh(x)) = - // \ -1.0, when x -> -INF - Value *one = ConstantFP::get(x->getType(), 1.0); - Value *isInf = CreateIsInf(x); - result = CreateSelect(isInf, CreateCopySign(one, x), result); - } - + // e = 2^(1/log(2)) + // e^-|2x| = 2^(-|2x|*(1/log(2))) + auto vTy = x->getType(); + Value *result = CreateIntrinsic(Intrinsic::fabs, vTy, x); + result = CreateFNeg(result); + result = CreateFMul(ConstantFP::get(vTy, 2.0), result); + result = CreateFMul(getRecipLog2(vTy), result); + result = CreateUnaryIntrinsic(Intrinsic::exp2, result); + result = CreateFAdd(ConstantFP::get(vTy, 1.0), result); + result = fDivFast(ConstantFP::get(vTy, 2.0), result); + result = CreateFSub(ConstantFP::get(vTy, 1.0), result); + result = CreateCopySign(result, x); result->setName(instName); return result; } diff --git a/lgc/builder/BuilderImpl.cpp b/lgc/builder/BuilderImpl.cpp index ad69faf624..0177fcd2be 100644 --- a/lgc/builder/BuilderImpl.cpp +++ b/lgc/builder/BuilderImpl.cpp @@ -249,11 +249,12 @@ Value *BuilderImpl::CreateIntegerDotProduct(Value *vector1, Value *vector2, Valu // ===================================================================================================================== // Get whether the context we are building in supports ds_bpermute or v_bpermute across all lanes in the wave -bool BuilderImpl::supportWaveWideBPermute() const { +// +// @param shaderStage : shader stage enum. +bool BuilderImpl::supportWaveWideBPermute(ShaderStageEnum shaderStage) const { auto gfxIp = getPipelineState()->getTargetInfo().getGfxIpVersion().major; auto supportBPermute = gfxIp == 8 || gfxIp == 9; - auto shaderStage = getShaderStage(GetInsertBlock()->getParent()); - auto waveSize = getPipelineState()->getShaderWaveSize(shaderStage.value()); + auto waveSize = getPipelineState()->getShaderWaveSize(shaderStage); supportBPermute = supportBPermute || waveSize == 32; return supportBPermute; } @@ -261,10 +262,7 @@ bool BuilderImpl::supportWaveWideBPermute() const { // ===================================================================================================================== // Get whether the context we are building in supports permute lane 64 DPP operations. bool BuilderImpl::supportPermLane64Dpp() const { - auto gfxip = getPipelineState()->getTargetInfo().getGfxIpVersion().major; - auto shaderStage = getShaderStage(GetInsertBlock()->getParent()); - auto waveSize = getPipelineState()->getShaderWaveSize(shaderStage.value()); - return gfxip >= 11 && waveSize == 64; + return getPipelineState()->getTargetInfo().getGfxIpVersion().major >= 11; } // ===================================================================================================================== diff --git a/lgc/builder/DescBuilder.cpp b/lgc/builder/DescBuilder.cpp index c837b2129a..6ef7fbbf69 100644 --- a/lgc/builder/DescBuilder.cpp +++ b/lgc/builder/DescBuilder.cpp @@ -394,45 +394,47 @@ Value *BuilderImpl::buildBufferCompactDesc(Value *desc, unsigned stride) { Value *descElem1 = CreateExtractElement(desc, 1); // Build normal buffer descriptor - // Dword 0 Value *bufDesc = PoisonValue::get(FixedVectorType::get(getInt32Ty(), 4)); - bufDesc = CreateInsertElement(bufDesc, descElem0, uint64_t(0)); - - // Dword 1 - SqBufRsrcWord1 sqBufRsrcWord1 = {}; - sqBufRsrcWord1.bits.baseAddressHi = UINT16_MAX; - descElem1 = CreateAnd(descElem1, getInt32(sqBufRsrcWord1.u32All)); - if (stride) { - SqBufRsrcWord1 sqBufRsrcWord1Stride = {}; - sqBufRsrcWord1Stride.bits.stride = stride; - descElem1 = CreateOr(descElem1, getInt32(sqBufRsrcWord1Stride.u32All)); - } - bufDesc = CreateInsertElement(bufDesc, descElem1, 1); - - // Dword 2 - SqBufRsrcWord2 sqBufRsrcWord2 = {}; - sqBufRsrcWord2.bits.numRecords = UINT32_MAX; - bufDesc = CreateInsertElement(bufDesc, getInt32(sqBufRsrcWord2.u32All), 2); - - // Dword 3 - SqBufRsrcWord3 sqBufRsrcWord3 = {}; - sqBufRsrcWord3.bits.dstSelX = BUF_DST_SEL_X; - sqBufRsrcWord3.bits.dstSelY = BUF_DST_SEL_Y; - sqBufRsrcWord3.bits.dstSelZ = BUF_DST_SEL_Z; - sqBufRsrcWord3.bits.dstSelW = BUF_DST_SEL_W; - if (gfxIp.major == 10) { - sqBufRsrcWord3.gfx10.format = BUF_FORMAT_32_UINT; - sqBufRsrcWord3.gfx10.resourceLevel = 1; - sqBufRsrcWord3.gfx10.oobSelect = stride ? 3 : 2; - assert(sqBufRsrcWord3.u32All == 0x21014FAC || sqBufRsrcWord3.u32All == 0x31014FAC); - } else if (gfxIp.major >= 11) { - sqBufRsrcWord3.gfx11.format = BUF_FORMAT_32_UINT; - sqBufRsrcWord3.gfx11.oobSelect = stride ? 3 : 2; - assert(sqBufRsrcWord3.u32All == 0x20014FAC || sqBufRsrcWord3.u32All == 0x30014FAC); - } else { - llvm_unreachable("Not implemented!"); + { + // Dword 0 + bufDesc = CreateInsertElement(bufDesc, descElem0, uint64_t(0)); + + // Dword 1 + SqBufRsrcWord1 sqBufRsrcWord1 = {}; + sqBufRsrcWord1.bits.baseAddressHi = UINT16_MAX; + descElem1 = CreateAnd(descElem1, getInt32(sqBufRsrcWord1.u32All)); + if (stride) { + SqBufRsrcWord1 sqBufRsrcWord1Stride = {}; + sqBufRsrcWord1Stride.bits.stride = stride; + descElem1 = CreateOr(descElem1, getInt32(sqBufRsrcWord1Stride.u32All)); + } + bufDesc = CreateInsertElement(bufDesc, descElem1, 1); + + // Dword 2 + SqBufRsrcWord2 sqBufRsrcWord2 = {}; + sqBufRsrcWord2.bits.numRecords = UINT32_MAX; + bufDesc = CreateInsertElement(bufDesc, getInt32(sqBufRsrcWord2.u32All), 2); + + // Dword 3 + SqBufRsrcWord3 sqBufRsrcWord3 = {}; + sqBufRsrcWord3.bits.dstSelX = BUF_DST_SEL_X; + sqBufRsrcWord3.bits.dstSelY = BUF_DST_SEL_Y; + sqBufRsrcWord3.bits.dstSelZ = BUF_DST_SEL_Z; + sqBufRsrcWord3.bits.dstSelW = BUF_DST_SEL_W; + if (gfxIp.major == 10) { + sqBufRsrcWord3.gfx10.format = BUF_FORMAT_32_UINT; + sqBufRsrcWord3.gfx10.resourceLevel = 1; + sqBufRsrcWord3.gfx10.oobSelect = stride ? 3 : 2; + assert(sqBufRsrcWord3.u32All == 0x21014FAC || sqBufRsrcWord3.u32All == 0x31014FAC); + } else if (gfxIp.major >= 11) { + sqBufRsrcWord3.gfx11.format = BUF_FORMAT_32_UINT; + sqBufRsrcWord3.gfx11.oobSelect = stride ? 3 : 2; + assert(sqBufRsrcWord3.u32All == 0x20014FAC || sqBufRsrcWord3.u32All == 0x30014FAC); + } else { + llvm_unreachable("Not implemented!"); + } + bufDesc = CreateInsertElement(bufDesc, getInt32(sqBufRsrcWord3.u32All), 3); } - bufDesc = CreateInsertElement(bufDesc, getInt32(sqBufRsrcWord3.u32All), 3); return bufDesc; } diff --git a/lgc/builder/ImageBuilder.cpp b/lgc/builder/ImageBuilder.cpp index 9b2fdb6d0e..363ebefd7c 100644 --- a/lgc/builder/ImageBuilder.cpp +++ b/lgc/builder/ImageBuilder.cpp @@ -423,14 +423,9 @@ static Type *convertToFloatingPointType(Type *origTy) { // @param instName : Name to give instruction(s) Value *BuilderImpl::CreateImageLoad(Type *resultTy, unsigned dim, unsigned flags, Value *imageDesc, Value *coord, Value *mipLevel, const Twine &instName) { - imageDesc = fixImageDescForRead(imageDesc); - // Mark usage of images, to allow the compute workgroup reconfiguration optimization. - getPipelineState()->getShaderResourceUsage(m_shaderStage.value())->useImages = true; - getPipelineState()->getShaderResourceUsage(m_shaderStage.value())->resourceRead = true; - assert(coord->getType()->getScalarType()->isIntegerTy(32)); - coord = handleFragCoordViewIndex(coord, flags, dim); + if (isa(imageDesc)) + return PoisonValue::get(resultTy); - unsigned dmask = 1; Type *origTexelTy = resultTy; if (auto structResultTy = dyn_cast(resultTy)) origTexelTy = structResultTy->getElementType(0); @@ -444,6 +439,21 @@ Value *BuilderImpl::CreateImageLoad(Type *resultTy, unsigned dim, unsigned flags texelTy = FixedVectorType::get(getHalfTy(), 4); } + bool isTexelBuffer = (dim == Dim1DBuffer || dim == Dim1DArrayBuffer); + bool needFullDesc = texelTy != origTexelTy && origTexelTy->isIntOrIntVectorTy(64) && origTexelTy->isVectorTy() && + m_pipelineState->getOptions().allowNullDescriptor; + imageDesc = transformImageDesc(imageDesc, needFullDesc, isTexelBuffer, resultTy); + const bool isVecTyDesc = imageDesc->getType()->isVectorTy(); + if (isVecTyDesc) + imageDesc = fixImageDescForRead(imageDesc); + // Mark usage of images, to allow the compute workgroup reconfiguration optimization. + getPipelineState()->getShaderResourceUsage(m_shaderStage.value())->useImages = true; + getPipelineState()->getShaderResourceUsage(m_shaderStage.value())->resourceRead = true; + assert(coord->getType()->getScalarType()->isIntegerTy(32)); + coord = handleFragCoordViewIndex(coord, flags, dim); + + unsigned dmask = 1; + if (auto vectorResultTy = dyn_cast(texelTy)) dmask = (1U << vectorResultTy->getNumElements()) - 1; @@ -462,7 +472,7 @@ Value *BuilderImpl::CreateImageLoad(Type *resultTy, unsigned dim, unsigned flags SmallVector args; Instruction *imageInst = nullptr; unsigned imageDescArgIndex = 0; - if (imageDesc->getType() == getDescTy(ResourceNodeType::DescriptorResource)) { + if (!isTexelBuffer) { // Not texel buffer; use image load instruction. // Build the intrinsic arguments. bool tfe = isa(intrinsicDataTy); @@ -509,11 +519,13 @@ Value *BuilderImpl::CreateImageLoad(Type *resultTy, unsigned dim, unsigned flags // Add a waterfall loop if needed. Value *result = imageInst; - if (flags & ImageFlagNonUniformImage) - result = createWaterfallLoop(imageInst, imageDescArgIndex, - getPipelineState()->getShaderOptions(m_shaderStage.value()).scalarizeWaterfallLoads); - else if (flags & ImageFlagEnforceReadFirstLaneImage) - enforceReadFirstLane(imageInst, imageDescArgIndex); + if (imageDesc->getType()->isVectorTy()) { + if (flags & ImageFlagNonUniformImage) + result = createWaterfallLoop(imageInst, imageDescArgIndex, + getPipelineState()->getShaderOptions(m_shaderStage.value()).scalarizeWaterfallLoads); + else if (flags & ImageFlagEnforceReadFirstLaneImage) + enforceReadFirstLane(imageInst, imageDescArgIndex); + } if (texelTy != origTexelTy) { Value *texel = result; @@ -581,6 +593,8 @@ Value *BuilderImpl::CreateImageLoad(Type *resultTy, unsigned dim, unsigned flags // @param instName : Name to give instruction(s) Value *BuilderImpl::CreateImageLoadWithFmask(Type *resultTy, unsigned dim, unsigned flags, Value *imageDesc, Value *fmaskDesc, Value *coord, Value *sampleNum, const Twine &instName) { + if (isa(imageDesc)) + return PoisonValue::get(resultTy); // Load texel from F-mask image. unsigned fmaskDim = dim; switch (dim) { @@ -596,7 +610,7 @@ Value *BuilderImpl::CreateImageLoadWithFmask(Type *resultTy, unsigned dim, unsig } // When the shadow table is disabled, we don't need to load F-mask descriptor - if (m_pipelineState->getOptions().enableFmask) { + if (m_pipelineState->getOptions().enableFmask && !isa(fmaskDesc)) { Value *fmaskTexel = CreateImageLoad(FixedVectorType::get(getInt32Ty(), 4), fmaskDim, flags, fmaskDesc, coord, nullptr, instName + ".fmaskload"); @@ -607,6 +621,11 @@ Value *BuilderImpl::CreateImageLoadWithFmask(Type *resultTy, unsigned dim, unsig calcSampleNum = CreateAnd(calcSampleNum, getInt32(15)); // Check whether the F-mask descriptor has a BUF_DATA_FORMAT_INVALID (0) format (dword[1].bit[20-25]). + if (!fmaskDesc->getType()->isVectorTy()) { + auto callInst = cast(fmaskTexel); + unsigned argIdx = callInst->arg_size() == 5 ? 0 : callInst->arg_size() - 3; + fmaskDesc = callInst->getArgOperand(argIdx); + } Value *fmaskFormat = CreateExtractElement(fmaskDesc, 1); fmaskFormat = CreateAnd(fmaskFormat, getInt32(63 << 20)); Value *fmaskValidFormat = CreateICmpNE(fmaskFormat, getInt32(0)); @@ -634,6 +653,8 @@ Value *BuilderImpl::CreateImageLoadWithFmask(Type *resultTy, unsigned dim, unsig // @param instName : Name to give instruction(s) Value *BuilderImpl::CreateImageStore(Value *texel, unsigned dim, unsigned flags, Value *imageDesc, Value *coord, Value *mipLevel, const Twine &instName) { + if (isa(imageDesc)) + return PoisonValue::get(texel->getType()); // Mark usage of images, to allow the compute workgroup reconfiguration optimization. getPipelineState()->getShaderResourceUsage(m_shaderStage.value())->resourceWrite = true; assert(coord->getType()->getScalarType()->isIntegerTy(32)); @@ -658,11 +679,14 @@ Value *BuilderImpl::CreateImageStore(Value *texel, unsigned dim, unsigned flags, SmallVector derivatives; dim = prepareCoordinate(dim, coord, nullptr, nullptr, nullptr, coords, derivatives); + bool isTexelBuffer = (dim == Dim1DBuffer || dim == Dim1DArrayBuffer); + imageDesc = transformImageDesc(imageDesc, false, isTexelBuffer, texel->getType()); + Type *texelTy = texel->getType(); SmallVector args; Instruction *imageStore = nullptr; unsigned imageDescArgIndex = 0; - if (imageDesc->getType() == getDescTy(ResourceNodeType::DescriptorResource)) { + if (!isTexelBuffer) { // Not texel buffer; use image store instruction. // Build the intrinsic arguments. unsigned dmask = 1; @@ -717,12 +741,14 @@ Value *BuilderImpl::CreateImageStore(Value *texel, unsigned dim, unsigned flags, CreateIntrinsic(Intrinsic::amdgcn_struct_buffer_store_format, texel->getType(), args, nullptr, instName); } - // Add a waterfall loop if needed. - if (flags & ImageFlagNonUniformImage) - createWaterfallLoop(imageStore, imageDescArgIndex, - getPipelineState()->getShaderOptions(m_shaderStage.value()).scalarizeWaterfallLoads); - else if (flags & ImageFlagEnforceReadFirstLaneImage) - enforceReadFirstLane(imageStore, imageDescArgIndex); + if (imageDesc->getType()->isVectorTy()) { + // Add a waterfall loop if needed. + if (flags & ImageFlagNonUniformImage) + createWaterfallLoop(imageStore, imageDescArgIndex, + getPipelineState()->getShaderOptions(m_shaderStage.value()).scalarizeWaterfallLoads); + else if (flags & ImageFlagEnforceReadFirstLaneImage) + enforceReadFirstLane(imageStore, imageDescArgIndex); + } return imageStore; } @@ -805,6 +831,10 @@ Value *BuilderImpl::CreateImageSampleConvertYCbCr(Type *resultTy, unsigned dim, Value *imageDesc = imageDescArray; if (isa(imageDescArray->getType())) imageDesc = CreateExtractValue(imageDescArray, 0); + if (isa(imageDesc)) + imageDesc = PoisonValue::get(FixedVectorType::get(getInt32Ty(), 8)); + else + imageDesc = transformImageDesc(imageDesc, true, false, resultTy); imageDesc = fixImageDescForRead(imageDesc); YCbCrSampleInfo sampleInfoLuma = {resultTy, dim, flags, imageDesc, samplerDescLuma, address, instName.str(), true}; @@ -817,6 +847,10 @@ Value *BuilderImpl::CreateImageSampleConvertYCbCr(Type *resultTy, unsigned dim, // Set image descriptor for chroma channel for (unsigned planeIdx = 1; planeIdx < yCbCrMetaData.word1.planes; ++planeIdx) { imageDesc = CreateExtractValue(imageDescArray, planeIdx); + if (isa(imageDesc)) + imageDesc = PoisonValue::get(FixedVectorType::get(getInt32Ty(), 8)); + else + imageDesc = transformImageDesc(imageDesc, true, false, resultTy); imageDesc = fixImageDescForRead(imageDesc); YCbCrConverter.SetImgDescChroma(planeIdx, imageDesc); } @@ -843,6 +877,9 @@ Value *BuilderImpl::CreateImageSampleConvertYCbCr(Type *resultTy, unsigned dim, // @param instName : Name to give instruction(s) Value *BuilderImpl::CreateImageGather(Type *resultTy, unsigned dim, unsigned flags, Value *imageDesc, Value *samplerDesc, ArrayRef address, const Twine &instName) { + if (isa(imageDesc) || isa(samplerDesc)) + return PoisonValue::get(resultTy); + Value *coord = address[ImageAddressIdxCoordinate]; assert(coord->getType()->getScalarType()->isFloatTy() || coord->getType()->getScalarType()->isHalfTy()); @@ -861,8 +898,7 @@ Value *BuilderImpl::CreateImageGather(Type *resultTy, unsigned dim, unsigned fla gatherTy = StructType::get(getContext(), {gatherTy, getInt32Ty()}); } - // Only the first 4 dwords are sampler descriptor, we need to extract these values under any condition - samplerDesc = CreateShuffleVector(samplerDesc, samplerDesc, ArrayRef{0, 1, 2, 3}); + samplerDesc = transformSamplerDesc(samplerDesc); if (m_pipelineState->getOptions().disableTruncCoordForGather) { samplerDesc = modifySamplerDescForGather(samplerDesc); @@ -930,7 +966,16 @@ Value *BuilderImpl::CreateImageGather(Type *resultTy, unsigned dim, unsigned fla Value *BuilderImpl::CreateImageSampleGather(Type *resultTy, unsigned dim, unsigned flags, Value *coord, Value *imageDesc, Value *samplerDesc, ArrayRef address, const Twine &instName, bool isSample) { - imageDesc = fixImageDescForRead(imageDesc); + if (isa(imageDesc) || isa(samplerDesc)) + return PoisonValue::get(resultTy); + + imageDesc = transformImageDesc(imageDesc, false, false, resultTy); + const bool isVecTyDesc = imageDesc->getType()->isVectorTy(); + if (isVecTyDesc) + imageDesc = fixImageDescForRead(imageDesc); + + samplerDesc = transformSamplerDesc(samplerDesc); + // Mark usage of images, to allow the compute workgroup reconfiguration optimization. getPipelineState()->getShaderResourceUsage(m_shaderStage.value())->useImages = true; // Set up the mask of address components provided, for use in searching the intrinsic ID table @@ -1057,16 +1102,22 @@ Value *BuilderImpl::CreateImageSampleGather(Type *resultTy, unsigned dim, unsign // Add a waterfall loop if needed. SmallVector nonUniformArgIndexes; - if (flags & ImageFlagNonUniformImage) - nonUniformArgIndexes.push_back(imageDescArgIndex); - else if (flags & ImageFlagEnforceReadFirstLaneImage) - enforceReadFirstLane(imageOp, imageDescArgIndex); + if (imageDesc->getType()->isVectorTy()) { + if (flags & ImageFlagNonUniformImage) + nonUniformArgIndexes.push_back(imageDescArgIndex); + else if (flags & ImageFlagEnforceReadFirstLaneImage) + enforceReadFirstLane(imageOp, imageDescArgIndex); + } - const unsigned samplerDescArgIndex = imageDescArgIndex + 1; - if (flags & ImageFlagNonUniformSampler) - nonUniformArgIndexes.push_back(samplerDescArgIndex); - else if (flags & ImageFlagEnforceReadFirstLaneSampler) - enforceReadFirstLane(imageOp, samplerDescArgIndex); + if (samplerDesc->getType()->isVectorTy()) { + const unsigned samplerDescArgIndex = imageDescArgIndex + 1; + if (flags & ImageFlagNonUniformSampler) { + nonUniformArgIndexes.push_back(samplerDescArgIndex); + } else { + // TODO: Re-add the condition once backend fix the waterfall loop bug. + enforceReadFirstLane(imageOp, samplerDescArgIndex); + } + } if (!nonUniformArgIndexes.empty()) imageOp = createWaterfallLoop(imageOp, nonUniformArgIndexes, @@ -1123,6 +1174,8 @@ Value *BuilderImpl::CreateImageAtomicCompareSwap(unsigned dim, unsigned flags, A Value *BuilderImpl::CreateImageAtomicCommon(unsigned atomicOp, unsigned dim, unsigned flags, AtomicOrdering ordering, Value *imageDesc, Value *coord, Value *inputValue, Value *comparatorValue, const Twine &instName) { + if (isa(imageDesc)) + return PoisonValue::get(inputValue->getType()); getPipelineState()->getShaderResourceUsage(m_shaderStage.value())->resourceWrite = true; assert(coord->getType()->getScalarType()->isIntegerTy(32)); coord = handleFragCoordViewIndex(coord, flags, dim); @@ -1131,7 +1184,7 @@ Value *BuilderImpl::CreateImageAtomicCommon(unsigned atomicOp, unsigned dim, uns case AtomicOrdering::Release: case AtomicOrdering::AcquireRelease: case AtomicOrdering::SequentiallyConsistent: - CreateFence(AtomicOrdering::Release, SyncScope::System); + CreateFence(AtomicOrdering::Release, getContext().getOrInsertSyncScopeID("agent")); break; default: break; @@ -1142,10 +1195,13 @@ Value *BuilderImpl::CreateImageAtomicCommon(unsigned atomicOp, unsigned dim, uns SmallVector derivatives; dim = prepareCoordinate(dim, coord, nullptr, nullptr, nullptr, coords, derivatives); + bool isTexelBuffer = (dim == Dim1DBuffer || dim == Dim1DArrayBuffer); + imageDesc = transformImageDesc(imageDesc, false, isTexelBuffer, nullptr); + SmallVector args; Instruction *atomicInst = nullptr; unsigned imageDescArgIndex = 0; - if (imageDesc->getType() == getDescTy(ResourceNodeType::DescriptorResource)) { + if (!isTexelBuffer) { // Resource descriptor. Use the image atomic instruction. args.push_back(inputValue); if (atomicOp == AtomicOpCompareSwap) @@ -1176,18 +1232,20 @@ Value *BuilderImpl::CreateImageAtomicCommon(unsigned atomicOp, unsigned dim, uns atomicInst = CreateIntrinsic(StructBufferAtomicIntrinsicTable[atomicOp], inputValue->getType(), args, nullptr, instName); } - if (flags & ImageFlagNonUniformImage) - atomicInst = - createWaterfallLoop(atomicInst, imageDescArgIndex, - getPipelineState()->getShaderOptions(m_shaderStage.value()).scalarizeWaterfallLoads); - else if (flags & ImageFlagEnforceReadFirstLaneImage) - enforceReadFirstLane(atomicInst, imageDescArgIndex); + if (imageDesc->getType()->isVectorTy()) { + if (flags & ImageFlagNonUniformImage) + atomicInst = + createWaterfallLoop(atomicInst, imageDescArgIndex, + getPipelineState()->getShaderOptions(m_shaderStage.value()).scalarizeWaterfallLoads); + else if (flags & ImageFlagEnforceReadFirstLaneImage) + enforceReadFirstLane(atomicInst, imageDescArgIndex); + } switch (ordering) { case AtomicOrdering::Acquire: case AtomicOrdering::AcquireRelease: case AtomicOrdering::SequentiallyConsistent: - CreateFence(AtomicOrdering::Acquire, SyncScope::System); + CreateFence(AtomicOrdering::Acquire, getContext().getOrInsertSyncScopeID("agent")); break; default: break; @@ -1204,8 +1262,12 @@ Value *BuilderImpl::CreateImageAtomicCommon(unsigned atomicOp, unsigned dim, uns // @param imageDesc : Image descriptor or texel buffer descriptor // @param instName : Name to give instruction(s) Value *BuilderImpl::CreateImageQueryLevels(unsigned dim, unsigned flags, Value *imageDesc, const Twine &instName) { + if (isa(imageDesc)) + return PoisonValue::get(getInt32Ty()); dim = dim == DimCubeArray ? DimCube : dim; + imageDesc = transformImageDesc(imageDesc, true, false, nullptr); + Value *numMipLevel = nullptr; GfxIpVersion gfxIp = getPipelineState()->getTargetInfo().getGfxIpVersion(); SqImgRsrcRegHandler proxySqRsrcRegHelper(this, imageDesc, &gfxIp); @@ -1242,6 +1304,10 @@ Value *BuilderImpl::CreateImageQueryLevels(unsigned dim, unsigned flags, Value * // @param imageDesc : Image descriptor or texel buffer descriptor // @param instName : Name to give instruction(s) Value *BuilderImpl::CreateImageQuerySamples(unsigned dim, unsigned flags, Value *imageDesc, const Twine &instName) { + if (isa(imageDesc)) + return PoisonValue::get(getInt32Ty()); + + imageDesc = transformImageDesc(imageDesc, true, false, nullptr); Value *descWord3 = CreateExtractElement(imageDesc, 3); Value *lastLevel = nullptr; if (m_pipelineState->getTargetInfo().getGfxIpVersion().major <= 11) { @@ -1299,7 +1365,11 @@ Value *BuilderImpl::CreateImageQuerySamples(unsigned dim, unsigned flags, Value // @param instName : Name to give instruction(s) Value *BuilderImpl::CreateImageQuerySize(unsigned dim, unsigned flags, Value *imageDesc, Value *lod, const Twine &instName) { - if (imageDesc->getType() == getDescTy(ResourceNodeType::DescriptorTexelBuffer)) { + if (isa(imageDesc)) + return PoisonValue::get(getInt32Ty()); + bool isTexelBuffer = (dim == Dim1DBuffer || dim == Dim1DArrayBuffer); + imageDesc = transformImageDesc(imageDesc, true, isTexelBuffer, nullptr); + if (isTexelBuffer) { // Texel buffer. // Extract NUM_RECORDS (SQ_BUF_RSRC_WORD2) Value *numRecords = CreateExtractElement(imageDesc, 2); @@ -1396,6 +1466,9 @@ Value *BuilderImpl::CreateImageQuerySize(unsigned dim, unsigned flags, Value *im // @param instName : Name to give instruction(s) Value *BuilderImpl::CreateImageGetLod(unsigned dim, unsigned flags, Value *imageDesc, Value *samplerDesc, Value *coord, const Twine &instName) { + if (isa(imageDesc) || isa(samplerDesc)) + return PoisonValue::get(FixedVectorType::get(getFloatTy(), 2)); + // Remove array from dimension if any. switch (dim) { case Dim1DArray: @@ -1417,8 +1490,13 @@ Value *BuilderImpl::CreateImageGetLod(unsigned dim, unsigned flags, Value *image SmallVector derivatives; dim = prepareCoordinate(dim, coord, nullptr, nullptr, nullptr, coords, derivatives); - // Only the first 4 dwords are sampler descriptor, we need to extract these values under any condition - samplerDesc = CreateShuffleVector(samplerDesc, samplerDesc, ArrayRef{0, 1, 2, 3}); + imageDesc = transformImageDesc(imageDesc, false, false, nullptr); + if (isa(samplerDesc->getType())) { + // Only the first 4 dwords are sampler descriptor, we need to extract these values under any condition + samplerDesc = CreateShuffleVector(samplerDesc, ArrayRef{0, 1, 2, 3}); + } else { + samplerDesc = transformSamplerDesc(samplerDesc); + } SmallVector args; args.push_back(getInt32(3)); // dmask @@ -1432,19 +1510,23 @@ Value *BuilderImpl::CreateImageGetLod(unsigned dim, unsigned flags, Value *image Instruction *result = CreateIntrinsic(ImageGetLodIntrinsicTable[dim], {FixedVectorType::get(getFloatTy(), 2), getFloatTy()}, args, nullptr, instName); - // Add a waterfall loop if needed. - SmallVector nonUniformArgIndexes; - if (flags & ImageFlagNonUniformImage) - nonUniformArgIndexes.push_back(imageDescArgIndex); - else if (flags & ImageFlagEnforceReadFirstLaneImage) - enforceReadFirstLane(result, imageDescArgIndex); - const unsigned samplerDescArgIndex = imageDescArgIndex + 1; - if (flags & ImageFlagNonUniformSampler) - nonUniformArgIndexes.push_back(samplerDescArgIndex); - else if (flags & ImageFlagEnforceReadFirstLaneSampler) - enforceReadFirstLane(result, samplerDescArgIndex); + SmallVector nonUniformArgIndexes; + if (imageDesc->getType()->isVectorTy()) { + // Add a waterfall loop if needed. + if (flags & ImageFlagNonUniformImage) + nonUniformArgIndexes.push_back(imageDescArgIndex); + else if (flags & ImageFlagEnforceReadFirstLaneImage) + enforceReadFirstLane(result, imageDescArgIndex); + } + if (samplerDesc->getType()->isVectorTy()) { + const unsigned samplerDescArgIndex = imageDescArgIndex + 1; + if (flags & ImageFlagNonUniformSampler) + nonUniformArgIndexes.push_back(samplerDescArgIndex); + else if (flags & ImageFlagEnforceReadFirstLaneSampler) + enforceReadFirstLane(result, samplerDescArgIndex); + } if (!nonUniformArgIndexes.empty()) result = createWaterfallLoop(result, nonUniformArgIndexes, getPipelineState()->getShaderOptions(m_shaderStage.value()).scalarizeWaterfallLoads); @@ -1911,3 +1993,40 @@ Value *BuilderImpl::modifySamplerDescForGather(Value *samplerDesc) { return samplerDesc; } + +// ===================================================================================================================== +// Transform image descriptor pointer to a i32 type or a descriptor load instruction. +// +// @param imageDesc : image descriptor pointer +// @param mustLoad : Whether to load image descriptor from the pointer +// @param isTexelBuffer : Whether it is a texel buffer +// @param texelType : The type of the texel +// @returns The transformed descriptor +Value *BuilderImpl::transformImageDesc(Value *imageDesc, bool mustLoad, bool isTexelBuffer, Type *texelType) { + assert(!isa(imageDesc)); + if (isa(imageDesc->getType())) + return imageDesc; + + // Explicitly load the descriptor from the descriptor pointer + Type *descType = FixedVectorType::get(getInt32Ty(), isTexelBuffer ? 4 : 8); + Value *desc = CreateLoad(descType, imageDesc); + cast(desc)->setMetadata(LLVMContext::MD_invariant_load, MDNode::get(getContext(), {})); + return desc; +} + +// ===================================================================================================================== +// Transform sampler descriptor pointer to a i32 type or a descriptor load instruction. +// +// @param samplerDesc : descriptor pointer or a full descriptor +// @returns Transformed sampler descriptor +Value *BuilderImpl::transformSamplerDesc(Value *samplerDesc) { + assert(!isa(samplerDesc)); + if (isa(samplerDesc->getType())) + return samplerDesc; + + // Explicitly load the descriptor from the descriptor pointer + Type *descType = FixedVectorType::get(getInt32Ty(), 4); + Value *desc = CreateLoad(descType, samplerDesc); + cast(desc)->setMetadata(LLVMContext::MD_invariant_load, MDNode::get(getContext(), {})); + return desc; +} diff --git a/lgc/builder/InOutBuilder.cpp b/lgc/builder/InOutBuilder.cpp index 4172a0aa6b..14cd7b5032 100644 --- a/lgc/builder/InOutBuilder.cpp +++ b/lgc/builder/InOutBuilder.cpp @@ -430,8 +430,8 @@ void BuilderImpl::markGenericInputOutputUsage(bool isOutput, unsigned location, if (isOutput) { // Keep all locations if the next stage of the output is fragment shader or is unspecified if (m_shaderStage != ShaderStage::Fragment) { - ShaderStageEnum nextStage = m_pipelineState->getNextShaderStage(m_shaderStage.value()); - keepAllLocations = nextStage == ShaderStage::Fragment || nextStage == ShaderStage::Invalid; + auto nextStage = m_pipelineState->getNextShaderStage(m_shaderStage.value()); + keepAllLocations = nextStage == ShaderStage::Fragment || !nextStage; } } else { // Keep all locations if it is the input of fragment shader diff --git a/lgc/builder/SubgroupBuilder.cpp b/lgc/builder/SubgroupBuilder.cpp index 3a74d5bc01..c62acd661c 100644 --- a/lgc/builder/SubgroupBuilder.cpp +++ b/lgc/builder/SubgroupBuilder.cpp @@ -79,31 +79,27 @@ unsigned BuilderImpl::getShaderWaveSize() { // // @param instName : Name to give final instruction. Value *SubgroupBuilder::CreateSubgroupElect(const Twine &instName) { - bool excludeHelperLanes = false; - if (getShaderStage(GetInsertBlock()->getParent()).value() == ShaderStage::Fragment) - excludeHelperLanes = m_pipelineState->getShaderModes()->getFragmentShaderMode().waveOpsExcludeHelperLanes; - return CreateICmpEQ(CreateSubgroupMbcnt(createGroupBallot(getTrue(), excludeHelperLanes)), getInt32(0)); + auto shaderStage = getShaderStage(GetInsertBlock()->getParent()); + return CreateICmpEQ(CreateSubgroupMbcnt(createGroupBallot(getTrue(), shaderStage.value())), getInt32(0)); } // ===================================================================================================================== // Create a subgroup all call. // // @param value : The value to compare across the subgroup. Must be an integer type. +// @param shaderStage : shader stage enum. // @param instName : Name to give final instruction. -Value *SubgroupBuilder::CreateSubgroupAll(Value *const value, const Twine &instName) { - bool ballotExcludeHelperLanes = false; +Value *SubgroupBuilder::createSubgroupAll(Value *const value, ShaderStageEnum shaderStage, const Twine &instName) { bool includeHelperLanes = false; bool requireHelperLanes = false; - if (getShaderStage(GetInsertBlock()->getParent()).value() == ShaderStage::Fragment) { + if (shaderStage == ShaderStage::Fragment) { const auto &fragmentMode = m_pipelineState->getShaderModes()->getFragmentShaderMode(); - ballotExcludeHelperLanes = fragmentMode.waveOpsExcludeHelperLanes; includeHelperLanes = !fragmentMode.waveOpsExcludeHelperLanes; requireHelperLanes = fragmentMode.waveOpsRequireHelperLanes; } - Value *result = CreateICmpEQ(createGroupBallot(value, ballotExcludeHelperLanes), - createGroupBallot(getTrue(), ballotExcludeHelperLanes)); + Value *result = CreateICmpEQ(createGroupBallot(value, shaderStage), createGroupBallot(getTrue(), shaderStage)); result = CreateSelect(CreateUnaryIntrinsic(Intrinsic::is_constant, value), value, result); // Helper invocations of whole quad mode should be included in the subgroup vote execution @@ -122,18 +118,18 @@ Value *SubgroupBuilder::CreateSubgroupAll(Value *const value, const Twine &instN // @param value : The value to compare across the subgroup. Must be an integer type. // @param instName : Name to give final instruction. Value *SubgroupBuilder::CreateSubgroupAny(Value *const value, const Twine &instName) { - bool ballotExcludeHelperLanes = false; + auto shaderStage = getShaderStage(GetInsertBlock()->getParent()); + bool includeHelperLanes = false; bool requireHelperLanes = false; if (getShaderStage(GetInsertBlock()->getParent()).value() == ShaderStage::Fragment) { const auto &fragmentMode = m_pipelineState->getShaderModes()->getFragmentShaderMode(); - ballotExcludeHelperLanes = fragmentMode.waveOpsExcludeHelperLanes; includeHelperLanes = !fragmentMode.waveOpsExcludeHelperLanes; requireHelperLanes = fragmentMode.waveOpsRequireHelperLanes; } - Value *result = CreateICmpNE(createGroupBallot(value, ballotExcludeHelperLanes), getInt64(0)); + Value *result = CreateICmpNE(createGroupBallot(value, shaderStage.value()), getInt64(0)); result = CreateSelect(CreateUnaryIntrinsic(Intrinsic::is_constant, value), value, result); // Helper invocations of whole quad mode should be included in the subgroup vote execution @@ -152,9 +148,11 @@ Value *SubgroupBuilder::CreateSubgroupAny(Value *const value, const Twine &instN // @param value : The value to compare across the subgroup. Must be an integer type. // @param instName : Name to give final instruction. Value *SubgroupBuilder::CreateSubgroupAllEqual(Value *const value, const Twine &instName) { + auto shaderStage = getShaderStage(GetInsertBlock()->getParent()).value(); + Type *const type = value->getType(); - Value *compare = CreateSubgroupBroadcastFirst(value, instName); + Value *compare = createSubgroupBroadcastFirst(value, shaderStage, instName); if (type->isFPOrFPVectorTy()) compare = CreateFCmpOEQ(compare, value); @@ -169,9 +167,9 @@ Value *SubgroupBuilder::CreateSubgroupAllEqual(Value *const value, const Twine & for (unsigned i = 1, compCount = cast(type)->getNumElements(); i < compCount; i++) result = CreateAnd(result, CreateExtractElement(compare, i)); - return CreateSubgroupAll(result, instName); + return createSubgroupAll(result, shaderStage, instName); } - return CreateSubgroupAll(compare, instName); + return createSubgroupAll(compare, shaderStage, instName); } // ===================================================================================================================== @@ -183,6 +181,8 @@ Value *SubgroupBuilder::CreateSubgroupAllEqual(Value *const value, const Twine & // @param instName : Name to give final instruction. Value *SubgroupBuilder::CreateSubgroupRotate(Value *const value, Value *const delta, Value *const clusterSize, const Twine &instName) { + auto shaderStage = getShaderStage(GetInsertBlock()->getParent()).value(); + // LocalId = SubgroupLocalInvocationId // RotationGroupSize = hasClusterSIze? ClusterSize : SubgroupSize. // Invocation ID = ((LocalId + Delta) & (RotationGroupSize - 1)) + (LocalId & ~(RotationGroupSize - 1)) @@ -194,7 +194,7 @@ Value *SubgroupBuilder::CreateSubgroupRotate(Value *const value, Value *const de CreateOr(CreateAnd(invocationId, rotationGroupSize), CreateAnd(localId, CreateNot(rotationGroupSize))); } - return CreateSubgroupShuffle(value, invocationId, instName); + return createSubgroupShuffle(value, invocationId, shaderStage, instName); } // ===================================================================================================================== @@ -232,12 +232,14 @@ Value *BuilderImpl::CreateSubgroupBroadcastWaterfall(Value *const value, Value * // Create a subgroup broadcastfirst call. // // @param value : The value to read from the first active lane into all other active lanes. +// @param shaderStage : shader stage enum. // @param instName : Name to give final instruction. -Value *BuilderImpl::CreateSubgroupBroadcastFirst(Value *const value, const Twine &instName) { - const auto &fragmentMode = m_pipelineState->getShaderModes()->getFragmentShaderMode(); +Value *BuilderImpl::createSubgroupBroadcastFirst(Value *const value, ShaderStageEnum shaderStage, + const Twine &instName) { // For waveOpsExcludeHelperLanes mode, we need filter out the helperlane and use readlane instead. - if (m_shaderStage == ShaderStage::Fragment && fragmentMode.waveOpsExcludeHelperLanes) { - Value *ballot = createGroupBallot(getTrue()); + if (shaderStage == ShaderStage::Fragment && + m_pipelineState->getShaderModes()->getFragmentShaderMode().waveOpsExcludeHelperLanes) { + Value *ballot = createGroupBallot(getTrue(), shaderStage); Value *firstlane = CreateIntrinsic(Intrinsic::cttz, getInt64Ty(), {ballot, getTrue()}); firstlane = CreateTrunc(firstlane, getInt32Ty()); @@ -384,10 +386,12 @@ Value *BuilderImpl::CreateSubgroupBallotFindMsb(Value *const value, const Twine // // @param value : The value to shuffle. // @param index : The index to shuffle from. +// @param shaderStage : shader stage enum. // @param instName : Name to give final instruction. -Value *BuilderImpl::CreateSubgroupShuffle(Value *const value, Value *const index, const Twine &instName) { +Value *BuilderImpl::createSubgroupShuffle(Value *const value, Value *const index, ShaderStageEnum shaderStage, + const Twine &instName) { - if (supportWaveWideBPermute()) { + if (supportWaveWideBPermute(shaderStage)) { auto mapFunc = [](BuilderBase &builder, ArrayRef mappedArgs, ArrayRef passthroughArgs) -> Value * { return builder.CreateIntrinsic(Intrinsic::amdgcn_ds_bpermute, {}, {passthroughArgs[0], mappedArgs[0]}); @@ -398,7 +402,7 @@ Value *BuilderImpl::CreateSubgroupShuffle(Value *const value, Value *const index } if (supportPermLane64Dpp()) { - assert(getShaderWaveSize() == 64); + assert(getPipelineState()->getShaderWaveSize(shaderStage) == 64); // Start the WWM section by setting the inactive lanes. Value *const poisonValue = PoisonValue::get(value->getType()); @@ -431,9 +435,9 @@ Value *BuilderImpl::CreateSubgroupShuffle(Value *const value, Value *const index auto result = CreateSelect(indexInSameHalf, bPermSameHalf, bPermOtherHalf); // If required, force inputs of the operation to be computed in WQM. - if (m_shaderStage == ShaderStage::Fragment && + if (shaderStage == ShaderStage::Fragment && m_pipelineState->getShaderModes()->getFragmentShaderMode().waveOpsRequireHelperLanes) - result = createWqm(result); + result = createWqm(result, shaderStage); return result; } @@ -630,12 +634,15 @@ Value *BuilderImpl::CreateSubgroupClusteredReduction(GroupArithOp groupArithOp, // Use a permute lane to cross rows (row 1 <-> row 0, row 3 <-> row 2). result = createGroupArithmeticOperation(groupArithOp, result, createPermLaneX16(result, result, UINT32_MAX, UINT32_MAX, true, false)); + if (waveSize == 32) + result = createReadFirstLane(result); } if (clusterSize == 64) { assert(waveSize == 64); if (supportPermLane64Dpp()) { result = createGroupArithmeticOperation(groupArithOp, result, createPermLane64(result)); + result = createReadFirstLane(result); } else { Value *const broadcast31 = CreateSubgroupBroadcast(result, getInt32(31), instName); Value *const broadcast63 = CreateSubgroupBroadcast(result, getInt32(63), instName); @@ -1303,6 +1310,18 @@ Value *BuilderImpl::createPermLane64(Value *const updateValue) { return CreateMapToSimpleType(mapFunc, updateValue, {}); } +// ===================================================================================================================== +// Create a call to get the first lane. +// +// @param updateValue : The value to update with. +Value *BuilderImpl::createReadFirstLane(Value *const updateValue) { + auto mapFunc = [](BuilderBase &builder, ArrayRef mappedArgs, ArrayRef passthroughArgs) -> Value * { + return builder.CreateIntrinsic(builder.getInt32Ty(), Intrinsic::amdgcn_readfirstlane, {mappedArgs[0]}); + }; + + return CreateMapToSimpleType(mapFunc, updateValue, {}); +} + // ===================================================================================================================== // Create a call to ds swizzle. // @@ -1333,12 +1352,13 @@ Value *BuilderImpl::createWwm(Value *const value) { // Only in fragment shader stage. // // @param value : The value to pass to the soft WQM call. -Value *BuilderImpl::createWqm(Value *const value) { +// @param shaderStage : shader stage enum. +Value *BuilderImpl::createWqm(Value *const value, ShaderStageEnum shaderStage) { auto mapFunc = [](BuilderBase &builder, ArrayRef mappedArgs, ArrayRef) -> Value * { return builder.CreateUnaryIntrinsic(Intrinsic::amdgcn_wqm, mappedArgs[0]); }; - if (m_shaderStage == ShaderStage::Fragment) + if (shaderStage == ShaderStage::Fragment) return CreateMapToSimpleType(mapFunc, value, {}); return value; @@ -1398,15 +1418,16 @@ Value *BuilderImpl::createThreadMaskedSelect(Value *const threadMask, uint64_t a // Do group ballot, turning a per-lane boolean value (in a VGPR) into a subgroup-wide shared SGPR. // // @param value : The value to contribute to the SGPR, must be an boolean type. -// @param excludeHelperLanes : exclude helper lanes. -Value *BuilderImpl::createGroupBallot(Value *const value, bool excludeHelperLanes) { +// @param shaderStage : shader stage enum. +Value *BuilderImpl::createGroupBallot(Value *const value, ShaderStageEnum shaderStage) { // Check the type is definitely an boolean. assert(value->getType()->isIntegerTy(1)); Value *result = value; // For waveOpsExcludeHelperLanes mode, we need mask away the helperlane. - if (excludeHelperLanes) { + if (shaderStage == ShaderStage::Fragment && + m_pipelineState->getShaderModes()->getFragmentShaderMode().waveOpsExcludeHelperLanes) { auto isLive = CreateIntrinsic(Intrinsic::amdgcn_live_mask, {}, {}, nullptr, {}); result = CreateAnd(isLive, result); } @@ -1426,11 +1447,7 @@ Value *BuilderImpl::createGroupBallot(Value *const value, bool excludeHelperLane // // @param value : The value to contribute to the SGPR, must be an boolean type. Value *BuilderImpl::createGroupBallot(Value *const value) { - // For waveOpsExcludeHelperLanes mode, we need mask away the helperlane. - bool excludeHelperLanes = false; - if (m_shaderStage == ShaderStage::Fragment) - excludeHelperLanes = m_pipelineState->getShaderModes()->getFragmentShaderMode().waveOpsExcludeHelperLanes; - return createGroupBallot(value, excludeHelperLanes); + return createGroupBallot(value, m_shaderStage.value()); } // ===================================================================================================================== diff --git a/lgc/include/lgc/builder/BuilderImpl.h b/lgc/include/lgc/builder/BuilderImpl.h index 98057d0670..652b7b920c 100644 --- a/lgc/include/lgc/builder/BuilderImpl.h +++ b/lgc/include/lgc/builder/BuilderImpl.h @@ -74,7 +74,7 @@ class BuilderImpl : public BuilderDefs { PipelineState *getPipelineState() const { return m_pipelineState; } // Get whether the context we are building in supports ds_bpermute or v_bpermute across all lanes in the wave. - bool supportWaveWideBPermute() const; + bool supportWaveWideBPermute(ShaderStageEnum shaderStage) const; // Get whether the context we are building in supports permute lane 64 DPP operations. bool supportPermLane64Dpp() const; @@ -441,6 +441,12 @@ class BuilderImpl : public BuilderDefs { // Modify sampler descriptor to force set trunc_coord as 0 for gather4 instruction. llvm::Value *modifySamplerDescForGather(llvm::Value *samplerDesc); + // Transform 32-bit image descriptor pointer to a i32 type or a descriptor load instruction. + llvm::Value *transformImageDesc(llvm::Value *imageDesc, bool mustLoad, bool isTexelBuffer, llvm::Type *texelType); + + // Transform 32-bit sampler descriptor pointer to a i32 type or a descriptor load instruction. + llvm::Value *transformSamplerDesc(llvm::Value *samplerDesc); + enum ImgDataFormat { IMG_DATA_FORMAT_32 = 4, IMG_DATA_FORMAT_8_8_8_8 = 10, @@ -657,7 +663,9 @@ class BuilderImpl : public BuilderDefs { const llvm::Twine &instName = ""); // Create a subgroup broadcast first. - llvm::Value *CreateSubgroupBroadcastFirst(llvm::Value *const value, const llvm::Twine &instName = ""); + llvm::Value *CreateSubgroupBroadcastFirst(llvm::Value *const value, const llvm::Twine &instName = "") { + return createSubgroupBroadcastFirst(value, m_shaderStage.value(), instName); + } // Create a subgroup ballot. llvm::Value *CreateSubgroupBallot(llvm::Value *const value, const llvm::Twine &instName = ""); @@ -686,7 +694,9 @@ class BuilderImpl : public BuilderDefs { // Create a subgroup shuffle. llvm::Value *CreateSubgroupShuffle(llvm::Value *const value, llvm::Value *const index, - const llvm::Twine &instName = ""); + const llvm::Twine &instName = "") { + return createSubgroupShuffle(value, index, m_shaderStage.value(), instName); + } // Create a subgroup shuffle xor. llvm::Value *CreateSubgroupShuffleXor(llvm::Value *const value, llvm::Value *const mask, @@ -771,20 +781,28 @@ class BuilderImpl : public BuilderDefs { llvm::Value *createPermLaneX16(llvm::Value *const origValue, llvm::Value *const updateValue, unsigned selectBitsLow, unsigned selectBitsHigh, bool fetchInactive, bool boundCtrl); llvm::Value *createPermLane64(llvm::Value *const updateValue); + llvm::Value *createReadFirstLane(llvm::Value *const updateValue); llvm::Value *createDsSwizzle(llvm::Value *const value, uint16_t dsPattern); llvm::Value *createWwm(llvm::Value *const value); - llvm::Value *createWqm(llvm::Value *const value); + llvm::Value *createWqm(llvm::Value *const value) { return createWqm(value, m_shaderStage.value()); } llvm::Value *createThreadMask(); llvm::Value *createThreadMaskedSelect(llvm::Value *const threadMask, uint64_t andMask, llvm::Value *const value1, llvm::Value *const value2); uint16_t getDsSwizzleBitMode(uint8_t xorMask, uint8_t orMask, uint8_t andMask); uint16_t getDsSwizzleQuadMode(uint8_t lane0, uint8_t lane1, uint8_t lane2, uint8_t lane3); -protected: - llvm::Value *createGroupBallot(llvm::Value *const value, bool excludeHelperLanes); llvm::Value *createGroupBallot(llvm::Value *const value); + +protected: + // The subgroup operation with explicit shader stage as parameter. llvm::Value *createFindMsb(llvm::Value *const mask); + llvm::Value *createGroupBallot(llvm::Value *const value, ShaderStageEnum shaderStage); + llvm::Value *createSubgroupBroadcastFirst(llvm::Value *const value, ShaderStageEnum shaderStage, + const llvm::Twine &instName); + llvm::Value *createSubgroupShuffle(llvm::Value *const value, llvm::Value *const index, ShaderStageEnum shaderStage, + const llvm::Twine &instName); + llvm::Value *createWqm(llvm::Value *const value, ShaderStageEnum shaderStage); }; } // namespace lgc diff --git a/lgc/include/lgc/builder/SubgroupBuilder.h b/lgc/include/lgc/builder/SubgroupBuilder.h index afd0c82b0c..6497951021 100644 --- a/lgc/include/lgc/builder/SubgroupBuilder.h +++ b/lgc/include/lgc/builder/SubgroupBuilder.h @@ -59,7 +59,9 @@ class SubgroupBuilder : public BuilderImpl { // // @param value : The value to compare // @param instName : Name to give instruction(s) - llvm::Value *CreateSubgroupAll(llvm::Value *const value, const llvm::Twine &instName = ""); + llvm::Value *CreateSubgroupAll(llvm::Value *const value, const llvm::Twine &instName = "") { + return createSubgroupAll(value, getShaderStage(GetInsertBlock()->getParent()).value(), instName); + } // Create a subgroup all equal. // @@ -80,6 +82,9 @@ class SubgroupBuilder : public BuilderImpl { SubgroupBuilder() = delete; SubgroupBuilder(const SubgroupBuilder &) = delete; SubgroupBuilder &operator=(const SubgroupBuilder &) = delete; + + // The subgroup operation with explicit shader stage as parameter. + llvm::Value *createSubgroupAll(llvm::Value *const value, ShaderStageEnum shaderStage, const llvm::Twine &instName); }; } // namespace lgc diff --git a/lgc/include/lgc/patch/LowerCooperativeMatrix.h b/lgc/include/lgc/patch/LowerCooperativeMatrix.h index 7deb5cbee9..e277d2e23b 100644 --- a/lgc/include/lgc/patch/LowerCooperativeMatrix.h +++ b/lgc/include/lgc/patch/LowerCooperativeMatrix.h @@ -212,9 +212,8 @@ class LowerCooperativeMatrix : public Patch, public llvm::PassInfoMixin { void setupElfsPrintfStrings(); llvm::DenseMap m_elfInfos; llvm::SmallVector m_toErase; + llvm::Value *m_debugPrintfBuffer = nullptr; PipelineState *m_pipelineState = nullptr; }; diff --git a/lgc/include/lgc/patch/SystemValues.h b/lgc/include/lgc/patch/SystemValues.h index 069ea1de16..3153bc3882 100644 --- a/lgc/include/lgc/patch/SystemValues.h +++ b/lgc/include/lgc/patch/SystemValues.h @@ -92,6 +92,9 @@ class ShaderSystemValues { // Get pointers to emit counters (GS) std::pair> getEmitCounterPtr(); + // Get pointer to total emit counter (GS) + llvm::Value *getTotalEmitCounterPtr(); + // Get global internal table pointer as pointer to i8. llvm::Instruction *getInternalGlobalTablePtr(); @@ -141,6 +144,7 @@ class ShaderSystemValues { llvm::Value *m_tessCoord = nullptr; // Tessellated coordinate (TES) llvm::Value *m_esGsOffsets = nullptr; // ES -> GS offsets (GS in) llvm::SmallVector m_emitCounterPtrs; // Pointers to emit counters (GS) + llvm::Value *m_totalEmitCounterPtr; // Pointer to total emit counter (GS) llvm::SmallVector m_descTablePtrs; // Descriptor table pointers llvm::SmallVector m_shadowDescTablePtrs; // Shadow descriptor table pointers diff --git a/lgc/include/lgc/state/AbiMetadata.h b/lgc/include/lgc/state/AbiMetadata.h index b687fa1576..a11fa5bef9 100644 --- a/lgc/include/lgc/state/AbiMetadata.h +++ b/lgc/include/lgc/state/AbiMetadata.h @@ -34,6 +34,8 @@ */ #pragma once +#include "lgc/CommonDefs.h" +#include "llvm/Support/ErrorHandling.h" #include namespace lgc { @@ -622,8 +624,27 @@ typedef enum SPI_SHADER_EX_FORMAT { } SPI_SHADER_EX_FORMAT; // The names of API shader stages used in PAL metadata, in ShaderStageEnum order. -static const char *const ApiStageNames[] = {".task", ".vertex", ".hull", ".domain", - ".geometry", ".mesh", ".pixel", ".compute"}; +inline const char *shaderStageToApiName(ShaderStageEnum stage) { + switch (stage) { + case ShaderStage::Task: + return ".task"; + case ShaderStage::Vertex: + return ".vertex"; + case ShaderStage::TessControl: + return ".hull"; + case ShaderStage::TessEval: + return ".domain"; + case ShaderStage::Geometry: + return ".geometry"; + case ShaderStage::Mesh: + return ".mesh"; + case ShaderStage::Fragment: + return ".pixel"; + case ShaderStage::Compute: + return ".compute"; + } + llvm::report_fatal_error("No api name for this shader stage"); +} // The names of hardware shader stages used in PAL metadata, in Util::Abi::HardwareStage order. static const char *const HwStageNames[static_cast(Util::Abi::HardwareStage::Count)] = {".hs", ".gs", ".vs", diff --git a/lgc/include/lgc/state/PipelineState.h b/lgc/include/lgc/state/PipelineState.h index 9a2ad7bebd..9bc8bed882 100644 --- a/lgc/include/lgc/state/PipelineState.h +++ b/lgc/include/lgc/state/PipelineState.h @@ -41,6 +41,7 @@ #include "llvm/IR/PassManager.h" #include "llvm/Pass.h" #include +#include namespace llvm { @@ -256,9 +257,9 @@ class PipelineState final : public Pipeline { bool hasShaderStage(ShaderStageEnum stage) { return getShaderStageMask().contains(stage); } bool isGraphics(); bool isComputeLibrary() const { return m_computeLibrary; } - ShaderStageEnum getLastVertexProcessingStage() const; - ShaderStageEnum getPrevShaderStage(ShaderStageEnum shaderStage) const; - ShaderStageEnum getNextShaderStage(ShaderStageEnum shaderStage) const; + std::optional getLastVertexProcessingStage() const; + std::optional getPrevShaderStage(ShaderStageEnum shaderStage) const; + std::optional getNextShaderStage(ShaderStageEnum shaderStage) const; // Get client name const char *getClient() const { return m_client.c_str(); } diff --git a/lgc/interface/lgc/Builder.h b/lgc/interface/lgc/Builder.h index f345b7010c..85ef8a388a 100644 --- a/lgc/interface/lgc/Builder.h +++ b/lgc/interface/lgc/Builder.h @@ -200,17 +200,19 @@ class BuilderDefs : public BuilderCommon { // Possible values for dimension argument for image methods. enum { - Dim1D = 0, // Coordinate: x - Dim2D = 1, // Coordinate: x, y - Dim3D = 2, // Coordinate: x, y, z - DimCube = 3, // Coordinate: x, y, face - Dim1DArray = 4, // Coordinate: x, slice - Dim2DArray = 5, // Coordinate: x, y, slice - Dim2DMsaa = 6, // Coordinate: x, y, fragid - Dim2DArrayMsaa = 7, // Coordinate: x, y, slice, fragid - DimCubeArray = 8, // Coordinate: x, y, face, slice (despite both SPIR-V and ISA - // combining face and slice into one component) - DimRect = 9, // Coordinate: x, y + Dim1D = 0, // Coordinate: x + Dim2D = 1, // Coordinate: x, y + Dim3D = 2, // Coordinate: x, y, z + DimCube = 3, // Coordinate: x, y, face + Dim1DArray = 4, // Coordinate: x, slice + Dim2DArray = 5, // Coordinate: x, y, slice + Dim2DMsaa = 6, // Coordinate: x, y, fragid + Dim2DArrayMsaa = 7, // Coordinate: x, y, slice, fragid + DimCubeArray = 8, // Coordinate: x, y, face, slice (despite both SPIR-V and ISA + // combining face and slice into one component) + DimRect = 9, // Coordinate: x, y + Dim1DBuffer = 10, // Coordinate: x (identify a texel buffer) + Dim1DArrayBuffer = 11, // Coordinate: x, slice (identify a texel buffer) }; // Get the number of coordinates for the specified dimension argument. @@ -219,6 +221,7 @@ class BuilderDefs : public BuilderCommon { static unsigned getImageNumCoords(unsigned dim) { switch (dim) { case Dim1D: + case Dim1DBuffer: return 1; case Dim2D: return 2; @@ -227,6 +230,7 @@ class BuilderDefs : public BuilderCommon { case DimCube: return 3; case Dim1DArray: + case Dim1DArrayBuffer: return 2; case Dim2DArray: return 3; @@ -249,6 +253,7 @@ class BuilderDefs : public BuilderCommon { static unsigned getImageQuerySizeComponentCount(unsigned dim) { switch (dim) { case Dim1D: + case Dim1DBuffer: return 1; case Dim2D: return 2; @@ -257,6 +262,7 @@ class BuilderDefs : public BuilderCommon { case DimCube: return 2; case Dim1DArray: + case Dim1DArrayBuffer: return 2; case Dim2DArray: return 3; @@ -279,6 +285,7 @@ class BuilderDefs : public BuilderCommon { static unsigned getImageDerivativeComponentCount(unsigned dim) { switch (dim) { case Dim1D: + case Dim1DBuffer: return 1; case Dim2D: return 2; @@ -287,6 +294,7 @@ class BuilderDefs : public BuilderCommon { case DimCube: return 3; case Dim1DArray: + case Dim1DArrayBuffer: return 1; case Dim2DArray: return 2; diff --git a/lgc/interface/lgc/CommonDefs.h b/lgc/interface/lgc/CommonDefs.h index 1cace83670..39af5ff976 100644 --- a/lgc/interface/lgc/CommonDefs.h +++ b/lgc/interface/lgc/CommonDefs.h @@ -31,6 +31,7 @@ #pragma once #include "EnumIterator.h" +#include "llvm/ADT/DenseMap.h" #include #include @@ -63,6 +64,34 @@ enum ShaderStage : unsigned { // TODO Temporary definition until ShaderStage is converted to a class enum. using ShaderStageEnum = ShaderStage::ShaderStage; +/// All shader stages +[[maybe_unused]] constexpr const std::array ShaderStages = { + ShaderStage::Compute, ShaderStage::Fragment, ShaderStage::Vertex, + ShaderStage::Geometry, ShaderStage::TessControl, ShaderStage::TessEval, + ShaderStage::Task, ShaderStage::Mesh, ShaderStage::CopyShader, +}; + +/// All graphics shader stages. +/// These are in execution order. +[[maybe_unused]] constexpr const std::array ShaderStagesGraphics = { + ShaderStage::Task, ShaderStage::Vertex, ShaderStage::TessControl, ShaderStage::TessEval, + ShaderStage::Geometry, ShaderStage::Mesh, ShaderStage::Fragment, +}; + +/// Graphics and compute shader stages. +/// The graphics stages are in execution order. +[[maybe_unused]] constexpr const std::array ShaderStagesNative = { + ShaderStage::Task, ShaderStage::Vertex, ShaderStage::TessControl, ShaderStage::TessEval, + ShaderStage::Geometry, ShaderStage::Mesh, ShaderStage::Fragment, ShaderStage::Compute, +}; + +/// Graphics and compute shader stages and copy shader. +/// The graphics stages are in execution order. +[[maybe_unused]] constexpr const std::array ShaderStagesNativeCopy = { + ShaderStage::Task, ShaderStage::Vertex, ShaderStage::TessControl, ShaderStage::TessEval, ShaderStage::Geometry, + ShaderStage::Mesh, ShaderStage::Fragment, ShaderStage::Compute, ShaderStage::CopyShader, +}; + class ShaderStageMask { public: constexpr ShaderStageMask() {} @@ -77,6 +106,11 @@ class ShaderStageMask { *this |= ShaderStageMask(stage); }; + template constexpr explicit ShaderStageMask(const std::array &stages) { + for (auto stage : stages) + *this |= ShaderStageMask(stage); + }; + constexpr static ShaderStageMask fromRaw(uint32_t mask) { ShaderStageMask result; result.m_value = mask; @@ -98,6 +132,7 @@ class ShaderStageMask { constexpr bool contains(ShaderStageEnum stage) const; constexpr bool contains_any(std::initializer_list stages) const; + template constexpr bool contains_any(const std::array &stages) const; constexpr bool empty() const { return m_value == 0; } uint32_t m_value = 0; @@ -133,6 +168,10 @@ constexpr bool ShaderStageMask::contains_any(std::initializer_list constexpr bool ShaderStageMask::contains_any(const std::array &stages) const { + return (*this & ShaderStageMask(stages)).m_value != 0; +} + enum AddrSpace { ADDR_SPACE_FLAT = 0, // Flat memory ADDR_SPACE_GLOBAL = 1, // Global memory @@ -202,4 +241,13 @@ namespace llvm { // Enable iteration over resource node type with `lgc::enumRange()`. LGC_DEFINE_DEFAULT_ITERABLE_ENUM(lgc::ResourceNodeType); +template <> struct DenseMapInfo { + using T = lgc::ShaderStageEnum; + + static T getEmptyKey() { return static_cast(DenseMapInfo::getEmptyKey()); } + static T getTombstoneKey() { return static_cast(DenseMapInfo::getTombstoneKey()); } + static unsigned getHashValue(const T &Val) { return static_cast(Val); } + static bool isEqual(const T &LHS, const T &RHS) { return LHS == RHS; } +}; + } // namespace llvm diff --git a/lgc/interface/lgc/LgcDialect.td b/lgc/interface/lgc/LgcDialect.td index 7dbfd25551..2f80b43bf9 100644 --- a/lgc/interface/lgc/LgcDialect.td +++ b/lgc/interface/lgc/LgcDialect.td @@ -163,7 +163,7 @@ def LoadStridedBufferDescOp : LgcOp<"load.strided.buffer.desc", [Memory<[]>, Wil } def DebugPrintfOp : LgcOp<"debug.printf", [Memory<[(readwrite InaccessibleMem)]>, WillReturn]> { - let arguments = (ins BufferPointer:$buffer, ConstantPointer:$format, varargs:$args); + let arguments = (ins ImmutableStringAttr:$format, varargs:$args); let results = (outs); let summary = "print a formatted message"; diff --git a/lgc/interface/lgc/Pipeline.h b/lgc/interface/lgc/Pipeline.h index 1c30ffe785..7bdb636f6c 100644 --- a/lgc/interface/lgc/Pipeline.h +++ b/lgc/interface/lgc/Pipeline.h @@ -625,7 +625,7 @@ struct TessellationMode { }; // Kind of GS input primitives. -enum class InputPrimitives : unsigned { Points, Lines, LinesAdjacency, Triangles, TrianglesAdjacency }; +enum class InputPrimitives : unsigned { Points, Lines, LinesAdjacency, Triangles, TrianglesAdjacency, Patch }; // Kind of GS/mesh shader output primitives enum class OutputPrimitives : unsigned { @@ -645,6 +645,7 @@ struct GeometryShaderMode { OutputPrimitives outputPrimitive; // Kind of output primitives unsigned invocations; // Number of times to invoke shader for each input primitive unsigned outputVertices; // Max number of vertices the shader will emit in one invocation + unsigned controlPoints; // Number of control points when the input primitive is a patch unsigned robustGsEmits; // robust buffer access }; diff --git a/llpc/context/GfxRuntimeContext.h b/lgc/interface/lgc/RuntimeContext.h similarity index 91% rename from llpc/context/GfxRuntimeContext.h rename to lgc/interface/lgc/RuntimeContext.h index 83639d10d1..fcf50e4778 100644 --- a/llpc/context/GfxRuntimeContext.h +++ b/lgc/interface/lgc/RuntimeContext.h @@ -24,8 +24,8 @@ **********************************************************************************************************************/ /** *********************************************************************************************************************** - * @file GfxRuntimeContext.h - * @brief LLVMContext extension that stores a GfxRuntime library module + * @file RuntimeContext.h + * @brief LLVMContext extension that stores a Runtime library module *********************************************************************************************************************** */ #pragma once @@ -40,16 +40,16 @@ class Module; namespace lgc { // This extension can be attached to an LLVMContext and queried via the -// GfxRuntimeContext::get method inherited from the base class. +// RuntimeContext::get method inherited from the base class. // -// Compiler drivers (like LLPC) are expected to set theModule to the GfxRuntime +// Compiler drivers (like LLPC) are expected to set theModule to the Runtime // library, so that advanced blend pass can cross-module inline // functions implemented there. + class GfxRuntimeContext : public llvm_dialects::ContextExtensionImpl { public: explicit GfxRuntimeContext(llvm::LLVMContext &) {} - ~GfxRuntimeContext(); - + ~GfxRuntimeContext() = default; static Key theKey; std::unique_ptr theModule; }; diff --git a/lgc/patch/ConfigBuilderBase.cpp b/lgc/patch/ConfigBuilderBase.cpp index 2d2d57147b..9302cc39df 100644 --- a/lgc/patch/ConfigBuilderBase.cpp +++ b/lgc/patch/ConfigBuilderBase.cpp @@ -111,10 +111,11 @@ void ConfigBuilderBase::addApiHwShaderMapping(ShaderStageEnum apiStage, unsigned // Get the MsgPack map node for the specified API shader in the ".shaders" map // // @param apiStage : API shader stage -msgpack::MapDocNode ConfigBuilderBase::getApiShaderNode(unsigned apiStage) { +msgpack::MapDocNode ConfigBuilderBase::getApiShaderNode(ShaderStageEnum apiStage) { if (m_apiShaderNodes[apiStage].isEmpty()) { m_apiShaderNodes[apiStage] = - m_pipelineNode[Util::Abi::PipelineMetadataKey::Shaders].getMap(true)[ApiStageNames[apiStage]].getMap(true); + m_pipelineNode[Util::Abi::PipelineMetadataKey::Shaders].getMap(true)[shaderStageToApiName(apiStage)].getMap( + true); } return m_apiShaderNodes[apiStage]; } @@ -139,7 +140,7 @@ msgpack::MapDocNode ConfigBuilderBase::getHwShaderNode(Util::Abi::HardwareStage // @param apiStage : API shader stage unsigned ConfigBuilderBase::setShaderHash(ShaderStageEnum apiStage) { const ShaderOptions &shaderOptions = m_pipelineState->getShaderOptions(apiStage); - auto hashNode = getApiShaderNode(unsigned(apiStage))[Util::Abi::ShaderMetadataKey::ApiShaderHash].getArray(true); + auto hashNode = getApiShaderNode(apiStage)[Util::Abi::ShaderMetadataKey::ApiShaderHash].getArray(true); hashNode[0] = shaderOptions.hash[0]; hashNode[1] = shaderOptions.hash[1]; return shaderOptions.hash[0] >> 32 ^ shaderOptions.hash[0] ^ shaderOptions.hash[1] >> 32 ^ shaderOptions.hash[1]; @@ -314,7 +315,7 @@ void ConfigBuilderBase::setThreadgroupDimensions(llvm::ArrayRef values } // ===================================================================================================================== -// Set stream-out vertex strides (GFX11+) +// Set stream-out vertex strides // // @param values : Values to set void ConfigBuilderBase::setStreamOutVertexStrides(ArrayRef values) { diff --git a/lgc/patch/ConfigBuilderBase.h b/lgc/patch/ConfigBuilderBase.h index 84f030e6a0..31c6c57acb 100644 --- a/lgc/patch/ConfigBuilderBase.h +++ b/lgc/patch/ConfigBuilderBase.h @@ -126,11 +126,11 @@ class ConfigBuilderBase { private: // Get the MsgPack map node for the specified API shader in the ".shaders" map - llvm::msgpack::MapDocNode getApiShaderNode(unsigned apiStage); + llvm::msgpack::MapDocNode getApiShaderNode(ShaderStageEnum apiStage); llvm::msgpack::Document *m_document; // The MsgPack document llvm::msgpack::MapDocNode m_pipelineNode; // MsgPack map node for amdpal.pipelines[0] - llvm::msgpack::MapDocNode m_apiShaderNodes[ShaderStage::NativeStageCount]; + llvm::DenseMap m_apiShaderNodes; // MsgPack map node for each API shader's node in // ".shaders" llvm::msgpack::MapDocNode m_hwShaderNodes[unsigned(Util::Abi::HardwareStage::Count)]; diff --git a/lgc/patch/LowerCooperativeMatrix.cpp b/lgc/patch/LowerCooperativeMatrix.cpp index 5dc9181dcb..f5d20ea013 100644 --- a/lgc/patch/LowerCooperativeMatrix.cpp +++ b/lgc/patch/LowerCooperativeMatrix.cpp @@ -1060,6 +1060,7 @@ Value *LowerCooperativeMatrix::cooperativeMatrixReshapeBetween16bitAnd32bitOnAcc } else { resultValue = builder.CreateBitCast(resultValue, FixedVectorType::get(builder.getFloatTy(), 4)); // 1st case:after convert + resultValue = builder.CreateShuffleVector(resultValue, {0, 1, 2, 3, -1, -1, -1, -1}); } return resultValue; } @@ -1542,8 +1543,8 @@ void LowerCooperativeMatrix::visitCooperativeMatrixMulAddOp(CooperativeMatrixMul mulAB1 = createDotProductFp16Fp16(rowData1, colData, accumulator1, isSatOrOpsel, instName, &muladd); mulAB2 = createDotProductFp16Fp16(rowData2, colData, accumulator2, isSatOrOpsel, instName, &muladd); } else { - mulAB1 = createDotProductInt16Int16(rowData1, colData, accumulator1, flags, isSatOrOpsel, instName, &muladd); - mulAB2 = createDotProductInt16Int16(rowData2, colData, accumulator2, flags, isSatOrOpsel, instName, &muladd); + mulAB1 = createDotProductInt(rowData1, colData, accumulator1, flags, isSatOrOpsel, instName, &muladd); + mulAB2 = createDotProductInt(rowData2, colData, accumulator2, flags, isSatOrOpsel, instName, &muladd); } dotProductValue = builder.CreateInsertElement(dotProductValue, mulAB1, accIdx); dotProductValue = builder.CreateInsertElement(dotProductValue, mulAB2, accIdx + 1); @@ -1575,6 +1576,8 @@ Value *LowerCooperativeMatrix::createDotProductFp16Fp32(Value *const vector1, Va BuilderBase builder(*m_context); builder.SetInsertPoint(insertPos); + // Dot instructions are not available on gfx1010 + const bool emulateDot = m_gfxIp.isGfx(10, 1) && m_gfxIp.stepping == 0; const unsigned compCount = cast(vector1->getType())->getNumElements(); Value *scalar = initAccumulator; auto intrinsicDot = Intrinsic::amdgcn_fdot2; @@ -1583,8 +1586,18 @@ Value *LowerCooperativeMatrix::createDotProductFp16Fp32(Value *const vector1, Va input1 = builder.CreateBitCast(input1, FixedVectorType::get(builder.getHalfTy(), 2)); Value *input2 = builder.CreateExtractElement(vector2, i); input2 = builder.CreateBitCast(input2, FixedVectorType::get(builder.getHalfTy(), 2)); - scalar = - builder.CreateIntrinsic(intrinsicDot, {}, {input1, input2, scalar, builder.getInt1(isSat)}, nullptr, instName); + if (emulateDot) { + Value *input1Fp32 = builder.CreateFPCast(input1, FixedVectorType::get(builder.getFloatTy(), 2)); + Value *input2Fp32 = builder.CreateFPCast(input2, FixedVectorType::get(builder.getFloatTy(), 2)); + for (unsigned j = 0; j < 2; ++j) { + Value *lhs = builder.CreateExtractElement(input1Fp32, j); + Value *rhs = builder.CreateExtractElement(input2Fp32, j); + scalar = builder.CreateIntrinsic(Intrinsic::fmuladd, lhs->getType(), {lhs, rhs, scalar}); + } + } else { + scalar = builder.CreateIntrinsic(intrinsicDot, {}, {input1, input2, scalar, builder.getInt1(isSat)}, nullptr, + instName); + } } scalar->setName(instName); return scalar; @@ -1638,6 +1651,8 @@ Value *LowerCooperativeMatrix::createDotProductInt8Int32(Value *vector1, Value * BuilderBase builder(*m_context); builder.SetInsertPoint(insertPos); + // Dot instructions are not available on gfx1010 + const bool emulateDot = m_gfxIp.isGfx(10, 1) && m_gfxIp.stepping == 0; const bool isSigned = (flags & lgc::Builder::FirstVectorSigned); auto intrinsicDot = isSigned ? Intrinsic::amdgcn_sdot4 : Intrinsic::amdgcn_udot4; @@ -1646,8 +1661,14 @@ Value *LowerCooperativeMatrix::createDotProductInt8Int32(Value *vector1, Value * for (unsigned i = 0; i < compCount; ++i) { Value *input1 = builder.CreateExtractElement(vector1, i); Value *input2 = builder.CreateExtractElement(vector2, i); - scalar = - builder.CreateIntrinsic(intrinsicDot, {}, {input1, input2, scalar, builder.getInt1(false)}, nullptr, instName); + if (emulateDot) { + input1 = builder.CreateBitCast(input1, FixedVectorType::get(builder.getInt8Ty(), 4)); + input2 = builder.CreateBitCast(input2, FixedVectorType::get(builder.getInt8Ty(), 4)); + scalar = createDotProductInt(input1, input2, scalar, flags, isSat, instName, insertPos); + } else { + scalar = builder.CreateIntrinsic(intrinsicDot, {}, {input1, input2, scalar, builder.getInt1(false)}, nullptr, + instName); + } } // Always use sadd_sat here as uint32@C is not supported. @@ -1677,6 +1698,8 @@ Value *LowerCooperativeMatrix::createDotProductInt16Int32(Value *vector1, Value BuilderBase builder(*m_context); builder.SetInsertPoint(insertPos); + // Dot instructions are not available on gfx1010 + const bool emulateDot = m_gfxIp.isGfx(10, 1) && m_gfxIp.stepping == 0; const bool isSigned = (flags & lgc::Builder::FirstVectorSigned); auto intrinsicDot = isSigned ? Intrinsic::amdgcn_sdot2 : Intrinsic::amdgcn_udot2; @@ -1687,8 +1710,12 @@ Value *LowerCooperativeMatrix::createDotProductInt16Int32(Value *vector1, Value input1 = builder.CreateBitCast(input1, FixedVectorType::get(builder.getInt16Ty(), 2)); Value *input2 = builder.CreateExtractElement(vector2, i); input2 = builder.CreateBitCast(input2, FixedVectorType::get(builder.getInt16Ty(), 2)); - scalar = - builder.CreateIntrinsic(intrinsicDot, {}, {input1, input2, scalar, builder.getInt1(isSat)}, nullptr, instName); + if (emulateDot) { + scalar = createDotProductInt(input1, input2, scalar, flags, isSat, instName, insertPos); + } else { + scalar = builder.CreateIntrinsic(intrinsicDot, {}, {input1, input2, scalar, builder.getInt1(isSat)}, nullptr, + instName); + } } scalar->setName(instName); return scalar; @@ -1704,9 +1731,8 @@ Value *LowerCooperativeMatrix::createDotProductInt16Int32(Value *vector1, Value // @param isSat: SaturatingAccumulation for calculation // @param instName : Name to give instruction(s) // @param insertPos : Where to insert the instruction -Value *LowerCooperativeMatrix::createDotProductInt16Int16(Value *vector1, Value *vector2, Value *accumulator, - unsigned flags, bool isSat, const Twine &instName, - Instruction *insertPos) { +Value *LowerCooperativeMatrix::createDotProductInt(Value *vector1, Value *vector2, Value *accumulator, unsigned flags, + bool isSat, const Twine &instName, Instruction *insertPos) { BuilderBase builder(*m_context); builder.SetInsertPoint(insertPos); Type *inputTy = vector1->getType(); @@ -1720,9 +1746,13 @@ Value *LowerCooperativeMatrix::createDotProductInt16Int16(Value *vector1, Value // as unsigned. const bool isMixed = (flags == lgc::Builder::FirstVectorSigned); - Type *targetTy = builder.getInt64Ty(); + const auto outputSizeInBits = outputTy->getScalarSizeInBits(); + const auto compSizeInBits = inputTy->getScalarSizeInBits(); + Type *targetTy = compSizeInBits * 2 >= outputSizeInBits ? builder.getIntNTy(outputSizeInBits * 2) : outputTy; + const auto targetSizeInBits = targetTy->getScalarSizeInBits(); + assert(targetSizeInBits <= 64); // Emulate dot product with no HW support cases - Value *scalar = builder.getInt64(0); + Value *scalar = builder.getIntN(targetSizeInBits, 0); for (unsigned elemIdx = 0; elemIdx < compCount; ++elemIdx) { Value *elem1 = builder.CreateExtractElement(vector1, elemIdx); elem1 = isSigned ? builder.CreateSExt(elem1, targetTy) : builder.CreateZExt(elem1, targetTy); @@ -1732,28 +1762,27 @@ Value *LowerCooperativeMatrix::createDotProductInt16Int16(Value *vector1, Value scalar = builder.CreateAdd(product, scalar); } - scalar = builder.CreateTrunc(scalar, builder.getInt32Ty()); - accumulator = builder.CreateTrunc(accumulator, builder.getInt32Ty()); + scalar = builder.CreateTrunc(scalar, outputTy); + accumulator = builder.CreateTrunc(accumulator, outputTy); Intrinsic::ID addIntrinsic = isSigned ? Intrinsic::sadd_sat : Intrinsic::uadd_sat; scalar = builder.CreateBinaryIntrinsic(addIntrinsic, scalar, accumulator, nullptr, instName); - const unsigned bitWidth = outputTy->getScalarSizeInBits(); - auto unsignedMax = (2ULL << (bitWidth - 1)) - 1; + auto unsignedMax = (2ULL << (targetSizeInBits - 1)) - 1; auto signedMax = unsignedMax >> 1; auto signedMin = -1ULL - signedMax; Value *minimum = nullptr, *maximum = nullptr; Value *isUnderflow = nullptr, *isOverflow = nullptr; if (isSigned) { - scalar = builder.CreateSExt(scalar, builder.getInt64Ty()); - minimum = ConstantInt::getSigned(builder.getInt64Ty(), signedMin); - maximum = ConstantInt::getSigned(builder.getInt64Ty(), signedMax); + scalar = builder.CreateSExt(scalar, targetTy); + minimum = ConstantInt::getSigned(targetTy, signedMin); + maximum = ConstantInt::getSigned(targetTy, signedMax); isUnderflow = builder.CreateICmpSLT(scalar, minimum); isOverflow = builder.CreateICmpSGT(scalar, maximum); } else { - scalar = builder.CreateZExt(scalar, builder.getInt64Ty()); - minimum = builder.getInt64(0); - maximum = builder.getInt64(unsignedMax); + scalar = builder.CreateZExt(scalar, targetTy); + minimum = builder.getIntN(targetSizeInBits, 0); + maximum = builder.getIntN(targetSizeInBits, unsignedMax); isUnderflow = builder.CreateICmpULT(scalar, minimum); isOverflow = builder.CreateICmpUGT(scalar, maximum); } diff --git a/lgc/patch/LowerDebugPrintf.cpp b/lgc/patch/LowerDebugPrintf.cpp index 7de0ef8752..3ccc0d5862 100644 --- a/lgc/patch/LowerDebugPrintf.cpp +++ b/lgc/patch/LowerDebugPrintf.cpp @@ -30,20 +30,24 @@ */ #include "lgc/patch/LowerDebugPrintf.h" #include "lgc/LgcDialect.h" +#include "lgc/builder/BuilderImpl.h" #include "lgc/patch/Patch.h" #include "lgc/state/PalMetadata.h" #include "lgc/state/PipelineState.h" #include "llvm-dialects/Dialect/Visitor.h" #include "llvm/ADT/Hashing.h" +#include "llvm/ADT/SetVector.h" #include "llvm/BinaryFormat/MsgPackDocument.h" #include "llvm/InitializePasses.h" #include "llvm/Support/Debug.h" +#include #define DEBUG_TYPE "lower-debug-printf" using namespace llvm; using namespace lgc; +constexpr unsigned PrintfBufferBindingId = 6; namespace lgc { // ===================================================================================================================== @@ -57,10 +61,40 @@ PreservedAnalyses LowerDebugPrintf::run(Module &module, ModuleAnalysisManager &a PipelineState *pipelineState = analysisManager.getResult(module).getPipelineState(); m_pipelineState = pipelineState; - static const auto visitor = - llvm_dialects::VisitorBuilder().add(&LowerDebugPrintf::visitDebugPrintf).build(); + // Find the function which contains DebugPrintf dialect + typedef SmallSetVector FuncSet; + FuncSet printfFuncs; + static const auto debugPrintfFuncsVisitor = + llvm_dialects::VisitorBuilder() + .setStrategy(llvm_dialects::VisitorStrategy::ByFunctionDeclaration) + .add([](FuncSet &pfunc, auto &inst) { pfunc.insert(inst.getFunction()); }) + .build(); + debugPrintfFuncsVisitor.visit(printfFuncs, module); - visitor.visit(*this, module); + if (printfFuncs.empty()) + return PreservedAnalyses::all(); + + bool hasPrintfDesc = + pipelineState + ->findResourceNode(ResourceNodeType::DescriptorBuffer, InternalDescriptorSetId, PrintfBufferBindingId) + .second != nullptr; + + static const auto lowerDebugfPrintOpVisitor = llvm_dialects::VisitorBuilder() + .setStrategy(llvm_dialects::VisitorStrategy::ByFunctionDeclaration) + .add(&LowerDebugPrintf::visitDebugPrintf) + .build(); + + BuilderImpl builder(m_pipelineState); + for (auto func : printfFuncs) { + // Create printbuffer Descriptor at the beginning of the function which contains DebugPrintf dialect ops + builder.SetInsertPointPastAllocas(func); + m_debugPrintfBuffer = hasPrintfDesc + ? m_debugPrintfBuffer = builder.create(builder.CreateBufferDesc( + InternalDescriptorSetId, PrintfBufferBindingId, builder.getInt32(0), 2)) + : nullptr; + + lowerDebugfPrintOpVisitor.visit(*this, *func); + } for (auto inst : m_toErase) inst->eraseFromParent(); @@ -79,10 +113,11 @@ PreservedAnalyses LowerDebugPrintf::run(Module &module, ModuleAnalysisManager &a void LowerDebugPrintf::visitDebugPrintf(DebugPrintfOp &op) { m_toErase.push_back(&op); - Value *debugPrintfBuffer = op.getBuffer(); - if (isa(debugPrintfBuffer)) + if (!m_debugPrintfBuffer) return; + Value *debugPrintfBuffer = m_debugPrintfBuffer; + BuilderBase builder(&op); // Printf output variables in DWORDs @@ -94,8 +129,7 @@ void LowerDebugPrintf::visitDebugPrintf(DebugPrintfOp &op) { getDwordValues(var, printArgs, bit64Vector, builder); } - GlobalVariable *globalStr = cast(op.getFormat()); - StringRef strDebugStr = (cast(globalStr->getInitializer()))->getAsString(); + StringRef strDebugStr = op.getFormat(); uint64_t hash = hash_value(strDebugStr); diff --git a/lgc/patch/MeshTaskShader.cpp b/lgc/patch/MeshTaskShader.cpp index c1b259d425..6e47c76187 100644 --- a/lgc/patch/MeshTaskShader.cpp +++ b/lgc/patch/MeshTaskShader.cpp @@ -998,13 +998,14 @@ void MeshTaskShader::lowerEmitMeshTasks(EmitMeshTasksOp &emitMeshTasksOp) { auto emitMeshTasksBlock = checkEmitMeshTasksBlock->splitBasicBlock(emitMeshTasksCall, ".emitMeshTasks"); auto endEmitMeshTasksBlock = emitMeshTasksBlock->splitBasicBlock(emitMeshTasksCall, ".endEmitMeshTasks"); + SyncScope::ID agentScope = m_builder.getContext().getOrInsertSyncScopeID("agent"); // Modify ".checkEmitMeshTasks" block { m_builder.SetInsertPoint(checkEmitMeshTasksBlock->getTerminator()); if (m_accessTaskPayload) { // Make sure the task payload read/write access is completed - m_builder.CreateFence(AtomicOrdering::Release, SyncScope::System); + m_builder.CreateFence(AtomicOrdering::Release, agentScope); createBarrier(); } @@ -1043,7 +1044,7 @@ void MeshTaskShader::lowerEmitMeshTasks(EmitMeshTasksOp &emitMeshTasksOp) { valueToAdd = m_builder.CreateBitCast(valueToAdd, m_builder.getInt64Ty()); m_builder.CreateAtomicRMW(AtomicRMWInst::Add, meshPipeStatsBufEntryPtr, valueToAdd, MaybeAlign(), - AtomicOrdering::Monotonic, SyncScope::System); + AtomicOrdering::Monotonic, agentScope); } // @@ -2082,7 +2083,7 @@ void MeshTaskShader::exportVertex() { if (waAtmPrecedesPos) { // Before the first export call of vertex position data, add s_wait_vscnt 0 to make sure the completion of all // attributes being written to the attribute ring buffer - m_builder.CreateFence(AtomicOrdering::Release, SyncScope::System); + m_builder.CreateFence(AtomicOrdering::Release, m_builder.getContext().getOrInsertSyncScopeID("agent")); doExport(ExportKind::Pos, posExports); } @@ -2101,6 +2102,7 @@ void MeshTaskShader::collectMeshStatsInfo(Function *entryPoint, Value *numMeshPr const uint64_t numMeshThreads = meshMode.workgroupSizeX * meshMode.workgroupSizeY * meshMode.workgroupSizeZ; Value *meshPipeStatsBufPtr = m_pipelineSysValues.get(entryPoint)->getMeshPipeStatsBufPtr(); + SyncScope::ID agentScope = m_builder.getContext().getOrInsertSyncScopeID("agent"); // // Record numMeshThreads @@ -2122,7 +2124,7 @@ void MeshTaskShader::collectMeshStatsInfo(Function *entryPoint, Value *numMeshPr valueToAdd = m_builder.CreateBitCast(valueToAdd, m_builder.getInt64Ty()); m_builder.CreateAtomicRMW(AtomicRMWInst::Add, meshPipeStatsBufEntryPtr, valueToAdd, MaybeAlign(), - AtomicOrdering::Monotonic, SyncScope::System); + AtomicOrdering::Monotonic, agentScope); } // @@ -2147,7 +2149,7 @@ void MeshTaskShader::collectMeshStatsInfo(Function *entryPoint, Value *numMeshPr valueToAdd = m_builder.CreateBitCast(valueToAdd, m_builder.getInt64Ty()); m_builder.CreateAtomicRMW(AtomicRMWInst::Add, meshPipeStatsBufEntryPtr, valueToAdd, MaybeAlign(), - AtomicOrdering::Monotonic, SyncScope::System); + AtomicOrdering::Monotonic, agentScope); } } diff --git a/lgc/patch/NggPrimShader.cpp b/lgc/patch/NggPrimShader.cpp index bca8196fef..24db846493 100644 --- a/lgc/patch/NggPrimShader.cpp +++ b/lgc/patch/NggPrimShader.cpp @@ -4068,12 +4068,13 @@ void NggPrimShader::writeGsOutput(Value *output, unsigned location, unsigned com const unsigned attribOffset = (location * 4) + component; auto ldsOffset = m_builder.CreateAdd(vertexOffset, m_builder.getInt32(attribOffset)); + IRBuilder<>::InsertPointGuard guard(m_builder); + + // Skip GS-VS ring write if the emit is invalid if (geometryMode.robustGsEmits) { - // skip the lds write by writing to a dummy offset. - // ldsOffset = (totalEmitVerts >= outputVertices) ? InvalidValue : ldsOffset - auto dummyOffset = m_builder.getInt32(0x80000000); - auto outOfRange = m_builder.CreateICmpUGE(totalEmitVerts, m_builder.getInt32(geometryMode.outputVertices)); - ldsOffset = m_builder.CreateSelect(outOfRange, dummyOffset, ldsOffset); + // validEmit = totalEmitVerts < outputVertices + auto validEmit = m_builder.CreateICmpULT(totalEmitVerts, m_builder.getInt32(geometryMode.outputVertices)); + m_builder.CreateIf(validEmit, false); } writeValueToLds(output, ldsOffset); @@ -4246,7 +4247,7 @@ Function *NggPrimShader::createGsEmitHandler() { totalEmitVerts = m_builder.CreateLoad(m_builder.getInt32Ty(), totalEmitVertsPtr); // totalEmitVerts++ totalEmitVerts = m_builder.CreateAdd(totalEmitVerts, m_builder.getInt32(1)); - // outVerts = (totalEmitVerts >= outputVertices) ? 0 : outVerts + // outVerts = (totalEmitVerts > outputVertices) ? 0 : outVerts Value *outOfRange = m_builder.CreateICmpUGT(totalEmitVerts, m_builder.getInt32(geometryMode.outputVertices)); outVerts = m_builder.CreateSelect(outOfRange, m_builder.getInt32(0), outVerts); } @@ -6253,7 +6254,7 @@ void NggPrimShader::processVertexAttribExport(Function *&target) { // Before the first export call, add s_wait_vscnt 0 to make sure the completion of all attributes being written // to the attribute ring buffer m_builder.SetInsertPoint(exportCalls[0]); - m_builder.CreateFence(AtomicOrdering::Release, SyncScope::System); + m_builder.CreateFence(AtomicOrdering::Release, m_builder.getContext().getOrInsertSyncScopeID("agent")); } // Remove calls diff --git a/lgc/patch/NggPrimShader.h b/lgc/patch/NggPrimShader.h index 7332661439..71a8e27982 100644 --- a/lgc/patch/NggPrimShader.h +++ b/lgc/patch/NggPrimShader.h @@ -32,6 +32,7 @@ #include "lgc/state/PipelineState.h" #include "lgc/state/TargetInfo.h" +#include "lgc/util/BuilderBase.h" #include "llvm/IR/Module.h" namespace lgc { @@ -419,7 +420,7 @@ class NggPrimShader { VertexCullInfoOffsets m_vertCullInfoOffsets; // A collection of offsets within an item of vertex cull info StreamOutControlCbOffsets m_streamOutControlCbOffsets; // A collection of offsets within stream-out control buffer - llvm::IRBuilder<> m_builder; // LLVM IR builder + BuilderBase m_builder; // LLVM IR builder llvm::Constant *m_lds = nullptr; // Global variable to model primitive shader LDS PrimShaderLdsLayout m_ldsLayout; // Primitive shader LDS layout diff --git a/lgc/patch/Patch.cpp b/lgc/patch/Patch.cpp index 1c94ed3a4c..9e759fec1e 100644 --- a/lgc/patch/Patch.cpp +++ b/lgc/patch/Patch.cpp @@ -180,7 +180,6 @@ void Patch::addPasses(PipelineState *pipelineState, lgc::PassManager &passMgr, T } passMgr.addPass(IPSCCPPass()); - passMgr.addPass(LowerDebugPrintf()); passMgr.addPass(createModuleToFunctionPassAdaptor(CombineCooperativeMatrix())); // Lower the cooperative matrix @@ -201,6 +200,7 @@ void Patch::addPasses(PipelineState *pipelineState, lgc::PassManager &passMgr, T passMgr.addPass(PatchCopyShader()); passMgr.addPass(LowerVertexFetch()); passMgr.addPass(LowerFragColorExport()); + passMgr.addPass(LowerDebugPrintf()); passMgr.addPass(LowerDesc()); passMgr.addPass(PatchEntryPointMutate()); passMgr.addPass(PatchInitializeWorkgroupMemory()); diff --git a/lgc/patch/PatchEntryPointMutate.cpp b/lgc/patch/PatchEntryPointMutate.cpp index bfc06831de..3deff7bf30 100644 --- a/lgc/patch/PatchEntryPointMutate.cpp +++ b/lgc/patch/PatchEntryPointMutate.cpp @@ -55,6 +55,7 @@ #include "lgc/patch/PatchEntryPointMutate.h" #include "ShaderMerger.h" +#include "compilerutils/CompilerUtils.h" #include "lgc/LgcContext.h" #include "lgc/LgcCpsDialect.h" #include "lgc/LgcDialect.h" @@ -847,7 +848,8 @@ Function *PatchEntryPointMutate::lowerCpsFunction(Function *func, ArrayRefgetType()); vsp = builder.CreateConstInBoundsGEP1_32(builder.getInt8Ty(), vsp, -alignTo(stateSize, ContinuationStackAlignment)); - Value *newState = builder.CreateLoad(state->getType(), vsp, "cps.state"); + auto *newState = builder.CreateLoad(state->getType(), vsp, "cps.state"); + CompilerUtils::setIsLastUseLoad(*newState); state->replaceAllUsesWith(newState); } vsp = builder.CreatePtrToInt(vsp, builder.getInt32Ty()); @@ -1101,7 +1103,7 @@ void PatchEntryPointMutate::gatherUserDataUsage(Module *module) { // offsets to calculate numbers of written primitives/dwords and update the counters. auto lastVertexStage = auto lastVertexStage = m_pipelineState->getLastVertexProcessingStage(); lastVertexStage = lastVertexStage == ShaderStage::CopyShader ? ShaderStage::Geometry : lastVertexStage; - getUserDataUsage(lastVertexStage)->usesStreamOutTable = true; + getUserDataUsage(lastVertexStage.value())->usesStreamOutTable = true; } } } diff --git a/lgc/patch/PatchInOutImportExport.cpp b/lgc/patch/PatchInOutImportExport.cpp index fb16894137..09027bf9fb 100644 --- a/lgc/patch/PatchInOutImportExport.cpp +++ b/lgc/patch/PatchInOutImportExport.cpp @@ -1082,6 +1082,14 @@ void PatchInOutImportExport::visitCallInst(CallInst &callInst) { Value *emitCounter = builder.CreateLoad(emitCounterTy, emitCounterPtr); emitCounter = builder.CreateAdd(emitCounter, builder.getInt32(1)); builder.CreateStore(emitCounter, emitCounterPtr); + + // Increment total emit vertex counter + if (m_pipelineState->getShaderModes()->getGeometryShaderMode().robustGsEmits) { + auto totalEmitCounterPtr = m_pipelineSysValues.get(m_entryPoint)->getTotalEmitCounterPtr(); + Value *totalEmitCounter = builder.CreateLoad(builder.getInt32Ty(), totalEmitCounterPtr); + totalEmitCounter = builder.CreateAdd(totalEmitCounter, builder.getInt32(1)); + builder.CreateStore(totalEmitCounter, totalEmitCounterPtr); + } } } } @@ -1101,7 +1109,7 @@ void PatchInOutImportExport::visitReturnInst(ReturnInst &retInst) { // Whether this shader stage has to use "exp" instructions to export outputs const bool useExpInst = ((m_shaderStage == ShaderStage::Vertex || m_shaderStage == ShaderStage::TessEval || m_shaderStage == ShaderStage::CopyShader) && - (nextStage == ShaderStage::Invalid || nextStage == ShaderStage::Fragment)); + (!nextStage || nextStage == ShaderStage::Fragment)); BuilderBase builder(&retInst); @@ -1329,7 +1337,7 @@ void PatchInOutImportExport::visitReturnInst(ReturnInst &retInst) { } // NOTE: We have to export gl_ClipDistance[] or gl_CullDistancep[] via generic outputs as well. - assert(nextStage == ShaderStage::Invalid || nextStage == ShaderStage::Fragment); + assert(!nextStage || nextStage == ShaderStage::Fragment); bool hasClipCullExport = true; if (nextStage == ShaderStage::Fragment) { @@ -1385,7 +1393,7 @@ void PatchInOutImportExport::visitReturnInst(ReturnInst &retInst) { bool hasPrimitiveIdExport = false; if (nextStage == ShaderStage::Fragment) { hasPrimitiveIdExport = nextBuiltInUsage.primitiveId; - } else if (nextStage == ShaderStage::Invalid) { + } else if (!nextStage) { if (m_shaderStage == ShaderStage::CopyShader) { hasPrimitiveIdExport = m_pipelineState->getShaderResourceUsage(ShaderStage::Geometry)->builtInUsage.gs.primitiveId; @@ -1466,7 +1474,7 @@ void PatchInOutImportExport::visitReturnInst(ReturnInst &retInst) { bool hasViewportIndexExport = true; if (nextStage == ShaderStage::Fragment) { hasViewportIndexExport = nextBuiltInUsage.viewportIndex; - } else if (nextStage == ShaderStage::Invalid) { + } else if (!nextStage) { hasViewportIndexExport = false; } @@ -1485,7 +1493,7 @@ void PatchInOutImportExport::visitReturnInst(ReturnInst &retInst) { bool hasLayerExport = true; if (nextStage == ShaderStage::Fragment) { hasLayerExport = nextBuiltInUsage.layer; - } else if (nextStage == ShaderStage::Invalid) { + } else if (!nextStage) { hasLayerExport = false; } @@ -1533,8 +1541,8 @@ void PatchInOutImportExport::visitReturnInst(ReturnInst &retInst) { } else if (m_shaderStage == ShaderStage::Geometry) { // NOTE: Per programming guide, we should do a "s_waitcnt 0,0,0 + s_waitcnt_vscnt 0" before issuing a "done", so // we use fence release to generate s_waitcnt vmcnt lgkmcnt/s_waitcnt_vscnt before s_sendmsg(MSG_GS_DONE) - SyncScope::ID scope = - m_pipelineState->isGsOnChip() ? m_context->getOrInsertSyncScopeID("workgroup") : SyncScope::System; + StringRef scopeName = m_pipelineState->isGsOnChip() ? "workgroup" : "agent"; + SyncScope::ID scope = m_context->getOrInsertSyncScopeID(scopeName); builder.CreateFence(AtomicOrdering::Release, scope); auto &entryArgIdxs = m_pipelineState->getShaderInterfaceData(ShaderStage::Geometry)->entryArgIdxs.gs; @@ -4001,6 +4009,18 @@ void PatchInOutImportExport::storeValueToGsVsRing(Value *storeValue, unsigned lo auto ringOffset = calcGsVsRingOffsetForOutput(location, compIdx, streamId, emitCounter, gsVsOffset, builder); + IRBuilder<>::InsertPointGuard guard(builder); + + // Skip GS-VS ring write if the emit is invalid + const auto &geometryMode = m_pipelineState->getShaderModes()->getGeometryShaderMode(); + if (geometryMode.robustGsEmits) { + auto totalEmitCounterPtr = m_pipelineSysValues.get(m_entryPoint)->getTotalEmitCounterPtr(); + auto totalEmitCounter = builder.CreateLoad(builder.getInt32Ty(), totalEmitCounterPtr); + // validEmit = totalEmitCounter < outputVertices + auto validEmit = builder.CreateICmpULT(totalEmitCounter, builder.getInt32(geometryMode.outputVertices)); + builder.CreateIf(validEmit, false); + } + if (m_pipelineState->isGsOnChip()) { auto lds = Patch::getLdsVariable(m_pipelineState, m_entryPoint); Value *storePtr = builder.CreateGEP(builder.getInt32Ty(), lds, ringOffset); @@ -4048,7 +4068,6 @@ Value *PatchInOutImportExport::calcEsGsRingOffsetForOutput(unsigned location, un BuilderBase &builder) { // ES -> GS ring is always on-chip on GFX10+ // ringOffset = esGsOffset + threadId * esGsRingItemSize + location * 4 + compIdx - assert(m_pipelineState->hasShaderStage(ShaderStage::Geometry)); const auto &calcFactor = m_pipelineState->getShaderResourceUsage(ShaderStage::Geometry)->inOutUsage.gs.calcFactor; @@ -4071,12 +4090,37 @@ Value *PatchInOutImportExport::calcEsGsRingOffsetForOutput(unsigned location, un // @param builder : the builder to use Value *PatchInOutImportExport::calcEsGsRingOffsetForInput(unsigned location, unsigned compIdx, Value *vertexIdx, BuilderBase &builder) { + // ES -> GS ring is always on-chip on GFX10+ + assert(m_pipelineState->hasShaderStage(ShaderStage::Geometry)); + const auto &calcFactor = m_pipelineState->getShaderResourceUsage(ShaderStage::Geometry)->inOutUsage.gs.calcFactor; + auto esGsOffsets = m_pipelineSysValues.get(m_entryPoint)->getEsGsOffsets(); + const auto &geometryMode = m_pipelineState->getShaderModes()->getGeometryShaderMode(); - // ES -> GS ring is always on-chip on GFX10+ - Value *vertexOffset = builder.CreateExtractElement(esGsOffsets, vertexIdx); + Value *vertexOffset = nullptr; + if (geometryMode.inputPrimitive == InputPrimitives::Patch) { + assert(geometryMode.controlPoints > 0); // Must have control points + + // NOTE: If the input primitive is a patch, the calculation of vertex offset is different from other input primitive + // types as follow: + // + // vertexOffset = esGsOffset0 + vertexIdx * esGsRingItemSize + // + // The esGsOffset0 is the starting offset of control points for each patch with such HW layout: + // + // +-----------------+-----------------+-----+-------------------+ + // | Control Point 0 | Control Point 1 | ... | Control Point N-1 | + // +-----------------+-----------------+-----+-------------------+ + // |<-------------------------- Patch -------------------------->| + // + vertexOffset = builder.CreateMul(vertexIdx, builder.getInt32(calcFactor.esGsRingItemSize)); + vertexOffset = builder.CreateAdd(builder.CreateExtractElement(esGsOffsets, static_cast(0)), vertexOffset); + } else { + // vertexOffset = esGsOffsets[vertexIdx] (vertexIdx < 6) + vertexOffset = builder.CreateExtractElement(esGsOffsets, vertexIdx); + } - // ringOffset = vertexOffset[N] + (location * 4 + compIdx); + // ringOffset = vertexOffset + (location * 4 + compIdx); Value *ringOffset = builder.CreateAdd(vertexOffset, builder.getInt32(location * 4 + compIdx)); return ringOffset; } @@ -4620,7 +4664,7 @@ void PatchInOutImportExport::addExportInstForGenericOutput(Value *output, unsign const auto nextStage = m_pipelineState->getNextShaderStage(m_shaderStage); const bool useExpInst = ((m_shaderStage == ShaderStage::Vertex || m_shaderStage == ShaderStage::TessEval || m_shaderStage == ShaderStage::CopyShader) && - (nextStage == ShaderStage::Invalid || nextStage == ShaderStage::Fragment)); + (!nextStage || nextStage == ShaderStage::Fragment)); assert(useExpInst); (void(useExpInst)); // unused diff --git a/lgc/patch/PatchReadFirstLane.cpp b/lgc/patch/PatchReadFirstLane.cpp index ee9c388585..301e0c46a0 100644 --- a/lgc/patch/PatchReadFirstLane.cpp +++ b/lgc/patch/PatchReadFirstLane.cpp @@ -361,7 +361,12 @@ void ReadFirstLaneOptimizer::collectAssumeUniforms(BasicBlock *block, while (!candidates.empty()) { Instruction *candidate = candidates.pop_back_val(); - + if (auto intrinsic = dyn_cast(candidate)) { + // Don't lift readfirstlane that is manually added after permlane64 or permlanex16 in subgroupClusteredReduction + if (intrinsic->getIntrinsicID() == Intrinsic::amdgcn_permlane64 || + intrinsic->getIntrinsicID() == Intrinsic::amdgcn_permlanex16) + continue; + } if (isAllUsersAssumedUniform(candidate)) tryPropagate(candidate, false); } diff --git a/lgc/patch/PatchResourceCollect.cpp b/lgc/patch/PatchResourceCollect.cpp index d9a091bda7..4d8f42f2b6 100644 --- a/lgc/patch/PatchResourceCollect.cpp +++ b/lgc/patch/PatchResourceCollect.cpp @@ -123,7 +123,7 @@ PreservedAnalyses PatchResourceCollect::run(Module &module, ModuleAnalysisManage if (func.isDeclaration()) continue; auto stage = getShaderStage(&func); - if (!m_shaderStage || &func == pipelineShaders.getEntryPoint(m_shaderStage)) + if (!stage || &func == pipelineShaders.getEntryPoint(stage.value())) continue; m_shaderStage = stage.value(); m_entryPoint = &func; @@ -462,6 +462,9 @@ bool PatchResourceCollect::checkGsOnChipValidity() { useAdjacency = true; inVertsPerPrim = 6; break; + case InputPrimitives::Patch: + inVertsPerPrim = geometryMode.controlPoints; + break; default: llvm_unreachable("Unexpected input primitive type!"); break; @@ -1640,7 +1643,7 @@ void PatchResourceCollect::mapBuiltInToGenericInOut() { auto &inOutUsage = resUsage->inOutUsage; const auto nextStage = m_pipelineState->getNextShaderStage(m_shaderStage); - auto nextResUsage = nextStage != ShaderStage::Invalid ? m_pipelineState->getShaderResourceUsage(nextStage) : nullptr; + auto nextResUsage = nextStage ? m_pipelineState->getShaderResourceUsage(nextStage.value()) : nullptr; assert(inOutUsage.builtInInputLocMap.empty()); // Should be empty assert(inOutUsage.builtInOutputLocMap.empty()); @@ -1812,7 +1815,7 @@ void PatchResourceCollect::mapBuiltInToGenericInOut() { } builtInUsage.vs.primitiveShadingRate = false; - } else if (nextStage == ShaderStage::Invalid) { + } else if (!nextStage) { // VS only if (builtInUsage.vs.clipDistance > 0 || builtInUsage.vs.cullDistance > 0) { unsigned mapLoc = availOutMapLoc++; @@ -1975,7 +1978,7 @@ void PatchResourceCollect::mapBuiltInToGenericInOut() { if (inOutUsage.builtInOutputLocMap.find(BuiltInCullDistance) != inOutUsage.builtInOutputLocMap.end() && inOutUsage.builtInOutputLocMap[BuiltInCullDistance] == InvalidValue) inOutUsage.builtInOutputLocMap[BuiltInCullDistance] = availOutMapLoc++; - } else if (nextStage == ShaderStage::Invalid) { + } else if (!nextStage) { // TCS only if (builtInUsage.tcs.position) inOutUsage.builtInOutputLocMap[BuiltInPosition] = availOutMapLoc++; @@ -2033,7 +2036,7 @@ void PatchResourceCollect::mapBuiltInToGenericInOut() { // incorrectness of location assignment during builtin-to-generic mapping. const auto prevStage = m_pipelineState->getPrevShaderStage(m_shaderStage); if (prevStage == ShaderStage::TessControl) { - const auto &prevBuiltInUsage = m_pipelineState->getShaderResourceUsage(prevStage)->builtInUsage.tcs; + const auto &prevBuiltInUsage = m_pipelineState->getShaderResourceUsage(prevStage.value())->builtInUsage.tcs; clipDistanceCount = std::max(clipDistanceCount, prevBuiltInUsage.clipDistance); } @@ -2047,7 +2050,7 @@ void PatchResourceCollect::mapBuiltInToGenericInOut() { const auto prevStage = m_pipelineState->getPrevShaderStage(m_shaderStage); if (prevStage == ShaderStage::TessControl) { - const auto &prevBuiltInUsage = m_pipelineState->getShaderResourceUsage(prevStage)->builtInUsage.tcs; + const auto &prevBuiltInUsage = m_pipelineState->getShaderResourceUsage(prevStage.value())->builtInUsage.tcs; cullDistanceCount = std::max(cullDistanceCount, prevBuiltInUsage.clipDistance); } @@ -2158,7 +2161,7 @@ void PatchResourceCollect::mapBuiltInToGenericInOut() { } else { builtInUsage.tes.viewportIndex = 0; } - } else if (nextStage == ShaderStage::Invalid) { + } else if (!nextStage) { // TES only if (builtInUsage.tes.clipDistance > 0 || builtInUsage.tes.cullDistance > 0) { unsigned mapLoc = availOutMapLoc++; @@ -2282,7 +2285,7 @@ void PatchResourceCollect::mapBuiltInToGenericInOut() { const unsigned mapLoc = nextInOutUsage.builtInInputLocMap[BuiltInViewportIndex]; builtInOutLocs[BuiltInViewportIndex] = mapLoc; } - } else if (nextStage == ShaderStage::Invalid) { + } else if (!nextStage) { // GS only unsigned availOutMapLoc = inOutUsage.outputLocInfoMap.size(); // Reset available location @@ -2368,7 +2371,7 @@ void PatchResourceCollect::mapBuiltInToGenericInOut() { const unsigned mapLoc = nextInOutUsage.builtInInputLocMap[BuiltInCullDistance]; inOutUsage.mesh.vertexBuiltInExportSlots[BuiltInCullDistance] = mapLoc; } - } else if (nextStage == ShaderStage::Invalid) { + } else if (!nextStage) { // Mesh shader only unsigned availExportLoc = inOutUsage.outputMapLocCount; @@ -2416,7 +2419,7 @@ void PatchResourceCollect::mapBuiltInToGenericInOut() { const unsigned mapLoc = nextInOutUsage.perPrimitiveBuiltInInputLocMap[BuiltInViewportIndex]; inOutUsage.mesh.primitiveBuiltInExportSlots[BuiltInViewportIndex] = mapLoc; } - } else if (nextStage == ShaderStage::Invalid) { + } else if (!nextStage) { // Mesh shader only unsigned availPerPrimitiveExportLoc = inOutUsage.perPrimitiveOutputMapLocCount; @@ -2682,7 +2685,7 @@ void PatchResourceCollect::updateInputLocInfoMapWithUnpack() { auto preStage = m_pipelineState->getPrevShaderStage(m_shaderStage); if (preStage == ShaderStage::TessControl || preStage == ShaderStage::Mesh) { if (!inputLocInfoMap.empty()) { - auto &outputLocInfoMap = m_pipelineState->getShaderResourceUsage(preStage)->inOutUsage.outputLocInfoMap; + auto &outputLocInfoMap = m_pipelineState->getShaderResourceUsage(preStage.value())->inOutUsage.outputLocInfoMap; for (auto &infoPair : outputLocInfoMap) { if (infoPair.second != InvalidValue) { inputLocInfoMap[infoPair.first] = InvalidValue; @@ -2692,7 +2695,8 @@ void PatchResourceCollect::updateInputLocInfoMapWithUnpack() { } auto &perPatchInLocMap = inOutUsage.perPatchInputLocMap; if (!perPatchInLocMap.empty()) { - auto &perPatchOutLocMap = m_pipelineState->getShaderResourceUsage(preStage)->inOutUsage.perPatchOutputLocMap; + auto &perPatchOutLocMap = + m_pipelineState->getShaderResourceUsage(preStage.value())->inOutUsage.perPatchOutputLocMap; for (auto &locPair : perPatchOutLocMap) { if (locPair.second != InvalidValue) { perPatchInLocMap[locPair.first] = InvalidValue; @@ -2756,10 +2760,10 @@ void PatchResourceCollect::updateInputLocInfoMapWithUnpack() { // ===================================================================================================================== // Clear unused output from outputLocInfoMap, perPatchOutputLocMap, and perPrimitiveOutputLocMap void PatchResourceCollect::clearUnusedOutput() { - ShaderStageEnum nextStage = m_pipelineState->getNextShaderStage(m_shaderStage); + auto nextStage = m_pipelineState->getNextShaderStage(m_shaderStage); auto &inOutUsage = m_pipelineState->getShaderResourceUsage(m_shaderStage)->inOutUsage; auto &outputLocInfoMap = inOutUsage.outputLocInfoMap; - if (nextStage != ShaderStage::Invalid) { + if (nextStage) { // Collect the locations of TCS's imported outputs DenseSet importOutputLocs; if (m_shaderStage == ShaderStage::TessControl) { @@ -2784,7 +2788,7 @@ void PatchResourceCollect::clearUnusedOutput() { // Do normal input/output matching SmallVector unusedLocInfos; - auto nextResUsage = m_pipelineState->getShaderResourceUsage(nextStage); + auto nextResUsage = m_pipelineState->getShaderResourceUsage(nextStage.value()); const auto &nextInLocInfoMap = nextResUsage->inOutUsage.inputLocInfoMap; for (auto &locInfoPair : outputLocInfoMap) { @@ -2899,8 +2903,9 @@ void PatchResourceCollect::updateOutputLocInfoMapWithUnpack() { // If we don't have to keep the locations and the next stage is valid, try to get location map of the outputs from // corresponding inputs of next stage. const bool keepLocation = m_shaderStage == ShaderStage::Geometry && !canChangeOutputLocationsForGs(); - if (!keepLocation && nextStage != ShaderStage::Invalid) { - auto &nextStageInputLocInfoMap = m_pipelineState->getShaderResourceUsage(nextStage)->inOutUsage.inputLocInfoMap; + if (!keepLocation && nextStage) { + auto &nextStageInputLocInfoMap = + m_pipelineState->getShaderResourceUsage(nextStage.value())->inOutUsage.inputLocInfoMap; for (auto &locInfoPair : outputLocInfoMap) { const auto &locationInfo = locInfoPair.first; auto &newLocationInfo = locInfoPair.second; @@ -3000,9 +3005,9 @@ void PatchResourceCollect::updateOutputLocInfoMapWithUnpack() { assert(m_shaderStage == ShaderStage::TessControl); // If the next stage is valid, try to get location map of the outputs from corresponding inputs of next stage. - if (nextStage != ShaderStage::Invalid) { + if (nextStage) { auto &nextStagePerPatchInputLocInfoMap = - m_pipelineState->getShaderResourceUsage(nextStage)->inOutUsage.perPatchInputLocMap; + m_pipelineState->getShaderResourceUsage(nextStage.value())->inOutUsage.perPatchInputLocMap; for (auto &locPair : perPatchOutputLocMap) { if (locPair.second != InvalidValue) continue; // Skip mapped locations @@ -3046,9 +3051,9 @@ void PatchResourceCollect::updateOutputLocInfoMapWithUnpack() { assert(m_shaderStage == ShaderStage::Mesh); // If the next stage is valid, try to get location map of the outputs from corresponding inputs of next stage. - if (nextStage != ShaderStage::Invalid) { + if (nextStage) { auto &nextStagePerPrimitiveInputLocMap = - m_pipelineState->getShaderResourceUsage(nextStage)->inOutUsage.perPrimitiveInputLocMap; + m_pipelineState->getShaderResourceUsage(nextStage.value())->inOutUsage.perPrimitiveInputLocMap; for (auto &locPair : perPrimitiveOutputLocMap) { if (locPair.second != InvalidValue) continue; // Skip mapped locations @@ -3096,7 +3101,7 @@ bool PatchResourceCollect::canChangeOutputLocationsForGs() { return true; if (m_pipelineState->getPalMetadata()->haveFsInputMappings()) return true; - if (m_pipelineState->getNextShaderStage(ShaderStage::Geometry) != ShaderStage::Invalid) + if (m_pipelineState->getNextShaderStage(ShaderStage::Geometry)) return true; return false; } @@ -3157,8 +3162,8 @@ void PatchResourceCollect::updateOutputLocInfoMapWithPack() { assert(m_shaderStage == ShaderStage::Vertex || m_shaderStage == ShaderStage::TessEval || m_shaderStage == ShaderStage::Geometry); auto nextStage = m_pipelineState->getNextShaderStage(m_shaderStage); - assert(nextStage != ShaderStage::Invalid); - auto &nextStageInputLocInfoMap = m_pipelineState->getShaderResourceUsage(nextStage)->inOutUsage.inputLocInfoMap; + auto &nextStageInputLocInfoMap = + m_pipelineState->getShaderResourceUsage(nextStage.value())->inOutUsage.inputLocInfoMap; // Remove unused outputs and update the output map if (m_shaderStage != m_pipelineState->getLastVertexProcessingStage()) { @@ -3707,7 +3712,7 @@ void PatchResourceCollect::clearUndefinedOutput() { for (auto call : candidateCalls) { // For unlinked case, we should keep the location info map unchanged. - if (m_pipelineState->getNextShaderStage(m_shaderStage) != ShaderStage::Invalid) { + if (m_pipelineState->getNextShaderStage(m_shaderStage)) { // Remove the output location info if it exists unsigned index = m_shaderStage == ShaderStage::Mesh ? 2 : 1; unsigned component = cast(call->getArgOperand(index))->getZExtValue(); diff --git a/lgc/patch/RegisterMetadataBuilder.cpp b/lgc/patch/RegisterMetadataBuilder.cpp index 6042c4313d..8e452560e8 100644 --- a/lgc/patch/RegisterMetadataBuilder.cpp +++ b/lgc/patch/RegisterMetadataBuilder.cpp @@ -129,9 +129,9 @@ void RegisterMetadataBuilder::buildPalMetadata() { if (hwStageMask & (Util::Abi::HwShaderGs | Util::Abi::HwShaderVs)) buildPaSpecificRegisters(); - if (lastVertexProcessingStage != ShaderStage::Invalid && m_pipelineState->isUnlinked()) { + if (lastVertexProcessingStage && m_pipelineState->isUnlinked()) { // Fill ".preraster_output_semantic" - auto resUsage = m_pipelineState->getShaderResourceUsage(lastVertexProcessingStage); + auto resUsage = m_pipelineState->getShaderResourceUsage(lastVertexProcessingStage.value()); auto &outputLocInfoMap = resUsage->inOutUsage.outputLocInfoMap; auto &builtInOutputLocMap = resUsage->inOutUsage.builtInOutputLocMap; // Collect semantic info for generic input and builtIns {gl_ClipDistance, gl_CulDistance, gl_Layer, @@ -200,6 +200,8 @@ void RegisterMetadataBuilder::buildLsHsRegisters() { lsVgprCompCnt = 3; // Enable all LS VGPRs (LS VGPR2 - VGPR5) else lsVgprCompCnt = 1; // Must enable relative vertex ID (LS VGPR2 and VGPR3) + } else { + llvm_unreachable("Not implemented!"); } getGraphicsRegNode()[Util::Abi::GraphicsRegisterMetadataKey::LsVgprCompCnt] = lsVgprCompCnt; @@ -226,6 +228,7 @@ void RegisterMetadataBuilder::buildEsGsRegisters() { const auto gsResUsage = m_pipelineState->getShaderResourceUsage(ShaderStage::Geometry); const auto &gsBuiltInUsage = gsResUsage->builtInUsage.gs; const auto &gsInOutUsage = gsResUsage->inOutUsage; + const auto &geometryMode = m_pipelineState->getShaderModes()->getGeometryShaderMode(); const auto &calcFactor = gsInOutUsage.gs.calcFactor; const auto tesResUsage = m_pipelineState->getShaderResourceUsage(ShaderStage::TessEval); const auto &tesBuiltInUsage = tesResUsage->builtInUsage.tes; @@ -233,11 +236,12 @@ void RegisterMetadataBuilder::buildEsGsRegisters() { // ES_VGPR_COMP_CNT in SPI_SHADER_PGM_RSRC2_GS unsigned gsVgprCompCnt = 0; - if (calcFactor.inputVertices > 4 || gsBuiltInUsage.invocationId) + if ((calcFactor.inputVertices > 4 && geometryMode.inputPrimitive != InputPrimitives::Patch) || + gsBuiltInUsage.invocationId) gsVgprCompCnt = 3; // Enable vtx4/vtx5 offset (GS VGPR3) or GS instance ID (GS VGPR4) else if (gsBuiltInUsage.primitiveIdIn) gsVgprCompCnt = 2; // Enable primitive ID (GS VGPR2) - else if (calcFactor.inputVertices > 2) + else if (calcFactor.inputVertices > 2 && geometryMode.inputPrimitive != InputPrimitives::Patch) gsVgprCompCnt = 1; // Enable vtx2/vtx3 offset (GS VGPR1) getGraphicsRegNode()[Util::Abi::GraphicsRegisterMetadataKey::GsVgprCompCnt] = gsVgprCompCnt; @@ -257,7 +261,6 @@ void RegisterMetadataBuilder::buildEsGsRegisters() { getHwShaderNode(Util::Abi::HardwareStage::Gs)[Util::Abi::HardwareStageMetadataKey::OffchipLdsEn] = hasTs; // VGT_GS_MAX_VERT_OUT - const auto &geometryMode = m_pipelineState->getShaderModes()->getGeometryShaderMode(); unsigned maxVertOut = std::max(1u, static_cast(geometryMode.outputVertices)); getGraphicsRegNode()[Util::Abi::GraphicsRegisterMetadataKey::VgtGsMaxVertOut] = maxVertOut; @@ -345,6 +348,13 @@ void RegisterMetadataBuilder::buildEsGsRegisters() { // VGT_ESGS_RING_ITEMSIZE getGraphicsRegNode()[Util::Abi::GraphicsRegisterMetadataKey::VgtEsgsRingItemsize] = calcFactor.esGsRingItemSize; + // VGT_LS_HS_CONFIG + if (geometryMode.inputPrimitive == InputPrimitives::Patch) { + assert(geometryMode.controlPoints > 0); + auto vgtLsHsConfig = getGraphicsRegNode()[Util::Abi::GraphicsRegisterMetadataKey::VgtLsHsConfig].getMap(true); + vgtLsHsConfig[Util::Abi::VgtLsHsConfigMetadataKey::HsNumInputCp] = geometryMode.controlPoints; + } + // GE_MAX_OUTPUT_PER_SUBGROUP and VGT_GS_MAX_PRIMS_PER_SUBGROUP const unsigned maxPrimsPerSubgroup = std::min(gsInstPrimsInSubgrp * maxVertOut, MaxGsThreadsPerSubgroup); getGraphicsRegNode()[Util::Abi::GraphicsRegisterMetadataKey::MaxVertsPerSubgroup] = maxPrimsPerSubgroup; @@ -380,11 +390,12 @@ void RegisterMetadataBuilder::buildPrimShaderRegisters() { unsigned gsVgprCompCnt = 0; if (m_gfxIp.major <= 11) { if (m_hasGs) { - if (calcFactor.inputVertices > 4 || gsBuiltInUsage.invocationId) + if ((calcFactor.inputVertices > 4 && geometryMode.inputPrimitive != InputPrimitives::Patch) || + gsBuiltInUsage.invocationId) gsVgprCompCnt = 3; // Enable vtx4/vtx5 offset (GS VGPR3) or GS instance ID (GS VGPR4) else if (gsBuiltInUsage.primitiveIdIn) gsVgprCompCnt = 2; // Enable primitive ID (GS VGPR2) - else if (calcFactor.inputVertices > 2) + else if (calcFactor.inputVertices > 2 && geometryMode.inputPrimitive != InputPrimitives::Patch) gsVgprCompCnt = 1; // Enable vtx2/vtx3 offset (GS VGPR1) } else if (m_hasVs) { // NOTE: When GS is absent, only those VGPRs are required: vtx0/vtx1 offset, vtx2/vtx3 offset, @@ -584,6 +595,13 @@ void RegisterMetadataBuilder::buildPrimShaderRegisters() { (m_hasGs ? calcFactor.esGsRingItemSize : 1); } + // VGT_LS_HS_CONFIG + if (geometryMode.inputPrimitive == InputPrimitives::Patch) { + assert(geometryMode.controlPoints > 0); + auto vgtLsHsConfig = getGraphicsRegNode()[Util::Abi::GraphicsRegisterMetadataKey::VgtLsHsConfig].getMap(true); + vgtLsHsConfig[Util::Abi::VgtLsHsConfigMetadataKey::HsNumInputCp] = geometryMode.controlPoints; + } + const auto nggControl = m_pipelineState->getNggControl(); assert(nggControl->enableNgg); if (!nggControl->passthroughMode) { @@ -1285,7 +1303,7 @@ void RegisterMetadataBuilder::buildPaSpecificRegisters() { // On 10.3+ all auxiliary position exports are optimized, not just the misc exports. if (m_gfxIp >= GfxIpVersion{10, 3}) - paClClipCntl[Util::Abi::PaClVsOutCntlMetadataKey::VsOutMiscSideBusEna] = true; + paClVsOutCntl[Util::Abi::PaClVsOutCntlMetadataKey::VsOutMiscSideBusEna] = true; } // PA_CL_VTE_CNTL @@ -1385,7 +1403,7 @@ void RegisterMetadataBuilder::setVgtShaderStagesEn(unsigned hwStageMask) { ShaderStageEnum apiStage = ShaderStage::Vertex; if (m_hasGs || m_hasMesh) { apiStage = m_hasGs ? ShaderStage::Geometry : ShaderStage::Mesh; - vgtShaderStagesEn[Util::Abi::VgtShaderStagesEnMetadataKey::GsStageEn] = GS_STAGE_ON; + vgtShaderStagesEn[Util::Abi::VgtShaderStagesEnMetadataKey::GsStageEn] = true; } else if (m_hasTes) { apiStage = ShaderStage::TessEval; } diff --git a/lgc/patch/ShaderMerger.cpp b/lgc/patch/ShaderMerger.cpp index 4b6a4b8121..14b49e5dc1 100644 --- a/lgc/patch/ShaderMerger.cpp +++ b/lgc/patch/ShaderMerger.cpp @@ -728,10 +728,12 @@ Function *ShaderMerger::generateEsGsEntryPoint(Function *esEntryPoint, Function ArrayRef vgprArgs(args.begin() + NumSpecialSgprInputs + 1, args.end()); // GS VGPRs + const auto &geometryMode = m_pipelineState->getShaderModes()->getGeometryShaderMode(); + Value *esGsOffsets01 = vgprArgs[0]; Value *esGsOffsets23 = PoisonValue::get(builder.getInt32Ty()); - if (calcFactor.inputVertices > 2) { + if (calcFactor.inputVertices > 2 && geometryMode.inputPrimitive != InputPrimitives::Patch) { // NOTE: ES to GS offset (vertex 2 and 3) is valid once the primitive type has more than 2 vertices. esGsOffsets23 = vgprArgs[1]; } @@ -740,7 +742,7 @@ Function *ShaderMerger::generateEsGsEntryPoint(Function *esEntryPoint, Function Value *invocationId = vgprArgs[3]; Value *esGsOffsets45 = PoisonValue::get(builder.getInt32Ty()); - if (calcFactor.inputVertices > 4) { + if (calcFactor.inputVertices > 4 && geometryMode.inputPrimitive != InputPrimitives::Patch) { // NOTE: ES to GS offset (vertex 4 and 5) is valid once the primitive type has more than 4 vertices. esGsOffsets45 = vgprArgs[4]; } diff --git a/lgc/patch/SystemValues.cpp b/lgc/patch/SystemValues.cpp index 0deecd5e06..1f8396c9f3 100644 --- a/lgc/patch/SystemValues.cpp +++ b/lgc/patch/SystemValues.cpp @@ -240,6 +240,7 @@ Value *ShaderSystemValues::getEsGsOffsets() { auto insertPos = &*m_entryPoint->front().getFirstNonPHIOrDbgOrAlloca(); auto intfData = m_pipelineState->getShaderInterfaceData(m_shaderStage); + // TODO: We should only insert those offsets required by the specified input primitive. m_esGsOffsets = PoisonValue::get(FixedVectorType::get(Type::getInt32Ty(*m_context), 6)); for (unsigned i = 0; i < InterfaceData::MaxEsGsOffsetCount; ++i) { auto esGsOffset = @@ -320,8 +321,6 @@ std::pair> ShaderSystemValues::getEmitCounterPtr() { assert(m_shaderStage == ShaderStage::Geometry); auto *emitCounterTy = Type::getInt32Ty(*m_context); if (m_emitCounterPtrs.empty()) { - // TODO: We should only insert those offsets required by the specified input primitive. - // Setup GS emit vertex counter auto &dataLayout = m_entryPoint->getParent()->getDataLayout(); auto insertPos = &*m_entryPoint->front().getFirstNonPHIOrDbgOrAlloca(); @@ -334,6 +333,21 @@ std::pair> ShaderSystemValues::getEmitCounterPtr() { return std::make_pair(emitCounterTy, ArrayRef(m_emitCounterPtrs)); } +// ===================================================================================================================== +// Get pointer to total emit counter (GS) +Value *ShaderSystemValues::getTotalEmitCounterPtr() { + assert(m_shaderStage == ShaderStage::Geometry); + assert(m_pipelineState->getShaderModes()->getGeometryShaderMode().robustGsEmits); // Must enable robust GS emits + if (!m_totalEmitCounterPtr) { + // Setup GS total emit vertex counter + BuilderBase builder(&*m_entryPoint->front().getFirstNonPHIOrDbgOrAlloca()); + + m_totalEmitCounterPtr = builder.CreateAlloca(builder.getInt32Ty()); + builder.CreateStore(builder.getInt32(0), m_totalEmitCounterPtr); + } + return m_totalEmitCounterPtr; +} + // ===================================================================================================================== // Get internal global table pointer as pointer to i8. Instruction *ShaderSystemValues::getInternalGlobalTablePtr() { diff --git a/lgc/patch/VertexFetch.cpp b/lgc/patch/VertexFetch.cpp index c8cf1f7e44..70c618aeff 100644 --- a/lgc/patch/VertexFetch.cpp +++ b/lgc/patch/VertexFetch.cpp @@ -118,9 +118,10 @@ class VertexFetchImpl : public VertexFetch { Value *loadVertexBufferDescriptor(unsigned binding, BuilderImpl &builderImpl); - void addVertexFetchInst(Value *vbDesc, Value *vbIndex, Value *srdStride, unsigned numChannels, unsigned offset, - unsigned dfmt, unsigned nfmt, unsigned inputCompBytes, unsigned fetchCompBytes, bool isSigned, - bool isPacked, bool fetchInByte, BuilderImpl &builderImpl, Value **ppFetch) const; + void addVertexFetchInst(Value *vbDesc, Value *vbIndex, Value *srdStride, Type *inputTy, unsigned numChannels, + unsigned offset, unsigned dfmt, unsigned nfmt, unsigned inputCompBytes, + unsigned fetchCompBytes, bool isSigned, bool isPacked, bool fetchInByte, + BuilderImpl &builderImpl, Value **ppFetch) const; bool needPostShuffle(const VertexInputDescription *inputDesc, std::vector &shuffleMask) const; @@ -715,8 +716,8 @@ Value *VertexFetchImpl::fetchVertex(InputImportGenericOp *inst, Value *descPtr, m_instanceIndex = ShaderInputs::getInstanceIndex(builder, *m_lgcContext); } - // Get the vertex buffer table pointer as pointer to v4i32 descriptor. - Type *vbDescTy = FixedVectorType::get(Type::getInt32Ty(*m_context), 4); + Type *vbDescTy = nullptr; + { vbDescTy = FixedVectorType::get(Type::getInt32Ty(*m_context), 4); } if (!m_vertexBufTablePtr) { IRBuilderBase::InsertPointGuard ipg(builder); builder.SetInsertPointPastAllocas(inst->getFunction()); @@ -835,13 +836,16 @@ Value *VertexFetchImpl::fetchVertex(InputImportGenericOp *inst, Value *descPtr, assert(bitWidth == 8 || bitWidth == 16 || bitWidth == 32 || bitWidth == 64); Intrinsic::ID instId = Intrinsic::amdgcn_struct_buffer_load_format; - if (m_useSoftwareVertexBufferDescriptors) { - instId = Intrinsic::amdgcn_raw_buffer_load_format; - auto srdStride = builder.CreateExtractElement(vbDesc, 3); - byteOffset = builder.CreateAdd(builder.CreateMul(vbIndex, srdStride), byteOffset); + + { + if (m_useSoftwareVertexBufferDescriptors) { + instId = Intrinsic::amdgcn_raw_buffer_load_format; + auto srdStride = builder.CreateExtractElement(vbDesc, 3); + byteOffset = builder.CreateAdd(builder.CreateMul(vbIndex, srdStride), byteOffset); + } + // Replace buffer format + vbDesc = builder.CreateInsertElement(vbDesc, bufferFormat, 3); } - // Replace buffer format - vbDesc = builder.CreateInsertElement(vbDesc, bufferFormat, 3); SmallVector args; args.push_back(vbDesc); @@ -849,7 +853,7 @@ Value *VertexFetchImpl::fetchVertex(InputImportGenericOp *inst, Value *descPtr, args.push_back(vbIndex); unsigned offsetIdx = args.size(); args.push_back(byteOffset); - args.push_back(builder.getInt32(0)); + { args.push_back(builder.getInt32(0)); } args.push_back(builder.getInt32(0)); if (disablePerCompFetch) { @@ -1251,9 +1255,9 @@ Value *VertexFetchImpl::fetchVertex(Type *inputTy, const VertexInputDescription // After back-end optimization, intrinsics may be combined to fetch the whole vertex in generated ISA codes. // To make sure combination works, we need to keep tbuffer_load formats as same as possible when visit this function. // To avoid redundant extract and insert operation, we need to keep component bit width as same as input component. - addVertexFetchInst(vbDesc, vbIndex, srdStride, numChannels, description->offset, compFormatInfo->fetchDfmt, - description->nfmt, inputCompBytes, fetchCompBytes, numFormatInfo->isSigned, isPacked, fetchInByte, - builderImpl, &vertexFetch); + addVertexFetchInst(vbDesc, vbIndex, srdStride, inputCompTy, numChannels, description->offset, + compFormatInfo->fetchDfmt, description->nfmt, inputCompBytes, fetchCompBytes, + numFormatInfo->isSigned, isPacked, fetchInByte, builderImpl, &vertexFetch); // When do fetch in Byte, we need to emulate final results manually. postFetchEmulation(description, fetchInByte, inputCompBytes, numChannels, numFormatInfo, compFormatInfo, builderImpl, @@ -1611,10 +1615,10 @@ void VertexFetchImpl::postFetchEmulation(const VertexInputDescription *descripti // @param fetchInByte: Do fetch in Byte if the vertex attribute offset and stride are not aligned. // @param builderImpl : BuilderImpl to use to insert vertex fetch instructions // @param [out] ppFetch : Destination of vertex fetch -void VertexFetchImpl::addVertexFetchInst(Value *vbDesc, Value *vbIndex, Value *srdStride, unsigned numChannels, - unsigned offset, unsigned dfmt, unsigned nfmt, unsigned inputCompBytes, - unsigned fetchCompBytes, bool isSigned, bool isPacked, bool fetchInByte, - BuilderImpl &builderImpl, Value **ppFetch) const { +void VertexFetchImpl::addVertexFetchInst(Value *vbDesc, Value *vbIndex, Value *srdStride, Type *inputTy, + unsigned numChannels, unsigned offset, unsigned dfmt, unsigned nfmt, + unsigned inputCompBytes, unsigned fetchCompBytes, bool isSigned, bool isPacked, + bool fetchInByte, BuilderImpl &builderImpl, Value **ppFetch) const { Intrinsic::ID instId = Intrinsic::amdgcn_struct_tbuffer_load; Value *instOffset = builderImpl.getInt32(0); if (m_useSoftwareVertexBufferDescriptors) { @@ -1703,7 +1707,11 @@ void VertexFetchImpl::addVertexFetchInst(Value *vbDesc, Value *vbIndex, Value *s if (inputCompBytes < compBytes) compVal = builderImpl.CreateTrunc(compVal, inputCompTy); else if (inputCompBytes > compBytes) { - if (isSigned) + if (inputTy->isFloatTy() && nfmt == BufNumFormatFloat) { + compVal = builderImpl.CreateBitCast(compVal, builderImpl.getHalfTy()); + compVal = builderImpl.CreateFPExt(compVal, builderImpl.getFloatTy()); + compVal = builderImpl.CreateBitCast(compVal, inputCompTy); + } else if (isSigned) compVal = builderImpl.CreateSExt(compVal, inputCompTy); else compVal = builderImpl.CreateZExt(compVal, inputCompTy); @@ -1783,15 +1791,14 @@ std::pair VertexFetchImpl::convertSrdToOffsetMode(Value *vbDes // uint32 strideInBytes; // }; + GfxIpVersion gfxIp = m_lgcContext->getTargetInfo().getGfxIpVersion(); // Stride is from the third DWORD. auto srdStride = builder.CreateExtractElement(vbDesc, 3); - SqBufRsrcWord3 sqBufRsrcWord3 = {}; sqBufRsrcWord3.bits.dstSelX = BUF_DST_SEL_X; sqBufRsrcWord3.bits.dstSelY = BUF_DST_SEL_Y; sqBufRsrcWord3.bits.dstSelZ = BUF_DST_SEL_Z; sqBufRsrcWord3.bits.dstSelW = BUF_DST_SEL_W; - GfxIpVersion gfxIp = m_lgcContext->getTargetInfo().getGfxIpVersion(); if (gfxIp.major == 10) { sqBufRsrcWord3.gfx10.format = BUF_FORMAT_32_UINT; sqBufRsrcWord3.gfx10.resourceLevel = 1; diff --git a/lgc/state/PipelineState.cpp b/lgc/state/PipelineState.cpp index 5734ce2b56..a4379a9c8d 100644 --- a/lgc/state/PipelineState.cpp +++ b/lgc/state/PipelineState.cpp @@ -497,41 +497,43 @@ void PipelineState::readShaderStageMask(Module *module) { // ===================================================================================================================== // Get the last vertex processing shader stage in this pipeline, or ShaderStage::Invalid if none. -ShaderStageEnum PipelineState::getLastVertexProcessingStage() const { - if (m_stageMask.contains(ShaderStage::CopyShader)) - return ShaderStage::CopyShader; - if (m_stageMask.contains(ShaderStage::Geometry)) - return ShaderStage::Geometry; - if (m_stageMask.contains(ShaderStage::TessEval)) - return ShaderStage::TessEval; - if (m_stageMask.contains(ShaderStage::Vertex)) - return ShaderStage::Vertex; - return ShaderStage::Invalid; +std::optional PipelineState::getLastVertexProcessingStage() const { + for (auto stage : {ShaderStage::CopyShader, ShaderStage::Geometry, ShaderStage::TessEval, ShaderStage::Vertex}) { + if (m_stageMask.contains(stage)) + return stage; + } + return std::nullopt; } // ===================================================================================================================== // Gets the previous active shader stage in this pipeline // // @param shaderStage : Current shader stage -ShaderStageEnum PipelineState::getPrevShaderStage(ShaderStageEnum shaderStage) const { +std::optional PipelineState::getPrevShaderStage(ShaderStageEnum shaderStage) const { if (shaderStage == ShaderStage::Compute) - return ShaderStage::Invalid; + return std::nullopt; if (shaderStage == ShaderStage::CopyShader) { // Treat copy shader as part of geometry shader shaderStage = ShaderStage::Geometry; } - assert(shaderStage < ShaderStage::GfxCount); + std::optional prevStage; - ShaderStageEnum prevStage = ShaderStage::Invalid; + bool foundCurrent = false; + for (auto stage : llvm::reverse(ShaderStagesGraphics)) { + if (!foundCurrent) { + if (stage == shaderStage) + foundCurrent = true; + continue; + } - for (int stage = shaderStage - 1; stage >= 0; --stage) { - if (m_stageMask.contains(static_cast(stage))) { - prevStage = static_cast(stage); + if (m_stageMask.contains(stage)) { + prevStage = stage; break; } } + assert(foundCurrent); return prevStage; } @@ -540,28 +542,34 @@ ShaderStageEnum PipelineState::getPrevShaderStage(ShaderStageEnum shaderStage) c // Gets the next active shader stage in this pipeline // // @param shaderStage : Current shader stage -ShaderStageEnum PipelineState::getNextShaderStage(ShaderStageEnum shaderStage) const { +std::optional PipelineState::getNextShaderStage(ShaderStageEnum shaderStage) const { if (shaderStage == ShaderStage::Compute) - return ShaderStage::Invalid; + return std::nullopt; if (shaderStage == ShaderStage::CopyShader) { // Treat copy shader as part of geometry shader shaderStage = ShaderStage::Geometry; } - assert(shaderStage < ShaderStage::GfxCount); - - ShaderStageEnum nextStage = ShaderStage::Invalid; + std::optional nextStage; auto stageMask = m_stageMask; if (isPartPipeline()) stageMask |= ShaderStageMask(ShaderStage::Fragment); - for (unsigned stage = shaderStage + 1; stage < ShaderStage::GfxCount; ++stage) { - if (stageMask.contains(static_cast(stage))) { - nextStage = static_cast(stage); + bool foundCurrent = false; + for (auto stage : ShaderStagesGraphics) { + if (!foundCurrent) { + if (stage == shaderStage) + foundCurrent = true; + continue; + } + + if (stageMask.contains(stage)) { + nextStage = stage; break; } } + assert(foundCurrent); return nextStage; } @@ -1434,8 +1442,8 @@ void PipelineState::buildAbiHwShaderMap() { } else { if (hasGs) { auto preGsStage = getPrevShaderStage(ShaderStage::Geometry); - if (preGsStage != ShaderStage::Invalid) - m_abiHwShaderMap[preGsStage] = Util::Abi::HwShaderGs; + if (preGsStage.has_value()) + m_abiHwShaderMap[preGsStage.value()] = Util::Abi::HwShaderGs; } if (hasTcs) { m_abiHwShaderMap[ShaderStage::TessControl] = Util::Abi::HwShaderHs; @@ -1444,16 +1452,16 @@ void PipelineState::buildAbiHwShaderMap() { } auto lastVertexProcessingStage = getLastVertexProcessingStage(); - if (lastVertexProcessingStage != ShaderStage::Invalid) { + if (lastVertexProcessingStage.has_value()) { if (lastVertexProcessingStage == ShaderStage::CopyShader) lastVertexProcessingStage = ShaderStage::Geometry; if (isNggEnabled()) { - m_abiHwShaderMap[lastVertexProcessingStage] = Util::Abi::HwShaderGs; + m_abiHwShaderMap[lastVertexProcessingStage.value()] = Util::Abi::HwShaderGs; m_abiPipelineType = hasTs ? Util::Abi::PipelineType::NggTess : Util::Abi::PipelineType::Ngg; } else { - m_abiHwShaderMap[lastVertexProcessingStage] = Util::Abi::HwShaderVs; + m_abiHwShaderMap[lastVertexProcessingStage.value()] = Util::Abi::HwShaderVs; if (hasGs) - m_abiHwShaderMap[lastVertexProcessingStage] |= Util::Abi::HwShaderGs; + m_abiHwShaderMap[lastVertexProcessingStage.value()] |= Util::Abi::HwShaderGs; if (hasTs && hasGs) m_abiPipelineType = Util::Abi::PipelineType::GsTess; @@ -1688,7 +1696,7 @@ bool PipelineState::enableSwXfb() { auto lastVertexStage = getLastVertexProcessingStage(); lastVertexStage = lastVertexStage == ShaderStage::CopyShader ? ShaderStage::Geometry : lastVertexStage; - if (lastVertexStage == ShaderStage::Invalid) { + if (!lastVertexStage) { assert(isUnlinked()); // Unlinked fragment shader or part-pipeline return false; } @@ -1933,16 +1941,16 @@ void PipelineState::initializeInOutPackState() { // We are assuming that if any of the vertex processing, then the vertex processing stages are complete. For // example, if we see a vertex shader and geometry shader with no tessellation shaders, then we will assume we can // pack the vertex outputs and geometry inputs because no tessellation shader will be added later. - for (ShaderStageEnum stage : lgc::enumRange(ShaderStage::GfxCount)) { + for (auto stage : ShaderStagesGraphics) { if (!m_stageMask.contains(stage)) continue; if (stage == ShaderStage::TessEval) continue; - ShaderStageEnum preStage = getPrevShaderStage(stage); - if (preStage == ShaderStage::Invalid) + auto preStage = getPrevShaderStage(stage); + if (!preStage) continue; m_inputPackState[stage] = true; - m_outputPackState[preStage] = true; + m_outputPackState[*preStage] = true; } } } @@ -1952,12 +1960,12 @@ void PipelineState::initializeInOutPackState() { // // @param shaderStage : The given shader stage bool PipelineState::canPackInput(ShaderStageEnum shaderStage) { - ShaderStageEnum preStage = getPrevShaderStage(shaderStage); + auto preStage = getPrevShaderStage(shaderStage); // The input packable state of the current stage should match the output packable state of the previous stage, except // that the current stage has no previous and it is a null FS. - if (preStage != ShaderStage::Invalid && + if (preStage && !(shaderStage == ShaderStage::Fragment && getShaderResourceUsage(shaderStage)->inOutUsage.fs.isNullFs)) - assert(m_inputPackState[shaderStage] == m_outputPackState[preStage]); + assert(m_inputPackState[shaderStage] == m_outputPackState[preStage.value()]); return m_inputPackState[shaderStage]; } @@ -1966,25 +1974,30 @@ bool PipelineState::canPackInput(ShaderStageEnum shaderStage) { // // @param shaderStage : The given shader stage bool PipelineState::canPackOutput(ShaderStageEnum shaderStage) { - ShaderStageEnum nextStage = getNextShaderStage(shaderStage); + auto nextStage = getNextShaderStage(shaderStage); // The output packable state of the current stage should match the input packable state of the next stage, except that // the current stage has no next stage or a null FS. - if (nextStage != ShaderStage::Invalid && - !(nextStage == ShaderStage::Fragment && getShaderResourceUsage(nextStage)->inOutUsage.fs.isNullFs)) - assert(m_outputPackState[shaderStage] == m_inputPackState[nextStage]); + if (nextStage && !(nextStage == ShaderStage::Fragment && getShaderResourceUsage(*nextStage)->inOutUsage.fs.isNullFs)) + assert(m_outputPackState[shaderStage] == m_inputPackState[*nextStage]); return m_outputPackState[shaderStage]; } // ===================================================================================================================== // Get the count of vertices per primitive. For GS, the count is for output primitive. unsigned PipelineState::getVerticesPerPrimitive() { - if (hasShaderStage(ShaderStage::Geometry)) { - const auto &geometryMode = getShaderModes()->getGeometryShaderMode(); - switch (geometryMode.outputPrimitive) { + if (hasShaderStage(ShaderStage::Geometry) || hasShaderStage(ShaderStage::Mesh)) { + OutputPrimitives outputPrimitive = OutputPrimitives::Points; + if (hasShaderStage(ShaderStage::Geometry)) + outputPrimitive = getShaderModes()->getGeometryShaderMode().outputPrimitive; + else + outputPrimitive = getShaderModes()->getMeshShaderMode().outputPrimitive; + switch (outputPrimitive) { case OutputPrimitives::Points: return 1; + case OutputPrimitives::Lines: case OutputPrimitives::LineStrip: return 2; + case OutputPrimitives::Triangles: case OutputPrimitives::TriangleStrip: return 3; default: diff --git a/llpc/context/GfxRuntimeContext.cpp b/lgc/state/RuntimeContext.cpp similarity index 90% rename from llpc/context/GfxRuntimeContext.cpp rename to lgc/state/RuntimeContext.cpp index d7932edf6b..644b892748 100644 --- a/llpc/context/GfxRuntimeContext.cpp +++ b/lgc/state/RuntimeContext.cpp @@ -24,17 +24,15 @@ **********************************************************************************************************************/ /** *********************************************************************************************************************** - * @file GfxRuntimeContext.cpp - * @brief LLVMContext extension that stores a GfxRuntime library module + * @file RuntimeContext.cpp + * @brief LLVMContext extension that stores a Runtime library module *********************************************************************************************************************** */ -#include "GfxRuntimeContext.h" +#include "lgc/RuntimeContext.h" #include "llvm/IR/Module.h" using namespace llvm; using namespace lgc; GfxRuntimeContext::Key GfxRuntimeContext::theKey; - -GfxRuntimeContext::~GfxRuntimeContext() = default; diff --git a/lgc/state/ShaderStage.cpp b/lgc/state/ShaderStage.cpp index c7eb5d2f75..78b3868fea 100644 --- a/lgc/state/ShaderStage.cpp +++ b/lgc/state/ShaderStage.cpp @@ -82,8 +82,9 @@ void lgc::setShaderStage(GlobalObject *func, std::optional stag MDNode::get(func->getContext(), {ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(func->getContext()), stage.value()))}); func->setMetadata(mdKindId, stageMetaNode); - } else + } else { func->eraseMetadata(mdKindId); + } } // ===================================================================================================================== @@ -137,13 +138,28 @@ bool lgc::isShaderEntryPoint(const Function *func) { // // @param shaderStage : Shader stage const char *lgc::getShaderStageAbbreviation(ShaderStageEnum shaderStage) { - if (shaderStage == ShaderStage::CopyShader) + switch (shaderStage) { + case ShaderStage::Compute: + return "CS"; + case ShaderStage::Fragment: + return "FS"; + case ShaderStage::Vertex: + return "VS"; + case ShaderStage::Geometry: + return "GS"; + case ShaderStage::CopyShader: return "COPY"; - if (shaderStage > ShaderStage::Compute) - return "Bad"; - - static const char *ShaderStageAbbrs[] = {"TASK", "VS", "TCS", "TES", "GS", "MESH", "FS", "CS"}; - return ShaderStageAbbrs[static_cast(shaderStage)]; + case ShaderStage::TessControl: + return "TCS"; + case ShaderStage::TessEval: + return "TES"; + case ShaderStage::Task: + return "TASK"; + case ShaderStage::Mesh: + return "MESH"; + default: + llvm_unreachable("Unhandled ShaderStage"); + } } // ===================================================================================================================== diff --git a/lgc/test/CsLowerDebugPrintf.lgc b/lgc/test/CsLowerDebugPrintf.lgc index f4e21a52b0..d380f37da8 100644 --- a/lgc/test/CsLowerDebugPrintf.lgc +++ b/lgc/test/CsLowerDebugPrintf.lgc @@ -6,27 +6,29 @@ source_filename = "llpc_compute_8" target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" target triple = "amdgcn--amdpal" -@str.1 = internal addrspace(4) constant [10 x i8] c"Output:%d\0A" +@0 = private unnamed_addr constant [11 x i8] c"Output:%d\0A\00", align 1 +@1 = private unnamed_addr constant [22 x i8] c"workgroup size:%f,%f\0A\00", align 1 ; Function Attrs: nounwind define dllexport spir_func void @lgc.shader.CS.main() local_unnamed_addr #0 !spirv.ExecutionModel !7 !lgc.shaderstage !8 { .entry: - %0 = call <3 x i32> @lgc.shader.input.WorkgroupId(i32 0) #1 + %0 = call <3 x i32> @lgc.shader.input.WorkgroupId(i32 0) #2 %1 = mul <3 x i32> %0, - %2 = call <3 x i32> @lgc.shader.input.LocalInvocationId(i32 47) #1 - %3 = insertelement <3 x i32> %2, i32 0, i64 1 - %4 = insertelement <3 x i32> %3, i32 0, i64 2 - %5 = call <3 x i32> @lgc.reconfigure.local.invocation.id(<3 x i32> %4, i32 0) #1 - %6 = add <3 x i32> %1, %5 - %__llpc_input_proxy_gl_GlobalInvocationID.0.vec.extract = extractelement <3 x i32> %6, i64 0 - %7 = call ptr addrspace(4) @lgc.descriptor.table.addr(i32 6, i32 6, i32 -1, i32 6, i32 -1) #1 - %8 = getelementptr i8, ptr addrspace(4) %7, i32 0 - %9 = load <4 x i32>, ptr addrspace(4) %8, align 16 - %10 = call ptr addrspace(7) @lgc.buffer.desc.to.ptr(<4 x i32> %9) - %11 = insertelement <2 x i32> poison, i32 0, i64 0 - %12 = insertelement <2 x i32> %11, i32 0, i64 1 - %13 = bitcast <2 x i32> %12 to i64 - call void (...) @lgc.debug.printf(ptr addrspace(7) %10, ptr addrspace(4) @str.1, i32 %__llpc_input_proxy_gl_GlobalInvocationID.0.vec.extract, i64 %13) + %2 = call i32 @lgc.shader.input.LocalInvocationId(i32 49) #2 + %3 = and i32 %2, 1023 + %4 = insertelement <3 x i32> poison, i32 %3, i64 0 + %5 = lshr i32 %2, 10 + %6 = and i32 %5, 1023 + %7 = insertelement <3 x i32> %4, i32 %6, i64 1 + %8 = lshr i32 %5, 10 + %9 = insertelement <3 x i32> %7, i32 %8, i64 2 + %10 = insertelement <3 x i32> %9, i32 0, i64 1 + %11 = insertelement <3 x i32> %10, i32 0, i64 2 + %12 = call <3 x i32> @lgc.reconfigure.local.invocation.id(<3 x i32> %11, i32 0) #2 + %13 = add <3 x i32> %1, %12 + %__llpc_input_proxy_gl_GlobalInvocationID.0.vec.extract = extractelement <3 x i32> %13, i64 0 + call void (...) @lgc.debug.printf(ptr nonnull @0, i32 %__llpc_input_proxy_gl_GlobalInvocationID.0.vec.extract) + call void (...) @lgc.debug.printf(ptr nonnull @1, double 1.000000e+00, double 1.000000e+00) ret void } @@ -70,42 +72,64 @@ attributes #2 = { nounwind willreturn memory(none) } !8 = !{i32 7} ; CHECK-LABEL: @lgc.shader.CS.main( ; CHECK-NEXT: .entry: -; CHECK-NEXT: [[TMP0:%.*]] = call <3 x i32> @lgc.shader.input.WorkgroupId(i32 0) #[[ATTR1:[0-9]+]] -; CHECK-NEXT: [[TMP1:%.*]] = mul <3 x i32> [[TMP0]], -; CHECK-NEXT: [[TMP2:%.*]] = call <3 x i32> @lgc.shader.input.LocalInvocationId(i32 47) #[[ATTR1]] -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <3 x i32> [[TMP2]], i32 0, i64 1 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <3 x i32> [[TMP3]], i32 0, i64 2 -; CHECK-NEXT: [[TMP5:%.*]] = call <3 x i32> @lgc.reconfigure.local.invocation.id(<3 x i32> [[TMP4]], i32 0) #[[ATTR1]] -; CHECK-NEXT: [[TMP6:%.*]] = add <3 x i32> [[TMP1]], [[TMP5]] -; CHECK-NEXT: [[__LLPC_INPUT_PROXY_GL_GLOBALINVOCATIONID_0_VEC_EXTRACT:%.*]] = extractelement <3 x i32> [[TMP6]], i64 0 -; CHECK-NEXT: [[TMP7:%.*]] = call ptr addrspace(4) @lgc.descriptor.table.addr(i32 6, i32 6, i32 -1, i32 6, i32 -1) #[[ATTR1]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP7]], i32 0 -; CHECK-NEXT: [[TMP9:%.*]] = load <4 x i32>, ptr addrspace(4) [[TMP8]], align 16 -; CHECK-NEXT: [[TMP10:%.*]] = call ptr addrspace(7) @lgc.buffer.desc.to.ptr(<4 x i32> [[TMP9]]) -; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x i32> poison, i32 0, i64 0 -; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x i32> [[TMP11]], i32 0, i64 1 -; CHECK-NEXT: [[TMP13:%.*]] = bitcast <2 x i32> [[TMP12]] to i64 -; CHECK-NEXT: [[TMP14:%.*]] = trunc i64 [[TMP13]] to i32 -; CHECK-NEXT: [[TMP15:%.*]] = lshr i64 [[TMP13]], 32 -; CHECK-NEXT: [[TMP16:%.*]] = trunc i64 [[TMP15]] to i32 -; CHECK-NEXT: [[TMP17:%.*]] = atomicrmw add ptr addrspace(7) [[TMP10]], i64 5 monotonic, align 8 -; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP17]], i64 536870912) -; CHECK-NEXT: [[TMP19:%.*]] = trunc i64 [[TMP18]] to i32 -; CHECK-NEXT: [[TMP20:%.*]] = add i32 [[TMP19]], 4 -; CHECK-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr addrspace(7) [[TMP10]], i32 [[TMP20]] -; CHECK-NEXT: store i32 {{-?[0-9]+}}, ptr addrspace(7) [[TMP21]], align 4 -; CHECK-NEXT: [[TMP22:%.*]] = add i32 [[TMP20]], 1 -; CHECK-NEXT: [[TMP23:%.*]] = getelementptr i32, ptr addrspace(7) [[TMP10]], i32 [[TMP22]] -; CHECK-NEXT: store i32 {{-?[0-9]+}}, ptr addrspace(7) [[TMP23]], align 4 -; CHECK-NEXT: [[TMP24:%.*]] = add i32 [[TMP22]], 1 -; CHECK-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr addrspace(7) [[TMP10]], i32 [[TMP24]] -; CHECK-NEXT: store i32 [[__LLPC_INPUT_PROXY_GL_GLOBALINVOCATIONID_0_VEC_EXTRACT]], ptr addrspace(7) [[TMP25]], align 4 -; CHECK-NEXT: [[TMP26:%.*]] = add i32 [[TMP24]], 1 -; CHECK-NEXT: [[TMP27:%.*]] = getelementptr i32, ptr addrspace(7) [[TMP10]], i32 [[TMP26]] -; CHECK-NEXT: store i32 [[TMP14]], ptr addrspace(7) [[TMP27]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.getpc() +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[TMP0]] to <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @lgc.load.user.data__i32(i32 0) +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[TMP2]], i64 0 +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr addrspace(4) +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = load <4 x i32>, ptr addrspace(4) [[TMP6]], align 16 +; CHECK-NEXT: [[TMP8:%.*]] = call ptr addrspace(7) @lgc.buffer.desc.to.ptr(<4 x i32> [[TMP7]]) +; CHECK-NEXT: [[TMP9:%.*]] = call <3 x i32> @lgc.shader.input.WorkgroupId(i32 0) #[[ATTR2:[0-9]+]] +; CHECK-NEXT: [[TMP10:%.*]] = mul <3 x i32> [[TMP9]], +; CHECK-NEXT: [[TMP11:%.*]] = call i32 @lgc.shader.input.LocalInvocationId(i32 49) #[[ATTR2]] +; CHECK-NEXT: [[TMP12:%.*]] = and i32 [[TMP11]], 1023 +; CHECK-NEXT: [[TMP13:%.*]] = insertelement <3 x i32> poison, i32 [[TMP12]], i64 0 +; CHECK-NEXT: [[TMP14:%.*]] = lshr i32 [[TMP11]], 10 +; CHECK-NEXT: [[TMP15:%.*]] = and i32 [[TMP14]], 1023 +; CHECK-NEXT: [[TMP16:%.*]] = insertelement <3 x i32> [[TMP13]], i32 [[TMP15]], i64 1 +; CHECK-NEXT: [[TMP17:%.*]] = lshr i32 [[TMP14]], 10 +; CHECK-NEXT: [[TMP18:%.*]] = insertelement <3 x i32> [[TMP16]], i32 [[TMP17]], i64 2 +; CHECK-NEXT: [[TMP19:%.*]] = insertelement <3 x i32> [[TMP18]], i32 0, i64 1 +; CHECK-NEXT: [[TMP20:%.*]] = insertelement <3 x i32> [[TMP19]], i32 0, i64 2 +; CHECK-NEXT: [[TMP21:%.*]] = call <3 x i32> @lgc.reconfigure.local.invocation.id(<3 x i32> [[TMP20]], i32 0) #[[ATTR2]] +; CHECK-NEXT: [[TMP22:%.*]] = add <3 x i32> [[TMP10]], [[TMP21]] +; CHECK-NEXT: [[__LLPC_INPUT_PROXY_GL_GLOBALINVOCATIONID_0_VEC_EXTRACT:%.*]] = extractelement <3 x i32> [[TMP22]], i64 0 +; CHECK-NEXT: [[TMP23:%.*]] = atomicrmw add ptr addrspace(7) [[TMP8]], i64 3 monotonic, align 8 +; CHECK-NEXT: [[TMP24:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP23]], i64 536870912) +; CHECK-NEXT: [[TMP25:%.*]] = trunc i64 [[TMP24]] to i32 +; CHECK-NEXT: [[TMP26:%.*]] = add i32 [[TMP25]], 4 +; CHECK-NEXT: [[TMP27:%.*]] = getelementptr i32, ptr addrspace(7) [[TMP8]], i32 [[TMP26]] +; CHECK-NEXT: store i32 {{-?[0-9]+}}, ptr addrspace(7) [[TMP27]], align 4 ; CHECK-NEXT: [[TMP28:%.*]] = add i32 [[TMP26]], 1 -; CHECK-NEXT: [[TMP29:%.*]] = getelementptr i32, ptr addrspace(7) [[TMP10]], i32 [[TMP28]] -; CHECK-NEXT: store i32 [[TMP16]], ptr addrspace(7) [[TMP29]], align 4 +; CHECK-NEXT: [[TMP29:%.*]] = getelementptr i32, ptr addrspace(7) [[TMP8]], i32 [[TMP28]] +; CHECK-NEXT: store i32 {{-?[0-9]+}}, ptr addrspace(7) [[TMP29]], align 4 ; CHECK-NEXT: [[TMP30:%.*]] = add i32 [[TMP28]], 1 +; CHECK-NEXT: [[TMP31:%.*]] = getelementptr i32, ptr addrspace(7) [[TMP8]], i32 [[TMP30]] +; CHECK-NEXT: store i32 [[__LLPC_INPUT_PROXY_GL_GLOBALINVOCATIONID_0_VEC_EXTRACT]], ptr addrspace(7) [[TMP31]], align 4 +; CHECK-NEXT: [[TMP32:%.*]] = add i32 [[TMP30]], 1 +; CHECK-NEXT: [[TMP33:%.*]] = atomicrmw add ptr addrspace(7) [[TMP8]], i64 6 monotonic, align 8 +; CHECK-NEXT: [[TMP34:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP33]], i64 536870912) +; CHECK-NEXT: [[TMP35:%.*]] = trunc i64 [[TMP34]] to i32 +; CHECK-NEXT: [[TMP36:%.*]] = add i32 [[TMP35]], 4 +; CHECK-NEXT: [[TMP37:%.*]] = getelementptr i32, ptr addrspace(7) [[TMP8]], i32 [[TMP36]] +; CHECK-NEXT: store i32 {{-?[0-9]+}}, ptr addrspace(7) [[TMP37]], align 4 +; CHECK-NEXT: [[TMP38:%.*]] = add i32 [[TMP36]], 1 +; CHECK-NEXT: [[TMP39:%.*]] = getelementptr i32, ptr addrspace(7) [[TMP8]], i32 [[TMP38]] +; CHECK-NEXT: store i32 {{-?[0-9]+}}, ptr addrspace(7) [[TMP39]], align 4 +; CHECK-NEXT: [[TMP40:%.*]] = add i32 [[TMP38]], 1 +; CHECK-NEXT: [[TMP41:%.*]] = getelementptr i32, ptr addrspace(7) [[TMP8]], i32 [[TMP40]] +; CHECK-NEXT: store i32 0, ptr addrspace(7) [[TMP41]], align 4 +; CHECK-NEXT: [[TMP42:%.*]] = add i32 [[TMP40]], 1 +; CHECK-NEXT: [[TMP43:%.*]] = getelementptr i32, ptr addrspace(7) [[TMP8]], i32 [[TMP42]] +; CHECK-NEXT: store i32 {{-?[0-9]+}}, ptr addrspace(7) [[TMP43]], align 4 +; CHECK-NEXT: [[TMP44:%.*]] = add i32 [[TMP42]], 1 +; CHECK-NEXT: [[TMP45:%.*]] = getelementptr i32, ptr addrspace(7) [[TMP8]], i32 [[TMP44]] +; CHECK-NEXT: store i32 0, ptr addrspace(7) [[TMP45]], align 4 +; CHECK-NEXT: [[TMP46:%.*]] = add i32 [[TMP44]], 1 +; CHECK-NEXT: [[TMP47:%.*]] = getelementptr i32, ptr addrspace(7) [[TMP8]], i32 [[TMP46]] +; CHECK-NEXT: store i32 {{-?[0-9]+}}, ptr addrspace(7) [[TMP47]], align 4 +; CHECK-NEXT: [[TMP48:%.*]] = add i32 [[TMP46]], 1 ; CHECK-NEXT: ret void ; diff --git a/lgc/test/CsReconfigWorkgroup.lgc b/lgc/test/CsReconfigWorkgroup.lgc index 0ef36e6515..3235ced729 100644 --- a/lgc/test/CsReconfigWorkgroup.lgc +++ b/lgc/test/CsReconfigWorkgroup.lgc @@ -51,8 +51,7 @@ define dllexport spir_func void @lgc.shader.CS.main() local_unnamed_addr #0 !lgc %2 = bitcast i8 addrspace(7)* %0 to <3 x i32> addrspace(7)* store <3 x i32> %1, <3 x i32> addrspace(7)* %2, align 4 %imgdescptr = call <8 x i32> addrspace(4)* (...) @lgc.create.get.desc.ptr.v8i32(i32 1, i32 0, i32 0, i32 1) - %imgdesc = load <8 x i32>, <8 x i32> addrspace(4)* %imgdescptr - %imgload = call <2 x float> (...) @lgc.create.image.load.v2f32(i32 1, i32 0, <8 x i32> %imgdesc, <2 x i32>) + %imgload = call <2 x float> (...) @lgc.create.image.load.v2f32(i32 1, i32 0, <8 x i32> addrspace(4)* %imgdescptr, <2 x i32>) %storeptr = getelementptr i8, i8 addrspace(7)* %0, i64 16 %storeptrcast = bitcast i8 addrspace(7)* %storeptr to <2 x float> addrspace(7)* store <2 x float> %imgload, <2 x float> addrspace(7)* %storeptrcast @@ -99,8 +98,7 @@ define dllexport spir_func void @lgc.shader.CS.main() local_unnamed_addr #0 !lgc %2 = bitcast i8 addrspace(7)* %0 to <3 x i32> addrspace(7)* store <3 x i32> %1, <3 x i32> addrspace(7)* %2, align 4 %imgdescptr = call <8 x i32> addrspace(4)* (...) @lgc.create.get.desc.ptr.v8i32(i32 1, i32 0, i32 0, i32 1) - %imgdesc = load <8 x i32>, <8 x i32> addrspace(4)* %imgdescptr - %imgload = call <2 x float> (...) @lgc.create.image.load.v2f32(i32 1, i32 0, <8 x i32> %imgdesc, <2 x i32>) + %imgload = call <2 x float> (...) @lgc.create.image.load.v2f32(i32 1, i32 0, <8 x i32> addrspace(4)* %imgdescptr, <2 x i32>) %storeptr = getelementptr i8, i8 addrspace(7)* %0, i64 16 %storeptrcast = bitcast i8 addrspace(7)* %storeptr to <2 x float> addrspace(7)* store <2 x float> %imgload, <2 x float> addrspace(7)* %storeptrcast diff --git a/lgc/test/ElfRelocationSize.lgc b/lgc/test/ElfRelocationSize.lgc index e77214a9c1..3125c72577 100644 --- a/lgc/test/ElfRelocationSize.lgc +++ b/lgc/test/ElfRelocationSize.lgc @@ -66,12 +66,10 @@ entry: %0 = extractelement <2 x float> %texcoordadj, i32 0 %1 = extractelement <2 x float> %texcoordadj, i32 1 %2 = call <8 x i32> addrspace(4)* (...) @lgc.create.get.desc.ptr.p4v8i32(i32 1, i32 0, i32 0, i32 1) - %3 = load <8 x i32>, <8 x i32> addrspace(4)* %2, align 32 %4 = call <4 x i32> addrspace(4)* (...) @lgc.create.get.desc.ptr.p4v4i32(i32 2, i32 0, i32 0, i32 2) - %5 = load <4 x i32>, <4 x i32> addrspace(4)* %4, align 16 %6 = insertelement <2 x float> undef, float %0, i64 0 %7 = insertelement <2 x float> %6, float %1, i64 1 - %8 = call <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 0, <8 x i32> %3, <4 x i32> %5, i32 1, <2 x float> %7) + %8 = call <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 0, <8 x i32> addrspace(4)* %2, <4 x i32> addrspace(4)* %4, i32 1, <2 x float> %7) %9 = extractelement <4 x float> %8, i64 0 %10 = insertvalue %types.ResRet.f32.1 undef, float %9, 0 %11 = extractelement <4 x float> %8, i64 1 diff --git a/lgc/test/PartPipeline.lgc b/lgc/test/PartPipeline.lgc index a39eb3f36b..d368f9394c 100644 --- a/lgc/test/PartPipeline.lgc +++ b/lgc/test/PartPipeline.lgc @@ -74,12 +74,10 @@ entry: %0 = extractelement <2 x float> %texcoordadj, i32 0 %1 = extractelement <2 x float> %texcoordadj, i32 1 %2 = call <8 x i32> addrspace(4)* (...) @lgc.create.get.desc.ptr.p4v8i32(i32 1, i32 1, i32 0, i32 1) - %3 = load <8 x i32>, <8 x i32> addrspace(4)* %2, align 32 %4 = call <4 x i32> addrspace(4)* (...) @lgc.create.get.desc.ptr.p4v4i32(i32 2, i32 2, i32 0, i32 2) - %5 = load <4 x i32>, <4 x i32> addrspace(4)* %4, align 16 %6 = insertelement <2 x float> undef, float %0, i64 0 %7 = insertelement <2 x float> %6, float %1, i64 1 - %8 = call <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 0, <8 x i32> %3, <4 x i32> %5, i32 1, <2 x float> %7) + %8 = call <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 0, <8 x i32> addrspace(4)* %2, <4 x i32> addrspace(4)* %4, i32 1, <2 x float> %7) %9 = extractelement <4 x float> %8, i64 0 %10 = insertvalue %types.ResRet.f32.1 undef, float %9, 0 %11 = extractelement <4 x float> %8, i64 1 diff --git a/lgc/test/PatchInvalidImageDescriptor.lgc b/lgc/test/PatchInvalidImageDescriptor.lgc index 32a48a7f9f..debed60f1e 100644 --- a/lgc/test/PatchInvalidImageDescriptor.lgc +++ b/lgc/test/PatchInvalidImageDescriptor.lgc @@ -4,7 +4,7 @@ ; CHECK-LABEL: IR Dump After Patch LLVM for workarounds ; GFX1010: extractelement <8 x i32> %{{[0-9]+}}, i64 3 -; GFX1010-NEXT: icmp sge i32 +; GFX1010: icmp sge i32 ; GFX1010-NEXT: and i32 ; GFX1010-NEXT: select i1 ; GFX1010-NEXT: [[PATCHED_DESC0:%[.a-zA-Z0-9]+]] = insertelement <8 x i32> %{{[0-9]+}} @@ -12,7 +12,7 @@ ; GFX1010: call void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float> zeroinitializer, i32 15, i32 0, i32 0, <8 x i32> %{{[0-9]+}}, i32 0, i32 0) -; GFX1010: %.sample = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float 0.000000e+00, float 0.000000e+00, <8 x i32> %{{[0-9]+}}, <4 x i32> %.sampler, i1 false, i32 0, i32 0) +; GFX1010: %.sample = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float 0.000000e+00, float 0.000000e+00, <8 x i32> %{{[0-9]+}}, <4 x i32> %{{[0-9]+}}, i1 false, i32 0, i32 0) ; GFX1010: %.gather = call <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32(i32 1, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, <8 x i32> %{{[0-9]+}}, <4 x i32> %{{[0-9]+}}, i1 false, i32 0, i32 0) @@ -40,22 +40,20 @@ define dllexport spir_func void @lgc.shader.VS.main() local_unnamed_addr #0 !lgc %.desc.ptr1 = bitcast <8 x i32> addrspace(4)* %.desc.ptr2 to i8 addrspace(4)* %.desc.ptr0 = getelementptr i8, i8 addrspace(4)* %.desc.ptr1, i64 0 %.desc.ptr = bitcast i8 addrspace(4)* %.desc.ptr0 to <8 x i32> addrspace(4)* - %.desc = load <8 x i32>, <8 x i32> addrspace(4)* %.desc.ptr, align 32 %.sampler.ptr = call <4 x i32> addrspace(4)* (...) @lgc.create.get.desc.ptr.p4v4i32(i32 2, i32 2, i32 0, i32 13) - %.sampler = load <4 x i32>, <4 x i32> addrspace(4)* %.sampler.ptr, align 16 - %.load = call <4 x float> (...) @lgc.create.image.load.v4f32(i32 0, i32 128, <8 x i32> %.desc, i32 1) - call void (...) @lgc.create.image.store(<4 x float> zeroinitializer, i32 1, i32 128, <8 x i32> %.desc, <2 x i32> zeroinitializer) + %.load = call <4 x float> (...) @lgc.create.image.load.v4f32(i32 0, i32 128, <8 x i32> addrspace(4)* %.desc.ptr, i32 1) + call void (...) @lgc.create.image.store(<4 x float> zeroinitializer, i32 1, i32 128, <8 x i32> addrspace(4)* %.desc.ptr, <2 x i32> zeroinitializer) - %.sample = call <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 128, <8 x i32> %.desc, <4 x i32> %.sampler, i32 1, <2 x float> zeroinitializer) - %.gather = call <4 x float> (...) @lgc.create.image.gather.v4f32(i32 1, i32 128, <8 x i32> %.desc, <4 x i32> %.sampler, i32 37, <2 x float> zeroinitializer, i32 0, float 0.000000e+00) + %.sample = call <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 128, <8 x i32> addrspace(4)* %.desc.ptr, <4 x i32> addrspace(4)* %.sampler.ptr, i32 1, <2 x float> zeroinitializer) + %.gather = call <4 x float> (...) @lgc.create.image.gather.v4f32(i32 1, i32 128, <8 x i32> addrspace(4)* %.desc.ptr, <4 x i32> addrspace(4)* %.sampler.ptr, i32 37, <2 x float> zeroinitializer, i32 0, float 0.000000e+00) - %.atomic = call i32 (...) @lgc.create.image.atomic.i32(i32 2, i32 0, i32 128, i32 0, <8 x i32> %.desc, i32 0, i32 1) #0 + %.atomic = call i32 (...) @lgc.create.image.atomic.i32(i32 2, i32 0, i32 128, i32 0, <8 x i32> addrspace(4)* %.desc.ptr, i32 0, i32 1) #0 - %.lod = call <2 x float> (...) @lgc.create.image.get.lod.v2f32(i32 1, i32 128, <8 x i32> %.desc, <4 x i32> %.sampler, <2 x float> zeroinitializer) + %.lod = call <2 x float> (...) @lgc.create.image.get.lod.v2f32(i32 1, i32 128, <8 x i32> addrspace(4)* %.desc.ptr, <4 x i32> addrspace(4)* %.sampler.ptr, <2 x float> zeroinitializer) - %.query.size = call <2 x i32> (...) @lgc.create.image.query.size.v2i32(i32 1, i32 128, <8 x i32> %.desc, i32 0) - %.query.levels = call i32 (...) @lgc.create.image.query.levels.i32(i32 1, i32 128, <8 x i32> %.desc) + %.query.size = call <2 x i32> (...) @lgc.create.image.query.size.v2i32(i32 1, i32 128, <8 x i32> addrspace(4)* %.desc.ptr, i32 0) + %.query.levels = call i32 (...) @lgc.create.image.query.levels.i32(i32 1, i32 128, <8 x i32> addrspace(4)* %.desc.ptr) %lane = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) ; just some source of divergence %ofs = mul i32 %lane, 32 @@ -65,8 +63,7 @@ define dllexport spir_func void @lgc.shader.VS.main() local_unnamed_addr #0 !lgc %.desc2.ptr1 = bitcast <8 x i32> addrspace(4)* %.desc2.ptr2 to i8 addrspace(4)* %.desc2.ptr0 = getelementptr i8, i8 addrspace(4)* %.desc2.ptr1, i32 %ofs %.desc2.ptr = bitcast i8 addrspace(4)* %.desc2.ptr0 to <8 x i32> addrspace(4)* - %.desc2 = load <8 x i32>, <8 x i32> addrspace(4)* %.desc2.ptr, align 32 - call void (...) @lgc.create.image.store(<4 x float> zeroinitializer, i32 0, i32 8, <8 x i32> %.desc2, i32 zeroinitializer) + call void (...) @lgc.create.image.store(<4 x float> zeroinitializer, i32 0, i32 8, <8 x i32> addrspace(4)* %.desc2.ptr, i32 zeroinitializer) ret void } diff --git a/lgc/test/SubgroupClusteredReduction.lgc b/lgc/test/SubgroupClusteredReduction.lgc new file mode 100644 index 0000000000..40b53f9584 --- /dev/null +++ b/lgc/test/SubgroupClusteredReduction.lgc @@ -0,0 +1,51 @@ +; NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py UTC_ARGS: --tool lgc +; RUN: lgc -o - --mcpu=gfx1100 --emit-llvm %s | FileCheck -check-prefixes=CHECK %s + +define dllexport spir_func i32 @fn(i32 %value1, i32 %value2) !lgc.shaderstage !0 { +.entry: + %r1 = call i32 (...) @lgc.create.subgroup.clustered.reduction.i32(i32 11, i32 %value1, i32 64) + %r2 = call i32 (...) @lgc.create.subgroup.clustered.reduction.i32(i32 11, i32 %value2, i32 32) + %r = add i32 %r1, %r2 + ret i32 %r +} + +declare i32 @lgc.create.subgroup.clustered.reduction.i32(...) + +; ShaderStage::Compute +!0 = !{i32 7} + +; Setting Threadgroup Dimensions to 64 x 1 x 1 +!llpc.compute.mode = !{!1} +!1 = !{i32 64, i32 1, i32 1} +; CHECK-LABEL: @_amdgpu_cs_main( +; CHECK-NEXT: .entry: +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.set.inactive.i32(i32 [[VALUE1:%.*]], i32 0) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.update.dpp.i32(i32 undef, i32 [[TMP0]], i32 177, i32 15, i32 15, i1 true) +; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], [[TMP0]] +; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.update.dpp.i32(i32 undef, i32 [[TMP2]], i32 78, i32 15, i32 15, i1 true) +; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP2]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.update.dpp.i32(i32 undef, i32 [[TMP4]], i32 321, i32 15, i32 15, i1 true) +; CHECK-NEXT: [[TMP6:%.*]] = or i32 [[TMP4]], [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.update.dpp.i32(i32 undef, i32 [[TMP6]], i32 320, i32 15, i32 15, i1 true) +; CHECK-NEXT: [[TMP8:%.*]] = or i32 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.amdgcn.permlanex16(i32 undef, i32 [[TMP8]], i32 -1, i32 -1, i1 true, i1 false) +; CHECK-NEXT: [[TMP10:%.*]] = or i32 [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.amdgcn.permlane64(i32 [[TMP10]]) +; CHECK-NEXT: [[TMP12:%.*]] = or i32 [[TMP10]], [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[TMP12]]) +; CHECK-NEXT: [[R1:%.*]] = call i32 @llvm.amdgcn.wwm.i32(i32 [[TMP13]]) +; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.amdgcn.set.inactive.i32(i32 [[VALUE2:%.*]], i32 0) +; CHECK-NEXT: [[TMP15:%.*]] = call i32 @llvm.amdgcn.update.dpp.i32(i32 undef, i32 [[TMP14]], i32 177, i32 15, i32 15, i1 true) +; CHECK-NEXT: [[TMP16:%.*]] = or i32 [[TMP15]], [[TMP14]] +; CHECK-NEXT: [[TMP17:%.*]] = call i32 @llvm.amdgcn.update.dpp.i32(i32 undef, i32 [[TMP16]], i32 78, i32 15, i32 15, i1 true) +; CHECK-NEXT: [[TMP18:%.*]] = or i32 [[TMP16]], [[TMP17]] +; CHECK-NEXT: [[TMP19:%.*]] = call i32 @llvm.amdgcn.update.dpp.i32(i32 undef, i32 [[TMP18]], i32 321, i32 15, i32 15, i1 true) +; CHECK-NEXT: [[TMP20:%.*]] = or i32 [[TMP18]], [[TMP19]] +; CHECK-NEXT: [[TMP21:%.*]] = call i32 @llvm.amdgcn.update.dpp.i32(i32 undef, i32 [[TMP20]], i32 320, i32 15, i32 15, i1 true) +; CHECK-NEXT: [[TMP22:%.*]] = or i32 [[TMP20]], [[TMP21]] +; CHECK-NEXT: [[TMP23:%.*]] = call i32 @llvm.amdgcn.permlanex16(i32 undef, i32 [[TMP22]], i32 -1, i32 -1, i1 true, i1 false) +; CHECK-NEXT: [[TMP24:%.*]] = or i32 [[TMP22]], [[TMP23]] +; CHECK-NEXT: [[R2:%.*]] = call i32 @llvm.amdgcn.wwm.i32(i32 [[TMP24]]) +; CHECK-NEXT: [[R:%.*]] = add i32 [[R2]], [[R1]] +; CHECK-NEXT: ret i32 [[R]] +; diff --git a/lgc/test/TaskShaderOps.lgc b/lgc/test/TaskShaderOps.lgc index 16d98df3dd..88a85fee82 100644 --- a/lgc/test/TaskShaderOps.lgc +++ b/lgc/test/TaskShaderOps.lgc @@ -51,7 +51,7 @@ ; CHECK-NEXT: [[meshPipeStatsBufAddr64:%[0-9]*]] = bitcast <2 x i32> [[meshPipeStatsBufAddr2x32]] to i64 ; CHECK-NEXT: [[meshPipeStatsBufAddr:%[0-9]*]] = inttoptr i64 [[meshPipeStatsBufAddr64]] to ptr addrspace(1) ; CHECK: [[numTaskThreadsPtr8:%[0-9]*]] = getelementptr i8, ptr addrspace(1) [[meshPipeStatsBufAddr]], i64 16 -; CHECK: %{{[0-9]*}} = atomicrmw add ptr addrspace(1) [[numTaskThreadsPtr8]], i64 %{{[0-9]*}} monotonic, align 8 +; CHECK: %{{[0-9]*}} = atomicrmw add ptr addrspace(1) [[numTaskThreadsPtr8]], i64 %{{[0-9]*}} syncscope("agent") monotonic, align 8 ; CHECK: [[ringSize:%[0-9]*]] = extractelement <4 x i32> [[drawDataRingDesc]], i64 2 ; CHECK-NEXT: [[numEntries:%[0-9]*]] = lshr i32 [[ringSize]], 4 ; CHECK-NEXT: [[wrapMask:%[0-9]*]] = add nuw nsw i32 [[numEntries]], 268435455 diff --git a/lgc/test/TestWaterfallLoopForStruct.lgc b/lgc/test/TestWaterfallLoopForStruct.lgc index 448a7ceee2..c43691bddf 100644 --- a/lgc/test/TestWaterfallLoopForStruct.lgc +++ b/lgc/test/TestWaterfallLoopForStruct.lgc @@ -17,10 +17,7 @@ define dllexport spir_func void @lgc.shader.VS.main() local_unnamed_addr #0 !spi %6 = mul i32 %0, %2 %7 = sext i32 %6 to i64 %8 = getelementptr i8, ptr addrspace(4) %1, i64 %7 - %9 = insertvalue { ptr addrspace(4), i32, i32, i32 } %5, ptr addrspace(4) %8, 0 - %10 = load <8 x i32>, ptr addrspace(4) %8, align 32, !invariant.load !12 - %11 = insertvalue [3 x <8 x i32>] poison, <8 x i32> %10, 0 - %12 = call { <4 x float>, i32 } (...) @"lgc.create.image.load.s[v4f32,i32]"(i32 1, i32 8, <8 x i32> %10, <2 x i32> ) + %12 = call { <4 x float>, i32 } (...) @"lgc.create.image.load.s[v4f32,i32]"(i32 1, i32 8, ptr addrspace(4) %8, <2 x i32> ) %13 = extractvalue { <4 x float>, i32 } %12, 1 %14 = extractvalue { <4 x float>, i32 } %12, 0 %15 = icmp sgt i32 %13, 0 @@ -92,22 +89,20 @@ attributes #2 = { nounwind willreturn memory(read) } ; CHECK-NEXT: [[TMP11:%.*]] = mul i32 [[TMP2]], 32 ; CHECK-NEXT: [[TMP12:%.*]] = sext i32 [[TMP11]] to i64 ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP7]], i64 [[TMP12]] -; CHECK-NEXT: [[TMP14:%.*]] = insertvalue { ptr addrspace(4), i32, i32, i32 } [[TMP10]], ptr addrspace(4) [[TMP13]], 0 -; CHECK-NEXT: [[TMP15:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP13]], align 32, !invariant.load !12 -; CHECK-NEXT: [[TMP16:%.*]] = insertvalue [3 x <8 x i32>] poison, <8 x i32> [[TMP15]], 0 -; CHECK-NEXT: [[TMP17:%.*]] = call i32 @llvm.amdgcn.waterfall.begin.i32(i32 0, i32 [[TMP11]]) -; CHECK-NEXT: [[TMP18:%.*]] = call <8 x i32> @llvm.amdgcn.waterfall.readfirstlane.v8i32.v8i32(i32 [[TMP17]], <8 x i32> [[TMP15]]) -; CHECK-NEXT: [[TMP19:%.*]] = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i32(i32 15, i32 1, i32 1, <8 x i32> [[TMP18]], i32 1, i32 0) -; CHECK-NEXT: [[TMP20:%.*]] = extractvalue { <4 x float>, i32 } [[TMP19]], 0 -; CHECK-NEXT: [[TMP21:%.*]] = call <4 x float> @llvm.amdgcn.waterfall.end.v4f32(i32 [[TMP17]], <4 x float> [[TMP20]]) -; CHECK-NEXT: [[TMP22:%.*]] = extractvalue { <4 x float>, i32 } [[TMP19]], 1 -; CHECK-NEXT: [[TMP23:%.*]] = call i32 @llvm.amdgcn.waterfall.end.i32(i32 [[TMP17]], i32 [[TMP22]]) -; CHECK-NEXT: [[TMP24:%.*]] = insertvalue { <4 x float>, i32 } poison, <4 x float> [[TMP21]], 0 -; CHECK-NEXT: [[TMP25:%.*]] = insertvalue { <4 x float>, i32 } [[TMP24]], i32 [[TMP23]], 1 -; CHECK-NEXT: [[TMP26:%.*]] = extractvalue { <4 x float>, i32 } [[TMP25]], 1 -; CHECK-NEXT: [[TMP27:%.*]] = extractvalue { <4 x float>, i32 } [[TMP25]], 0 -; CHECK-NEXT: [[TMP28:%.*]] = icmp sgt i32 [[TMP26]], 0 -; CHECK-NEXT: [[TMP29:%.*]] = select i1 [[TMP28]], <4 x float> [[TMP27]], <4 x float> zeroinitializer -; CHECK-NEXT: call void @lgc.output.export.generic.i32.i32.v4f32(i32 0, i32 0, <4 x float> [[TMP29]]) #[[ATTR5:[0-9]+]] +; CHECK-NEXT: [[TMP14:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP13]], align 32, !invariant.load !12 +; CHECK-NEXT: [[TMP15:%.*]] = call i32 @llvm.amdgcn.waterfall.begin.i32(i32 0, i32 [[TMP11]]) +; CHECK-NEXT: [[TMP16:%.*]] = call <8 x i32> @llvm.amdgcn.waterfall.readfirstlane.v8i32.v8i32(i32 [[TMP15]], <8 x i32> [[TMP14]]) +; CHECK-NEXT: [[TMP17:%.*]] = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i32(i32 15, i32 1, i32 1, <8 x i32> [[TMP16]], i32 1, i32 0) +; CHECK-NEXT: [[TMP18:%.*]] = extractvalue { <4 x float>, i32 } [[TMP17]], 0 +; CHECK-NEXT: [[TMP19:%.*]] = call <4 x float> @llvm.amdgcn.waterfall.end.v4f32(i32 [[TMP15]], <4 x float> [[TMP18]]) +; CHECK-NEXT: [[TMP20:%.*]] = extractvalue { <4 x float>, i32 } [[TMP17]], 1 +; CHECK-NEXT: [[TMP21:%.*]] = call i32 @llvm.amdgcn.waterfall.end.i32(i32 [[TMP15]], i32 [[TMP20]]) +; CHECK-NEXT: [[TMP22:%.*]] = insertvalue { <4 x float>, i32 } poison, <4 x float> [[TMP19]], 0 +; CHECK-NEXT: [[TMP23:%.*]] = insertvalue { <4 x float>, i32 } [[TMP22]], i32 [[TMP21]], 1 +; CHECK-NEXT: [[TMP24:%.*]] = extractvalue { <4 x float>, i32 } [[TMP23]], 1 +; CHECK-NEXT: [[TMP25:%.*]] = extractvalue { <4 x float>, i32 } [[TMP23]], 0 +; CHECK-NEXT: [[TMP26:%.*]] = icmp sgt i32 [[TMP24]], 0 +; CHECK-NEXT: [[TMP27:%.*]] = select i1 [[TMP26]], <4 x float> [[TMP25]], <4 x float> zeroinitializer +; CHECK-NEXT: call void @lgc.output.export.generic.i32.i32.v4f32(i32 0, i32 0, <4 x float> [[TMP27]]) #[[ATTR5:[0-9]+]] ; CHECK-NEXT: ret void ; diff --git a/lgc/test/TextureRange.lgc b/lgc/test/TextureRange.lgc index 0f50ab4c50..3cda8fc4f8 100644 --- a/lgc/test/TextureRange.lgc +++ b/lgc/test/TextureRange.lgc @@ -87,11 +87,9 @@ define dllexport spir_func void @lgc.shader.FS.PSMain() local_unnamed_addr #0 !s %13 = fmul reassoc nnan nsz arcp contract afn float %10, %12 %14 = call ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 3221225472, i32 1) %15 = call i32 (...) @lgc.create.get.desc.stride__i32(i32 1, i32 1, i64 3221225472, i32 1) - %16 = load <8 x i32>, ptr addrspace(4) %14, align 32 %17 = call ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4(i32 2, i32 2, i64 2147483648, i32 0) %18 = call i32 (...) @lgc.create.get.desc.stride__i32(i32 2, i32 2, i64 2147483648, i32 0) - %19 = load <4 x i32>, ptr addrspace(4) %17, align 16 - %20 = call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.sample__v4f32(i32 1, i32 512, <8 x i32> %16, <4 x i32> %19, i32 1, <2 x float> %6) + %20 = call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.sample__v4f32(i32 1, i32 512, ptr addrspace(4) %14, ptr addrspace(4) %17, i32 1, <2 x float> %6) %.splatinsert = insertelement <4 x float> poison, float %13, i64 0 %21 = shufflevector <4 x float> %.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer %scale = fmul reassoc nnan nsz arcp contract afn <4 x float> %20, %21 diff --git a/lgc/test/Transforms/LowerCooperativeMatrix/gfx1010muladd.lgc b/lgc/test/Transforms/LowerCooperativeMatrix/gfx1010muladd.lgc new file mode 100644 index 0000000000..90ffa98e22 --- /dev/null +++ b/lgc/test/Transforms/LowerCooperativeMatrix/gfx1010muladd.lgc @@ -0,0 +1,32 @@ +; RUN: lgc -march=amdgcn -o - --mcpu=gfx1010 -filetype=asm %s | FileCheck -check-prefixes=CHECK %s + +define void @matmul_f16f32_emulator(ptr addrspace(3) %out0, <8 x float> %a, <8 x float> %b, <8 x float> %c0) !lgc.shaderstage !0 { +; CHECK-NOT: v_dot + %value = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f8(<8 x float> %a, <8 x float> %b, <8 x float> %c0, i1 true, i1 true, i1 false, i1 false, i32 2, i32 1) + call void (...) @lgc.cooperative.matrix.store(ptr addrspace(3) %out0, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %value) + ret void +} + +define void @matmul_i16i32_emulator(ptr addrspace(3) %out0, <8 x i32> %a, <8 x i32> %b, <8 x i32> %c0) !lgc.shaderstage !0 { +; CHECK-NOT: v_dot + %value = call <8 x i32> (...) @lgc.cooperative.matrix.muladd__v8i32(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c0, i1 true, i1 true, i1 false, i1 false, i32 5, i32 4) + call void (...) @lgc.cooperative.matrix.store(ptr addrspace(3) %out0, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x i32> %value) + ret void +} + +define void @matmul_i8i32_emulator(ptr addrspace(3) %out0, <8 x i32> %a, <8 x i32> %b, <8 x i32> %c0) !lgc.shaderstage !0 { +; CHECK-NOT: v_dot + %value = call <8 x i32> (...) @lgc.cooperative.matrix.muladd__v8i32(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c0, i1 true, i1 true, i1 false, i1 false, i32 5, i32 3) + call void (...) @lgc.cooperative.matrix.store(ptr addrspace(3) %out0, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x i32> %value) + ret void +} + +declare <8 x float> @lgc.cooperative.matrix.muladd__v8f8(...) +declare <8 x i32> @lgc.cooperative.matrix.muladd__v8i32(...) +declare void @lgc.cooperative.matrix.store(...) + +; ShaderStage::Compute +!0 = !{i32 7} +; Setting Threadgroup Dimensions to 64 x 1 x 1 +!llpc.compute.mode = !{!1} +!1 = !{i32 64, i32 1, i32 1} diff --git a/lgc/test/Transforms/LowerCooperativeMatrix/gfx1011muladd.lgc b/lgc/test/Transforms/LowerCooperativeMatrix/gfx1011muladd.lgc new file mode 100644 index 0000000000..88292bf642 --- /dev/null +++ b/lgc/test/Transforms/LowerCooperativeMatrix/gfx1011muladd.lgc @@ -0,0 +1,33 @@ +; RUN: lgc -march=amdgcn -o - --mcpu=gfx1011 -filetype=asm %s | FileCheck -check-prefixes=CHECK %s + +define void @matmul_f16f32_emulator(ptr addrspace(3) %out0, <8 x float> %a, <8 x float> %b, <8 x float> %c0) !lgc.shaderstage !0 { +; CHECK: v_dot2c_f32_f16 + %value = call <8 x float> (...) @lgc.cooperative.matrix.muladd__v8f8(<8 x float> %a, <8 x float> %b, <8 x float> %c0, i1 true, i1 true, i1 false, i1 false, i32 2, i32 1) + call void (...) @lgc.cooperative.matrix.store(ptr addrspace(3) %out0, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> %value) + ret void +} + +define void @matmul_i16i32_emulator(ptr addrspace(3) %out0, <8 x i32> %a, <8 x i32> %b, <8 x i32> %c0) !lgc.shaderstage !0 { +; CHECK: v_dot2_i32_i16 + %value = call <8 x i32> (...) @lgc.cooperative.matrix.muladd__v8i32(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c0, i1 true, i1 true, i1 false, i1 false, i32 5, i32 4) + call void (...) @lgc.cooperative.matrix.store(ptr addrspace(3) %out0, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x i32> %value) + ret void +} + +define void @matmul_i8i32_emulator(ptr addrspace(3) %out0, <8 x i32> %a, <8 x i32> %b, <8 x i32> %c0) !lgc.shaderstage !0 { +; CHECK: v_dot4c_i32_i8 + %value = call <8 x i32> (...) @lgc.cooperative.matrix.muladd__v8i32(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c0, i1 true, i1 true, i1 false, i1 false, i32 5, i32 3) + call void (...) @lgc.cooperative.matrix.store(ptr addrspace(3) %out0, i32 4, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x i32> %value) + ret void +} + +declare <8 x float> @lgc.cooperative.matrix.muladd__v8f8(...) +declare <8 x i32> @lgc.cooperative.matrix.muladd__v8i32(...) +declare void @lgc.cooperative.matrix.store(...) + +; ShaderStage::Compute +!0 = !{i32 7} +; Setting Threadgroup Dimensions to 64 x 1 x 1 +!llpc.compute.mode = !{!1} +!1 = !{i32 64, i32 1, i32 1} + diff --git a/lgc/test/Transforms/LowerDebugPrintf/basic.lgc b/lgc/test/Transforms/LowerDebugPrintf/basic.lgc index a20f4e0fd5..312478af57 100644 --- a/lgc/test/Transforms/LowerDebugPrintf/basic.lgc +++ b/lgc/test/Transforms/LowerDebugPrintf/basic.lgc @@ -1,15 +1,19 @@ -; RUN: lgc -o - -passes='require,lgc-lower-debug-printf' %s | FileCheck --check-prefixes=IR %s -; RUN: lgc -o - -passes='require,lgc-lower-debug-printf,print' %s -o /dev/null 2>&1 | FileCheck --check-prefixes=PALMD %s +; RUN: lgc -o - -passes="require,lgc-lower-debug-printf" %s | FileCheck --check-prefixes=IR %s +; RUN: lgc -o - -passes="require,lgc-lower-debug-printf,print" %s -o /dev/null 2>&1 | FileCheck --check-prefixes=PALMD %s -@str = internal addrspace(4) global [8 x i8] c"Test: %u" +@str = private unnamed_addr constant [8 x i8] c"Test: %u", align 1 -define spir_func void @simple(ptr addrspace(7) %buffer) !lgc.shaderstage !0 { +define spir_func void @simple() !lgc.shaderstage !0 { ; IR-LABEL: @simple( ; IR-NOT: call {{.*}} @lgc.debug.printf - call void (...) @lgc.debug.printf(ptr addrspace(7) %buffer, ptr addrspace(4) @str, i32 42) + call void (...) @lgc.debug.printf(ptr nonnull @str, i32 42) ret void } +!lgc.user.data.nodes = !{!4, !5} +!4 = !{!"DescriptorTableVaPtr", i32 7, i32 0, i32 0, i32 1, i32 1} +!5 = !{!"DescriptorBuffer", i32 6, i32 0, i32 0, i32 4, i32 -1, i32 6, i32 4} + ; IR: !amdgpu.pal.metadata.msgpack = ; PALMD: amdpal.format_strings: diff --git a/lgc/test/lgcdis.lgc b/lgc/test/lgcdis.lgc index 7b970b75ac..b90f44f093 100644 --- a/lgc/test/lgcdis.lgc +++ b/lgc/test/lgcdis.lgc @@ -95,10 +95,8 @@ define dllexport void @lgc.shader.FS.main() !lgc.shaderstage !25 { entry: %TEXCOORD = call <2 x float> (...) @lgc.create.read.generic.input.v2f32(i32 1, i32 0, i32 0, i32 1, i32 16, i32 poison) %imageptr = call <8 x i32> addrspace(4)* (...) @lgc.create.get.desc.ptr.p4v8i32(i32 1, i32 1, i32 0, i32 1) - %image = load <8 x i32>, <8 x i32> addrspace(4)* %imageptr, align 32 %samplerptr = call <4 x i32> addrspace(4)* (...) @lgc.create.get.desc.ptr.p4v4i32(i32 2, i32 2, i32 0, i32 2) - %sampler = load <4 x i32>, <4 x i32> addrspace(4)* %samplerptr, align 16 - %sample = call <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 0, <8 x i32> %image, <4 x i32> %sampler, i32 1, <2 x float> %TEXCOORD) + %sample = call <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 0, <8 x i32> addrspace(4)* %imageptr, <4 x i32> addrspace(4)* %samplerptr, i32 1, <2 x float> %TEXCOORD) call void (...) @lgc.create.write.generic.output(<4 x float> %sample, i32 0, i32 0, i32 0, i32 1, i32 0, i32 poison) ret void } diff --git a/lgc/test/scalarizationOfDescriptorLoadsTest1.lgc b/lgc/test/scalarizationOfDescriptorLoadsTest1.lgc index cf01ecaead..186f904c28 100644 --- a/lgc/test/scalarizationOfDescriptorLoadsTest1.lgc +++ b/lgc/test/scalarizationOfDescriptorLoadsTest1.lgc @@ -8,31 +8,31 @@ target triple = "amdgcn--amdpal" ; Function Attrs: nounwind define dllexport spir_func void @lgc.shader.VS.main() local_unnamed_addr #0 !spirv.ExecutionModel !14 !lgc.shaderstage !15 { ; CHECK-LABEL: define dllexport spir_func void @lgc.shader.VS.main( -; CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] !spirv.ExecutionModel !14 !lgc.shaderstage [[META15:![0-9]+]] { +; CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] !spirv.ExecutionModel [[META14:![0-9]+]] !lgc.shaderstage [[META15:![0-9]+]] { ; CHECK-NEXT: .entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i32 @lgc.input.import.generic__i32(i1 false, i32 0, i32 0, i32 0, i32 poison) ; CHECK-NEXT: [[TMP1:%.*]] = mul i32 [[TMP0]], poison ; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[TMP1]] to i64 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr addrspace(4) poison, i64 [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr addrspace(4) [[TMP3]], align 32, !invariant.load [[META16:![0-9]+]] ; CHECK-NEXT: [[TMP5:%.*]] = mul i32 [[TMP0]], poison ; CHECK-NEXT: [[TMP6:%.*]] = sext i32 [[TMP5]] to i64 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr addrspace(4) poison, i64 [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = load <4 x i32>, ptr addrspace(4) [[TMP7]], align 16, !invariant.load [[META16]] +; CHECK-NEXT: [[TMP8:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP7]], align 32, !invariant.load [[META16:![0-9]+]] ; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.amdgcn.waterfall.begin.i32(i32 0, i32 [[TMP5]]) ; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.amdgcn.waterfall.readfirstlane.i32.i32(i32 [[TMP9]], i32 [[TMP5]]) ; CHECK-NEXT: [[TMP11:%.*]] = sext i32 [[TMP10]] to i64 ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr addrspace(4) poison, i64 [[TMP11]] -; CHECK-NEXT: [[TMP13:%.*]] = load <4 x i32>, ptr addrspace(4) [[TMP12]], align 16, !invariant.load [[META16]] -; CHECK-NEXT: [[TMP14:%.*]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> [[TMP13]], i32 0, i32 0, i32 0, i32 0) +; CHECK-NEXT: [[TMP13:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP12]], align 32, !invariant.load [[META16]] +; CHECK-NEXT: [[TMP14:%.*]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 0, <8 x i32> [[TMP13]], i32 0, i32 0) ; CHECK-NEXT: [[TMP15:%.*]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.waterfall.end.v4f32(i32 [[TMP9]], <4 x float> [[TMP14]]) +; CHECK-NEXT: [[TMP22:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP3]], align 32, !invariant.load [[META16]] ; CHECK-NEXT: [[TMP16:%.*]] = call i32 @llvm.amdgcn.waterfall.begin.i32(i32 0, i32 [[TMP1]]) ; CHECK-NEXT: [[TMP17:%.*]] = call i32 @llvm.amdgcn.waterfall.readfirstlane.i32.i32(i32 [[TMP16]], i32 [[TMP1]]) ; CHECK-NEXT: [[TMP18:%.*]] = sext i32 [[TMP17]] to i64 ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr addrspace(4) poison, i64 [[TMP18]] -; CHECK-NEXT: [[TMP20:%.*]] = load <4 x i32>, ptr addrspace(4) [[TMP19]], align 32, !invariant.load [[META16]] -; CHECK-NEXT: [[TMP21:%.*]] = call <4 x i32> @llvm.amdgcn.waterfall.last.use.v4i32(i32 [[TMP16]], <4 x i32> [[TMP20]]) -; CHECK-NEXT: call void @llvm.amdgcn.struct.buffer.store.format.v4f32(<4 x float> [[TMP15]], <4 x i32> [[TMP21]], i32 1, i32 0, i32 0, i32 0) +; CHECK-NEXT: [[TMP20:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP19]], align 32, !invariant.load [[META16]] +; CHECK-NEXT: [[TMP21:%.*]] = call <8 x i32> @llvm.amdgcn.waterfall.last.use.v8i32(i32 [[TMP16]], <8 x i32> [[TMP20]]) +; CHECK-NEXT: call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> [[TMP15]], i32 15, i32 1, <8 x i32> [[TMP21]], i32 0, i32 0) ; CHECK-NEXT: ret void ; .entry: @@ -42,13 +42,11 @@ define dllexport spir_func void @lgc.shader.VS.main() local_unnamed_addr #0 !spi %3 = mul i32 %0, %2 %4 = sext i32 %3 to i64 %5 = getelementptr i8, ptr addrspace(4) %1, i64 %4 - %6 = load <4 x i32>, ptr addrspace(4) %5, align 32, !invariant.load !16 %7 = mul i32 %0, %2 %8 = sext i32 %7 to i64 %9 = getelementptr i8, ptr addrspace(4) %1, i64 %8 - %10 = load <4 x i32>, ptr addrspace(4) %9, align 16, !invariant.load !16 - %11 = call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.load.v4f32(i32 0, i32 8, <4 x i32> %10, i32 0) - call void (...) @lgc.create.image.store(<4 x float> %11, i32 0, i32 8, <4 x i32> %6, i32 1) + %11 = call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.load.v4f32(i32 0, i32 8, ptr addrspace(4) %9, i32 0) + call void (...) @lgc.create.image.store(<4 x float> %11, i32 0, i32 8, ptr addrspace(4) %5, i32 1) ret void } @@ -99,3 +97,8 @@ attributes #3 = { nounwind memory(write) } !14 = !{i32 0} !15 = !{i32 1} !16 = !{} +;. +; CHECK: [[META14]] = !{i32 0} +; CHECK: [[META15]] = !{i32 1} +; CHECK: [[META16]] = !{} +;. diff --git a/lgc/test/scalarizationOfDescriptorLoadsTest10.lgc b/lgc/test/scalarizationOfDescriptorLoadsTest10.lgc index 7835bf62e7..8d8609b8ea 100644 --- a/lgc/test/scalarizationOfDescriptorLoadsTest10.lgc +++ b/lgc/test/scalarizationOfDescriptorLoadsTest10.lgc @@ -10,7 +10,7 @@ target triple = "amdgcn--amdpal" ; Function Attrs: nounwind define dllexport spir_func void @lgc.shader.FS.main() local_unnamed_addr #0 !spirv.ExecutionModel !8 !lgc.shaderstage !9 { ; CHECK-LABEL: define dllexport spir_func void @lgc.shader.FS.main( -; CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] !spirv.ExecutionModel !8 !lgc.shaderstage [[META9:![0-9]+]] { +; CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] !spirv.ExecutionModel [[META8:![0-9]+]] !lgc.shaderstage [[META9:![0-9]+]] { ; CHECK-NEXT: .entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.getpc() ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[TMP0]] to <2 x i32> @@ -44,8 +44,8 @@ define dllexport spir_func void @lgc.shader.FS.main() local_unnamed_addr #0 !spi ; CHECK-NEXT: [[I6:%.*]] = mul i32 [[PHI_IND]], [[PHI]] ; CHECK-NEXT: [[I7:%.*]] = sext i32 [[I6]] to i64 ; CHECK-NEXT: [[I8:%.*]] = getelementptr i8, ptr addrspace(4) [[I2]], i64 [[I7]] -; CHECK-NEXT: [[I9:%.*]] = load <4 x i32>, ptr addrspace(4) [[I8]], align 16, !invariant.load [[META10:![0-9]+]] -; CHECK-NEXT: [[I10:%.*]] = load <8 x i32>, ptr addrspace(4) [[I5]], align 32, !invariant.load [[META10]] +; CHECK-NEXT: [[TMP21:%.*]] = load <8 x i32>, ptr addrspace(4) [[I5]], align 32, !invariant.load [[META10:![0-9]+]] +; CHECK-NEXT: [[TMP22:%.*]] = load <4 x i32>, ptr addrspace(4) [[I8]], align 16, !invariant.load [[META10]] ; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.amdgcn.waterfall.begin.i32(i32 0, i32 [[I3]]) ; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.amdgcn.waterfall.readfirstlane.i32.i32(i32 [[TMP12]], i32 [[I3]]) ; CHECK-NEXT: [[TMP14:%.*]] = sext i32 [[TMP13]] to i64 @@ -90,9 +90,7 @@ loop.latch: ; preds = %bb2, %bb1 %i6 = mul i32 %phi.ind, %phi %i7 = sext i32 %i6 to i64 %i8 = getelementptr i8, ptr addrspace(4) %i2, i64 %i7 - %i9 = load <4 x i32>, ptr addrspace(4) %i8, align 16, !invariant.load !10 - %i10 = load <8 x i32>, ptr addrspace(4) %i5, align 32, !invariant.load !10 - %i11 = call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 24, <8 x i32> %i10, <4 x i32> %i9, i32 1, <2 x float> zeroinitializer) + %i11 = call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 24, ptr addrspace(4) %i5, ptr addrspace(4) %i8, i32 1, <2 x float> zeroinitializer) call void (...) @lgc.create.write.generic.output(<4 x float> %i11, i32 0, i32 0, i32 0, i32 0, i32 0, i32 poison) %ind = add i32 %phi.ind, 1 %cond2 = icmp ne i32 %ind, 1000 @@ -147,3 +145,8 @@ attributes #3 = { nounwind } !8 = !{i32 4} !9 = !{i32 6} !10 = !{} +;. +; CHECK: [[META8]] = !{i32 4} +; CHECK: [[META9]] = !{i32 6} +; CHECK: [[META10]] = !{} +;. diff --git a/lgc/test/scalarizationOfDescriptorLoadsTest11.lgc b/lgc/test/scalarizationOfDescriptorLoadsTest11.lgc index 2a2d450d1b..68797e0c4e 100644 --- a/lgc/test/scalarizationOfDescriptorLoadsTest11.lgc +++ b/lgc/test/scalarizationOfDescriptorLoadsTest11.lgc @@ -10,7 +10,7 @@ target triple = "amdgcn--amdpal" ; Function Attrs: nounwind define dllexport spir_func void @lgc.shader.FS.main() local_unnamed_addr #0 !spirv.ExecutionModel !8 !lgc.shaderstage !9 { ; CHECK-LABEL: define dllexport spir_func void @lgc.shader.FS.main( -; CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] !spirv.ExecutionModel !8 !lgc.shaderstage [[META9:![0-9]+]] { +; CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] !spirv.ExecutionModel [[META8:![0-9]+]] !lgc.shaderstage [[META9:![0-9]+]] { ; CHECK-NEXT: .entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.getpc() ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[TMP0]] to <2 x i32> @@ -44,8 +44,8 @@ define dllexport spir_func void @lgc.shader.FS.main() local_unnamed_addr #0 !spi ; CHECK-NEXT: br label [[LOOP_LATCH]] ; CHECK: loop.latch: ; CHECK-NEXT: [[PHI:%.*]] = phi ptr addrspace(4) [ [[I5]], [[BB1]] ], [ [[I8]], [[BB2]] ] -; CHECK-NEXT: [[I9:%.*]] = load <4 x i32>, ptr addrspace(4) [[PHI]], align 16, !invariant.load [[META10:![0-9]+]] -; CHECK-NEXT: [[I10:%.*]] = load <8 x i32>, ptr addrspace(4) [[PHI]], align 32, !invariant.load [[META10]] +; CHECK-NEXT: [[I10:%.*]] = load <8 x i32>, ptr addrspace(4) [[PHI]], align 32, !invariant.load [[META10:![0-9]+]] +; CHECK-NEXT: [[I9:%.*]] = load <4 x i32>, ptr addrspace(4) [[PHI]], align 16, !invariant.load [[META10]] ; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.amdgcn.waterfall.begin.v8i32(i32 0, <8 x i32> [[I10]]) ; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.amdgcn.waterfall.begin.v4i32(i32 [[TMP12]], <4 x i32> [[I9]]) ; CHECK-NEXT: [[TMP14:%.*]] = call <8 x i32> @llvm.amdgcn.waterfall.readfirstlane.v8i32.v8i32(i32 [[TMP13]], <8 x i32> [[I10]]) @@ -86,9 +86,7 @@ bb2: ; preds = %loop loop.latch: ; preds = %bb2, %bb1 %phi = phi ptr addrspace(4) [ %i5, %bb1 ], [ %i8, %bb2 ] - %i9 = load <4 x i32>, ptr addrspace(4) %phi, align 16, !invariant.load !10 - %i10 = load <8 x i32>, ptr addrspace(4) %phi, align 32, !invariant.load !10 - %i11 = call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 24, <8 x i32> %i10, <4 x i32> %i9, i32 1, <2 x float> zeroinitializer) + %i11 = call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 24, ptr addrspace(4) %phi, ptr addrspace(4) %phi, i32 1, <2 x float> zeroinitializer) call void (...) @lgc.create.write.generic.output(<4 x float> %i11, i32 0, i32 0, i32 0, i32 0, i32 0, i32 poison) %ind = add i32 %phi.ind, 1 %cond2 = icmp ne i32 %ind, 1000 @@ -143,3 +141,8 @@ attributes #3 = { nounwind } !8 = !{i32 4} !9 = !{i32 6} !10 = !{} +;. +; CHECK: [[META8]] = !{i32 4} +; CHECK: [[META9]] = !{i32 6} +; CHECK: [[META10]] = !{} +;. diff --git a/lgc/test/scalarizationOfDescriptorLoadsTest12.lgc b/lgc/test/scalarizationOfDescriptorLoadsTest12.lgc index afd0072c01..e5909c6acb 100644 --- a/lgc/test/scalarizationOfDescriptorLoadsTest12.lgc +++ b/lgc/test/scalarizationOfDescriptorLoadsTest12.lgc @@ -11,7 +11,7 @@ target triple = "amdgcn--amdpal" ; Function Attrs: nounwind define dllexport spir_func void @lgc.shader.FS.main() local_unnamed_addr #0 !spirv.ExecutionModel !8 !lgc.shaderstage !9 { ; CHECK-LABEL: define dllexport spir_func void @lgc.shader.FS.main( -; CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] !spirv.ExecutionModel !8 !lgc.shaderstage [[META9:![0-9]+]] { +; CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] !spirv.ExecutionModel [[META8:![0-9]+]] !lgc.shaderstage [[META9:![0-9]+]] { ; CHECK-NEXT: .entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.getpc() ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[TMP0]] to <2 x i32> @@ -38,8 +38,8 @@ define dllexport spir_func void @lgc.shader.FS.main() local_unnamed_addr #0 !spi ; CHECK-NEXT: [[I6:%.*]] = mul i32 [[PHI_IND]], 48 ; CHECK-NEXT: [[I7:%.*]] = sext i32 [[I6]] to i64 ; CHECK-NEXT: [[I8:%.*]] = getelementptr i8, ptr addrspace(4) [[I2]], i64 [[I7]] -; CHECK-NEXT: [[I9:%.*]] = load <4 x i32>, ptr addrspace(4) [[I8]], align 16, !invariant.load [[META10:![0-9]+]] -; CHECK-NEXT: [[I10:%.*]] = load <8 x i32>, ptr addrspace(4) [[I5]], align 32, !invariant.load [[META10]] +; CHECK-NEXT: [[TMP21:%.*]] = load <8 x i32>, ptr addrspace(4) [[I5]], align 32, !invariant.load [[META10:![0-9]+]] +; CHECK-NEXT: [[TMP22:%.*]] = load <4 x i32>, ptr addrspace(4) [[I8]], align 16, !invariant.load [[META10]] ; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.amdgcn.waterfall.begin.i32(i32 0, i32 [[I3]]) ; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.amdgcn.waterfall.readfirstlane.i32.i32(i32 [[TMP12]], i32 [[I3]]) ; CHECK-NEXT: [[TMP14:%.*]] = sext i32 [[TMP13]] to i64 @@ -75,9 +75,7 @@ loop: ; preds = %loop, %.entry %i6 = mul i32 %phi.ind, %b %i7 = sext i32 %i6 to i64 %i8 = getelementptr i8, ptr addrspace(4) %i2, i64 %i7 - %i9 = load <4 x i32>, ptr addrspace(4) %i8, align 16, !invariant.load !10 - %i10 = load <8 x i32>, ptr addrspace(4) %i5, align 32, !invariant.load !10 - %i11 = call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 24, <8 x i32> %i10, <4 x i32> %i9, i32 1, <2 x float> zeroinitializer) + %i11 = call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 24, ptr addrspace(4) %i5, ptr addrspace(4) %i8, i32 1, <2 x float> zeroinitializer) %i12 = fadd <4 x float> %phi.img, %ind = add i32 %phi.ind, 1 %cond = icmp ne i32 %ind, 1000 @@ -133,3 +131,8 @@ attributes #3 = { nounwind } !8 = !{i32 4} !9 = !{i32 6} !10 = !{} +;. +; CHECK: [[META8]] = !{i32 4} +; CHECK: [[META9]] = !{i32 6} +; CHECK: [[META10]] = !{} +;. diff --git a/lgc/test/scalarizationOfDescriptorLoadsTest13.lgc b/lgc/test/scalarizationOfDescriptorLoadsTest13.lgc index cd41d3a2bf..9cb2e24f8f 100644 --- a/lgc/test/scalarizationOfDescriptorLoadsTest13.lgc +++ b/lgc/test/scalarizationOfDescriptorLoadsTest13.lgc @@ -11,7 +11,7 @@ target triple = "amdgcn--amdpal" ; Function Attrs: nounwind define dllexport spir_func void @lgc.shader.FS.main() local_unnamed_addr #0 !spirv.ExecutionModel !8 !lgc.shaderstage !9 { ; CHECK-LABEL: define dllexport spir_func void @lgc.shader.FS.main( -; CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] !spirv.ExecutionModel !8 !lgc.shaderstage [[META9:![0-9]+]] { +; CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] !spirv.ExecutionModel [[META8:![0-9]+]] !lgc.shaderstage [[META9:![0-9]+]] { ; CHECK-NEXT: .entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.getpc() ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[TMP0]] to <2 x i32> @@ -31,16 +31,15 @@ define dllexport spir_func void @lgc.shader.FS.main() local_unnamed_addr #0 !spi ; CHECK-NEXT: [[I3:%.*]] = mul i32 [[I]], 48 ; CHECK-NEXT: [[I4:%.*]] = sext i32 [[I3]] to i64 ; CHECK-NEXT: [[I5:%.*]] = getelementptr i8, ptr addrspace(4) [[I1]], i64 [[I4]] -; CHECK-NEXT: [[L:%.*]] = load <8 x i32>, ptr addrspace(4) [[I5]], align 32, !invariant.load [[META10:![0-9]+]] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[PHI_IND:%.*]] = phi i32 [ 0, [[DOTENTRY:%.*]] ], [ [[IND:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[PHI_LOAD:%.*]] = phi <8 x i32> [ [[L]], [[DOTENTRY]] ], [ [[I10:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[PHI_LOAD1:%.*]] = phi ptr addrspace(4) [ [[I5]], [[DOTENTRY]] ], [ [[I8:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[I6:%.*]] = mul i32 [[PHI_IND]], 48 ; CHECK-NEXT: [[I7:%.*]] = sext i32 [[I6]] to i64 -; CHECK-NEXT: [[I8:%.*]] = getelementptr i8, ptr addrspace(4) [[I2]], i64 [[I7]] -; CHECK-NEXT: [[I9:%.*]] = load <4 x i32>, ptr addrspace(4) [[I8]], align 16, !invariant.load [[META10]] -; CHECK-NEXT: [[I10]] = load <8 x i32>, ptr addrspace(4) [[I8]], align 32, !invariant.load [[META10]] +; CHECK-NEXT: [[I8]] = getelementptr i8, ptr addrspace(4) [[I2]], i64 [[I7]] +; CHECK-NEXT: [[PHI_LOAD:%.*]] = load <8 x i32>, ptr addrspace(4) [[PHI_LOAD1]], align 32, !invariant.load [[META10:![0-9]+]] +; CHECK-NEXT: [[TMP20:%.*]] = load <4 x i32>, ptr addrspace(4) [[I8]], align 16, !invariant.load [[META10]] ; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.amdgcn.waterfall.begin.v8i32(i32 0, <8 x i32> [[PHI_LOAD]]) ; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.amdgcn.waterfall.begin.i32(i32 [[TMP12]], i32 [[I6]]) ; CHECK-NEXT: [[TMP14:%.*]] = call <8 x i32> @llvm.amdgcn.waterfall.readfirstlane.v8i32.v8i32(i32 [[TMP13]], <8 x i32> [[PHI_LOAD]]) @@ -65,19 +64,16 @@ define dllexport spir_func void @lgc.shader.FS.main() local_unnamed_addr #0 !spi %i3 = mul i32 %i, %a %i4 = sext i32 %i3 to i64 %i5 = getelementptr i8, ptr addrspace(4) %i1, i64 %i4 - %l = load <8 x i32>, ptr addrspace(4) %i5, align 32, !invariant.load !10 br label %loop loop: ; preds = %loop, %.entry %phi.ind = phi i32 [ 0, %.entry ], [ %ind, %loop ] - %phi.load = phi <8 x i32> [ %l, %.entry ], [ %i10, %loop ] + %phi.load = phi ptr addrspace(4) [ %i5, %.entry ], [ %i8, %loop ] %b = call i32 (...) @lgc.create.get.desc.stride__i32(i32 2, i32 2, i64 0, i32 7) %i6 = mul i32 %phi.ind, %b %i7 = sext i32 %i6 to i64 %i8 = getelementptr i8, ptr addrspace(4) %i2, i64 %i7 - %i9 = load <4 x i32>, ptr addrspace(4) %i8, align 16, !invariant.load !10 - %i10 = load <8 x i32>, ptr addrspace(4) %i8, align 32, !invariant.load !10 - %i11 = call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 24, <8 x i32> %phi.load, <4 x i32> %i9, i32 1, <2 x float> zeroinitializer) + %i11 = call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 24, ptr addrspace(4) %phi.load, ptr addrspace(4) %i8, i32 1, <2 x float> zeroinitializer) call void (...) @lgc.create.write.generic.output(<4 x float> %i11, i32 0, i32 0, i32 0, i32 0, i32 0, i32 poison) %ind = add i32 %phi.ind, 1 %cond = icmp ne i32 %ind, 1000 @@ -132,3 +128,8 @@ attributes #3 = { nounwind } !8 = !{i32 4} !9 = !{i32 6} !10 = !{} +;. +; CHECK: [[META8]] = !{i32 4} +; CHECK: [[META9]] = !{i32 6} +; CHECK: [[META10]] = !{} +;. diff --git a/lgc/test/scalarizationOfDescriptorLoadsTest14.lgc b/lgc/test/scalarizationOfDescriptorLoadsTest14.lgc index bba218fbc6..34edce02c5 100644 --- a/lgc/test/scalarizationOfDescriptorLoadsTest14.lgc +++ b/lgc/test/scalarizationOfDescriptorLoadsTest14.lgc @@ -12,7 +12,7 @@ target triple = "amdgcn--amdpal" ; Function Attrs: nounwind define dllexport spir_func void @lgc.shader.FS.main() local_unnamed_addr #0 !spirv.ExecutionModel !8 !lgc.shaderstage !9 { ; CHECK-LABEL: define dllexport spir_func void @lgc.shader.FS.main( -; CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] !spirv.ExecutionModel !8 !lgc.shaderstage [[META9:![0-9]+]] { +; CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] !spirv.ExecutionModel [[META8:![0-9]+]] !lgc.shaderstage [[META9:![0-9]+]] { ; CHECK-NEXT: .entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.getpc() ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[TMP0]] to <2 x i32> @@ -39,8 +39,8 @@ define dllexport spir_func void @lgc.shader.FS.main() local_unnamed_addr #0 !spi ; CHECK-NEXT: [[I6:%.*]] = mul i32 [[PHI_IND]], 48 ; CHECK-NEXT: [[I7:%.*]] = sext i32 [[I6]] to i64 ; CHECK-NEXT: [[I8:%.*]] = getelementptr i8, ptr addrspace(4) [[I2]], i64 [[I7]] -; CHECK-NEXT: [[I9:%.*]] = load <4 x i32>, ptr addrspace(4) [[I8]], align 16, !invariant.load [[META10:![0-9]+]] -; CHECK-NEXT: [[I10:%.*]] = load <8 x i32>, ptr addrspace(4) [[I5]], align 32, !invariant.load [[META10]] +; CHECK-NEXT: [[TMP25:%.*]] = load <8 x i32>, ptr addrspace(4) [[I5]], align 32, !invariant.load [[META10:![0-9]+]] +; CHECK-NEXT: [[TMP26:%.*]] = load <4 x i32>, ptr addrspace(4) [[I8]], align 16, !invariant.load [[META10]] ; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.amdgcn.waterfall.begin.i32(i32 0, i32 [[I3]]) ; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.amdgcn.waterfall.readfirstlane.i32.i32(i32 [[TMP12]], i32 [[I3]]) ; CHECK-NEXT: [[TMP14:%.*]] = sext i32 [[TMP13]] to i64 @@ -56,13 +56,14 @@ define dllexport spir_func void @lgc.shader.FS.main() local_unnamed_addr #0 !spi ; CHECK-NEXT: [[COND:%.*]] = icmp ne i32 [[IND]], 1000 ; CHECK-NEXT: br i1 [[COND]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: +; CHECK-NEXT: [[TMP27:%.*]] = load <8 x i32>, ptr addrspace(4) [[I8]], align 32, !invariant.load [[META10]] ; CHECK-NEXT: [[TMP21:%.*]] = call i32 @llvm.amdgcn.waterfall.begin.i32(i32 0, i32 [[I6]]) ; CHECK-NEXT: [[TMP22:%.*]] = call i32 @llvm.amdgcn.waterfall.readfirstlane.i32.i32(i32 [[TMP21]], i32 [[I6]]) ; CHECK-NEXT: [[TMP23:%.*]] = sext i32 [[TMP22]] to i64 ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr addrspace(4) [[I2]], i64 [[TMP23]] -; CHECK-NEXT: [[TMP25:%.*]] = load <4 x i32>, ptr addrspace(4) [[TMP24]], align 16, !invariant.load [[META10]] -; CHECK-NEXT: [[TMP26:%.*]] = call <4 x i32> @llvm.amdgcn.waterfall.last.use.v4i32(i32 [[TMP21]], <4 x i32> [[TMP25]]) -; CHECK-NEXT: call void @llvm.amdgcn.struct.buffer.store.format.v4f32(<4 x float> [[I12]], <4 x i32> [[TMP26]], i32 1, i32 0, i32 0, i32 0) +; CHECK-NEXT: [[TMP28:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP24]], align 32, !invariant.load [[META10]] +; CHECK-NEXT: [[TMP29:%.*]] = call <8 x i32> @llvm.amdgcn.waterfall.last.use.v8i32(i32 [[TMP21]], <8 x i32> [[TMP28]]) +; CHECK-NEXT: call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> [[I12]], i32 15, i32 1, <8 x i32> [[TMP29]], i32 0, i32 0) ; CHECK-NEXT: ret void ; .entry: @@ -82,16 +83,14 @@ loop: ; preds = %loop, %.entry %i6 = mul i32 %phi.ind, %b %i7 = sext i32 %i6 to i64 %i8 = getelementptr i8, ptr addrspace(4) %i2, i64 %i7 - %i9 = load <4 x i32>, ptr addrspace(4) %i8, align 16, !invariant.load !10 - %i10 = load <8 x i32>, ptr addrspace(4) %i5, align 32, !invariant.load !10 - %i11 = call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 24, <8 x i32> %i10, <4 x i32> %i9, i32 1, <2 x float> zeroinitializer) + %i11 = call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 24, ptr addrspace(4) %i5, ptr addrspace(4) %i8, i32 1, <2 x float> zeroinitializer) %i12 = fadd <4 x float> %phi.img, %ind = add i32 %phi.ind, 1 %cond = icmp ne i32 %ind, 1000 br i1 %cond, label %loop, label %exit exit: ; preds = %loop - call void (...) @lgc.create.image.store(<4 x float> %i12, i32 0, i32 8, <4 x i32> %i9, i32 1) + call void (...) @lgc.create.image.store(<4 x float> %i12, i32 0, i32 8, ptr addrspace(4) %i8, i32 1) ret void } @@ -140,3 +139,8 @@ attributes #3 = { nounwind memory(write) } !8 = !{i32 4} !9 = !{i32 6} !10 = !{} +;. +; CHECK: [[META8]] = !{i32 4} +; CHECK: [[META9]] = !{i32 6} +; CHECK: [[META10]] = !{} +;. diff --git a/lgc/test/scalarizationOfDescriptorLoadsTest15.lgc b/lgc/test/scalarizationOfDescriptorLoadsTest15.lgc index c22cf5fc59..ab426db63d 100644 --- a/lgc/test/scalarizationOfDescriptorLoadsTest15.lgc +++ b/lgc/test/scalarizationOfDescriptorLoadsTest15.lgc @@ -11,7 +11,7 @@ target triple = "amdgcn--amdpal" ; Function Attrs: nounwind define dllexport spir_func void @lgc.shader.FS.main() local_unnamed_addr #0 !spirv.ExecutionModel !8 !lgc.shaderstage !9 { ; CHECK-LABEL: define dllexport spir_func void @lgc.shader.FS.main( -; CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] !spirv.ExecutionModel !8 !lgc.shaderstage [[META9:![0-9]+]] { +; CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] !spirv.ExecutionModel [[META8:![0-9]+]] !lgc.shaderstage [[META9:![0-9]+]] { ; CHECK-NEXT: .entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.getpc() ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[TMP0]] to <2 x i32> @@ -38,8 +38,8 @@ define dllexport spir_func void @lgc.shader.FS.main() local_unnamed_addr #0 !spi ; CHECK-NEXT: [[I6:%.*]] = mul i32 [[PHI_IND]], 48 ; CHECK-NEXT: [[I7:%.*]] = sext i32 [[I6]] to i64 ; CHECK-NEXT: [[I8:%.*]] = getelementptr i8, ptr addrspace(4) [[I2]], i64 [[I7]] -; CHECK-NEXT: [[I9:%.*]] = load <4 x i32>, ptr addrspace(4) [[I8]], align 16, !invariant.load [[META10:![0-9]+]] -; CHECK-NEXT: [[I10:%.*]] = load <8 x i32>, ptr addrspace(4) [[I5]], align 32, !invariant.load [[META10]] +; CHECK-NEXT: [[TMP25:%.*]] = load <8 x i32>, ptr addrspace(4) [[I5]], align 32, !invariant.load [[META10:![0-9]+]] +; CHECK-NEXT: [[TMP26:%.*]] = load <4 x i32>, ptr addrspace(4) [[I8]], align 16, !invariant.load [[META10]] ; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.amdgcn.waterfall.begin.i32(i32 0, i32 [[I3]]) ; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.amdgcn.waterfall.readfirstlane.i32.i32(i32 [[TMP12]], i32 [[I3]]) ; CHECK-NEXT: [[TMP14:%.*]] = sext i32 [[TMP13]] to i64 @@ -51,13 +51,14 @@ define dllexport spir_func void @lgc.shader.FS.main() local_unnamed_addr #0 !spi ; CHECK-NEXT: [[TMP20:%.*]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float 0.000000e+00, float 0.000000e+00, <8 x i32> [[TMP16]], <4 x i32> [[TMP19]], i1 false, i32 0, i32 0) ; CHECK-NEXT: [[I11]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.waterfall.end.v4f32(i32 [[TMP12]], <4 x float> [[TMP20]]) ; CHECK-NEXT: [[I12:%.*]] = fadd <4 x float> [[PHI_IMG]], +; CHECK-NEXT: [[TMP27:%.*]] = load <8 x i32>, ptr addrspace(4) [[I8]], align 32, !invariant.load [[META10]] ; CHECK-NEXT: [[TMP21:%.*]] = call i32 @llvm.amdgcn.waterfall.begin.i32(i32 0, i32 [[I6]]) ; CHECK-NEXT: [[TMP22:%.*]] = call i32 @llvm.amdgcn.waterfall.readfirstlane.i32.i32(i32 [[TMP21]], i32 [[I6]]) ; CHECK-NEXT: [[TMP23:%.*]] = sext i32 [[TMP22]] to i64 ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr addrspace(4) [[I2]], i64 [[TMP23]] -; CHECK-NEXT: [[TMP25:%.*]] = load <4 x i32>, ptr addrspace(4) [[TMP24]], align 16, !invariant.load [[META10]] -; CHECK-NEXT: [[TMP26:%.*]] = call <4 x i32> @llvm.amdgcn.waterfall.last.use.v4i32(i32 [[TMP21]], <4 x i32> [[TMP25]]) -; CHECK-NEXT: call void @llvm.amdgcn.struct.buffer.store.format.v4f32(<4 x float> [[I12]], <4 x i32> [[TMP26]], i32 1, i32 0, i32 0, i32 0) +; CHECK-NEXT: [[TMP28:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP24]], align 32, !invariant.load [[META10]] +; CHECK-NEXT: [[TMP29:%.*]] = call <8 x i32> @llvm.amdgcn.waterfall.last.use.v8i32(i32 [[TMP21]], <8 x i32> [[TMP28]]) +; CHECK-NEXT: call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> [[I12]], i32 15, i32 1, <8 x i32> [[TMP29]], i32 0, i32 0) ; CHECK-NEXT: [[IND]] = add i32 [[PHI_IND]], 1 ; CHECK-NEXT: [[COND:%.*]] = icmp ne i32 [[IND]], 1000 ; CHECK-NEXT: br i1 [[COND]], label [[LOOP]], label [[EXIT:%.*]] @@ -81,11 +82,9 @@ loop: ; preds = %loop, %.entry %i6 = mul i32 %phi.ind, %b %i7 = sext i32 %i6 to i64 %i8 = getelementptr i8, ptr addrspace(4) %i2, i64 %i7 - %i9 = load <4 x i32>, ptr addrspace(4) %i8, align 16, !invariant.load !10 - %i10 = load <8 x i32>, ptr addrspace(4) %i5, align 32, !invariant.load !10 - %i11 = call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 24, <8 x i32> %i10, <4 x i32> %i9, i32 1, <2 x float> zeroinitializer) + %i11 = call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 24, ptr addrspace(4) %i5, ptr addrspace(4) %i8, i32 1, <2 x float> zeroinitializer) %i12 = fadd <4 x float> %phi.img, - call void (...) @lgc.create.image.store(<4 x float> %i12, i32 0, i32 8, <4 x i32> %i9, i32 1) + call void (...) @lgc.create.image.store(<4 x float> %i12, i32 0, i32 8, ptr addrspace(4) %i8, i32 1) %ind = add i32 %phi.ind, 1 %cond = icmp ne i32 %ind, 1000 br i1 %cond, label %loop, label %exit @@ -139,3 +138,8 @@ attributes #3 = { nounwind memory(write) } !8 = !{i32 4} !9 = !{i32 6} !10 = !{} +;. +; CHECK: [[META8]] = !{i32 4} +; CHECK: [[META9]] = !{i32 6} +; CHECK: [[META10]] = !{} +;. diff --git a/lgc/test/scalarizationOfDescriptorLoadsTest16.lgc b/lgc/test/scalarizationOfDescriptorLoadsTest16.lgc index 573c55b677..39f2ed7d68 100644 --- a/lgc/test/scalarizationOfDescriptorLoadsTest16.lgc +++ b/lgc/test/scalarizationOfDescriptorLoadsTest16.lgc @@ -11,7 +11,7 @@ target triple = "amdgcn--amdpal" ; Function Attrs: nounwind define dllexport spir_func void @lgc.shader.FS.main() local_unnamed_addr #0 !spirv.ExecutionModel !8 !lgc.shaderstage !9 { ; CHECK-LABEL: define dllexport spir_func void @lgc.shader.FS.main( -; CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] !spirv.ExecutionModel !8 !lgc.shaderstage [[META9:![0-9]+]] { +; CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] !spirv.ExecutionModel [[META8:![0-9]+]] !lgc.shaderstage [[META9:![0-9]+]] { ; CHECK-NEXT: .entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.getpc() ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[TMP0]] to <2 x i32> @@ -37,16 +37,16 @@ define dllexport spir_func void @lgc.shader.FS.main() local_unnamed_addr #0 !spi ; CHECK-NEXT: [[I6:%.*]] = mul i32 [[PHI_IND]], 48 ; CHECK-NEXT: [[I7:%.*]] = sext i32 [[I6]] to i64 ; CHECK-NEXT: [[I8:%.*]] = getelementptr i8, ptr addrspace(4) [[I2]], i64 [[I7]] -; CHECK-NEXT: [[I9:%.*]] = load <4 x i32>, ptr addrspace(4) [[I8]], align 16, !invariant.load [[META10:![0-9]+]] -; CHECK-NEXT: [[I10:%.*]] = load <4 x i32>, ptr addrspace(4) [[I5]], align 32, !invariant.load [[META10]] +; CHECK-NEXT: [[I10:%.*]] = load <4 x i32>, ptr addrspace(4) [[I5]], align 32, !invariant.load [[META10:![0-9]+]] ; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i32> [[I10]] to <4 x float> +; CHECK-NEXT: [[TMP17:%.*]] = load <8 x i32>, ptr addrspace(4) [[I8]], align 32, !invariant.load [[META10]] ; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.amdgcn.waterfall.begin.i32(i32 0, i32 [[I6]]) ; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.amdgcn.waterfall.readfirstlane.i32.i32(i32 [[TMP13]], i32 [[I6]]) ; CHECK-NEXT: [[TMP15:%.*]] = sext i32 [[TMP14]] to i64 ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr addrspace(4) [[I2]], i64 [[TMP15]] -; CHECK-NEXT: [[TMP17:%.*]] = load <4 x i32>, ptr addrspace(4) [[TMP16]], align 16, !invariant.load [[META10]] -; CHECK-NEXT: [[TMP18:%.*]] = call <4 x i32> @llvm.amdgcn.waterfall.last.use.v4i32(i32 [[TMP13]], <4 x i32> [[TMP17]]) -; CHECK-NEXT: call void @llvm.amdgcn.struct.buffer.store.format.v4f32(<4 x float> [[TMP12]], <4 x i32> [[TMP18]], i32 1, i32 0, i32 0, i32 0) +; CHECK-NEXT: [[TMP18:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP16]], align 32, !invariant.load [[META10]] +; CHECK-NEXT: [[TMP19:%.*]] = call <8 x i32> @llvm.amdgcn.waterfall.last.use.v8i32(i32 [[TMP13]], <8 x i32> [[TMP18]]) +; CHECK-NEXT: call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> [[TMP12]], i32 15, i32 1, <8 x i32> [[TMP19]], i32 0, i32 0) ; CHECK-NEXT: [[IND]] = add i32 [[PHI_IND]], 1 ; CHECK-NEXT: [[COND:%.*]] = icmp ne i32 [[IND]], 1000 ; CHECK-NEXT: br i1 [[COND]], label [[LOOP]], label [[EXIT:%.*]] @@ -69,9 +69,8 @@ loop: ; preds = %loop, %.entry %i6 = mul i32 %phi.ind, %b %i7 = sext i32 %i6 to i64 %i8 = getelementptr i8, ptr addrspace(4) %i2, i64 %i7 - %i9 = load <4 x i32>, ptr addrspace(4) %i8, align 16, !invariant.load !10 %i10 = load <4 x i32>, ptr addrspace(4) %i5, align 32, !invariant.load !10 - call void (...) @lgc.create.image.store(<4 x i32> %i10, i32 0, i32 8, <4 x i32> %i9, i32 1) + call void (...) @lgc.create.image.store(<4 x i32> %i10, i32 0, i32 8, ptr addrspace(4) %i8, i32 1) %ind = add i32 %phi.ind, 1 %cond = icmp ne i32 %ind, 1000 br i1 %cond, label %loop, label %exit @@ -125,3 +124,8 @@ attributes #3 = { nounwind memory(write) } !8 = !{i32 4} !9 = !{i32 6} !10 = !{} +;. +; CHECK: [[META8]] = !{i32 4} +; CHECK: [[META9]] = !{i32 6} +; CHECK: [[META10]] = !{} +;. diff --git a/lgc/test/scalarizationOfDescriptorLoadsTest2.lgc b/lgc/test/scalarizationOfDescriptorLoadsTest2.lgc index bca91884fb..c3ed6f1215 100644 --- a/lgc/test/scalarizationOfDescriptorLoadsTest2.lgc +++ b/lgc/test/scalarizationOfDescriptorLoadsTest2.lgc @@ -8,23 +8,23 @@ target triple = "amdgcn--amdpal" ; Function Attrs: nounwind define dllexport spir_func void @lgc.shader.VS.main() local_unnamed_addr #0 !spirv.ExecutionModel !14 !lgc.shaderstage !15 { ; CHECK-LABEL: define dllexport spir_func void @lgc.shader.VS.main( -; CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] !spirv.ExecutionModel !14 !lgc.shaderstage [[META15:![0-9]+]] { +; CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] !spirv.ExecutionModel [[META14:![0-9]+]] !lgc.shaderstage [[META15:![0-9]+]] { ; CHECK-NEXT: .entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i32 @lgc.input.import.generic__i32(i1 false, i32 0, i32 0, i32 0, i32 poison) ; CHECK-NEXT: [[TMP1:%.*]] = mul i32 [[TMP0]], poison ; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[TMP1]] to i64 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr addrspace(4) poison, i64 [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr addrspace(4) [[TMP3]], align 16, !invariant.load [[META16:![0-9]+]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr addrspace(4) poison, i64 [[TMP2]] -; CHECK-NEXT: [[TMP6:%.*]] = load <4 x i32>, ptr addrspace(4) [[TMP5]], align 16, !invariant.load [[META16]] +; CHECK-NEXT: [[TMP6:%.*]] = load <4 x i32>, ptr addrspace(4) [[TMP5]], align 16, !invariant.load [[META16:![0-9]+]] ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <4 x float> +; CHECK-NEXT: [[TMP14:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP3]], align 32, !invariant.load [[META16]] ; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.waterfall.begin.i32(i32 0, i32 [[TMP1]]) ; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.amdgcn.waterfall.readfirstlane.i32.i32(i32 [[TMP8]], i32 [[TMP1]]) ; CHECK-NEXT: [[TMP10:%.*]] = sext i32 [[TMP9]] to i64 ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr addrspace(4) poison, i64 [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = load <4 x i32>, ptr addrspace(4) [[TMP11]], align 16, !invariant.load [[META16]] -; CHECK-NEXT: [[TMP13:%.*]] = call <4 x i32> @llvm.amdgcn.waterfall.last.use.v4i32(i32 [[TMP8]], <4 x i32> [[TMP12]]) -; CHECK-NEXT: call void @llvm.amdgcn.struct.buffer.store.format.v4f32(<4 x float> [[TMP7]], <4 x i32> [[TMP13]], i32 1, i32 0, i32 0, i32 0) +; CHECK-NEXT: [[TMP12:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP11]], align 32, !invariant.load [[META16]] +; CHECK-NEXT: [[TMP13:%.*]] = call <8 x i32> @llvm.amdgcn.waterfall.last.use.v8i32(i32 [[TMP8]], <8 x i32> [[TMP12]]) +; CHECK-NEXT: call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> [[TMP7]], i32 15, i32 1, <8 x i32> [[TMP13]], i32 0, i32 0) ; CHECK-NEXT: ret void ; .entry: @@ -34,10 +34,9 @@ define dllexport spir_func void @lgc.shader.VS.main() local_unnamed_addr #0 !spi %3 = mul i32 %0, %2 %4 = sext i32 %3 to i64 %5 = getelementptr i8, ptr addrspace(4) %1, i64 %4 - %6 = load <4 x i32>, ptr addrspace(4) %5, align 16, !invariant.load !16 %7 = getelementptr i8, ptr addrspace(4) %1, i64 %4 %8 = load <4 x i32>, ptr addrspace(4) %7, align 16, !invariant.load !16 - call void (...) @lgc.create.image.store(<4 x i32> %8, i32 0, i32 8, <4 x i32> %6, i32 1) + call void (...) @lgc.create.image.store(<4 x i32> %8, i32 0, i32 8, ptr addrspace(4) %5, i32 1) ret void } @@ -86,3 +85,8 @@ attributes #4 = { nounwind } !14 = !{i32 0} !15 = !{i32 1} !16 = !{} +;. +; CHECK: [[META14]] = !{i32 0} +; CHECK: [[META15]] = !{i32 1} +; CHECK: [[META16]] = !{} +;. diff --git a/lgc/test/scalarizationOfDescriptorLoadsTest3.lgc b/lgc/test/scalarizationOfDescriptorLoadsTest3.lgc index 21d460b530..4616e701a2 100644 --- a/lgc/test/scalarizationOfDescriptorLoadsTest3.lgc +++ b/lgc/test/scalarizationOfDescriptorLoadsTest3.lgc @@ -8,7 +8,7 @@ target triple = "amdgcn--amdpal" ; Function Attrs: nounwind define dllexport spir_func void @lgc.shader.VS.main() local_unnamed_addr #0 !spirv.ExecutionModel !14 !lgc.shaderstage !15 { ; CHECK-LABEL: define dllexport spir_func void @lgc.shader.VS.main( -; CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] !spirv.ExecutionModel !14 !lgc.shaderstage [[META15:![0-9]+]] { +; CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] !spirv.ExecutionModel [[META14:![0-9]+]] !lgc.shaderstage [[META15:![0-9]+]] { ; CHECK-NEXT: .entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i32 @lgc.input.import.generic__i32(i1 false, i32 0, i32 0, i32 0, i32 poison) ; CHECK-NEXT: [[TMP1:%.*]] = mul i32 [[TMP0]], poison @@ -18,12 +18,12 @@ define dllexport spir_func void @lgc.shader.VS.main() local_unnamed_addr #0 !spi ; CHECK-NEXT: [[TMP5:%.*]] = mul i32 [[TMP0]], poison ; CHECK-NEXT: [[TMP6:%.*]] = sext i32 [[TMP5]] to i64 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr addrspace(4) poison, i64 [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP7]], align 16, !invariant.load [[META16]] +; CHECK-NEXT: [[TMP8:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP7]], align 32, !invariant.load [[META16]] ; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.amdgcn.waterfall.begin.i32(i32 0, i32 [[TMP5]]) ; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.amdgcn.waterfall.readfirstlane.i32.i32(i32 [[TMP9]], i32 [[TMP5]]) ; CHECK-NEXT: [[TMP11:%.*]] = sext i32 [[TMP10]] to i64 ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr addrspace(4) poison, i64 [[TMP11]] -; CHECK-NEXT: [[TMP13:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP12]], align 16, !invariant.load [[META16]] +; CHECK-NEXT: [[TMP13:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP12]], align 32, !invariant.load [[META16]] ; CHECK-NEXT: [[TMP14:%.*]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float 0.000000e+00, float 0.000000e+00, <8 x i32> [[TMP13]], <4 x i32> , i1 false, i32 0, i32 0) ; CHECK-NEXT: [[TMP15:%.*]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.waterfall.end.v4f32(i32 [[TMP9]], <4 x float> [[TMP14]]) ; CHECK-NEXT: ret void @@ -39,8 +39,7 @@ define dllexport spir_func void @lgc.shader.VS.main() local_unnamed_addr #0 !spi %7 = mul i32 %0, %2 %8 = sext i32 %7 to i64 %9 = getelementptr i8, ptr addrspace(4) %1, i64 %8 - %10 = load <8 x i32>, ptr addrspace(4) %9, align 16, !invariant.load !16 - %11 = call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 24, <8 x i32> %10, <4 x i32> , i32 1, <2 x float> zeroinitializer) + %11 = call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 24, ptr addrspace(4) %9, <4 x i32> , i32 1, <2 x float> zeroinitializer) ret void } @@ -87,3 +86,8 @@ attributes #2 = { nounwind memory(none) } !14 = !{i32 0} !15 = !{i32 1} !16 = !{} +;. +; CHECK: [[META14]] = !{i32 0} +; CHECK: [[META15]] = !{i32 1} +; CHECK: [[META16]] = !{} +;. diff --git a/lgc/test/scalarizationOfDescriptorLoadsTest4.lgc b/lgc/test/scalarizationOfDescriptorLoadsTest4.lgc index a6076a3787..1fbc044bd6 100644 --- a/lgc/test/scalarizationOfDescriptorLoadsTest4.lgc +++ b/lgc/test/scalarizationOfDescriptorLoadsTest4.lgc @@ -10,7 +10,7 @@ declare <4 x i32> @foo1(i32 %V) ; Function Attrs: nounwind define dllexport spir_func void @lgc.shader.VS.main() local_unnamed_addr #0 !spirv.ExecutionModel !14 !lgc.shaderstage !15 { ; CHECK-LABEL: define dllexport spir_func void @lgc.shader.VS.main( -; CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] !spirv.ExecutionModel !14 !lgc.shaderstage [[META15:![0-9]+]] { +; CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] !spirv.ExecutionModel [[META14:![0-9]+]] !lgc.shaderstage [[META15:![0-9]+]] { ; CHECK-NEXT: .entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i32 @lgc.input.import.generic__i32(i1 false, i32 0, i32 0, i32 0, i32 poison) ; CHECK-NEXT: [[TMP1:%.*]] = mul i32 [[TMP0]], poison @@ -20,14 +20,15 @@ define dllexport spir_func void @lgc.shader.VS.main() local_unnamed_addr #0 !spi ; CHECK-NEXT: [[TMP5:%.*]] = mul i32 [[TMP0]], poison ; CHECK-NEXT: [[TMP6:%.*]] = sext i32 [[TMP5]] to i64 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr addrspace(4) poison, i64 [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP7]], align 16, !invariant.load [[META16]] -; CHECK-NEXT: [[TMP9:%.*]] = call <4 x i32> @foo1(i32 [[TMP0]]) +; CHECK-NEXT: [[TMP8:%.*]] = call ptr addrspace(4) @foo1(i32 [[TMP0]]) +; CHECK-NEXT: [[TMP19:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP7]], align 32, !invariant.load [[META16]] +; CHECK-NEXT: [[TMP9:%.*]] = load <4 x i32>, ptr addrspace(4) [[TMP8]], align 16, !invariant.load [[META16]] ; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.amdgcn.waterfall.begin.i32(i32 0, i32 [[TMP5]]) ; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.amdgcn.waterfall.begin.v4i32(i32 [[TMP10]], <4 x i32> [[TMP9]]) ; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.amdgcn.waterfall.readfirstlane.i32.i32(i32 [[TMP11]], i32 [[TMP5]]) ; CHECK-NEXT: [[TMP13:%.*]] = sext i32 [[TMP12]] to i64 ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr addrspace(4) poison, i64 [[TMP13]] -; CHECK-NEXT: [[TMP15:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP14]], align 16, !invariant.load [[META16]] +; CHECK-NEXT: [[TMP15:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP14]], align 32, !invariant.load [[META16]] ; CHECK-NEXT: [[TMP16:%.*]] = call <4 x i32> @llvm.amdgcn.waterfall.readfirstlane.v4i32.v4i32(i32 [[TMP11]], <4 x i32> [[TMP9]]) ; CHECK-NEXT: [[TMP17:%.*]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float 0.000000e+00, float 0.000000e+00, <8 x i32> [[TMP15]], <4 x i32> [[TMP16]], i1 false, i32 0, i32 0) ; CHECK-NEXT: [[TMP18:%.*]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.waterfall.end.v4f32(i32 [[TMP11]], <4 x float> [[TMP17]]) @@ -44,9 +45,8 @@ define dllexport spir_func void @lgc.shader.VS.main() local_unnamed_addr #0 !spi %7 = mul i32 %0, %2 %8 = sext i32 %7 to i64 %9 = getelementptr i8, ptr addrspace(4) %1, i64 %8 - %10 = load <8 x i32>, ptr addrspace(4) %9, align 16, !invariant.load !16 - %11 = call <4 x i32> @foo1(i32 %0) - %12 = call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 24, <8 x i32> %10, <4 x i32> %11, i32 1, <2 x float> zeroinitializer) + %11 = call ptr addrspace(4) @foo1(i32 %0) + %12 = call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 24, ptr addrspace(4) %9, ptr addrspace(4) %11, i32 1, <2 x float> zeroinitializer) ret void } @@ -93,3 +93,8 @@ attributes #2 = { nounwind memory(none) } !14 = !{i32 0} !15 = !{i32 1} !16 = !{} +;. +; CHECK: [[META14]] = !{i32 0} +; CHECK: [[META15]] = !{i32 1} +; CHECK: [[META16]] = !{} +;. diff --git a/lgc/test/scalarizationOfDescriptorLoadsTest5.lgc b/lgc/test/scalarizationOfDescriptorLoadsTest5.lgc index 6cb1fdfdcc..085be16a66 100644 --- a/lgc/test/scalarizationOfDescriptorLoadsTest5.lgc +++ b/lgc/test/scalarizationOfDescriptorLoadsTest5.lgc @@ -8,7 +8,7 @@ target triple = "amdgcn--amdpal" ; Function Attrs: nounwind define dllexport spir_func void @lgc.shader.VS.main() local_unnamed_addr #0 !spirv.ExecutionModel !14 !lgc.shaderstage !15 { ; CHECK-LABEL: define dllexport spir_func void @lgc.shader.VS.main( -; CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] !spirv.ExecutionModel !14 !lgc.shaderstage [[META15:![0-9]+]] { +; CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] !spirv.ExecutionModel [[META14:![0-9]+]] !lgc.shaderstage [[META15:![0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i32 @lgc.input.import.generic__i32(i1 false, i32 0, i32 0, i32 0, i32 poison) ; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP0]], 0 @@ -17,25 +17,25 @@ define dllexport spir_func void @lgc.shader.VS.main() local_unnamed_addr #0 !spi ; CHECK-NEXT: [[TMP1:%.*]] = mul i32 [[TMP0]], poison ; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[TMP1]] to i64 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr addrspace(4) poison, i64 [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr addrspace(4) [[TMP3]], align 16, !invariant.load [[META16:![0-9]+]] ; CHECK-NEXT: [[TMP5:%.*]] = mul i32 [[TMP0]], poison ; CHECK-NEXT: [[TMP6:%.*]] = sext i32 [[TMP5]] to i64 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr addrspace(4) poison, i64 [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = load <4 x i32>, ptr addrspace(4) [[TMP7]], align 16, !invariant.load [[META16]] +; CHECK-NEXT: [[TMP8:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP7]], align 32, !invariant.load [[META16:![0-9]+]] ; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.amdgcn.waterfall.begin.i32(i32 0, i32 [[TMP5]]) ; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.amdgcn.waterfall.readfirstlane.i32.i32(i32 [[TMP9]], i32 [[TMP5]]) ; CHECK-NEXT: [[TMP11:%.*]] = sext i32 [[TMP10]] to i64 ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr addrspace(4) poison, i64 [[TMP11]] -; CHECK-NEXT: [[TMP13:%.*]] = load <4 x i32>, ptr addrspace(4) [[TMP12]], align 16, !invariant.load [[META16]] -; CHECK-NEXT: [[TMP14:%.*]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> [[TMP13]], i32 0, i32 0, i32 0, i32 0) +; CHECK-NEXT: [[TMP13:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP12]], align 32, !invariant.load [[META16]] +; CHECK-NEXT: [[TMP14:%.*]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 0, <8 x i32> [[TMP13]], i32 0, i32 0) ; CHECK-NEXT: [[TMP15:%.*]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.waterfall.end.v4f32(i32 [[TMP9]], <4 x float> [[TMP14]]) +; CHECK-NEXT: [[TMP22:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP3]], align 32, !invariant.load [[META16]] ; CHECK-NEXT: [[TMP16:%.*]] = call i32 @llvm.amdgcn.waterfall.begin.i32(i32 0, i32 [[TMP1]]) ; CHECK-NEXT: [[TMP17:%.*]] = call i32 @llvm.amdgcn.waterfall.readfirstlane.i32.i32(i32 [[TMP16]], i32 [[TMP1]]) ; CHECK-NEXT: [[TMP18:%.*]] = sext i32 [[TMP17]] to i64 ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr addrspace(4) poison, i64 [[TMP18]] -; CHECK-NEXT: [[TMP20:%.*]] = load <4 x i32>, ptr addrspace(4) [[TMP19]], align 16, !invariant.load [[META16]] -; CHECK-NEXT: [[TMP21:%.*]] = call <4 x i32> @llvm.amdgcn.waterfall.last.use.v4i32(i32 [[TMP16]], <4 x i32> [[TMP20]]) -; CHECK-NEXT: call void @llvm.amdgcn.struct.buffer.store.format.v4f32(<4 x float> [[TMP15]], <4 x i32> [[TMP21]], i32 1, i32 0, i32 0, i32 0) +; CHECK-NEXT: [[TMP20:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP19]], align 32, !invariant.load [[META16]] +; CHECK-NEXT: [[TMP21:%.*]] = call <8 x i32> @llvm.amdgcn.waterfall.last.use.v8i32(i32 [[TMP16]], <8 x i32> [[TMP20]]) +; CHECK-NEXT: call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> [[TMP15]], i32 15, i32 1, <8 x i32> [[TMP21]], i32 0, i32 0) ; CHECK-NEXT: br label [[RET]] ; CHECK: ret: ; CHECK-NEXT: ret void @@ -51,13 +51,11 @@ bb: ; preds = %entry %3 = mul i32 %0, %2 %4 = sext i32 %3 to i64 %5 = getelementptr i8, ptr addrspace(4) %1, i64 %4 - %6 = load <4 x i32>, ptr addrspace(4) %5, align 16, !invariant.load !16 %7 = mul i32 %0, %2 %8 = sext i32 %7 to i64 %9 = getelementptr i8, ptr addrspace(4) %1, i64 %8 - %10 = load <4 x i32>, ptr addrspace(4) %9, align 16, !invariant.load !16 - %11 = call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.load.v4f32(i32 0, i32 8, <4 x i32> %10, i32 0) - call void (...) @lgc.create.image.store(<4 x float> %11, i32 0, i32 8, <4 x i32> %6, i32 1) + %11 = call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.load.v4f32(i32 0, i32 8, ptr addrspace(4) %9, i32 0) + call void (...) @lgc.create.image.store(<4 x float> %11, i32 0, i32 8, ptr addrspace(4) %5, i32 1) br label %ret ret: ; preds = %bb, %entry @@ -111,3 +109,8 @@ attributes #3 = { nounwind memory(write) } !14 = !{i32 0} !15 = !{i32 1} !16 = !{} +;. +; CHECK: [[META14]] = !{i32 0} +; CHECK: [[META15]] = !{i32 1} +; CHECK: [[META16]] = !{} +;. diff --git a/lgc/test/scalarizationOfDescriptorLoadsTest6.lgc b/lgc/test/scalarizationOfDescriptorLoadsTest6.lgc index cf57f85ba7..42398f1ce4 100644 --- a/lgc/test/scalarizationOfDescriptorLoadsTest6.lgc +++ b/lgc/test/scalarizationOfDescriptorLoadsTest6.lgc @@ -8,7 +8,7 @@ target triple = "amdgcn--amdpal" ; Function Attrs: nounwind define dllexport spir_func void @lgc.shader.FS.main() local_unnamed_addr #0 !spirv.ExecutionModel !22 !lgc.shaderstage !23 { ; CHECK-LABEL: define dllexport spir_func void @lgc.shader.FS.main( -; CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] !spirv.ExecutionModel !22 !lgc.shaderstage [[META23:![0-9]+]] { +; CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] !spirv.ExecutionModel [[META22:![0-9]+]] !lgc.shaderstage [[META23:![0-9]+]] { ; CHECK-NEXT: .entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.getpc() ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[TMP0]] to <2 x i32> @@ -35,8 +35,8 @@ define dllexport spir_func void @lgc.shader.FS.main() local_unnamed_addr #0 !spi ; CHECK-NEXT: [[TMP16:%.*]] = bitcast <2 x i32> [[TMP15]] to i64 ; CHECK-NEXT: [[TMP17:%.*]] = inttoptr i64 [[TMP16]] to ptr addrspace(4) ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP17]], i32 0 -; CHECK-NEXT: [[TMP19:%.*]] = load <4 x i32>, ptr addrspace(4) [[TMP18]], align 16, !invariant.load [[META24:![0-9]+]] -; CHECK-NEXT: [[TMP20:%.*]] = call <4 x i32> @llvm.amdgcn.struct.buffer.load.format.v4i32(<4 x i32> [[TMP19]], i32 [[DOT0]], i32 0, i32 0, i32 0), !invariant.load [[META24]] +; CHECK-NEXT: [[TMP19:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP18]], align 32, !invariant.load [[META24:![0-9]+]] +; CHECK-NEXT: [[TMP20:%.*]] = call <4 x i32> @llvm.amdgcn.image.load.1d.v4i32.i32(i32 15, i32 [[DOT0]], <8 x i32> [[TMP19]], i32 0, i32 0), !invariant.load [[META24]] ; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i32> [[TMP20]], i64 0 ; CHECK-NEXT: [[TMP22:%.*]] = call i32 @lgc.load.user.data__i32(i32 36) ; CHECK-NEXT: [[TMP23:%.*]] = insertelement <2 x i32> [[TMP3]], i32 [[TMP22]], i64 0 @@ -46,12 +46,12 @@ define dllexport spir_func void @lgc.shader.FS.main() local_unnamed_addr #0 !spi ; CHECK-NEXT: [[TMP27:%.*]] = mul i32 [[TMP21]], 32 ; CHECK-NEXT: [[TMP28:%.*]] = sext i32 [[TMP27]] to i64 ; CHECK-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP26]], i64 [[TMP28]] -; CHECK-NEXT: [[TMP30:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP29]], align 32, !invariant.load [[META24]] ; CHECK-NEXT: [[TMP31:%.*]] = call i32 @lgc.load.user.data__i32(i32 36) ; CHECK-NEXT: [[TMP32:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[TMP31]], i64 0 ; CHECK-NEXT: [[TMP33:%.*]] = bitcast <2 x i32> [[TMP32]] to i64 ; CHECK-NEXT: [[TMP34:%.*]] = inttoptr i64 [[TMP33]] to ptr addrspace(4) ; CHECK-NEXT: [[TMP35:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP34]], i32 0 +; CHECK-NEXT: [[TMP52:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP29]], align 32, !invariant.load [[META24]] ; CHECK-NEXT: [[TMP36:%.*]] = load <4 x i32>, ptr addrspace(4) [[TMP35]], align 16, !invariant.load [[META24]] ; CHECK-NEXT: [[TMP37:%.*]] = call i32 @llvm.amdgcn.waterfall.begin.i32(i32 0, i32 [[TMP27]]) ; CHECK-NEXT: [[TMP38:%.*]] = call i32 @llvm.amdgcn.waterfall.readfirstlane.i32.i32(i32 [[TMP37]], i32 [[TMP27]]) @@ -64,26 +64,29 @@ define dllexport spir_func void @lgc.shader.FS.main() local_unnamed_addr #0 !spi ; CHECK-NEXT: [[TMP45:%.*]] = sext i32 [[TMP44]] to i64 ; CHECK-NEXT: [[TMP46:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP26]], i64 [[TMP45]] ; CHECK-NEXT: [[TMP47:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP46]], align 32, !invariant.load [[META24]] +; CHECK-NEXT: [[TMP59:%.*]] = load <4 x i32>, ptr addrspace(4) [[TMP35]], align 16, !invariant.load [[META24]] ; CHECK-NEXT: [[TMP48:%.*]] = call i32 @llvm.amdgcn.waterfall.begin.i32(i32 0, i32 [[TMP44]]) ; CHECK-NEXT: [[TMP49:%.*]] = call i32 @llvm.amdgcn.waterfall.readfirstlane.i32.i32(i32 [[TMP48]], i32 [[TMP44]]) ; CHECK-NEXT: [[TMP50:%.*]] = sext i32 [[TMP49]] to i64 ; CHECK-NEXT: [[TMP51:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP26]], i64 [[TMP50]] -; CHECK-NEXT: [[TMP52:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP51]], align 32, !invariant.load [[META24]] -; CHECK-NEXT: [[TMP53:%.*]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float 0.000000e+00, float 0.000000e+00, <8 x i32> [[TMP52]], <4 x i32> [[TMP36]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[TMP67:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP51]], align 32, !invariant.load [[META24]] +; CHECK-NEXT: [[TMP53:%.*]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float 0.000000e+00, float 0.000000e+00, <8 x i32> [[TMP67]], <4 x i32> [[TMP59]], i1 false, i32 0, i32 0) ; CHECK-NEXT: [[TMP54:%.*]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.waterfall.end.v4f32(i32 [[TMP48]], <4 x float> [[TMP53]]) +; CHECK-NEXT: [[TMP68:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP29]], align 32, !invariant.load [[META24]] +; CHECK-NEXT: [[TMP69:%.*]] = load <4 x i32>, ptr addrspace(4) [[TMP35]], align 16, !invariant.load [[META24]] ; CHECK-NEXT: [[TMP55:%.*]] = call i32 @llvm.amdgcn.waterfall.begin.i32(i32 0, i32 [[TMP27]]) ; CHECK-NEXT: [[TMP56:%.*]] = call i32 @llvm.amdgcn.waterfall.readfirstlane.i32.i32(i32 [[TMP55]], i32 [[TMP27]]) ; CHECK-NEXT: [[TMP57:%.*]] = sext i32 [[TMP56]] to i64 ; CHECK-NEXT: [[TMP58:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP26]], i64 [[TMP57]] -; CHECK-NEXT: [[TMP59:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP58]], align 32, !invariant.load [[META24]] -; CHECK-NEXT: [[TMP60:%.*]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float 0.000000e+00, float 0.000000e+00, <8 x i32> [[TMP59]], <4 x i32> [[TMP36]], i1 false, i32 0, i32 0) +; CHECK-NEXT: [[TMP70:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP58]], align 32, !invariant.load [[META24]] +; CHECK-NEXT: [[TMP60:%.*]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float 0.000000e+00, float 0.000000e+00, <8 x i32> [[TMP70]], <4 x i32> [[TMP69]], i1 false, i32 0, i32 0) ; CHECK-NEXT: [[TMP61:%.*]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.waterfall.end.v4f32(i32 [[TMP55]], <4 x float> [[TMP60]]) ; CHECK-NEXT: [[TMP62]] = fadd reassoc nnan nsz arcp contract afn <4 x float> [[DOT09]], [[TMP61]] ; CHECK-NEXT: [[TMP63:%.*]] = fadd reassoc nnan nsz arcp contract afn <4 x float> [[TMP43]], [[TMP54]] ; CHECK-NEXT: [[TMP64]] = fadd reassoc nnan nsz arcp contract afn <4 x float> [[DOT010]], [[TMP63]] ; CHECK-NEXT: [[TMP65]] = add i32 [[DOT0]], 1 ; CHECK-NEXT: br label [[TMP9]], !llvm.loop [[LOOP25:![0-9]+]] -; CHECK: 66: +; CHECK: 69: ; CHECK-NEXT: ret void ; .entry: @@ -105,24 +108,20 @@ define dllexport spir_func void @lgc.shader.FS.main() local_unnamed_addr #0 !spi 7: ; preds = %3 %8 = call ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4(i32 4, i32 4, i64 1, i32 12) %9 = call i32 (...) @lgc.create.get.desc.stride__i32(i32 4, i32 4, i64 1, i32 12) - %10 = load <4 x i32>, ptr addrspace(4) %8, align 16, !invariant.load !24 - %11 = call <4 x i32> (...) @lgc.create.image.load.v4i32(i32 0, i32 1536, <4 x i32> %10, i32 %.0) + %11 = call <4 x i32> (...) @lgc.create.image.load.v4i32(i32 0, i32 1536, ptr addrspace(4) %8, i32 %.0) %12 = extractelement <4 x i32> %11, i64 0 %13 = call ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 6) %14 = call i32 (...) @lgc.create.get.desc.stride__i32(i32 1, i32 1, i64 0, i32 6) %15 = mul i32 %12, %14 %16 = sext i32 %15 to i64 %17 = getelementptr i8, ptr addrspace(4) %13, i64 %16 - %18 = load <8 x i32>, ptr addrspace(4) %17, align 32, !invariant.load !24 %19 = call ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4(i32 2, i32 2, i64 0, i32 5) - %20 = load <4 x i32>, ptr addrspace(4) %19, align 16, !invariant.load !24 - %21 = call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 24, <8 x i32> %18, <4 x i32> %20, i32 1, <2 x float> zeroinitializer) + %21 = call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 24, ptr addrspace(4) %17, ptr addrspace(4) %19, i32 1, <2 x float> zeroinitializer) %22 = mul i32 %1, %14 %23 = sext i32 %22 to i64 %24 = getelementptr i8, ptr addrspace(4) %13, i64 %23 - %25 = load <8 x i32>, ptr addrspace(4) %24, align 32, !invariant.load !24 - %26 = call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 24, <8 x i32> %25, <4 x i32> %20, i32 1, <2 x float> zeroinitializer) - %27 = call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 24, <8 x i32> %18, <4 x i32> %20, i32 1, <2 x float> zeroinitializer) + %26 = call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 24, ptr addrspace(4) %24, ptr addrspace(4) %19, i32 1, <2 x float> zeroinitializer) + %27 = call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 24, ptr addrspace(4) %17, ptr addrspace(4) %19, i32 1, <2 x float> zeroinitializer) %28 = fadd reassoc nnan nsz arcp contract afn <4 x float> %.09, %27 %29 = fadd reassoc nnan nsz arcp contract afn <4 x float> %21, %26 %30 = fadd reassoc nnan nsz arcp contract afn <4 x float> %.010, %29 @@ -191,3 +190,9 @@ attributes #2 = { nounwind memory(none) } !23 = !{i32 6} !24 = !{} !25 = distinct !{!25} +;. +; CHECK: [[META22]] = !{i32 4} +; CHECK: [[META23]] = !{i32 6} +; CHECK: [[META24]] = !{} +; CHECK: [[LOOP25]] = distinct !{[[LOOP25]]} +;. diff --git a/lgc/test/scalarizationOfDescriptorLoadsTest7.lgc b/lgc/test/scalarizationOfDescriptorLoadsTest7.lgc index 445a355b8d..4b1edb4ae3 100644 --- a/lgc/test/scalarizationOfDescriptorLoadsTest7.lgc +++ b/lgc/test/scalarizationOfDescriptorLoadsTest7.lgc @@ -10,7 +10,7 @@ target triple = "amdgcn--amdpal" ; Function Attrs: nounwind define dllexport spir_func void @lgc.shader.FS.main() local_unnamed_addr #0 !spirv.ExecutionModel !8 !lgc.shaderstage !9 { ; CHECK-LABEL: define dllexport spir_func void @lgc.shader.FS.main( -; CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] !spirv.ExecutionModel !8 !lgc.shaderstage [[META9:![0-9]+]] { +; CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] !spirv.ExecutionModel [[META8:![0-9]+]] !lgc.shaderstage [[META9:![0-9]+]] { ; CHECK-NEXT: .entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.getpc() ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[TMP0]] to <2 x i32> @@ -37,18 +37,15 @@ define dllexport spir_func void @lgc.shader.FS.main() local_unnamed_addr #0 !spi ; CHECK-NEXT: [[I8:%.*]] = mul i32 [[I]], 48 ; CHECK-NEXT: [[I9:%.*]] = sext i32 [[I8]] to i64 ; CHECK-NEXT: [[I10:%.*]] = getelementptr i8, ptr addrspace(4) [[I3]], i64 [[I9]] -; CHECK-NEXT: [[I11:%.*]] = load <4 x i32>, ptr addrspace(4) [[I10]], align 16, !invariant.load [[META10:![0-9]+]] ; CHECK-NEXT: br label [[BB3:%.*]] ; CHECK: bb3: -; CHECK-NEXT: [[I12:%.*]] = load <8 x i32>, ptr addrspace(4) [[I7]], align 32, !invariant.load [[META10]] +; CHECK-NEXT: [[TMP14:%.*]] = load <8 x i32>, ptr addrspace(4) [[I7]], align 32, !invariant.load [[META10:![0-9]+]] +; CHECK-NEXT: [[TMP19:%.*]] = load <4 x i32>, ptr addrspace(4) [[I3]], align 16, !invariant.load [[META10]] ; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.amdgcn.waterfall.begin.i32(i32 0, i32 [[I5]]) ; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.amdgcn.waterfall.readfirstlane.i32.i32(i32 [[TMP12]], i32 [[I5]]) -; CHECK-NEXT: [[TMP14:%.*]] = sext i32 [[TMP13]] to i64 -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr addrspace(4) [[I1]], i64 [[TMP14]] -; CHECK-NEXT: [[TMP16:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP15]], align 32, !invariant.load [[META10]] ; CHECK-NEXT: [[TMP17:%.*]] = sext i32 [[TMP13]] to i64 -; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr addrspace(4) [[I3]], i64 [[TMP17]] -; CHECK-NEXT: [[TMP19:%.*]] = load <4 x i32>, ptr addrspace(4) [[TMP18]], align 16, !invariant.load [[META10]] +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr addrspace(4) [[I1]], i64 [[TMP17]] +; CHECK-NEXT: [[TMP16:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP18]], align 32, !invariant.load [[META10]] ; CHECK-NEXT: [[TMP20:%.*]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float 0.000000e+00, float 0.000000e+00, <8 x i32> [[TMP16]], <4 x i32> [[TMP19]], i1 false, i32 0, i32 0) ; CHECK-NEXT: [[I13:%.*]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.waterfall.end.v4f32(i32 [[TMP12]], <4 x float> [[TMP20]]) ; CHECK-NEXT: call void @lgc.output.export.generic.i32.i32.v4f32(i32 0, i32 0, <4 x float> [[I13]]) #[[ATTR5:[0-9]+]] @@ -72,12 +69,10 @@ bb2: ; preds = %bb1 %i8 = mul i32 %i, %i4 %i9 = sext i32 %i8 to i64 %i10 = getelementptr i8, ptr addrspace(4) %i3, i64 %i9 - %i11 = load <4 x i32>, ptr addrspace(4) %i10, align 16, !invariant.load !10 br label %bb3 bb3: ; preds = %bb2 - %i12 = load <8 x i32>, ptr addrspace(4) %i7, align 32, !invariant.load !10 - %i13 = call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 24, <8 x i32> %i12, <4 x i32> %i11, i32 1, <2 x float> zeroinitializer) + %i13 = call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 24, ptr addrspace(4) %i7, ptr addrspace(4) %i3, i32 1, <2 x float> zeroinitializer) call void (...) @lgc.create.write.generic.output(<4 x float> %i13, i32 0, i32 0, i32 0, i32 0, i32 0, i32 poison) ret void } @@ -127,3 +122,8 @@ attributes #3 = { nounwind } !8 = !{i32 4} !9 = !{i32 6} !10 = !{} +;. +; CHECK: [[META8]] = !{i32 4} +; CHECK: [[META9]] = !{i32 6} +; CHECK: [[META10]] = !{} +;. diff --git a/lgc/test/scalarizationOfDescriptorLoadsTest8.lgc b/lgc/test/scalarizationOfDescriptorLoadsTest8.lgc index 03b16464d2..4958e7432e 100644 --- a/lgc/test/scalarizationOfDescriptorLoadsTest8.lgc +++ b/lgc/test/scalarizationOfDescriptorLoadsTest8.lgc @@ -10,7 +10,7 @@ target triple = "amdgcn--amdpal" ; Function Attrs: nounwind define dllexport spir_func void @lgc.shader.FS.main() local_unnamed_addr #0 !spirv.ExecutionModel !8 !lgc.shaderstage !9 { ; CHECK-LABEL: define dllexport spir_func void @lgc.shader.FS.main( -; CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] !spirv.ExecutionModel !8 !lgc.shaderstage [[META9:![0-9]+]] { +; CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] !spirv.ExecutionModel [[META8:![0-9]+]] !lgc.shaderstage [[META9:![0-9]+]] { ; CHECK-NEXT: .entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.getpc() ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[TMP0]] to <2 x i32> @@ -41,8 +41,8 @@ define dllexport spir_func void @lgc.shader.FS.main() local_unnamed_addr #0 !spi ; CHECK-NEXT: [[I6:%.*]] = mul i32 [[I]], [[PHI]] ; CHECK-NEXT: [[I7:%.*]] = sext i32 [[I6]] to i64 ; CHECK-NEXT: [[I8:%.*]] = getelementptr i8, ptr addrspace(4) [[I2]], i64 [[I7]] -; CHECK-NEXT: [[I9:%.*]] = load <4 x i32>, ptr addrspace(4) [[I8]], align 16, !invariant.load [[META10:![0-9]+]] -; CHECK-NEXT: [[I10:%.*]] = load <8 x i32>, ptr addrspace(4) [[I5]], align 32, !invariant.load [[META10]] +; CHECK-NEXT: [[TMP21:%.*]] = load <8 x i32>, ptr addrspace(4) [[I5]], align 32, !invariant.load [[META10:![0-9]+]] +; CHECK-NEXT: [[TMP22:%.*]] = load <4 x i32>, ptr addrspace(4) [[I8]], align 16, !invariant.load [[META10]] ; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.amdgcn.waterfall.begin.i32(i32 0, i32 [[I3]]) ; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.amdgcn.waterfall.readfirstlane.i32.i32(i32 [[TMP12]], i32 [[I3]]) ; CHECK-NEXT: [[TMP14:%.*]] = sext i32 [[TMP13]] to i64 @@ -79,9 +79,7 @@ bb3: ; preds = %bb2, %bb1 %i6 = mul i32 %i, %phi %i7 = sext i32 %i6 to i64 %i8 = getelementptr i8, ptr addrspace(4) %i2, i64 %i7 - %i9 = load <4 x i32>, ptr addrspace(4) %i8, align 16, !invariant.load !10 - %i10 = load <8 x i32>, ptr addrspace(4) %i5, align 32, !invariant.load !10 - %i11 = call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 24, <8 x i32> %i10, <4 x i32> %i9, i32 1, <2 x float> zeroinitializer) + %i11 = call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 24, ptr addrspace(4) %i5, ptr addrspace(4) %i8, i32 1, <2 x float> zeroinitializer) call void (...) @lgc.create.write.generic.output(<4 x float> %i11, i32 0, i32 0, i32 0, i32 0, i32 0, i32 poison) ret void } @@ -131,3 +129,8 @@ attributes #3 = { nounwind } !8 = !{i32 4} !9 = !{i32 6} !10 = !{} +;. +; CHECK: [[META8]] = !{i32 4} +; CHECK: [[META9]] = !{i32 6} +; CHECK: [[META10]] = !{} +;. diff --git a/lgc/test/scalarizationOfDescriptorLoadsTest9.lgc b/lgc/test/scalarizationOfDescriptorLoadsTest9.lgc index 97e7f1777c..c188e6376b 100644 --- a/lgc/test/scalarizationOfDescriptorLoadsTest9.lgc +++ b/lgc/test/scalarizationOfDescriptorLoadsTest9.lgc @@ -9,7 +9,7 @@ target triple = "amdgcn--amdpal" ; Function Attrs: nounwind define dllexport spir_func void @lgc.shader.FS.main() local_unnamed_addr #0 !spirv.ExecutionModel !8 !lgc.shaderstage !9 { ; CHECK-LABEL: define dllexport spir_func void @lgc.shader.FS.main( -; CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] !spirv.ExecutionModel !8 !lgc.shaderstage [[META9:![0-9]+]] { +; CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] !spirv.ExecutionModel [[META8:![0-9]+]] !lgc.shaderstage [[META9:![0-9]+]] { ; CHECK-NEXT: .entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.getpc() ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[TMP0]] to <2 x i32> @@ -37,6 +37,8 @@ define dllexport spir_func void @lgc.shader.FS.main() local_unnamed_addr #0 !spi ; CHECK-NEXT: [[I8:%.*]] = getelementptr i8, ptr addrspace(4) [[I2]], i64 [[I7]] ; CHECK-NEXT: [[I9:%.*]] = load <4 x i32>, ptr addrspace(4) [[I8]], align 16, !invariant.load [[META10:![0-9]+]] ; CHECK-NEXT: [[I10:%.*]] = load <8 x i32>, ptr addrspace(4) [[I5]], align 32, !invariant.load [[META10]] +; CHECK-NEXT: [[TMP21:%.*]] = load <8 x i32>, ptr addrspace(4) [[I5]], align 32, !invariant.load [[META10]] +; CHECK-NEXT: [[TMP22:%.*]] = load <4 x i32>, ptr addrspace(4) [[I8]], align 16, !invariant.load [[META10]] ; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.amdgcn.waterfall.begin.i32(i32 0, i32 [[I3]]) ; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.amdgcn.waterfall.readfirstlane.i32.i32(i32 [[TMP12]], i32 [[I3]]) ; CHECK-NEXT: [[TMP14:%.*]] = sext i32 [[TMP13]] to i64 @@ -72,7 +74,7 @@ loop: ; preds = %loop, %.entry %i8 = getelementptr i8, ptr addrspace(4) %i2, i64 %i7 %i9 = load <4 x i32>, ptr addrspace(4) %i8, align 16, !invariant.load !10 %i10 = load <8 x i32>, ptr addrspace(4) %i5, align 32, !invariant.load !10 - %i11 = call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 24, <8 x i32> %i10, <4 x i32> %i9, i32 1, <2 x float> zeroinitializer) + %i11 = call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 24, ptr addrspace(4) %i5, ptr addrspace(4) %i8, i32 1, <2 x float> zeroinitializer) call void (...) @lgc.create.write.generic.output(<4 x float> %i11, i32 0, i32 0, i32 0, i32 0, i32 0, i32 poison) %ind = add i32 %phi.ind, 1 %cond = icmp ne i32 %ind, 1000 @@ -127,3 +129,8 @@ attributes #3 = { nounwind } !8 = !{i32 4} !9 = !{i32 6} !10 = !{} +;. +; CHECK: [[META8]] = !{i32 4} +; CHECK: [[META9]] = !{i32 6} +; CHECK: [[META10]] = !{} +;. diff --git a/lgc/test/tanh.lgc b/lgc/test/tanh.lgc new file mode 100644 index 0000000000..977bc458fb --- /dev/null +++ b/lgc/test/tanh.lgc @@ -0,0 +1,54 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --tool lgc --march amdgcn --version 4 +; RUN: lgc -mcpu=gfx1100 -filetype=asm -o - %s | FileCheck --check-prefixes=CHECK %s + +; ModuleID = 'LLPC module' +source_filename = "LLPC module" +target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" +target triple = "amdgcn--amdpal" + +define float @sample(float %x) !lgc.shaderstage !1 { +; CHECK-LABEL: sample: +; CHECK: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_mul_f32_e64 v1, |v0|, -2.0 +; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; CHECK-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 +; CHECK-NEXT: v_exp_f32_e32 v1, v1 +; CHECK-NEXT: s_waitcnt_depctr 0xfff +; CHECK-NEXT: v_add_f32_e32 v1, 1.0, v1 +; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; CHECK-NEXT: v_cmp_lt_f32_e64 s[0:1], 0x6f800000, |v1| +; CHECK-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x2f800000, s[0:1] +; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; CHECK-NEXT: v_mul_f32_e32 v1, v1, v2 +; CHECK-NEXT: v_rcp_f32_e32 v1, v1 +; CHECK-NEXT: s_waitcnt_depctr 0xfff +; CHECK-NEXT: v_add_f32_e32 v1, v1, v1 +; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; CHECK-NEXT: v_mul_f32_e32 v1, v2, v1 +; CHECK-NEXT: v_sub_f32_e32 v1, 1.0, v1 +; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) +; CHECK-NEXT: v_bfi_b32 v0, 0x7fffffff, v1, v0 +; CHECK-NEXT: s_setpc_b64 s[30:31] + %y = call float @lgc.create.tanh.f32(float %x) + ret float %y +} + +; Function Attrs: nounwind willreturn memory(read) +declare !lgc.create.opcode !2 i32 @lgc.create.read.builtin.input.i32(...) #0 + +; Function Attrs: nounwind willreturn memory(none) +declare ptr addrspace(7) @lgc.load.buffer.desc(i64, i32, i32, i32) #1 + +; Function Attrs: nounwind memory(none) +declare !lgc.create.opcode !3 float @lgc.create.tanh.f32(...) #2 + +attributes #0 = { nounwind willreturn memory(read) } +attributes #1 = { nounwind willreturn memory(none) } +attributes #2 = { nounwind memory(none) } + +!llpc.compute.mode = !{!0} + +!0 = !{i32 8, i32 8, i32 1} +!1 = !{i32 7} +!2 = !{i32 77} +!3 = !{i32 17} diff --git a/llpc/CMakeLists.txt b/llpc/CMakeLists.txt index 16120f7005..277bde621a 100644 --- a/llpc/CMakeLists.txt +++ b/llpc/CMakeLists.txt @@ -205,7 +205,6 @@ if(ICD_BUILD_LLPC) context/llpcGraphicsContext.cpp context/llpcPipelineContext.cpp context/llpcRayTracingContext.cpp - context/GfxRuntimeContext.cpp ) # llpc/lower diff --git a/llpc/context/llpcCompiler.cpp b/llpc/context/llpcCompiler.cpp index 6544139aa1..bc85bd630c 100644 --- a/llpc/context/llpcCompiler.cpp +++ b/llpc/context/llpcCompiler.cpp @@ -64,6 +64,7 @@ #include "lgc/LgcCpsDialect.h" #include "lgc/LgcRtDialect.h" #include "lgc/PassManager.h" +#include "lgc/RuntimeContext.h" #include "llvm-dialects/Dialect/Dialect.h" #include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/SmallSet.h" @@ -657,7 +658,7 @@ Result Compiler::BuildShaderModule(const ShaderModuleBuildInfo *shaderInfo, Shad std::vector imageSymbolInfo; std::vector atomicCounterSymbolInfo; std::vector defaultUniformSymbolInfo; - if (shaderInfo->options.pipelineOptions.buildResourcesDataForShaderModule && + if (shaderInfo->options.pipelineOptions.getGlState().buildResourcesDataForShaderModule && moduleData.binType == BinaryType::Spirv) { buildShaderModuleResourceUsage(shaderInfo, resourceNodes, inputSymbolInfo, outputSymbolInfo, uniformBufferInfo, storageBufferInfo, textureSymbolInfo, imageSymbolInfo, atomicCounterSymbolInfo, @@ -700,7 +701,7 @@ Result Compiler::BuildShaderModule(const ShaderModuleBuildInfo *shaderInfo, Shad pShaderModuleData->binCode.pCode = bufferWritePtr; bufferWritePtr += codeBuffer.size() * sizeof(unsigned); - if (shaderInfo->options.pipelineOptions.buildResourcesDataForShaderModule && + if (shaderInfo->options.pipelineOptions.getGlState().buildResourcesDataForShaderModule && moduleData.binType == BinaryType::Spirv) { memcpy(bufferWritePtr, &resourceNodes, sizeof(ResourcesNodes)); pResourcesNodes = reinterpret_cast(bufferWritePtr); @@ -763,6 +764,7 @@ static bool getSymbolInfoFromSpvVariable(const SPIRVVariable *spvVar, ResourceNo SPIRVWord varId = 0; BasicType basicType = BasicType::Unknown; symbolInfo->columnCount = 1; + symbolInfo->componentCount = 1; SPIRVWord builtIn = false; bool isBuiltIn = spvVar->hasDecorate(DecorationBuiltIn, 0, &builtIn); @@ -784,6 +786,8 @@ static bool getSymbolInfoFromSpvVariable(const SPIRVVariable *spvVar, ResourceNo } if (varElemTy->getOpCode() == OpTypeMatrix) { symbolInfo->columnCount = varElemTy->getMatrixColumnCount(); + if (varElemTy->getMatrixColumnType()->getOpCode() == OpTypeVector) + symbolInfo->componentCount = varElemTy->getMatrixColumnType()->getVectorComponentCount(); varElemTy = varElemTy->getMatrixColumnType(); } if (varElemTy->getOpCode() == OpTypeVector) @@ -3378,37 +3382,39 @@ void Compiler::adjustRayTracingElf(ElfPackage *pipelineElf, RayTracingContext *r auto &shaderFunctionSection = pipeline.getMap(true)[PalAbi::PipelineMetadataKey::ShaderFunctions].getMap(true); // Get the shader function - auto shaderFunctionName = shaderFunctionSection.begin()->first.getString(); - auto &shaderFunction = shaderFunctionSection.begin()->second.getMap(true); - - // 1. Add raytracing pipeline indirect pipeline metadata - // The metadata is needed for RGP to correctly show different subtype of shaders. - // Determine the shader subtype by name - auto subtype = "Unknown"; - if (auto shaderStage = tryGetLgcRtShaderStageFromName(shaderFunctionName)) { - auto stage = shaderStage.value(); - if (stage == lgc::rt::RayTracingShaderStage::RayGeneration) - subtype = "RayGeneration"; - else if (stage == lgc::rt::RayTracingShaderStage::Miss) - subtype = "Miss"; - else if (stage == lgc::rt::RayTracingShaderStage::AnyHit) - subtype = "AnyHit"; - else if (stage == lgc::rt::RayTracingShaderStage::ClosestHit) - subtype = "ClosestHit"; - else if (stage == lgc::rt::RayTracingShaderStage::Intersection) - subtype = "Intersection"; - else if (stage == lgc::rt::RayTracingShaderStage::Callable) - subtype = "Callable"; - else if (stage == lgc::rt::RayTracingShaderStage::Traversal) - subtype = "Traversal"; - } - shaderFunction[".shader_subtype"] = subtype; - - // 2. Apply the .internal_pipeline_hash to .api_shader_hash in .shader_functions section - // NOTE: this is needed for RGP to recognize different shader subtype - auto pipelineHash = pipeline.getMap(true)[PalAbi::PipelineMetadataKey::InternalPipelineHash].getArray(true); - shaderFunction[PalAbi::ShaderMetadataKey::ApiShaderHash].getArray(true)[0] = pipelineHash[0]; - shaderFunction[PalAbi::ShaderMetadataKey::ApiShaderHash].getArray(true)[1] = pipelineHash[1]; + for (auto &funcSection : shaderFunctionSection) { + auto shaderFunctionName = funcSection.first.getString(); + auto &shaderFunction = funcSection.second.getMap(true); + + // 1. Add raytracing pipeline indirect pipeline metadata + // The metadata is needed for RGP to correctly show different subtype of shaders. + // Determine the shader subtype by name + auto subtype = "Unknown"; + if (auto shaderStage = tryGetLgcRtShaderStageFromName(shaderFunctionName)) { + auto stage = shaderStage.value(); + if (stage == lgc::rt::RayTracingShaderStage::RayGeneration) + subtype = "RayGeneration"; + else if (stage == lgc::rt::RayTracingShaderStage::Miss) + subtype = "Miss"; + else if (stage == lgc::rt::RayTracingShaderStage::AnyHit) + subtype = "AnyHit"; + else if (stage == lgc::rt::RayTracingShaderStage::ClosestHit) + subtype = "ClosestHit"; + else if (stage == lgc::rt::RayTracingShaderStage::Intersection) + subtype = "Intersection"; + else if (stage == lgc::rt::RayTracingShaderStage::Callable) + subtype = "Callable"; + else if (stage == lgc::rt::RayTracingShaderStage::Traversal) + subtype = "Traversal"; + } + shaderFunction[".shader_subtype"] = subtype; + + // 2. Apply the .internal_pipeline_hash to .api_shader_hash in .shader_functions section + // NOTE: this is needed for RGP to recognize different shader subtype + auto pipelineHash = pipeline.getMap(true)[PalAbi::PipelineMetadataKey::InternalPipelineHash].getArray(true); + shaderFunction[PalAbi::ShaderMetadataKey::ApiShaderHash].getArray(true)[0] = pipelineHash[0]; + shaderFunction[PalAbi::ShaderMetadataKey::ApiShaderHash].getArray(true)[1] = pipelineHash[1]; + } // Write modified metadata to the pipeline ELF ElfNote newMetaNote = metaNote; @@ -3590,6 +3596,7 @@ void Compiler::buildShaderCacheHash(Context *context, unsigned stageMask, ArrayR auto pipelineInfo = reinterpret_cast(context->getPipelineBuildInfo()); auto pipelineOptions = pipelineContext->getPipelineOptions(); + ShaderStage preStage = ShaderStageInvalid; // Build hash per shader stage for (ShaderStage stage : gfxShaderStages()) { if ((stageMask & getLgcShaderStageMask(stage)) == 0) @@ -3619,10 +3626,21 @@ void Compiler::buildShaderCacheHash(Context *context, unsigned stageMask, ArrayR // Add per stage hash code to fragmentHasher or nonFragmentHasher per shader stage auto shaderHashCode = MetroHash::compact64(&hash); - if (stage == ShaderStageFragment) + if (stage == ShaderStageFragment) { fragmentHasher.Update(shaderHashCode); - else + const ShaderModuleData *moduleData = reinterpret_cast(shaderInfo->pModuleData); + if (moduleData && moduleData->usage.useBarycentric) { + // If fragment uses barycentrics, we still need to care about the previous stage, because the primitive type + // might be specified there. + if ((preStage != ShaderStageInvalid) && (preStage != ShaderStageVertex)) { + auto preShaderInfo = pipelineContext->getPipelineShaderInfo(preStage); + moduleData = reinterpret_cast(preShaderInfo->pModuleData); + fragmentHasher.Update(moduleData->cacheHash); + } + } + } else nonFragmentHasher.Update(shaderHashCode); + preStage = stage; } // Add additional pipeline state to final hasher diff --git a/llpc/context/llpcContext.cpp b/llpc/context/llpcContext.cpp index 58b1d25c6b..b148ed4ef1 100644 --- a/llpc/context/llpcContext.cpp +++ b/llpc/context/llpcContext.cpp @@ -29,7 +29,6 @@ *********************************************************************************************************************** */ #include "llpcContext.h" -#include "GfxRuntimeContext.h" #include "LowerAdvancedBlend.h" #include "ProcessGfxRuntimeLibrary.h" #include "SPIRVInternal.h" @@ -55,6 +54,7 @@ #include "lgc/LgcDialect.h" #include "lgc/LgcRtDialect.h" #include "lgc/PassManager.h" +#include "lgc/RuntimeContext.h" #include "llvm/Bitcode/BitcodeReader.h" #include "llvm/Bitcode/BitcodeWriter.h" #include "llvm/Bitstream/BitstreamReader.h" @@ -71,6 +71,10 @@ #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/AlwaysInliner.h" #include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Scalar/ADCE.h" +#include "llvm/Transforms/Scalar/InstSimplifyPass.h" +#include "llvm/Transforms/Scalar/SROA.h" +#include "llvm/Transforms/Scalar/SimplifyCFG.h" #include "llvm/Transforms/Utils/Cloning.h" #define DEBUG_TYPE "llpc-context" @@ -241,6 +245,8 @@ void Context::ensureGpurtLibrary() { ShaderModuleData moduleData = {}; moduleData.binCode = rtState->gpurtShaderLibrary; + if (moduleData.binCode.codeSize == 0) + report_fatal_error("No GPURT library available"); moduleData.binType = BinaryType::Spirv; moduleData.usage.keepUnusedFunctions = true; moduleData.usage.rayQueryLibrary = true; @@ -277,6 +283,18 @@ void Context::ensureGpurtLibrary() { lowerPassMgr->addPass(AlwaysInlinerPass()); lowerPassMgr->addPass(SpirvLowerAccessChain()); lowerPassMgr->addPass(SpirvLowerGlobal()); + + // Run some basic optimization to simplify the code. This should be more efficient than optimizing them after they are + // inlined into the caller. + FunctionPassManager fpm; + fpm.addPass(SROAPass(SROAOptions::ModifyCFG)); + fpm.addPass(InstSimplifyPass()); + fpm.addPass(SimplifyCFGPass()); + // DCE is particularly useful for removing dead instructions after continuation call, which may help reducing + // continuation stack size. + fpm.addPass(ADCEPass()); + lowerPassMgr->addPass(createModuleToFunctionPassAdaptor(std::move(fpm))); + timerProfiler.addTimerStartStopPass(*lowerPassMgr, TimerTranslate, false); lowerPassMgr->run(*gpurt); diff --git a/llpc/context/llpcPipelineContext.cpp b/llpc/context/llpcPipelineContext.cpp index 808cf24cdd..5338cffcd0 100644 --- a/llpc/context/llpcPipelineContext.cpp +++ b/llpc/context/llpcPipelineContext.cpp @@ -311,10 +311,10 @@ Options PipelineContext::computePipelineOptions() const { // Driver report full subgroup lanes for compute shader, here we just set fullSubgroups as default options options.fullSubgroups = true; options.internalRtShaders = getPipelineOptions()->internalRtShaders; - options.disableSampleMask = getPipelineOptions()->disableSampleMask; - options.disableTruncCoordForGather = getPipelineOptions()->disableTruncCoordForGather; + options.disableSampleMask = getPipelineOptions()->getGlState().disableSampleMask; + options.disableTruncCoordForGather = getPipelineOptions()->getGlState().disableTruncCoordForGather; options.enablePrimGeneratedQuery = getPipelineOptions()->enablePrimGeneratedQuery; - options.enableFragColor = getPipelineOptions()->enableFragColor; + options.enableFragColor = getPipelineOptions()->getGlState().enableFragColor; options.rtBoxSortHeuristicMode = m_rtState.boxSortHeuristicMode; options.rtStaticPipelineFlags = m_rtState.staticPipelineFlags; @@ -481,7 +481,7 @@ void PipelineContext::convertResourceNode(ResourceNode &dst, const ResourceMappi else dst.concreteType = static_cast(src.type); - if (getPipelineOptions()->replaceSetWithResourceType && src.srdRange.set == 0) { + if (getPipelineOptions()->getGlState().replaceSetWithResourceType && src.srdRange.set == 0) { // Special value InternalDescriptorSetId(-1) will be passed in for internal usage dst.set = getGlResourceNodeSetFromType(src.type); } else { diff --git a/llpc/docs/DdnBindlessTexture.md b/llpc/docs/DdnBindlessTexture.md index da974d90bf..30f506efde 100644 --- a/llpc/docs/DdnBindlessTexture.md +++ b/llpc/docs/DdnBindlessTexture.md @@ -250,7 +250,7 @@ If a bindless texture is declared as uvec2, it behaves identically to a normal The ARB_bindless_texture extension was published in 2013, when we implemented this extension in OGLP driver there was no SPIR-V opcode or extension support it, so we had to add two flags to indicate whether the bindless texture/image are used in the program, we can get this state from glslang, when one texture/image in a shader is declared as bindless, all the textures/images in the given program will be handled as bindless mode, which can simplify our driver’s implementation, so in LLPC’s implementation we will continue to follow this way. -Two pipeline options are added to indicate whether the bindless texture or image is used, these flags are set at program link-time, so that when Llpc::Compiler::buildShaderModuleResourceUsage() is called, the texture variables can be recognized as its real type variables (eg. if declared as `layout(bindless_sampler) uniform sampler2D s1;`, it will be recognized as a 64bit uint typed default uniform variable, instead of a texture), so that we can create the correct resourceMappingNode table for each kind of resource. And these two flags will also be checked at pipeline compile-time, so that we can generate the correct LLVM IR for bindless texture. +Two pipeline options are added to indicate whether the bindless texture or image is used, these two flags will be checked at pipeline compile-time, so that we can generate the correct LLVM IR for bindless texture. ``` c++ struct PipelineOptions { @@ -314,7 +314,7 @@ If declare a bindless texture handle as samplerXX type, it will be a `OpTypeSamp - At program link-time, when calling `Llpc::Compiler::buildShaderModuleResourceUsage()`, we need to recognize `OpTypeSampledImage` type variable as a 64-bit unsigned integer typed default uniform, so that we will not generate resource mapping node for texture, but generate a default uniform instead; - At pipeline compile time, we only need to add two patches in spirvReader: - 1). When calls `SPIRVToLLVM::transVariable()` to translate variable `%13`, we need to force to change the variable type from `OpTypedSapledImage` to int64, so that we can generate a uniform variable’s declaration, and we can handle `OpLoad` instruction correctly; + 1). When calls `SPIRVToLLVM::transVariable()` to translate variable `%13`, we need to force to change the variable type from `OpTypedSampledImage` to int64, so that we can generate a uniform variable’s declaration; ``` %11 = OpTypeSampledImage %10 @@ -329,19 +329,16 @@ If declare a bindless texture handle as samplerXX type, it will be a `OpTypeSamp %18 = OpLoad %15 %17 %19 = OpImageSampleImplicitLod %7 %14 %18 ``` - 2). When calling `SPIRVToLLVM::transValueWithOpcode()` to load the bindless texture handle, we need to do two things: - i). Load 64-bit image descriptor address, then convert it to an int pointer with correct address space; + 2). When calling `SPIRVToLLVM::transValueWithOpcode()` to load the bindless texture handle, we need to load the imageDescPointer by the bindless handle; +The above solution works for the simple cases, but in real implementation, we found if the texture is declared as an array, multi-dimensional array, or declared as a struct member or block member, it is hard to handle the accessChain instruction, especially when translate the type of a bindless texture to a 64-bit unsigned integer. To handle the aggregate data types, we provided a new solution in Spirv-Builder: +1). Convert the OpTypeSampledImage typed variable to a uvec2 type variable; +2). before the texture function is called, insert a bitcast opCode to convert the uvec2 type handle to a sampler type variable; - ii). Currently image descriptor, sampler descriptor and fmask descriptor are stored in a structure, we need to obtain the each descriptor after loading the image descriptor address, then insert all descriptors in the structure; - -After the above change, we can see the pipeline dumps for the above shader, the pass “LLPC translate SPIR-V binary to LLVM IR” and the ISA code dump looks as following, the cases that declare bindless textures handle as sampler2D can run correctly. - -![](./DdnBindlessTexturePipelineDumpDeclSamplerType.PNG) +The above solution can significantly simplify the implementation in LLPC, after this change, we don't need to convert the data types at At program link-time, we don't need to change the variable's type when calling SPIRVToLLVM::transVariable(), and we don't need to do any change to handle the accessChain instructions for the aggregate types, the bindless handle will be treated just as a uvec2 type variable, and handling the case that declare a bindless texture by a samplerXX type variable would be exactly same as that declare a bindless texutre by a uvec2 type. #### 2. Declare bindless texture handle as uvec2 type -If declare a bindless texture as uniform uvec2 type, the solution would be much easier, we don’t need to change the variable’s data type at program link-time or when `SPIRVToLLVM::transVariable()` is called, an `OpBitcast` instruction was added by SPIR-V builder to convert a 64-bit handle to a sampler, which need to handle specially for bindless texture. As the bindless handle is a native 64-bit data type, so the result of this instruction `%14 = OpLoad %11 %13` is a 64-bit texture handle, when translate the following instruction -`%17 = OpBitcast %16 %14`, we need to do the same thing as above case(declared the handle by sampler2D): - +If declare a bindless texture as uniform uvec2 type, the solution would be much easier, an `OpBitcast` instruction was added by SPIR-V builder to convert a 64-bit handle to a sampler, which need to handle specially for bindless texture. As the bindless handle is a native 64-bit data type, so the result of this instruction `%14 = OpLoad %11 %13` is a 64-bit texture handle, when translate the following instruction +`%17 = OpBitcast %16 %14` - Load 64-bit image descriptor address, then convert it to an int pointer with correct address space; - Obtain the each descriptor’s pointer after image descriptor address is loaded, then insert all descriptors in the structure; diff --git a/llpc/docs/DdnBindlessTexturePipelineDumpDeclSamplerType.PNG b/llpc/docs/DdnBindlessTexturePipelineDumpDeclSamplerType.PNG deleted file mode 100644 index eeb1a2a4b5..0000000000 Binary files a/llpc/docs/DdnBindlessTexturePipelineDumpDeclSamplerType.PNG and /dev/null differ diff --git a/llpc/docs/DdnBindlessTexturePipelineDumpDeclUvec2Type.PNG b/llpc/docs/DdnBindlessTexturePipelineDumpDeclUvec2Type.PNG index 5c2eef2925..e3c868e0a3 100644 Binary files a/llpc/docs/DdnBindlessTexturePipelineDumpDeclUvec2Type.PNG and b/llpc/docs/DdnBindlessTexturePipelineDumpDeclUvec2Type.PNG differ diff --git a/llpc/lower/LowerAdvancedBlend.cpp b/llpc/lower/LowerAdvancedBlend.cpp index e06f675fe1..6e937329dc 100644 --- a/llpc/lower/LowerAdvancedBlend.cpp +++ b/llpc/lower/LowerAdvancedBlend.cpp @@ -29,13 +29,13 @@ *********************************************************************************************************************** */ #include "LowerAdvancedBlend.h" -#include "GfxRuntimeContext.h" #include "SPIRVInternal.h" #include "compilerutils/CompilerUtils.h" #include "llpcContext.h" #include "llpcSpirvLowerInternalLibraryIntrinsicUtil.h" #include "vkgcDefs.h" #include "lgc/Builder.h" +#include "lgc/RuntimeContext.h" #define DEBUG_TYPE "Lower-advanced-blend" @@ -85,30 +85,23 @@ void LowerAdvancedBlend::processFsOutputs(Module &module) { if (global.getType()->getAddressSpace() == SPIRAS_Uniform && global.getName().ends_with(AdvancedBlendIsMsaaName)) isMsaaUniform = &global; } - // Prepare arguments of AmdAdvancedBlend(inColor, imageDescMsLow, imageDescMsHigh, imageDescLow, imageDescHigh, - // fmaskDescLow, fmaskDescHigh, mode, isMsaa) from shaderLibrary + // Prepare arguments of AmdAdvancedBlend(inColor, imageDescMs, imageDesc, fmaskDesc, mode, isMsaa) from shaderLibrary m_builder->SetInsertPointPastAllocas(m_entryPoint); // Get the parameters and store them into the allocated parameter points - Type *descType = FixedVectorType::get(m_builder->getInt32Ty(), 8); unsigned bindings[2] = {m_binding, m_binding + 1}; - Value *imageDescLow[2] = {}; - Value *imageDescHigh[2] = {}; + Value *imageDesc[2] = {}; for (unsigned id = 0; id < 2; ++id) { unsigned descSet = PipelineContext::getGlResourceNodeSetFromType(Vkgc::ResourceMappingNodeType::DescriptorResource); - Value *imageDescPtr = m_builder->CreateGetDescPtr(ResourceNodeType::DescriptorResource, - ResourceNodeType::DescriptorResource, descSet, bindings[id]); - Value *imageDesc = m_builder->CreateLoad(descType, imageDescPtr); - imageDescLow[id] = m_builder->CreateShuffleVector(imageDesc, ArrayRef{0, 1, 2, 3}); - imageDescHigh[id] = m_builder->CreateShuffleVector(imageDesc, ArrayRef{4, 5, 6, 7}); + imageDesc[id] = m_builder->CreateGetDescPtr(ResourceNodeType::DescriptorResource, + ResourceNodeType::DescriptorResource, descSet, bindings[id]); + imageDesc[id] = m_builder->CreatePtrToInt(imageDesc[id], m_builder->getInt64Ty()); } unsigned descSet = PipelineContext::getGlResourceNodeSetFromType(Vkgc::ResourceMappingNodeType::DescriptorFmask); - Value *fmaskDescPtr = m_builder->CreateGetDescPtr(ResourceNodeType::DescriptorFmask, - ResourceNodeType::DescriptorFmask, descSet, m_binding); - Value *fmaskDesc = m_builder->CreateLoad(descType, fmaskDescPtr); - Value *fmaskDescLow = m_builder->CreateShuffleVector(fmaskDesc, ArrayRef{0, 1, 2, 3}); - Value *fmaskDescHigh = m_builder->CreateShuffleVector(fmaskDesc, ArrayRef{4, 5, 6, 7}); + Value *fmaskDesc = m_builder->CreateGetDescPtr(ResourceNodeType::DescriptorFmask, ResourceNodeType::DescriptorFmask, + descSet, m_binding); + fmaskDesc = m_builder->CreatePtrToInt(fmaskDesc, m_builder->getInt64Ty()); assert(modeUniform && isMsaaUniform); modeUniform = m_builder->CreateLoad(m_builder->getInt32Ty(), modeUniform); @@ -132,8 +125,7 @@ void LowerAdvancedBlend::processFsOutputs(Module &module) { Value *blendColor = inliner .inlineCall(*m_builder, advancedBlendFunc, - {srcVal, imageDescLow[0], imageDescHigh[0], imageDescLow[1], imageDescHigh[1], - fmaskDescLow, fmaskDescHigh, modeUniform, isMsaaUniform}) + {srcVal, imageDesc[0], imageDesc[1], fmaskDesc, modeUniform, isMsaaUniform}) .returnValue; storeInst->setOperand(0, blendColor); diff --git a/llpc/lower/LowerGLCompatibility.cpp b/llpc/lower/LowerGLCompatibility.cpp index 62c82a0697..aeae5b78fe 100644 --- a/llpc/lower/LowerGLCompatibility.cpp +++ b/llpc/lower/LowerGLCompatibility.cpp @@ -773,19 +773,16 @@ void LowerGLCompatibility::emulateDrawPixels() { auto vec2Type = FixedVectorType::get(floatType, 2); auto vec4Type = FixedVectorType::get(floatType, 4); auto ivec2Type = FixedVectorType::get(int32Type, 2); - auto ivec8Type = FixedVectorType::get(int32Type, 8); if (m_patchTexCoord == nullptr) { createPatchTexCoord(); } Value *patchTexcoord = m_builder->CreateLoad(vec2Type, m_patchTexCoord); Value *texcoord = m_builder->CreateFPToUI(patchTexcoord, ivec2Type); - auto imageDesc = m_builder->CreateGetDescPtr( + auto imageDescPtr = m_builder->CreateGetDescPtr( lgc::ResourceNodeType::DescriptorResource, lgc::ResourceNodeType::DescriptorResource, PipelineContext::getGlResourceNodeSetFromType(Vkgc::ResourceMappingNodeType::DescriptorResource), Vkgc::InternalBinding::PixelOpInternalBinding); - auto descriptor = m_builder->CreateLoad(ivec8Type, imageDesc); - descriptor->setMetadata(LLVMContext::MD_invariant_load, MDNode::get(*m_context, {})); - Value *texel = m_builder->CreateImageLoad(vec4Type, Dim2D, 0, descriptor, texcoord, nullptr); + Value *texel = m_builder->CreateImageLoad(vec4Type, Dim2D, 0, imageDescPtr, texcoord, nullptr); // Write Color if (buildInfo->glState.drawPixelsType == Vkgc::DrawPixelsTypeColor) { @@ -868,7 +865,6 @@ void LowerGLCompatibility::emulateBitmap() { auto int32Type = m_builder->getInt32Ty(); auto vec2Type = FixedVectorType::get(floatType, 2); auto ivec2Type = FixedVectorType::get(int32Type, 2); - auto ivec8Type = FixedVectorType::get(int32Type, 8); if (!m_patchTexCoord) { createPatchTexCoord(); } @@ -882,13 +878,11 @@ void LowerGLCompatibility::emulateBitmap() { } mask = m_builder->CreateShl(ConstantInt::get(ivec2Type, 1), mask); Value *texCoordSrc = m_builder->CreateLShr(constInt0x3, texcoord); - auto imageDesc = m_builder->CreateGetDescPtr( + auto imageDescPtr = m_builder->CreateGetDescPtr( lgc::ResourceNodeType::DescriptorResource, lgc::ResourceNodeType::DescriptorResource, PipelineContext::getGlResourceNodeSetFromType(Vkgc::ResourceMappingNodeType::DescriptorResource), Vkgc::InternalBinding::PixelOpInternalBinding); - auto descriptor = m_builder->CreateLoad(ivec8Type, imageDesc); - descriptor->setMetadata(LLVMContext::MD_invariant_load, MDNode::get(*m_context, {})); - Value *texel = m_builder->CreateImageLoad(ivec2Type, Dim2D, 0, descriptor, texCoordSrc, nullptr); + Value *texel = m_builder->CreateImageLoad(ivec2Type, Dim2D, 0, imageDescPtr, texCoordSrc, nullptr); Value *val = m_builder->CreateAnd(mask, texel); val = m_builder->CreateExtractElement(val, ConstantInt::get(int32Type, 0)); auto cmp = m_builder->CreateICmpEQ(val, ConstantInt::get(int32Type, 0)); diff --git a/llpc/lower/ProcessGfxRuntimeLibrary.cpp b/llpc/lower/ProcessGfxRuntimeLibrary.cpp index 378a8e749e..3dac36e0a2 100644 --- a/llpc/lower/ProcessGfxRuntimeLibrary.cpp +++ b/llpc/lower/ProcessGfxRuntimeLibrary.cpp @@ -113,37 +113,35 @@ void ProcessGfxRuntimeLibrary::processLibraryFunction(Function *&func) { // ===================================================================================================================== // Create texel load void ProcessGfxRuntimeLibrary::createTexelLoad(Function *func) { - // Arguments: imageDescLow, imageDescHigh, icoord, lod - constexpr unsigned argCount = 4; - Type *int4Ty = FixedVectorType::get(m_builder->getInt32Ty(), 4); + // Arguments: imageDesc, icoord, lod + constexpr unsigned argCount = 3; Type *int2Ty = FixedVectorType::get(m_builder->getInt32Ty(), 2); - Type *argTypes[] = {int4Ty, int4Ty, int2Ty, m_builder->getInt32Ty()}; + Type *argTypes[] = {m_builder->getInt64Ty(), int2Ty, m_builder->getInt32Ty()}; std::array loadArgs; for (unsigned i = 0; i < argCount; ++i) loadArgs[i] = m_builder->CreateLoad(argTypes[i], func->getArg(i)); unsigned imageFlag = Builder::ImageFlagInvariant | Builder::ImageFlagNotAliased; - auto imageDesc = m_builder->CreateShuffleVector(loadArgs[0], loadArgs[1], ArrayRef{0, 1, 2, 3, 4, 5, 6, 7}); - auto imageLoad = - m_builder->CreateImageLoad(func->getReturnType(), Builder::Dim2D, imageFlag, imageDesc, loadArgs[2], loadArgs[3]); + loadArgs[0] = m_builder->CreateIntToPtr(loadArgs[0], PointerType::get(m_builder->getContext(), ADDR_SPACE_CONST)); + auto imageLoad = m_builder->CreateImageLoad(func->getReturnType(), Builder::Dim2D, imageFlag, loadArgs[0], + loadArgs[1], loadArgs[2]); m_builder->CreateRet(imageLoad); } // ===================================================================================================================== // Create texel load with fmask void ProcessGfxRuntimeLibrary::createTexelLoadFmask(Function *func) { - // Argument: imageDescLow, imageDescHigh, fmaskDescLow, fmaskDescHigh, icoord, lod - constexpr unsigned argCount = 6; - Type *int4Ty = FixedVectorType::get(m_builder->getInt32Ty(), 4); + // Argument: imageDescMs, fmaskDesc, icoord, lod + constexpr unsigned argCount = 4; Type *int2Ty = FixedVectorType::get(m_builder->getInt32Ty(), 2); - Type *argTypes[] = {int4Ty, int4Ty, int4Ty, int4Ty, int2Ty, m_builder->getInt32Ty()}; + Type *argTypes[] = {m_builder->getInt64Ty(), m_builder->getInt64Ty(), int2Ty, m_builder->getInt32Ty()}; std::array loadArgs; for (unsigned i = 0; i < argCount; ++i) loadArgs[i] = m_builder->CreateLoad(argTypes[i], func->getArg(i)); unsigned imageFlag = Builder::ImageFlagInvariant | Builder::ImageFlagNotAliased; - auto imageDesc = m_builder->CreateShuffleVector(loadArgs[0], loadArgs[1], ArrayRef{0, 1, 2, 3, 4, 5, 6, 7}); - auto fmaskDesc = m_builder->CreateShuffleVector(loadArgs[2], loadArgs[3], ArrayRef{0, 1, 2, 3, 4, 5, 6, 7}); - auto imageLoad = m_builder->CreateImageLoadWithFmask(func->getReturnType(), Builder::Dim2DMsaa, imageFlag, imageDesc, - fmaskDesc, loadArgs[4], loadArgs[5]); + loadArgs[0] = m_builder->CreateIntToPtr(loadArgs[0], PointerType::get(m_builder->getContext(), ADDR_SPACE_CONST)); + loadArgs[1] = m_builder->CreateIntToPtr(loadArgs[1], PointerType::get(m_builder->getContext(), ADDR_SPACE_CONST)); + auto imageLoad = m_builder->CreateImageLoadWithFmask(func->getReturnType(), Builder::Dim2DMsaa, imageFlag, + loadArgs[0], loadArgs[1], loadArgs[2], loadArgs[3]); m_builder->CreateRet(imageLoad); } diff --git a/llpc/lower/llpcSpirvLowerGlobal.cpp b/llpc/lower/llpcSpirvLowerGlobal.cpp index ef982d0525..bc3233c22d 100644 --- a/llpc/lower/llpcSpirvLowerGlobal.cpp +++ b/llpc/lower/llpcSpirvLowerGlobal.cpp @@ -30,6 +30,7 @@ */ #include "llpcSpirvLowerGlobal.h" #include "SPIRVInternal.h" +#include "compilerutils/CompilerUtils.h" #include "continuations/ContinuationsUtil.h" #include "llpcContext.h" #include "llpcDebug.h" @@ -188,8 +189,7 @@ static_assert(lgc::ShadingRateHorizontal4Pixels == "Shading rate flag mismatch"); // ===================================================================================================================== -SpirvLowerGlobal::SpirvLowerGlobal() - : m_lowerInputInPlace(false), m_lowerOutputInPlace(false), m_lastVertexProcessingStage(ShaderStageInvalid) { +SpirvLowerGlobal::SpirvLowerGlobal() : m_lastVertexProcessingStage(ShaderStageInvalid) { } // ===================================================================================================================== @@ -204,8 +204,39 @@ PreservedAnalyses SpirvLowerGlobal::run(Module &module, ModuleAnalysisManager &a changeRtFunctionSignature(); + // Special handling of explicit interpolation (InterpolateAt* instructions) in fragment shaders -- get those out of + // the way. + if (m_shaderStage == ShaderStageFragment) + handleCallInst(false, true); + + // Preparations for output lowering + m_unifiedReturn = nullptr; + + if (m_shaderStage == ShaderStageGeometry) { + // Collect "emit" calls + handleCallInst(true, false); + } else if (m_shaderStage < ShaderStageGfxCount) { + ensureUnifiedReturn(); + } + + // Preparations for XFB handling + auto shaderStageMask = m_context->getShaderStageMask(); + m_lastVertexProcessingStage = ShaderStageInvalid; + + if (m_shaderStage < ShaderStageFragment) { + if (shaderStageMask & ShaderStageGeometryBit) + m_lastVertexProcessingStage = ShaderStageGeometry; + else if (shaderStageMask & ShaderStageTessEvalBit) + m_lastVertexProcessingStage = ShaderStageTessEval; + else if (shaderStageMask & ShaderStageVertexBit) + m_lastVertexProcessingStage = ShaderStageVertex; + + if (m_shaderStage == m_lastVertexProcessingStage) + buildApiXfbMap(); + } + // First pass over globals - for (GlobalVariable &global : m_module->globals()) { + for (GlobalVariable &global : llvm::make_early_inc_range(m_module->globals())) { auto addrSpace = global.getType()->getAddressSpace(); if (addrSpace == SPIRAS_Private || addrSpace == SPIRAS_Input || addrSpace == SPIRAS_Output) { @@ -213,12 +244,11 @@ PreservedAnalyses SpirvLowerGlobal::run(Module &module, ModuleAnalysisManager &a // used yet for inputs/outputs.) convertUsersOfConstantsToInstructions(&global); - if (addrSpace == SPIRAS_Private) + if (addrSpace == SPIRAS_Private) { mapGlobalVariableToProxy(&global); - else if (addrSpace == SPIRAS_Input) - mapInputToProxy(&global); - else if (addrSpace == SPIRAS_Output) - mapOutputToProxy(&global); + } else { + lowerInOut(&global); + } } else if (addrSpace == SPIRAS_Local) { // Prefix all LDS variables to avoid downstream conflicts when linking shaders together if (global.hasName()) { @@ -227,30 +257,17 @@ PreservedAnalyses SpirvLowerGlobal::run(Module &module, ModuleAnalysisManager &a } } - // Remove global variables that were already fully replaced - for (auto globalVar : m_globalsToErase) { - globalVar->dropAllReferences(); - globalVar->eraseFromParent(); - } - m_globalsToErase.clear(); - - // Do lowering operations - if (m_lowerInputInPlace && m_lowerOutputInPlace) { - // Both input and output have to be lowered in-place (without proxy variables) - lowerInOutInPlace(); // Just one lowering operation is sufficient - } else { - // Either input or output has to be lowered in-place, not both - if (m_lowerInputInPlace) - lowerInOutInPlace(); - else - lowerInput(); - - if (m_lowerOutputInPlace) - lowerInOutInPlace(); - else - lowerOutput(); + // Now that outputs have been lowered, replace the Emit(Stream)Vertex calls with builder code. + for (auto emitCall : m_emitCalls) { + unsigned emitStreamId = + emitCall->arg_size() != 0 ? cast(emitCall->getArgOperand(0))->getZExtValue() : 0; + m_builder->SetInsertPoint(emitCall); + m_builder->CreateEmitVertex(emitStreamId); + emitCall->eraseFromParent(); } + m_emitCalls.clear(); + // Do further lowering operations if (m_shaderStage == ShaderStageVertex) lowerEdgeFlag(); @@ -294,8 +311,8 @@ void SpirvLowerGlobal::lowerEdgeFlag() { } // ===================================================================================================================== -// Handle "return" instructions. -ReturnInst *SpirvLowerGlobal::ensureUnifiedReturn() { +// Ensure that there is exactly one "ret" instruction. This is used for writing output variables for many shader types. +void SpirvLowerGlobal::ensureUnifiedReturn() { SmallVector retInsts; for (BasicBlock &block : *m_entryPoint) { @@ -303,8 +320,10 @@ ReturnInst *SpirvLowerGlobal::ensureUnifiedReturn() { retInsts.push_back(retInst); } - if (retInsts.size() == 1) - return retInsts[0]; + if (retInsts.size() == 1) { + m_unifiedReturn = retInsts[0]; + return; + } // There are more than 2 returns; create a unified return block. // @@ -319,7 +338,7 @@ ReturnInst *SpirvLowerGlobal::ensureUnifiedReturn() { } m_builder->SetInsertPoint(retBlock); - return m_builder->CreateRetVoid(); + m_unifiedReturn = m_builder->CreateRetVoid(); } // ===================================================================================================================== @@ -335,7 +354,7 @@ void SpirvLowerGlobal::handleCallInst(bool checkEmitCall, bool checkInterpCall) // We get all users before iterating because the iterator can be invalidated // by interpolateInputElement SmallVector users(function.users()); - for (User *user : users) { + for (User *user : make_early_inc_range(users)) { assert(isa(user) && "We should only have CallInst instructions here."); CallInst *callInst = cast(user); if (checkEmitCall) { @@ -348,6 +367,8 @@ void SpirvLowerGlobal::handleCallInst(bool checkEmitCall, bool checkInterpCall) mangledName.starts_with(gSPIRVName::InterpolateAtSample) || mangledName.starts_with(gSPIRVName::InterpolateAtOffset) || mangledName.starts_with(gSPIRVName::InterpolateAtVertexAMD)) { + m_builder->SetInsertPoint(callInst); + // Translate interpolation functions to LLPC intrinsic calls auto loadSrc = callInst->getArgOperand(0); unsigned interpLoc = InterpLocUnknown; @@ -375,7 +396,7 @@ void SpirvLowerGlobal::handleCallInst(bool checkEmitCall, bool checkInterpCall) GlobalVariable *gv = nullptr; SmallVector indexOperands; - if (auto getElemPtr = dyn_cast(loadSrc)) { + if (auto getElemPtr = dyn_cast(loadSrc)) { // The interpolant is an element of the input for (auto &index : getElemPtr->indices()) indexOperands.push_back(m_builder->CreateZExtOrTrunc(index, m_builder->getInt32Ty())); @@ -383,7 +404,9 @@ void SpirvLowerGlobal::handleCallInst(bool checkEmitCall, bool checkInterpCall) } else { gv = cast(loadSrc); } - interpolateInputElement(interpLoc, auxInterpValue, *callInst, gv, indexOperands); + Value *result = interpolateInputElement(callInst->getType(), interpLoc, auxInterpValue, gv, indexOperands); + callInst->replaceAllUsesWith(result); + callInst->eraseFromParent(); } } } @@ -430,162 +453,6 @@ static bool hasPrimitiveIdx(const Constant &metaVal) { return static_cast(inOutMeta.PerPrimitive); } -// ===================================================================================================================== -// Handle a single "load" instruction loading a global. -// -// @param inOut : Global Variable instruction -// @param indexOperands : Indices of GEP instruction -// @param loadInst : Load instruction -void SpirvLowerGlobal::handleLoadInstGEP(GlobalVariable *inOut, ArrayRef indexOperands, LoadInst &loadInst) { - - assert((indexOperands.empty() || cast(indexOperands.front())->isZero()) && "Non-zero GEP first index\n"); - if (!indexOperands.empty()) - indexOperands = indexOperands.drop_front(); - - m_builder->SetInsertPoint(&loadInst); - - Value *vertexIdx = nullptr; - auto inOutTy = inOut->getValueType(); - - auto addrSpace = inOut->getType()->getPointerAddressSpace(); - - MDNode *metaNode = inOut->getMetadata(gSPIRVMD::InOut); - assert(metaNode); - auto inOutMetaVal = mdconst::dyn_extract(metaNode->getOperand(0)); - - // If the input/output is arrayed, the outermost index might be used for vertex indexing - if (inOutTy->isArrayTy() && hasVertexIdx(*inOutMetaVal)) { - if (!indexOperands.empty()) { - vertexIdx = indexOperands.front(); - indexOperands = indexOperands.drop_front(); - } else if (inOutTy != loadInst.getType()) { - vertexIdx = m_builder->getInt32(0); - } - inOutTy = inOutTy->getArrayElementType(); - inOutMetaVal = cast(inOutMetaVal->getOperand(1)); - } - - Value *loadValue = loadInOutMember(inOutTy, loadInst.getType(), addrSpace, indexOperands, 0, inOutMetaVal, nullptr, - vertexIdx, InterpLocUnknown, nullptr, false); - - m_loadInsts.insert(&loadInst); - loadInst.replaceAllUsesWith(loadValue); -} - -// ===================================================================================================================== -// Handle "load" instructions. -void SpirvLowerGlobal::handleLoadInst() { - auto shouldHandle = [&](const unsigned addrSpace) { - if (addrSpace != SPIRAS_Input && addrSpace != SPIRAS_Output) - return false; - // Skip if "load" instructions are not expected to be handled - const bool isTcsInput = (m_shaderStage == ShaderStageTessControl && addrSpace == SPIRAS_Input); - const bool isTcsOutput = (m_shaderStage == ShaderStageTessControl && addrSpace == SPIRAS_Output); - const bool isTesInput = (m_shaderStage == ShaderStageTessEval && addrSpace == SPIRAS_Input); - const bool isMeshInput = (m_shaderStage == ShaderStageMesh && addrSpace == SPIRAS_Input); - - return isTcsInput || isTcsOutput || isTesInput || isMeshInput; - }; - - for (GlobalVariable &global : m_module->globals()) { - const unsigned addrSpace = global.getType()->getPointerAddressSpace(); - if (!shouldHandle(addrSpace)) - continue; - for (User *user : global.users()) { - if (LoadInst *loadInst = dyn_cast(user)) { - handleLoadInstGEP(&global, {}, *loadInst); - } else if (GetElementPtrInst *gep = dyn_cast(user)) { - // The user is a GEP - // We look for load instructions in the GEP users - for (User *gepUser : gep->users()) { - // We shouldn't have any chained GEPs here, they are coalesced by the LowerAccessChain pass. - assert(!isa(gepUser)); - if (LoadInst *loadInst = dyn_cast(gepUser)) { - SmallVector indexOperands; - for (auto &index : gep->indices()) - indexOperands.push_back(m_builder->CreateZExtOrTrunc(index, m_builder->getInt32Ty())); - handleLoadInstGEP(&global, indexOperands, *loadInst); - } - } - } - } - } -} - -// ===================================================================================================================== -// Handle a single "store" instruction storing a global. -// -// @param output : Global Variable instruction -// @param indexOperands : Indices of GEP instruction -// @param storeInst : Store instruction -void SpirvLowerGlobal::handleStoreInstGEP(GlobalVariable *output, ArrayRef indexOperands, - StoreInst &storeInst) { - assert((indexOperands.empty() || cast(indexOperands.front())->isZero()) && "Non-zero GEP first index\n"); - // drop first element - if (!indexOperands.empty()) - indexOperands = indexOperands.drop_front(); - - m_builder->SetInsertPoint(&storeInst); - - Value *storeValue = storeInst.getOperand(0); - Value *vertexOrPrimitiveIdx = nullptr; - auto outputTy = output->getValueType(); - - MDNode *metaNode = output->getMetadata(gSPIRVMD::InOut); - assert(metaNode); - auto outputMetaVal = mdconst::dyn_extract(metaNode->getOperand(0)); - // If the output is arrayed, the outermost index might be used for vertex or primitive indexing - if (outputTy->isArrayTy() && (hasVertexIdx(*outputMetaVal) || hasPrimitiveIdx(*outputMetaVal))) { - if (!indexOperands.empty()) { - vertexOrPrimitiveIdx = indexOperands.front(); - indexOperands = indexOperands.drop_front(); - } else if (outputTy != storeInst.getValueOperand()->getType()) { - vertexOrPrimitiveIdx = m_builder->getInt32(0); - } - outputTy = outputTy->getArrayElementType(); - outputMetaVal = cast(outputMetaVal->getOperand(1)); - } - - storeOutputMember(outputTy, storeInst.getValueOperand()->getType(), storeValue, indexOperands, 0, outputMetaVal, - nullptr, vertexOrPrimitiveIdx); - - m_storeInsts.insert(&storeInst); -} - -// ===================================================================================================================== -// Visits "store" instructions. -void SpirvLowerGlobal::handleStoreInst() { - auto shouldHandle = [&](const unsigned addrSpace) { - const bool isTcsOutput = (m_shaderStage == ShaderStageTessControl && addrSpace == SPIRAS_Output); - const bool isMeshOutput = (m_shaderStage == ShaderStageMesh && addrSpace == SPIRAS_Output); - return isTcsOutput || isMeshOutput; - }; - - for (GlobalVariable &global : m_module->globals()) { - const unsigned addrSpace = global.getType()->getPointerAddressSpace(); - if (!shouldHandle(addrSpace)) - continue; - for (User *user : global.users()) { - if (StoreInst *storeInst = dyn_cast(user)) { - handleStoreInstGEP(&global, {}, *storeInst); - } else if (GetElementPtrInst *gep = dyn_cast(user)) { - // The user is a GEP - // We look for store instructions in the GEP users - for (User *gepUser : gep->users()) { - // We shouldn't have any chained GEPs here, they are coalesced by the LowerAccessChain pass. - assert(!isa(gepUser)); - if (StoreInst *storeInst = dyn_cast(gepUser)) { - SmallVector indexOperands; - for (auto &index : gep->indices()) - indexOperands.push_back(m_builder->CreateZExtOrTrunc(index, m_builder->getInt32Ty())); - handleStoreInstGEP(&global, indexOperands, *storeInst); - } - } - } - } - } -} - // ===================================================================================================================== // Maps the specified global variable to proxy variable. // @@ -618,326 +485,158 @@ void SpirvLowerGlobal::mapGlobalVariableToProxy(GlobalVariable *globalVar) { }); } - m_globalsToErase.push_back(globalVar); -} - -// ===================================================================================================================== -// Maps the specified input to proxy variable. -// -// @param input : Input to be mapped -void SpirvLowerGlobal::mapInputToProxy(GlobalVariable *input) { - // NOTE: For tessellation shader, we do not map inputs to real proxy variables. Instead, we directly - // replace "load" instructions with import calls in the lowering operation. - if (m_shaderStage == ShaderStageTessControl || m_shaderStage == ShaderStageTessEval) { - m_inputProxyMap[input] = nullptr; - m_lowerInputInPlace = true; - return; - } - - m_builder->SetInsertPointPastAllocas(m_entryPoint); - - const auto &dataLayout = m_module->getDataLayout(); - Type *inputTy = input->getValueType(); - if (inputTy->isPointerTy()) - inputTy = m_builder->getInt64Ty(); - - MDNode *metaNode = input->getMetadata(gSPIRVMD::InOut); - assert(metaNode); - - auto meta = mdconst::dyn_extract(metaNode->getOperand(0)); - Value *proxy = m_builder->CreateAlloca(inputTy, dataLayout.getAllocaAddrSpace(), nullptr, - Twine(LlpcName::InputProxyPrefix) + input->getName()); - - // Import input to proxy variable - auto inputValue = addCallInstForInOutImport(inputTy, SPIRAS_Input, meta, nullptr, 0, nullptr, nullptr, - InterpLocUnknown, nullptr, false); - - m_builder->CreateStore(inputValue, proxy); - - m_inputProxyMap[input] = proxy; + globalVar->dropAllReferences(); + globalVar->eraseFromParent(); } // ===================================================================================================================== -// Maps the specified output to proxy variable. +// Lowers an input or output global variable. // -// @param output : Output to be mapped -void SpirvLowerGlobal::mapOutputToProxy(GlobalVariable *output) { - m_builder->SetInsertPointPastAllocas(m_entryPoint); - - // NOTE: For tessellation control shader, task shader, or mesh shader, we do not map outputs to real proxy variables. - // Instead, we directly replace "store" instructions with export calls in the lowering operation. - if (m_shaderStage == ShaderStageTessControl || m_shaderStage == ShaderStageTask || m_shaderStage == ShaderStageMesh) { - if (output->hasInitializer()) { - auto initializer = output->getInitializer(); - m_builder->CreateStore(initializer, output); - } - m_outputProxyMap.emplace_back(output, nullptr); - m_lowerOutputInPlace = true; - return; - } - - const auto &dataLayout = m_module->getDataLayout(); - Type *outputTy = output->getValueType(); - if (outputTy->isPointerTy()) - outputTy = m_builder->getInt64Ty(); - - auto proxy = m_builder->CreateAlloca(outputTy, dataLayout.getAllocaAddrSpace(), nullptr, - Twine(LlpcName::OutputProxyPrefix) + output->getName()); - - if (output->hasInitializer()) { - auto initializer = output->getInitializer(); - m_builder->CreateStore(initializer, proxy); - } - - m_outputProxyMap.emplace_back(output, proxy); -} - -// ===================================================================================================================== -// Does lowering operations for SPIR-V inputs, replaces inputs with proxy variables. -void SpirvLowerGlobal::lowerInput() { - if (m_inputProxyMap.empty()) { - // Skip lowering if there is no input - return; - } - - // NOTE: For tessellation shader, we invoke handling of "load"/"store" instructions and replace all those - // instructions with import/export calls in-place. - assert(m_shaderStage != ShaderStageTessControl && m_shaderStage != ShaderStageTessEval); - - // NOTE: For fragment shader, we have to handle interpolation functions first since input interpolants must be - // lowered in-place. - if (m_shaderStage == ShaderStageFragment) { - // Invoke handling of interpolation calls - handleCallInst(false, true); - - // Remove interpolation calls, they must have been replaced with LLPC intrinsics - std::unordered_set getElemInsts; - for (auto interpCall : m_interpCalls) { - GetElementPtrInst *getElemPtr = dyn_cast(interpCall->getArgOperand(0)); - if (getElemPtr) - getElemInsts.insert(getElemPtr); - - assert(interpCall->use_empty()); - interpCall->dropAllReferences(); - interpCall->eraseFromParent(); - } +// @param globalVar : the global variable to be lowered +void SpirvLowerGlobal::lowerInOut(llvm::GlobalVariable *globalVar) { + assert(globalVar->getAddressSpace() == SPIRAS_Input || globalVar->getAddressSpace() == SPIRAS_Output); + const bool isInput = globalVar->getAddressSpace() == SPIRAS_Input; - for (auto getElemPtr : getElemInsts) { - if (getElemPtr->use_empty()) { - getElemPtr->dropAllReferences(); - getElemPtr->eraseFromParent(); - } - } - } - - for (auto inputMap : m_inputProxyMap) { - auto input = cast(inputMap.first); - auto proxy = inputMap.second; - - for (auto user = input->user_begin(), end = input->user_end(); user != end; ++user) { - // NOTE: "Getelementptr" and "bitcast" will propagate the address space of pointer value (input variable) - // to the element pointer value (destination). We have to clear the address space of this element pointer - // value. The original pointer value has been lowered and therefore the address space is invalid now. - Instruction *inst = dyn_cast(*user); - if (inst) { - Type *instTy = inst->getType(); - if (isa(instTy) && instTy->getPointerAddressSpace() == SPIRAS_Input) { - assert(isa(inst) || isa(inst)); - Type *newInstTy = PointerType::get(*m_context, SPIRAS_Private); - inst->mutateType(newInstTy); - } - } - } - - handleVolatileInput(input, proxy); - - input->mutateType(proxy->getType()); // To clear address space for pointer to make replacement valid - input->replaceAllUsesWith(proxy); - input->eraseFromParent(); - } -} - -// ===================================================================================================================== -// Does lowering operations for SPIR-V outputs, replaces outputs with proxy variables. -void SpirvLowerGlobal::lowerOutput() { - if (m_outputProxyMap.empty() && m_shaderStage != ShaderStageGeometry) { - // Skip lowering if there is no output for non-geometry shader - return; + // Apply output initializer, if any + if (!isInput && globalVar->hasInitializer()) { + m_builder->SetInsertPointPastAllocas(m_entryPoint); + auto initializer = globalVar->getInitializer(); + m_builder->CreateStore(initializer, globalVar); } - // Collect "emit" calls - if (m_shaderStage == ShaderStageGeometry) - handleCallInst(true, false); - - // Create unified return block in which to place all the outputs from proxy variables - ReturnInst *retInst = ensureUnifiedReturn(); - - // NOTE: For tessellation control shader, we invoke handling of "load"/"store" instructions and replace all those - // instructions with import/export calls in-place. - assert(m_shaderStage != ShaderStageTessControl); + const bool mapToProxy = isInput ? (m_shaderStage != ShaderStageTessControl && m_shaderStage != ShaderStageTessEval) + : (m_shaderStage != ShaderStageTessControl && m_shaderStage != ShaderStageTask && + m_shaderStage != ShaderStageMesh); - // Set the last vertex processing stage - auto shaderStageMask = m_context->getShaderStageMask(); - m_lastVertexProcessingStage = ShaderStageInvalid; - if (shaderStageMask & ShaderStageGeometryBit) - m_lastVertexProcessingStage = ShaderStageGeometry; - else if (shaderStageMask & ShaderStageTessEvalBit) - m_lastVertexProcessingStage = ShaderStageTessEval; - else if (shaderStageMask & ShaderStageVertexBit) - m_lastVertexProcessingStage = ShaderStageVertex; - - buildApiXfbMap(); - - // Export output from the proxy variable prior to "return" instruction or "emit" calls - for (auto outputMap : m_outputProxyMap) { - auto output = cast(outputMap.first); - auto proxy = outputMap.second; - auto proxyTy = proxy->getAllocatedType(); - - MDNode *metaNode = output->getMetadata(gSPIRVMD::InOut); + if (mapToProxy) { + const auto &dataLayout = m_module->getDataLayout(); + Type *ty = globalVar->getValueType(); + if (ty->isPointerTy()) + ty = m_builder->getInt64Ty(); + MDNode *metaNode = globalVar->getMetadata(gSPIRVMD::InOut); assert(metaNode); - auto meta = mdconst::dyn_extract(metaNode->getOperand(0)); - if (m_shaderStage == ShaderStageVertex || m_shaderStage == ShaderStageTessEval || - m_shaderStage == ShaderStageFragment) { - m_builder->SetInsertPoint(retInst); - Value *outputValue = m_builder->CreateLoad(proxyTy, proxy); - addCallInstForOutputExport(outputValue, meta, nullptr, 0, 0, 0, nullptr, nullptr, InvalidValue); - } else if (m_shaderStage == ShaderStageGeometry) { - for (auto emitCall : m_emitCalls) { - unsigned emitStreamId = 0; - - m_builder->SetInsertPoint(emitCall); - - auto mangledName = emitCall->getCalledFunction()->getName(); - if (mangledName.starts_with(gSPIRVName::EmitStreamVertex)) - emitStreamId = cast(emitCall->getOperand(0))->getZExtValue(); - else - assert(mangledName.starts_with(gSPIRVName::EmitVertex)); + m_builder->SetInsertPointPastAllocas(m_entryPoint); + Value *proxy = m_builder->CreateAlloca(ty, dataLayout.getAllocaAddrSpace(), nullptr, + Twine(LlpcName::InputProxyPrefix) + globalVar->getName()); - Value *outputValue = m_builder->CreateLoad(proxyTy, proxy); - addCallInstForOutputExport(outputValue, meta, nullptr, 0, 0, 0, nullptr, nullptr, emitStreamId); - } - } - } + if (isInput) { + // Import input to proxy variable + auto inputValue = addCallInstForInOutImport(ty, SPIRAS_Input, meta, nullptr, 0, nullptr, nullptr, + InterpLocUnknown, nullptr, false); - // Replace the Emit(Stream)Vertex calls with builder code. - for (auto emitCall : m_emitCalls) { - unsigned emitStreamId = - emitCall->arg_size() != 0 ? cast(emitCall->getArgOperand(0))->getZExtValue() : 0; - m_builder->SetInsertPoint(emitCall); - m_builder->CreateEmitVertex(emitStreamId); - emitCall->eraseFromParent(); - } + m_builder->CreateStore(inputValue, proxy); - // NOTE: "Getelementptr" will propagate the address space of pointer value (output variable) - // to the element pointer value (destination). We have to clear the address space of this element pointer - // value. The original pointer value has been lowered and therefore the address space is invalid now. - for (auto outputMap : m_outputProxyMap) { - auto output = cast(outputMap.first); + handleVolatileInput(globalVar, proxy); + } else { + // Export the output at shader end or vertex emit + if (m_shaderStage == ShaderStageVertex || m_shaderStage == ShaderStageTessEval || + m_shaderStage == ShaderStageFragment) { + m_builder->SetInsertPoint(m_unifiedReturn); + Value *outputValue = m_builder->CreateLoad(ty, proxy); + addCallInstForOutputExport(outputValue, meta, nullptr, 0, 0, 0, nullptr, nullptr, InvalidValue); + } else { + assert(m_shaderStage == ShaderStageGeometry); - SmallVector propagationWorklist; - propagationWorklist.push_back(output); + for (auto emitCall : m_emitCalls) { + unsigned emitStreamId = 0; - while (!propagationWorklist.empty()) { - Value *current = propagationWorklist.pop_back_val(); + m_builder->SetInsertPoint(emitCall); - for (User *user : current->users()) { - Instruction *inst = dyn_cast(user); - if (inst) { - Type *instTy = inst->getType(); - if (isa(instTy) && instTy->getPointerAddressSpace() == SPIRAS_Output) { - assert(isa(inst)); - Type *newInstTy = PointerType::get(*m_context, SPIRAS_Private); - inst->mutateType(newInstTy); + auto mangledName = emitCall->getCalledFunction()->getName(); + if (mangledName.starts_with(gSPIRVName::EmitStreamVertex)) + emitStreamId = cast(emitCall->getOperand(0))->getZExtValue(); + else + assert(mangledName.starts_with(gSPIRVName::EmitVertex)); - propagationWorklist.push_back(user); - } + Value *outputValue = m_builder->CreateLoad(ty, proxy); + addCallInstForOutputExport(outputValue, meta, nullptr, 0, 0, 0, nullptr, nullptr, emitStreamId); } } } - auto proxy = outputMap.second; - output->mutateType(proxy->getType()); // To clear address space for pointer to make replacement valid - output->replaceAllUsesWith(proxy); - output->eraseFromParent(); + SmallVector toErase; + CompilerUtils::replaceAllPointerUses(m_builder, globalVar, proxy, toErase); + for (auto inst : toErase) + inst->eraseFromParent(); + } else { + // In-place lowering. + SmallVector indexStack; + lowerInOutUsersInPlace(globalVar, globalVar, indexStack); } + + assert(globalVar->use_empty()); + globalVar->eraseFromParent(); } // ===================================================================================================================== -// Does inplace lowering operations for SPIR-V inputs/outputs, replaces "load" instructions with import calls and -// "store" instructions with export calls. -void SpirvLowerGlobal::lowerInOutInPlace() { - assert(m_shaderStage == ShaderStageTessControl || m_shaderStage == ShaderStageTessEval || - m_shaderStage == ShaderStageMesh); - - // Invoke handling of "load" and "store" instruction - handleLoadInst(); - if (m_shaderStage == ShaderStageTessControl || m_shaderStage == ShaderStageMesh) - handleStoreInst(); - - DenseSet getElemInsts; - - // Remove unnecessary "load" instructions - for (auto loadInst : m_loadInsts) { - GetElementPtrInst *const getElemPtr = dyn_cast(loadInst->getPointerOperand()); - if (getElemPtr) - getElemInsts.insert(getElemPtr); - - assert(loadInst->use_empty()); - loadInst->dropAllReferences(); - loadInst->eraseFromParent(); - } - - m_loadInsts.clear(); - - // Remove unnecessary "store" instructions - for (auto storeInst : m_storeInsts) { - GetElementPtrInst *const getElemPtr = dyn_cast(storeInst->getPointerOperand()); - if (getElemPtr) - getElemInsts.insert(getElemPtr); - - assert(storeInst->use_empty()); - storeInst->dropAllReferences(); - storeInst->eraseFromParent(); - } - - m_storeInsts.clear(); - - // Remove unnecessary "getelementptr" instructions - while (!getElemInsts.empty()) { - GetElementPtrInst *const getElemPtr = *getElemInsts.begin(); - getElemInsts.erase(getElemPtr); - - // If the GEP still has any uses, skip processing it. - if (!getElemPtr->use_empty()) - continue; - - // If the GEP is GEPing into another GEP, record that GEP as something we need to visit too. - if (GetElementPtrInst *const otherGetElemInst = dyn_cast(getElemPtr->getPointerOperand())) - getElemInsts.insert(otherGetElemInst); - - getElemPtr->dropAllReferences(); - getElemPtr->eraseFromParent(); - } +// Recursively lower all users of `current`, which can be traced back to `globalVar` via the given GEP indices, +// to in-place import/export ops. +// +// This makes the assumption that GEPs have not been type-punned (though 0 indices may have been dropped). +void SpirvLowerGlobal::lowerInOutUsersInPlace(llvm::GlobalVariable *globalVar, llvm::Value *current, + SmallVectorImpl &indexStack) { + for (User *user : llvm::make_early_inc_range(current->users())) { + Instruction *inst = cast(user); + + if (auto *gep = dyn_cast(inst)) { + // We currently expect that GEPs are only used on the global variable directly, with the global variable's type. + // The SpirvLowerAccessChain pass ensures this. + // + // TODO: As LLVM is moving away from GEPs towards ptradds, we need a better solution, probably by adding our + // own "structured GEP" operation. + assert(current == globalVar && gep->getSourceElementType() == globalVar->getValueType()); + assert(cast(gep->idx_begin()[0])->isNullValue()); + + for (unsigned i = 1, e = gep->getNumIndices(); i < e; ++i) + indexStack.push_back(m_builder->CreateZExtOrTrunc(gep->idx_begin()[i], m_builder->getInt32Ty())); + + lowerInOutUsersInPlace(globalVar, gep, indexStack); + + indexStack.clear(); + } else if (isa(inst) || isa(inst)) { + auto *loadInst = dyn_cast(inst); + auto *storeInst = dyn_cast(inst); + + m_builder->SetInsertPoint(inst); + + Value *vertexOrPrimitiveIdx = nullptr; + auto inOutTy = globalVar->getValueType(); + auto accessTy = loadInst ? loadInst->getType() : storeInst->getValueOperand()->getType(); + auto addrSpace = globalVar->getAddressSpace(); + + MDNode *metaNode = globalVar->getMetadata(gSPIRVMD::InOut); + assert(metaNode); + auto inOutMetaVal = mdconst::dyn_extract(metaNode->getOperand(0)); + + auto indexOperands = ArrayRef(indexStack); + + // If the input/output is arrayed, the outermost index might be used for vertex indexing + if (inOutTy->isArrayTy() && (hasVertexIdx(*inOutMetaVal) || hasPrimitiveIdx(*inOutMetaVal))) { + if (!indexOperands.empty()) { + vertexOrPrimitiveIdx = indexOperands.front(); + indexOperands = indexOperands.drop_front(); + } else if (inOutTy != accessTy) { + vertexOrPrimitiveIdx = m_builder->getInt32(0); + } + inOutTy = inOutTy->getArrayElementType(); + inOutMetaVal = cast(inOutMetaVal->getOperand(1)); + } - // Remove inputs if they are lowered in-place - if (m_lowerInputInPlace) { - for (auto inputMap : m_inputProxyMap) { - auto input = cast(inputMap.first); - assert(input->use_empty()); - input->eraseFromParent(); + if (loadInst) { + Value *loadValue = loadInOutMember(inOutTy, accessTy, addrSpace, indexOperands, 0, inOutMetaVal, nullptr, + vertexOrPrimitiveIdx, InterpLocUnknown, nullptr, false); + loadInst->replaceAllUsesWith(loadValue); + } else { + Value *storeValue = storeInst->getOperand(0); + storeOutputMember(inOutTy, accessTy, storeValue, indexOperands, 0, inOutMetaVal, nullptr, vertexOrPrimitiveIdx); + } + } else { + llvm_unreachable("unhandled user of input/output variable"); } - } - // Remove outputs if they are lowered in-place - if (m_lowerOutputInPlace) { - for (auto outputMap : m_outputProxyMap) { - auto output = cast(outputMap.first); - assert(output->use_empty()); - output->eraseFromParent(); - } + inst->eraseFromParent(); } } @@ -1129,7 +828,6 @@ Value *SpirvLowerGlobal::addCallInstForInOutImport(Type *inOutTy, unsigned addrS vertexIdx, interpLoc, auxInterpValue, isPerVertexDimension); } inOutValue = m_builder->CreateInsertValue(inOutValue, elem, {idx}); - // clang-format on } } } @@ -1176,7 +874,7 @@ Value *SpirvLowerGlobal::addCallInstForInOutImport(Type *inOutTy, unsigned addrS if (addrSpace == SPIRAS_Input) { // In the case where the command has no baseVertex parameter, force the value of gl_BaseVertex to zero if (builtIn == lgc::BuiltInBaseVertex && - m_context->getPipelineContext()->getPipelineOptions()->disableBaseVertex) + m_context->getPipelineContext()->getPipelineOptions()->getGlState().disableBaseVertex) inOutValue = m_builder->getInt32(0); else inOutValue = m_builder->CreateReadBuiltInInput(builtIn, inOutInfo, vertexIdx, elemIdx); @@ -2297,6 +1995,7 @@ void SpirvLowerGlobal::lowerUniformConstants() { // ===================================================================================================================== // Interpolates an element of the input. // +// @param returnTy : the return type of the interpolation // @param interpLoc : Interpolation location, valid for fragment shader (use "InterpLocUnknown" as don't-care value) // @param auxInterpValue : Auxiliary value of interpolation (valid for fragment shader): - Sample ID for // "InterpLocSample" - Offset from the center of the pixel for "InterpLocCenter" - Vertex no. (0 ~ 2) for @@ -2304,12 +2003,10 @@ void SpirvLowerGlobal::lowerUniformConstants() { // @param callInst : "Call" instruction // @param indexOperands : indices of GEP instruction // @param gv : Global Variable instruction -void SpirvLowerGlobal::interpolateInputElement(unsigned interpLoc, Value *auxInterpValue, CallInst &callInst, - GlobalVariable *gv, ArrayRef indexOperands) { +Value *SpirvLowerGlobal::interpolateInputElement(Type *returnTy, unsigned interpLoc, Value *auxInterpValue, + GlobalVariable *gv, ArrayRef indexOperands) { assert((indexOperands.empty() || cast(indexOperands.front())->isZero()) && "Non-zero GEP first index\n"); - m_builder->SetInsertPoint(&callInst); - auto inputTy = gv->getValueType(); MDNode *metaNode = gv->getMetadata(gSPIRVMD::InOut); @@ -2328,36 +2025,26 @@ void SpirvLowerGlobal::interpolateInputElement(unsigned interpLoc, Value *auxInt if (hasAllConstantIndices(indexOperands)) { if (!indexOperands.empty()) indexOperands = indexOperands.drop_front(); - auto loadValue = loadInOutMember(inputTy, callInst.getFunctionType()->getReturnType(), SPIRAS_Input, indexOperands, - 0, inputMeta, nullptr, nullptr, interpLoc, auxInterpValue, false); - - m_interpCalls.insert(&callInst); - callInst.replaceAllUsesWith(loadValue); - } else { - // Interpolant an element via dynamic index by extending interpolant to each element - // - // Regardless of where we do the interpolation, the alloca for the temporary must be inserted in the function entry - // block for efficient code generation, so we don't use the builder for it. - auto interpPtr = new AllocaInst(inputTy, m_module->getDataLayout().getAllocaAddrSpace(), Twine(), - &*(m_entryPoint->begin()->getFirstInsertionPt())); - // Load all possibly accessed values - auto loadValue = loadDynamicIndexedMembers(inputTy, SPIRAS_Input, ArrayRef(indexOperands).drop_front(), inputMeta, - nullptr, interpLoc, auxInterpValue, false); - - m_builder->CreateStore(loadValue, interpPtr); - - auto interpElemPtr = m_builder->CreateGEP(inputTy, interpPtr, indexOperands); - auto interpElemTy = GetElementPtrInst::getIndexedType(inputTy, indexOperands); - - // Only get the value that the original getElemPtr points to - auto interpElemValue = m_builder->CreateLoad(interpElemTy, interpElemPtr); - callInst.replaceAllUsesWith(interpElemValue); - - if (callInst.user_empty()) { - callInst.dropAllReferences(); - callInst.eraseFromParent(); - } + return loadInOutMember(inputTy, returnTy, SPIRAS_Input, indexOperands, 0, inputMeta, nullptr, nullptr, interpLoc, + auxInterpValue, false); } + + // Interpolate an element via dynamic index by extending interpolant to each element + // + // Regardless of where we do the interpolation, the alloca for the temporary must be inserted in the function entry + // block for efficient code generation, so we don't use the builder for it. + auto interpPtr = m_builder->CreateAllocaAtFuncEntry(inputTy); + // Load all possibly accessed values + auto loadValue = loadDynamicIndexedMembers(inputTy, SPIRAS_Input, ArrayRef(indexOperands).drop_front(), inputMeta, + nullptr, interpLoc, auxInterpValue, false); + + m_builder->CreateStore(loadValue, interpPtr); + + auto interpElemPtr = m_builder->CreateGEP(inputTy, interpPtr, indexOperands); + auto interpElemTy = GetElementPtrInst::getIndexedType(inputTy, indexOperands); + + // Only get the value that the original getElemPtr points to + return m_builder->CreateLoad(interpElemTy, interpElemPtr); } // ===================================================================================================================== @@ -2613,21 +2300,23 @@ void SpirvLowerGlobal::changeRtFunctionSignature() { } } + SmallVector globalsToErase; + if (hitAttributeVar && m_entryPoint->arg_size() == 2) { assert(!rayTracingContext->isContinuationsMode() || m_shaderStage != ShaderStageRayTracingIntersect); convertUsersOfConstantsToInstructions(hitAttributeVar); hitAttributeVar->replaceAllUsesWith(m_entryPoint->getArg(1)); - m_globalsToErase.push_back(hitAttributeVar); + globalsToErase.push_back(hitAttributeVar); } if (incomingPayloadVar) { convertUsersOfConstantsToInstructions(incomingPayloadVar); incomingPayloadVar->replaceAllUsesWith(m_entryPoint->getArg(0)); - m_globalsToErase.push_back(incomingPayloadVar); + globalsToErase.push_back(incomingPayloadVar); } else if (incomingCallableDataVar) { convertUsersOfConstantsToInstructions(incomingCallableDataVar); incomingCallableDataVar->replaceAllUsesWith(m_entryPoint->getArg(0)); - m_globalsToErase.push_back(incomingCallableDataVar); + globalsToErase.push_back(incomingCallableDataVar); } if (rayTracingContext->isContinuationsMode()) { @@ -2647,11 +2336,10 @@ void SpirvLowerGlobal::changeRtFunctionSignature() { contFuncTy.writeMetadata(newFunc); } - for (auto globalVar : m_globalsToErase) { + for (auto globalVar : globalsToErase) { globalVar->dropAllReferences(); globalVar->eraseFromParent(); } - m_globalsToErase.clear(); } } // namespace Llpc diff --git a/llpc/lower/llpcSpirvLowerGlobal.h b/llpc/lower/llpcSpirvLowerGlobal.h index 1ca6cd6ade..700f9c870b 100644 --- a/llpc/lower/llpcSpirvLowerGlobal.h +++ b/llpc/lower/llpcSpirvLowerGlobal.h @@ -62,14 +62,12 @@ class SpirvLowerGlobal : public SpirvLower, public llvm::PassInfoMixin &indexStack); - llvm::ReturnInst *ensureUnifiedReturn(); + void ensureUnifiedReturn(); - void lowerInput(); - void lowerOutput(); - void lowerInOutInPlace(); void lowerBufferBlock(); void lowerTaskPayload(); void lowerPushConsts(); @@ -105,30 +103,17 @@ class SpirvLowerGlobal : public SpirvLower, public llvm::PassInfoMixin indexOperands, unsigned maxLocOffset, llvm::Constant *outputMeta, llvm::Value *locOffset, llvm::Value *vertexOrPrimitiveIdx); - void interpolateInputElement(unsigned interpLoc, llvm::Value *interpInfo, llvm::CallInst &callInst, - GlobalVariable *gv, ArrayRef indexOperands); + llvm::Value *interpolateInputElement(llvm::Type *returnTy, unsigned interpLoc, llvm::Value *interpInfo, + GlobalVariable *gv, ArrayRef indexOperands); void buildApiXfbMap(); void addCallInstForXfbOutput(const ShaderInOutMetadata &outputMeta, Value *outputValue, unsigned xfbBufferAdjust, unsigned xfbOffsetAdjust, unsigned locOffset, lgc::InOutInfo outputInfo); - llvm::SmallVector m_globalsToErase; - std::unordered_map m_inputProxyMap; // Proxy map for lowering inputs - - // NOTE: Here we use list to store pairs of output proxy mappings. This is because we want output patching to be - // "ordered" (resulting LLVM IR for the patching always be consistent). - std::list> m_outputProxyMap; // Proxy list for lowering outputs - - bool m_lowerInputInPlace; // Whether to lower input inplace - bool m_lowerOutputInPlace; // Whether to lower output inplace - - std::unordered_set m_emitCalls; // "Call" instructions to emit vertex (geometry shader) - std::unordered_set m_loadInsts; // "Load" instructions to be removed - std::unordered_set m_storeInsts; // "Store" instructions to be removed - std::unordered_set m_interpCalls; // "Call" instruction to do input interpolation - // (fragment shader) - ShaderStage m_lastVertexProcessingStage; // The last vertex processing stage + llvm::ReturnInst *m_unifiedReturn = nullptr; + std::unordered_set m_emitCalls; // "Call" instructions to emit vertex (geometry shader) + ShaderStage m_lastVertexProcessingStage; // The last vertex processing stage llvm::DenseMap m_builtInXfbMap; // Map built-in to XFB output info specified by API interface llvm::DenseMap diff --git a/llpc/lower/llpcSpirvLowerInternalLibraryIntrinsicUtil.cpp b/llpc/lower/llpcSpirvLowerInternalLibraryIntrinsicUtil.cpp index c8c644855c..65a0bcef1e 100644 --- a/llpc/lower/llpcSpirvLowerInternalLibraryIntrinsicUtil.cpp +++ b/llpc/lower/llpcSpirvLowerInternalLibraryIntrinsicUtil.cpp @@ -175,13 +175,13 @@ static void createAtomic(Function *func, Builder *builder, bool is64, bool isCmp // Create GEP to get the byte address with byte offset gpuAddrAsPtr = builder->CreateGEP(builder->getInt8Ty(), gpuAddrAsPtr, offset); Value *atomicValue = nullptr; + SyncScope::ID scope = func->getContext().getOrInsertSyncScopeID("agent"); if (!isCmpXchg) { assert(binOp != AtomicRMWInst::BAD_BINOP); - atomicValue = builder->CreateAtomicRMW(binOp, gpuAddrAsPtr, value, MaybeAlign(), AtomicOrdering::Monotonic, - SyncScope::System); + atomicValue = builder->CreateAtomicRMW(binOp, gpuAddrAsPtr, value, MaybeAlign(), AtomicOrdering::Monotonic, scope); } else { atomicValue = builder->CreateAtomicCmpXchg(gpuAddrAsPtr, compare, value, MaybeAlign(), AtomicOrdering::Monotonic, - AtomicOrdering::Monotonic, SyncScope::System); + AtomicOrdering::Monotonic, scope); atomicValue = builder->CreateExtractValue(atomicValue, 0); } builder->CreateRet(atomicValue); diff --git a/llpc/lower/llpcSpirvLowerTranslator.cpp b/llpc/lower/llpcSpirvLowerTranslator.cpp index 575f1ed0ac..9c61f90cd4 100644 --- a/llpc/lower/llpcSpirvLowerTranslator.cpp +++ b/llpc/lower/llpcSpirvLowerTranslator.cpp @@ -101,7 +101,8 @@ void SpirvLowerTranslator::translateSpirvToLlvm(const PipelineShaderInfo *shader for (const auto &range : descriptorRangeValues) { if (range.type == ResourceMappingNodeType::DescriptorYCbCrSampler) { uint32_t rangeSet = range.set; - if (context->getPipelineContext()->getPipelineOptions()->replaceSetWithResourceType && range.set == 0) { + if (context->getPipelineContext()->getPipelineOptions()->getGlState().replaceSetWithResourceType && + range.set == 0) { rangeSet = PipelineContext::getGlResourceNodeSetFromType(range.type); } convertingSamplers.push_back( diff --git a/llpc/test/lit.cfg.py b/llpc/test/lit.cfg.py index d545648727..896718f119 100644 --- a/llpc/test/lit.cfg.py +++ b/llpc/test/lit.cfg.py @@ -66,6 +66,9 @@ if 'Undefined' in config.xgl_sanitizers: config.available_features.add('ubsan') +if config.llpc_is_standalone != 'ON': + config.available_features.add('gpurt') + llvm_config.use_default_substitutions() config.substitutions.append(('%PATH%', config.environment['PATH'])) diff --git a/llpc/test/lit.site.cfg.py.in b/llpc/test/lit.site.cfg.py.in index c80a3bc6a4..31bd7a41b8 100644 --- a/llpc/test/lit.site.cfg.py.in +++ b/llpc/test/lit.site.cfg.py.in @@ -15,6 +15,7 @@ config.gfxip = "@AMDLLPC_DEFAULT_TARGET@" # Propagate CMake options used in lit feature tests. config.llvm_assertions = "@LLVM_ENABLE_ASSERTIONS@" config.xgl_sanitizers = "@XGL_USE_SANITIZER@" +config.llpc_is_standalone = "@LLPC_IS_STANDALONE@" for d in "@LIT_DEFINITIONS@".split(";"): def_split = d.split("=") diff --git a/llpc/test/shaderdb/core/ObjNonUniform_TestTexutreLoadStoreInt64.spvasm b/llpc/test/shaderdb/core/ObjNonUniform_TestTexutreLoadStoreInt64.spvasm index f1f8eecaea..7eaa31a17b 100644 --- a/llpc/test/shaderdb/core/ObjNonUniform_TestTexutreLoadStoreInt64.spvasm +++ b/llpc/test/shaderdb/core/ObjNonUniform_TestTexutreLoadStoreInt64.spvasm @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by tool/update_llpc_test_checks.py ; BEGIN_SHADERTEST -; RUN: amdllpc --print-after=llpc-spirv-lower-translator -o - 2>&1 %s | FileCheck -check-prefixes=SHADERTEST %s +; RUN: amdllpc --print-after=llpc-spirv-lower-translator -filetype=asm -o - 2>&1 %s | FileCheck -check-prefixes=SHADERTEST %s ; #version 450 ; #extension GL_EXT_nonuniform_qualifier : require ; #extension GL_ARB_gpu_shader_int64 : require @@ -88,7 +88,7 @@ OpFunctionEnd ; SHADERTEST-LABEL: @main( ; SHADERTEST-NEXT: .entry: -; SHADERTEST-NEXT: [[TMP0:%.*]] = alloca { [3 x <8 x i32>], { <4 x i32>, i32 } }, align 32, addrspace(5) +; SHADERTEST-NEXT: [[TMP0:%.*]] = alloca { [3 x ptr addrspace(4)], { ptr addrspace(4), i32 } }, align 8, addrspace(5) ; SHADERTEST-NEXT: [[_12:%.*]] = alloca i64, align 8, addrspace(5) ; SHADERTEST-NEXT: [[TMP1:%.*]] = call ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 7) ; SHADERTEST-NEXT: [[TMP2:%.*]] = call i32 (...) @lgc.create.get.desc.stride.i32(i32 1, i32 1, i64 0, i32 7) @@ -127,20 +127,21 @@ ; SHADERTEST-NEXT: [[TMP31:%.*]] = extractvalue { { ptr addrspace(4), i32, i32, i32 }, { ptr addrspace(4), i32, i32 } } [[TMP30]], 1 ; SHADERTEST-NEXT: [[TMP32:%.*]] = extractvalue { ptr addrspace(4), i32, i32 } [[TMP31]], 2 ; SHADERTEST-NEXT: [[TMP33:%.*]] = extractvalue { ptr addrspace(4), i32, i32 } [[TMP31]], 0 -; SHADERTEST-NEXT: [[TMP34:%.*]] = load <4 x i32>, ptr addrspace(4) [[TMP33]], align 16, !invariant.load !4 -; SHADERTEST-NEXT: [[TMP35:%.*]] = insertvalue { <4 x i32>, i32 } poison, <4 x i32> [[TMP34]], 0 -; SHADERTEST-NEXT: [[TMP36:%.*]] = insertvalue { <4 x i32>, i32 } [[TMP35]], i32 [[TMP32]], 1 -; SHADERTEST-NEXT: [[TMP37:%.*]] = extractvalue { { ptr addrspace(4), i32, i32, i32 }, { ptr addrspace(4), i32, i32 } } [[TMP30]], 0 -; SHADERTEST-NEXT: [[TMP38:%.*]] = extractvalue { ptr addrspace(4), i32, i32, i32 } [[TMP37]], 0 -; SHADERTEST-NEXT: [[TMP39:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP38]], align 32, !invariant.load !4 -; SHADERTEST-NEXT: [[TMP40:%.*]] = insertvalue [3 x <8 x i32>] poison, <8 x i32> [[TMP39]], 0 -; SHADERTEST-NEXT: [[TMP41:%.*]] = insertvalue { [3 x <8 x i32>], { <4 x i32>, i32 } } poison, [3 x <8 x i32>] [[TMP40]], 0 -; SHADERTEST-NEXT: [[TMP42:%.*]] = insertvalue { [3 x <8 x i32>], { <4 x i32>, i32 } } [[TMP41]], { <4 x i32>, i32 } [[TMP36]], 1 -; SHADERTEST-NEXT: call void @"spirv.NonUniform.s[a3v8i32,s[v4i32,i32]]"({ [3 x <8 x i32>], { <4 x i32>, i32 } } [[TMP42]]) -; SHADERTEST-NEXT: store { [3 x <8 x i32>], { <4 x i32>, i32 } } [[TMP42]], ptr addrspace(5) [[TMP0]], align 32 -; SHADERTEST-NEXT: [[TMP43:%.*]] = load { [3 x <8 x i32>], { <4 x i32>, i32 } }, ptr addrspace(5) [[TMP0]], align 32 -; SHADERTEST-NEXT: [[TMP44:%.*]] = extractvalue { [3 x <8 x i32>], { <4 x i32>, i32 } } [[TMP43]], 1 -; SHADERTEST-NEXT: [[TMP45:%.*]] = extractvalue { [3 x <8 x i32>], { <4 x i32>, i32 } } [[TMP43]], 0 -; SHADERTEST-NEXT: [[TMP46:%.*]] = extractvalue [3 x <8 x i32>] [[TMP45]], 0 -; SHADERTEST-NEXT: [[TMP47:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP44]], 0 -; SHADERTEST-NEXT: [[TMP48:%.*]] = call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 512, <8 x i32> [[TMP46]], <4 x i32> [[TMP47]], i32 1, <2 x float> zeroinitializer) +; SHADERTEST-NEXT: [[TMP34:%.*]] = insertvalue { ptr addrspace(4), i32 } poison, ptr addrspace(4) [[TMP33]], 0 +; SHADERTEST-NEXT: [[TMP35:%.*]] = insertvalue { ptr addrspace(4), i32 } [[TMP34]], i32 [[TMP32]], 1 +; SHADERTEST-NEXT: [[TMP36:%.*]] = extractvalue { { ptr addrspace(4), i32, i32, i32 }, { ptr addrspace(4), i32, i32 } } [[TMP30]], 0 +; SHADERTEST-NEXT: [[TMP37:%.*]] = extractvalue { ptr addrspace(4), i32, i32, i32 } [[TMP36]], 0 +; SHADERTEST-NEXT: [[TMP38:%.*]] = insertvalue [3 x ptr addrspace(4)] poison, ptr addrspace(4) [[TMP37]], 0 +; SHADERTEST-NEXT: [[TMP39:%.*]] = insertvalue { [3 x ptr addrspace(4)], { ptr addrspace(4), i32 } } poison, [3 x ptr addrspace(4)] [[TMP38]], 0 +; SHADERTEST-NEXT: [[TMP40:%.*]] = insertvalue { [3 x ptr addrspace(4)], { ptr addrspace(4), i32 } } [[TMP39]], { ptr addrspace(4), i32 } [[TMP35]], 1 +; SHADERTEST-NEXT: call void @"spirv.NonUniform.s[a3p4,s[p4,i32]]"({ [3 x ptr addrspace(4)], { ptr addrspace(4), i32 } } [[TMP40]]) +; SHADERTEST-NEXT: store { [3 x ptr addrspace(4)], { ptr addrspace(4), i32 } } [[TMP40]], ptr addrspace(5) [[TMP0]], align 8 +; SHADERTEST-NEXT: [[TMP41:%.*]] = load { [3 x ptr addrspace(4)], { ptr addrspace(4), i32 } }, ptr addrspace(5) [[TMP0]], align 8 +; SHADERTEST-NEXT: [[TMP42:%.*]] = extractvalue { [3 x ptr addrspace(4)], { ptr addrspace(4), i32 } } [[TMP41]], 1 +; SHADERTEST-NEXT: [[TMP43:%.*]] = extractvalue { [3 x ptr addrspace(4)], { ptr addrspace(4), i32 } } [[TMP41]], 0 +; SHADERTEST-NEXT: [[TMP44:%.*]] = extractvalue [3 x ptr addrspace(4)] [[TMP43]], 0 +; SHADERTEST-NEXT: [[TMP45:%.*]] = extractvalue { ptr addrspace(4), i32 } [[TMP42]], 0 +; SHADERTEST-NEXT: [[TMP46:%.*]] = call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 512, ptr addrspace(4) [[TMP44]], ptr addrspace(4) [[TMP45]], i32 1, <2 x float> zeroinitializer) +; SHADERTEST-NEXT: store <4 x float> [[TMP46]], ptr addrspace(65) @_3, align 16 +; SHADERTEST-NEXT: ret void +; diff --git a/llpc/test/shaderdb/core/OpAccessChain_TestBlockVectorExtract_lit.frag b/llpc/test/shaderdb/core/OpAccessChain_TestBlockVectorExtract_lit.frag index 0e28ffad56..f7005d4991 100644 --- a/llpc/test/shaderdb/core/OpAccessChain_TestBlockVectorExtract_lit.frag +++ b/llpc/test/shaderdb/core/OpAccessChain_TestBlockVectorExtract_lit.frag @@ -40,11 +40,11 @@ void main() ; SHADERTEST: %[[COLUMN1:.*]] = type <{ [3 x float], [4 x i8] }> ; SHADERTEST: %[[COLUMN2:.*]] = type <{ [4 x double] }> -; SHADERTEST: getelementptr inbounds (<{ [3 x float], [4 x i8], [2 x %[[COLUMN1]]] }>, ptr addrspace({{.*}}) @{{.*}}, i32 0, i32 0, i32 1 +; SHADERTEST: getelementptr {{(inbounds )?}}(<{ [3 x float], [4 x i8], [2 x %[[COLUMN1]]] }>, ptr addrspace({{.*}}) @{{.*}}, i32 0, i32 0, i32 1 ; SHADERTEST: getelementptr <{ [3 x float], [4 x i8], [2 x %[[COLUMN1]]] }>, ptr addrspace({{.*}}) @{{.*}}, i32 0, i32 2, i32 1, i32 0, i32 %{{[0-9]*}} ; SHADERTEST: getelementptr <{ [3 x float], [4 x i8], [2 x %[[COLUMN1]]] }>, ptr addrspace({{.*}}) @{{.*}}, i32 0, i32 2, i32 %{{[0-9]*}}, i32 0, i32 1 ; SHADERTEST: getelementptr <{ [4 x double], [4 x %[[COLUMN2]]] }>, ptr addrspace({{.*}}) @{{.*}}, i32 0, i32 0, i32 %{{[0-9]*}} -; SHADERTEST: getelementptr inbounds (<{ [4 x double], [4 x %[[COLUMN2]]] }>, ptr addrspace({{.*}}) @{{.*}}, i32 0, i32 1, i32 2, i32 0, i32 3 +; SHADERTEST: getelementptr {{(inbounds )?}}(<{ [4 x double], [4 x %[[COLUMN2]]] }>, ptr addrspace({{.*}}) @{{.*}}, i32 0, i32 1, i32 2, i32 0, i32 3 ; SHADERTEST: getelementptr <{ [4 x double], [4 x %[[COLUMN2]]] }>, ptr addrspace({{.*}}) @{{.*}}, i32 0, i32 1, i32 %{{[0-9]*}}, i32 0, i32 %{{[0-9]*}} ; SHADERTEST: AMDLLPC SUCCESS diff --git a/llpc/test/shaderdb/core/OpAtomicXXX_TestImageDimension_lit.comp b/llpc/test/shaderdb/core/OpAtomicXXX_TestImageDimension_lit.comp index 1e2d98cc1c..2d46dfd421 100644 --- a/llpc/test/shaderdb/core/OpAtomicXXX_TestImageDimension_lit.comp +++ b/llpc/test/shaderdb/core/OpAtomicXXX_TestImageDimension_lit.comp @@ -124,94 +124,94 @@ void main() ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 2, i32 0, i32 0, i32 0, <8 x i32> %{{[-0-9A-Za0z_.]+}}, i32 7, i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 2, i32 1, i32 0, i32 0, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 2, i32 2, i32 0, i32 0, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 2, i32 9, i32 0, i32 0, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 2, i32 3, i32 0, i32 0, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 2, i32 0, i32 0, i32 0, <4 x i32> %{{[-0-9A-Za0z_.]+}}, i32 7, i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 2, i32 4, i32 0, i32 0, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 2, i32 5, i32 0, i32 0, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 2, i32 8, i32 0, i32 0, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <4 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 2, i32 6, i32 0, i32 0, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 2, i32 7, i32 0, i32 0, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <4 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 4, i32 0, i32 0, i32 0, <8 x i32> %{{[-0-9A-Za0z_.]+}}, i32 7, i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 4, i32 1, i32 0, i32 0, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 4, i32 2, i32 0, i32 0, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 4, i32 9, i32 0, i32 0, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 4, i32 3, i32 0, i32 0, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 4, i32 0, i32 0, i32 0, <4 x i32> %{{[-0-9A-Za0z_.]+}}, i32 7, i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 4, i32 4, i32 0, i32 0, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 4, i32 5, i32 0, i32 0, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 4, i32 8, i32 0, i32 0, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <4 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 4, i32 6, i32 0, i32 0, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 4, i32 7, i32 0, i32 0, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <4 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 6, i32 0, i32 0, i32 0, <8 x i32> %{{[-0-9A-Za0z_.]+}}, i32 7, i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 6, i32 1, i32 0, i32 0, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 6, i32 2, i32 0, i32 0, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 6, i32 9, i32 0, i32 0, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 6, i32 3, i32 0, i32 0, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 6, i32 0, i32 0, i32 0, <4 x i32> %{{[-0-9A-Za0z_.]+}}, i32 7, i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 6, i32 4, i32 0, i32 0, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 6, i32 5, i32 0, i32 0, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 6, i32 8, i32 0, i32 0, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <4 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 6, i32 6, i32 0, i32 0, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 6, i32 7, i32 0, i32 0, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <4 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 8, i32 0, i32 0, i32 0, <8 x i32> %{{[-0-9A-Za0z_.]+}}, i32 7, i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 8, i32 1, i32 0, i32 0, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 8, i32 2, i32 0, i32 0, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 8, i32 9, i32 0, i32 0, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 8, i32 3, i32 0, i32 0, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 8, i32 0, i32 0, i32 0, <4 x i32> %{{[-0-9A-Za0z_.]+}}, i32 7, i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 8, i32 4, i32 0, i32 0, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 8, i32 5, i32 0, i32 0, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 8, i32 8, i32 0, i32 0, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <4 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 8, i32 6, i32 0, i32 0, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 8, i32 7, i32 0, i32 0, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <4 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 9, i32 0, i32 0, i32 0, <8 x i32> %{{[-0-9A-Za0z_.]+}}, i32 7, i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 9, i32 1, i32 0, i32 0, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 9, i32 2, i32 0, i32 0, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 9, i32 9, i32 0, i32 0, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 9, i32 3, i32 0, i32 0, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 9, i32 0, i32 0, i32 0, <4 x i32> %{{[-0-9A-Za0z_.]+}}, i32 7, i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 9, i32 4, i32 0, i32 0, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 9, i32 5, i32 0, i32 0, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 9, i32 8, i32 0, i32 0, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <4 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 9, i32 6, i32 0, i32 0, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 9, i32 7, i32 0, i32 0, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <4 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 10, i32 0, i32 0, i32 0, <8 x i32> %{{[-0-9A-Za0z_.]+}}, i32 7, i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 10, i32 1, i32 0, i32 0, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 10, i32 2, i32 0, i32 0, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 10, i32 9, i32 0, i32 0, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 10, i32 3, i32 0, i32 0, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 10, i32 0, i32 0, i32 0, <4 x i32> %{{[-0-9A-Za0z_.]+}}, i32 7, i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 10, i32 4, i32 0, i32 0, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 10, i32 5, i32 0, i32 0, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 10, i32 8, i32 0, i32 0, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <4 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 10, i32 6, i32 0, i32 0, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 10, i32 7, i32 0, i32 0, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <4 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 0, i32 0, i32 0, i32 0, <8 x i32> %{{[-0-9A-Za0z_.]+}}, i32 7, i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 0, i32 1, i32 0, i32 0, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 0, i32 2, i32 0, i32 0, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 0, i32 9, i32 0, i32 0, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 0, i32 3, i32 0, i32 0, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 0, i32 0, i32 0, i32 0, <4 x i32> %{{[-0-9A-Za0z_.]+}}, i32 7, i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 0, i32 4, i32 0, i32 0, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 0, i32 5, i32 0, i32 0, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 0, i32 8, i32 0, i32 0, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <4 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 0, i32 6, i32 0, i32 0, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 0, i32 7, i32 0, i32 0, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <4 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.compare.swap.i32(i32 0, i32 0, i32 0, <8 x i32> %{{[-0-9A-Za0z_.]+}}, i32 7, i32 9, i32 3) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.compare.swap.i32(i32 1, i32 0, i32 0, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9, i32 3) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.compare.swap.i32(i32 2, i32 0, i32 0, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9, i32 3) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.compare.swap.i32(i32 9, i32 0, i32 0, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9, i32 3) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.compare.swap.i32(i32 3, i32 0, i32 0, <8 x i32> -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.compare.swap.i32(i32 0, i32 0, i32 0, <4 x i32> -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.compare.swap.i32(i32 4, i32 0, i32 0, <8 x i32> -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.compare.swap.i32(i32 5, i32 0, i32 0, <8 x i32> -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.compare.swap.i32(i32 8, i32 0, i32 0, <8 x i32> -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.compare.swap.i32(i32 6, i32 0, i32 0, <8 x i32> -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.compare.swap.i32(i32 7, i32 0, i32 0, <8 x i32> +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 2, i32 0, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, i32 7, i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 2, i32 1, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 2, i32 2, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 2, i32 9, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 2, i32 3, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 2, i32 10, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, i32 7, i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 2, i32 4, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 2, i32 5, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 2, i32 8, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <4 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 2, i32 6, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 2, i32 7, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <4 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 4, i32 0, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, i32 7, i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 4, i32 1, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 4, i32 2, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 4, i32 9, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 4, i32 3, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 4, i32 10, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, i32 7, i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 4, i32 4, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 4, i32 5, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 4, i32 8, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <4 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 4, i32 6, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 4, i32 7, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <4 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 6, i32 0, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, i32 7, i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 6, i32 1, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 6, i32 2, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 6, i32 9, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 6, i32 3, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 6, i32 10, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, i32 7, i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 6, i32 4, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 6, i32 5, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 6, i32 8, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <4 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 6, i32 6, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 6, i32 7, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <4 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 8, i32 0, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, i32 7, i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 8, i32 1, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 8, i32 2, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 8, i32 9, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 8, i32 3, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 8, i32 10, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, i32 7, i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 8, i32 4, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 8, i32 5, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 8, i32 8, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <4 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 8, i32 6, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 8, i32 7, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <4 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 9, i32 0, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, i32 7, i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 9, i32 1, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 9, i32 2, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 9, i32 9, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 9, i32 3, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 9, i32 10, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, i32 7, i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 9, i32 4, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 9, i32 5, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 9, i32 8, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <4 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 9, i32 6, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 9, i32 7, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <4 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 10, i32 0, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, i32 7, i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 10, i32 1, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 10, i32 2, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 10, i32 9, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 10, i32 3, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 10, i32 10, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, i32 7, i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 10, i32 4, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 10, i32 5, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 10, i32 8, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <4 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 10, i32 6, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 10, i32 7, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <4 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 0, i32 0, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, i32 7, i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 0, i32 1, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 0, i32 2, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 0, i32 9, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 0, i32 3, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 0, i32 10, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, i32 7, i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 0, i32 4, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 0, i32 5, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 0, i32 8, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <4 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 0, i32 6, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 0, i32 7, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <4 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.compare.swap.i32(i32 0, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, i32 7, i32 9, i32 3) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.compare.swap.i32(i32 1, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9, i32 3) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.compare.swap.i32(i32 2, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <3 x i32> , i32 9, i32 3) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.compare.swap.i32(i32 9, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9, i32 3) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.compare.swap.i32(i32 3, i32 0, i32 0, ptr addrspace(4) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.compare.swap.i32(i32 10, i32 0, i32 0, ptr addrspace(4) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.compare.swap.i32(i32 4, i32 0, i32 0, ptr addrspace(4) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.compare.swap.i32(i32 5, i32 0, i32 0, ptr addrspace(4) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.compare.swap.i32(i32 8, i32 0, i32 0, ptr addrspace(4) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.compare.swap.i32(i32 6, i32 0, i32 0, ptr addrspace(4) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.compare.swap.i32(i32 7, i32 0, i32 0, ptr addrspace(4) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results ; SHADERTEST: call i32 @llvm.amdgcn.image.atomic.add.1d.i32.i16(i32 9, i16 7, <8 x i32> %{{.*}}, i32 0, i32 0) diff --git a/llpc/test/shaderdb/core/OpAtomicXXX_TestImageMemoryQualifier_lit.comp b/llpc/test/shaderdb/core/OpAtomicXXX_TestImageMemoryQualifier_lit.comp index 8b2876bf51..3892925122 100644 --- a/llpc/test/shaderdb/core/OpAtomicXXX_TestImageMemoryQualifier_lit.comp +++ b/llpc/test/shaderdb/core/OpAtomicXXX_TestImageMemoryQualifier_lit.comp @@ -16,9 +16,9 @@ void main() ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 2, i32 1, i32 0, i32 0, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 2, i32 1, i32 0, i32 0, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 2, i32 1, i32 0, i32 0, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 2, i32 1, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 2, i32 1, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 2, i32 1, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results ; SHADERTEST: call i32 @llvm.amdgcn.image.atomic.add.2d.i32.i16(i32 9, i16 5, i16 5, <8 x i32> %{{.*}}, i32 0, i32 0) diff --git a/llpc/test/shaderdb/core/OpAtomicXXX_TestImage_lit.comp b/llpc/test/shaderdb/core/OpAtomicXXX_TestImage_lit.comp index 9241040a7e..c64f1c2cdb 100644 --- a/llpc/test/shaderdb/core/OpAtomicXXX_TestImage_lit.comp +++ b/llpc/test/shaderdb/core/OpAtomicXXX_TestImage_lit.comp @@ -46,23 +46,23 @@ void main() ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 2, i32 1, i32 0, i32 0, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 4, i32 1, i32 0, i32 0, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 6, i32 1, i32 0, i32 0, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 8, i32 1, i32 0, i32 0, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 9, i32 1, i32 0, i32 0, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 10, i32 1, i32 0, i32 0, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 0, i32 1, i32 0, i32 0, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.compare.swap.i32(i32 1, i32 0, i32 0, <8 x i32> -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 2, i32 1, i32 0, i32 0, <8 x i32> -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 5, i32 1, i32 0, i32 0, <8 x i32> -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 7, i32 1, i32 0, i32 0, <8 x i32> -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 8, i32 1, i32 0, i32 0, <8 x i32> -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 9, i32 1, i32 0, i32 0, <8 x i32> -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 10, i32 1, i32 0, i32 0, <8 x i32> -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 0, i32 1, i32 0, i32 0, <8 x i32> -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.compare.swap.i32(i32 1, i32 0, i32 0, <8 x i32> -; SHADERTEST: call reassoc nnan nsz arcp contract afn float (...) @lgc.create.image.atomic.f32(i32 0, i32 1, i32 0, i32 0, <8 x i32> +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 2, i32 1, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 4, i32 1, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 6, i32 1, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 8, i32 1, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 9, i32 1, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 10, i32 1, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 0, i32 1, i32 0, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> , i32 9) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.compare.swap.i32(i32 1, i32 0, i32 0, ptr addrspace(4) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 2, i32 1, i32 0, i32 0, ptr addrspace(4) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 5, i32 1, i32 0, i32 0, ptr addrspace(4) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 7, i32 1, i32 0, i32 0, ptr addrspace(4) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 8, i32 1, i32 0, i32 0, ptr addrspace(4) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 9, i32 1, i32 0, i32 0, ptr addrspace(4) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 10, i32 1, i32 0, i32 0, ptr addrspace(4) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 0, i32 1, i32 0, i32 0, ptr addrspace(4) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.compare.swap.i32(i32 1, i32 0, i32 0, ptr addrspace(4) +; SHADERTEST: call reassoc nnan nsz arcp contract afn float (...) @lgc.create.image.atomic.f32(i32 0, i32 1, i32 0, i32 0, ptr addrspace(4) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results ; SHADERTEST: call i32 @llvm.amdgcn.image.atomic.add.2d.i32.i16(i32 9, i16 7, i16 7, <8 x i32> %{{.*}}, i32 0, i32 0) diff --git a/llpc/test/shaderdb/core/OpAtomicXXX_TestImage_lit.frag b/llpc/test/shaderdb/core/OpAtomicXXX_TestImage_lit.frag index 2ee55dcbf6..81b0a25cc1 100644 --- a/llpc/test/shaderdb/core/OpAtomicXXX_TestImage_lit.frag +++ b/llpc/test/shaderdb/core/OpAtomicXXX_TestImage_lit.frag @@ -48,23 +48,23 @@ void main() ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 2, i32 0, i32 0, i32 0, <8 x i32> -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 4, i32 1, i32 0, i32 0, <8 x i32> -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 6, i32 1, i32 128, i32 0, <8 x i32> -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 8, i32 6, i32 0, i32 0, <8 x i32> -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 9, i32 0, i32 0, i32 0, <8 x i32> -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 10, i32 0, i32 0, i32 0, <8 x i32> -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 0, i32 0, i32 0, i32 0, <8 x i32> -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.compare.swap.i32(i32 0, i32 0, i32 0, <8 x i32> -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 2, i32 3, i32 0, i32 0, <8 x i32> -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 5, i32 0, i32 0, i32 0, <4 x i32> -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 7, i32 0, i32 128, i32 0, <4 x i32> -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 8, i32 7, i32 0, i32 0, <8 x i32> -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 9, i32 3, i32 0, i32 0, <8 x i32> -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 10, i32 3, i32 0, i32 0, <8 x i32> -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 0, i32 3, i32 0, i32 0, <8 x i32> -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.compare.swap.i32(i32 3, i32 0, i32 0, <8 x i32> -; SHADERTEST: call reassoc nnan nsz arcp contract afn float (...) @lgc.create.image.atomic.f32(i32 0, i32 9, i32 0, i32 0, <8 x i32> +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 2, i32 0, i32 0, i32 0, ptr addrspace(4) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 4, i32 1, i32 0, i32 0, ptr addrspace(4) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 6, i32 1, i32 128, i32 0, ptr addrspace(4) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 8, i32 6, i32 0, i32 0, ptr addrspace(4) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 9, i32 0, i32 0, i32 0, ptr addrspace(4) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 10, i32 0, i32 0, i32 0, ptr addrspace(4) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 0, i32 0, i32 0, i32 0, ptr addrspace(4) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.compare.swap.i32(i32 0, i32 0, i32 0, ptr addrspace(4) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 2, i32 3, i32 0, i32 0, ptr addrspace(4) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 5, i32 10, i32 0, i32 0, ptr addrspace(4) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 7, i32 10, i32 128, i32 0, ptr addrspace(4) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 8, i32 7, i32 0, i32 0, ptr addrspace(4) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 9, i32 3, i32 0, i32 0, ptr addrspace(4) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 10, i32 3, i32 0, i32 0, ptr addrspace(4) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 0, i32 3, i32 0, i32 0, ptr addrspace(4) +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.compare.swap.i32(i32 3, i32 0, i32 0, ptr addrspace(4) +; SHADERTEST: call reassoc nnan nsz arcp contract afn float (...) @lgc.create.image.atomic.f32(i32 0, i32 9, i32 0, i32 0, ptr addrspace(4) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results ; SHADERTEST: call i32 @llvm.amdgcn.image.atomic.add.1d.i32.i16(i32 %{{.*}}, i16 1, <8 x i32> %{{.*}}, i32 0, i32 0) diff --git a/llpc/test/shaderdb/core/OpFMul_TestOperandIsZero.spvasm b/llpc/test/shaderdb/core/OpFMul_TestOperandIsZero.spvasm index ff8682c708..fd43a195b7 100644 --- a/llpc/test/shaderdb/core/OpFMul_TestOperandIsZero.spvasm +++ b/llpc/test/shaderdb/core/OpFMul_TestOperandIsZero.spvasm @@ -32,6 +32,8 @@ OpDecorate %22 FPFastMathMode NotInf OpDecorate %32 FPFastMathMode NotInf OpDecorate %38 FPFastMathMode NotInf + OpDecorate %33 NoContraction + OpDecorate %39 NoContraction %void = OpTypeVoid %3 = OpTypeFunction %void %float = OpTypeFloat 32 diff --git a/llpc/test/shaderdb/core/OpFOrdEqual_TestVec3_lit.frag b/llpc/test/shaderdb/core/OpFOrdEqual_TestVec3_lit.frag index 110b133f0d..b7e84aefe6 100644 --- a/llpc/test/shaderdb/core/OpFOrdEqual_TestVec3_lit.frag +++ b/llpc/test/shaderdb/core/OpFOrdEqual_TestVec3_lit.frag @@ -20,7 +20,7 @@ void main() // CHECK-NEXT: [[TMP0:%.*]] = call ptr addrspace(7) @lgc.load.buffer.desc(i64 0, i32 0, i32 0, i32 0) // CHECK-NEXT: [[TMP1:%.*]] = call ptr @llvm.invariant.start.p7(i64 -1, ptr addrspace(7) [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = load <3 x float>, ptr addrspace(7) [[TMP0]], align 16 -// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds {{i8|<{ [[]3 x float], [[]4 x i8], [[]3 x float] }>}}, ptr addrspace(7) [[TMP0]], i32 {{16|0, i32 2}} +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr {{(inbounds i8|<{ [[]3 x float], [[]4 x i8], [[]3 x float] }>)|i8}}, ptr addrspace(7) [[TMP0]], i32 {{16|0, i32 2}} // CHECK-NEXT: [[TMP4:%.*]] = load <3 x float>, ptr addrspace(7) [[TMP3]], align 16 // CHECK-NEXT: [[TMP5:%.*]] = extractelement <3 x float> [[TMP2]], i64 0 // CHECK-NEXT: [[TMP6:%.*]] = extractelement <3 x float> [[TMP4]], i64 0 diff --git a/llpc/test/shaderdb/core/OpFOrdNotEqual_TestVec3_lit.frag b/llpc/test/shaderdb/core/OpFOrdNotEqual_TestVec3_lit.frag index 05b9e49a03..0732bda8cb 100644 --- a/llpc/test/shaderdb/core/OpFOrdNotEqual_TestVec3_lit.frag +++ b/llpc/test/shaderdb/core/OpFOrdNotEqual_TestVec3_lit.frag @@ -20,7 +20,7 @@ void main() // CHECK-NEXT: [[TMP0:%.*]] = call ptr addrspace(7) @lgc.load.buffer.desc(i64 0, i32 0, i32 0, i32 0) // CHECK-NEXT: [[TMP1:%.*]] = call ptr @llvm.invariant.start.p7(i64 -1, ptr addrspace(7) [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = load <3 x float>, ptr addrspace(7) [[TMP0]], align 16 -// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds {{i8|<{ [[]3 x float], [[]4 x i8], [[]3 x float] }>}}, ptr addrspace(7) [[TMP0]], i32 {{16|0, i32 2}} +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr {{inbounds i8|<{ [[]3 x float], [[]4 x i8], [[]3 x float] }>|i8}}, ptr addrspace(7) [[TMP0]], i32 {{16|0, i32 2}} // CHECK-NEXT: [[TMP4:%.*]] = load <3 x float>, ptr addrspace(7) [[TMP3]], align 16 // CHECK-NEXT: [[TMP5:%.*]] = extractelement <3 x float> [[TMP2]], i64 0 // CHECK-NEXT: [[TMP6:%.*]] = extractelement <3 x float> [[TMP4]], i64 0 diff --git a/llpc/test/shaderdb/core/OpIEqual_TestIvec2_lit.frag b/llpc/test/shaderdb/core/OpIEqual_TestIvec2_lit.frag index 01adb95c6a..1ce6abbbbc 100644 --- a/llpc/test/shaderdb/core/OpIEqual_TestIvec2_lit.frag +++ b/llpc/test/shaderdb/core/OpIEqual_TestIvec2_lit.frag @@ -20,7 +20,7 @@ void main() // SHADERTEST-NEXT: [[TMP0:%.*]] = call ptr addrspace(7) @lgc.load.buffer.desc(i64 0, i32 0, i32 0, i32 0) // SHADERTEST-NEXT: [[TMP1:%.*]] = call ptr @llvm.invariant.start.p7(i64 -1, ptr addrspace(7) [[TMP0]]) // SHADERTEST-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr addrspace(7) [[TMP0]], align 8 -// SHADERTEST-NEXT: [[TMP3:%.*]] = getelementptr inbounds {{i8|<{ [[]2 x i32], [[]2 x i32] }>}}, ptr addrspace(7) [[TMP0]], i32 {{8|0, i32 1}} +// SHADERTEST-NEXT: [[TMP3:%.*]] = getelementptr {{inbounds i8|<{ [[]2 x i32], [[]2 x i32] }>|i8}}, ptr addrspace(7) [[TMP0]], i32 {{8|0, i32 1}} // SHADERTEST-NEXT: [[TMP4:%.*]] = load <2 x i32>, ptr addrspace(7) [[TMP3]], align 8 // SHADERTEST-NEXT: [[TMP5:%.*]] = extractelement <2 x i32> [[TMP2]], i64 0 // SHADERTEST-NEXT: [[TMP6:%.*]] = extractelement <2 x i32> [[TMP4]], i64 0 diff --git a/llpc/test/shaderdb/core/OpINotEqual_TestIvec2_lit.frag b/llpc/test/shaderdb/core/OpINotEqual_TestIvec2_lit.frag index d58cc30b47..3fedb0e70d 100644 --- a/llpc/test/shaderdb/core/OpINotEqual_TestIvec2_lit.frag +++ b/llpc/test/shaderdb/core/OpINotEqual_TestIvec2_lit.frag @@ -20,7 +20,7 @@ void main() // SHADERTEST-NEXT: [[TMP0:%.*]] = call ptr addrspace(7) @lgc.load.buffer.desc(i64 0, i32 0, i32 0, i32 0) // SHADERTEST-NEXT: [[TMP1:%.*]] = call ptr @llvm.invariant.start.p7(i64 -1, ptr addrspace(7) [[TMP0]]) // SHADERTEST-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr addrspace(7) [[TMP0]], align 8 -// SHADERTEST-NEXT: [[TMP3:%.*]] = getelementptr inbounds {{i8|<{ [[]2 x i32], [[]2 x i32] }>}}, ptr addrspace(7) [[TMP0]], i32 {{8|0, i32 1}} +// SHADERTEST-NEXT: [[TMP3:%.*]] = getelementptr {{inbounds i8|<{ [[]2 x i32], [[]2 x i32] }>|i8}}, ptr addrspace(7) [[TMP0]], i32 {{8|0, i32 1}} // SHADERTEST-NEXT: [[TMP4:%.*]] = load <2 x i32>, ptr addrspace(7) [[TMP3]], align 8 // SHADERTEST-NEXT: [[TMP5:%.*]] = extractelement <2 x i32> [[TMP2]], i64 0 // SHADERTEST-NEXT: [[TMP6:%.*]] = extractelement <2 x i32> [[TMP4]], i64 0 diff --git a/llpc/test/shaderdb/core/OpImageDrefGather_TestBasic_lit.frag b/llpc/test/shaderdb/core/OpImageDrefGather_TestBasic_lit.frag index 151dde1a3a..380040dcfa 100644 --- a/llpc/test/shaderdb/core/OpImageDrefGather_TestBasic_lit.frag +++ b/llpc/test/shaderdb/core/OpImageDrefGather_TestBasic_lit.frag @@ -17,9 +17,7 @@ void main() ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results ; SHADERTEST: [[IMAGEPTR:%[0-9A-Za-z_.-]+]] = call {{.*}} @lgc.create.get.desc.ptr.p4{{.*}}(i32 1, i32 1, i64 0, i32 0 ; SHADERTEST: [[SAMPLERPTR:%[0-9A-Za-z_.-]+]] = call {{.*}} @lgc.create.get.desc.ptr.p4{{.*}}(i32 2, i32 2, i64 0, i32 0 -; SHADERTEST: [[SAMPLER:%[0-9A-Za-z_.-]+]] = load <4 x i32>, {{<4 x i32> addrspace\(4\)\*|ptr addrspace\(4\)}} [[SAMPLERPTR]] -; SHADERTEST: [[IMAGE:%[0-9A-Za-z_.-]+]] = load <8 x i32>, {{<8 x i32> addrspace\(4\)\*|ptr addrspace\(4\)}} [[IMAGEPTR]] -; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> {{.*}}@lgc.create.image.gather.v4f32(i32 1, i32 512, <8 x i32> [[IMAGE]], <4 x i32> [[SAMPLER]],{{.*}},{{.*}} float 2.000000e+00 +; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> {{.*}}@lgc.create.image.gather.v4f32(i32 1, i32 512, ptr addrspace(4) [[IMAGEPTR]], ptr addrspace(4) [[SAMPLERPTR]],{{.*}},{{.*}} float 2.000000e+00 ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results ; SHADERTEST: call {{.*}} <4 x float> @llvm.amdgcn.image.gather4.c.lz.2d.v4f32.f32(i32 1, float 2.000000e+00, diff --git a/llpc/test/shaderdb/core/OpImageDrefGather_TestTextureGatherOffset_lit.frag b/llpc/test/shaderdb/core/OpImageDrefGather_TestTextureGatherOffset_lit.frag index b6e0067e9d..42c07e1bba 100644 --- a/llpc/test/shaderdb/core/OpImageDrefGather_TestTextureGatherOffset_lit.frag +++ b/llpc/test/shaderdb/core/OpImageDrefGather_TestTextureGatherOffset_lit.frag @@ -27,9 +27,9 @@ void main() ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results ; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0) ; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4(i32 2, i32 2, i64 0, i32 0) -; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 1, i32 512, <8 x i32> -; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 5, i32 384, <8 x i32> -; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 9, i32 512, <8 x i32> +; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 1, i32 512, ptr addrspace(4) +; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 5, i32 384, ptr addrspace(4) +; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 9, i32 512, ptr addrspace(4) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results ; SHADERTEST: call {{.*}} <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.2d.v4f32.f32(i32 1, i32 257, float 0x3FECCCCCC0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <4 x i32> %{{[-0-9A-Za0z_.]+}}, i1 false, i32 0, i32 0) diff --git a/llpc/test/shaderdb/core/OpImageDrefGather_TestTextureGather_lit.frag b/llpc/test/shaderdb/core/OpImageDrefGather_TestTextureGather_lit.frag index 77074845cf..d5f32f94b5 100644 --- a/llpc/test/shaderdb/core/OpImageDrefGather_TestTextureGather_lit.frag +++ b/llpc/test/shaderdb/core/OpImageDrefGather_TestTextureGather_lit.frag @@ -1,3 +1,4 @@ + #version 450 layout(set = 0, binding = 0) uniform sampler2DShadow samp2DShadow; @@ -33,7 +34,7 @@ void main() ; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 2, i32 2, i64 0, i32 0) ; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 1, i32 512, {{.*}}, i32 545, <2 x float> , float 0.000000e+00, float 0x3FECCCCCC0000000) ; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 5, i32 384, {{.*}}, i32 545, <3 x float> , float 0.000000e+00, float 0x3FE99999A0000000) -; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 9, i32 512, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <4 x i32> %{{[-0-9A-Za0z_.]+}}, i32 545, <2 x float> , float 0.000000e+00, float 0x3FE6666660000000) +; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 9, i32 512, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, i32 545, <2 x float> , float 0.000000e+00, float 0x3FE6666660000000) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results ; SHADERTEST: call {{.*}} <4 x float> @llvm.amdgcn.image.gather4.c.lz.2d.v4f32.f32(i32 1, float 0x3FECCCCCC0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <4 x i32> %{{[-0-9A-Za0z_.]+}}, i1 false, i32 0, i32 0) diff --git a/llpc/test/shaderdb/core/OpImageExplicitLod_TestDrefLodOffset_lit.frag b/llpc/test/shaderdb/core/OpImageExplicitLod_TestDrefLodOffset_lit.frag index 2ccb975b3a..5e243f4bcd 100644 --- a/llpc/test/shaderdb/core/OpImageExplicitLod_TestDrefLodOffset_lit.frag +++ b/llpc/test/shaderdb/core/OpImageExplicitLod_TestDrefLodOffset_lit.frag @@ -17,7 +17,7 @@ void main() ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0 ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 2, i32 2, i64 0, i32 0 -; SHADERTEST: call reassoc nnan nsz arcp contract afn float (...) @lgc.create.image.sample.f32(i32 1, i32 512, <8 x i32>{{.*}}, i32 801,{{.*}}, float 1.000000e+00, <2 x i32> , +; SHADERTEST: call reassoc nnan nsz arcp contract afn float (...) @lgc.create.image.sample.f32(i32 1, i32 512, ptr addrspace(4){{.*}}, i32 801,{{.*}}, float 1.000000e+00, <2 x i32> , ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results ; SHADERTEST: call {{.*}} float @llvm.amdgcn.image.sample.c.l.o.2d.f32.f32(i32 1, i32 770,{{.*}},{{.*}},{{.*}}, float 1.000000e+00,{{.*}},{{.*}}, i1 false, i32 0, i32 0) diff --git a/llpc/test/shaderdb/core/OpImageFetch_TestBuffer_lit.comp b/llpc/test/shaderdb/core/OpImageFetch_TestBuffer_lit.comp index aa8f65b773..c97da7dea4 100644 --- a/llpc/test/shaderdb/core/OpImageFetch_TestBuffer_lit.comp +++ b/llpc/test/shaderdb/core/OpImageFetch_TestBuffer_lit.comp @@ -19,7 +19,7 @@ void main() ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 4, i32 4, i64 0, i32 0 -; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.load.v4f32(i32 0, i32 1536, <4 x i32> %{{[-0-9A-Za0z_.]+}}, i32 3) +; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.load.v4f32(i32 10, i32 1536, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, i32 3) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results ; SHADERTEST: call {{.*}} <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32({{.*}}, i32 3, i32 0, i32 0, i32 0), !invariant.load diff --git a/llpc/test/shaderdb/core/OpImageFetch_TestTexelFetch_lit.frag b/llpc/test/shaderdb/core/OpImageFetch_TestTexelFetch_lit.frag index 78d7897576..4dbca71ce8 100644 --- a/llpc/test/shaderdb/core/OpImageFetch_TestTexelFetch_lit.frag +++ b/llpc/test/shaderdb/core/OpImageFetch_TestTexelFetch_lit.frag @@ -36,7 +36,7 @@ void main() ; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.load.v4f32(i32 0, i32 1536, {{.*}}, i32 2, i32 2) ; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.load.v4f32(i32 1, i32 128, {{.*}}, <2 x i32> , i32 8) ; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.load.v4f32(i32 9, i32 1536, {{.*}}, <2 x i32> ) -; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.load.v4f32(i32 0, i32 1536, {{.*}}, i32 5) +; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.load.v4f32(i32 10, i32 1536, {{.*}}, i32 5) ; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.load.with.fmask.v4f32(i32 6, i32 128, {{.*}}, {{.*}}, <2 x i32> , i32 4) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results diff --git a/llpc/test/shaderdb/core/OpImageGather_TestConstOffsets_lit.frag b/llpc/test/shaderdb/core/OpImageGather_TestConstOffsets_lit.frag index ac7612a0e2..428160ad38 100644 --- a/llpc/test/shaderdb/core/OpImageGather_TestConstOffsets_lit.frag +++ b/llpc/test/shaderdb/core/OpImageGather_TestConstOffsets_lit.frag @@ -18,7 +18,7 @@ void main() ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0 ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 2, i32 2, i64 0, i32 0 -; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 1, i32 512, <8 x {{.*}}, <4 x {{.*}}, i32 293, <2 x {{.*}}, i32 2, float 0.000000e+00, [4 x <2 x i32>] [<2 x i32> , <2 x i32> , <2 x i32> , <2 x i32> ]) +; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 1, i32 512, ptr addrspace(4) {{.*}}, ptr addrspace(4) {{.*}}, i32 293, <2 x {{.*}}, i32 2, float 0.000000e+00, [4 x <2 x i32>] [<2 x i32> , <2 x i32> , <2 x i32> , <2 x i32> ]) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results ; SHADERTEST: call {{.*}} <4 x float> @llvm.amdgcn.image.gather4.lz.o.2d.v4f32.f32(i32 4, i32 513,{{.*}},{{.*}},{{.*}}, i1 false, i32 0, i32 0) diff --git a/llpc/test/shaderdb/core/OpImageGather_TestDrefConstOffsets_lit.frag b/llpc/test/shaderdb/core/OpImageGather_TestDrefConstOffsets_lit.frag index 09f96ac2b3..6d1959e231 100644 --- a/llpc/test/shaderdb/core/OpImageGather_TestDrefConstOffsets_lit.frag +++ b/llpc/test/shaderdb/core/OpImageGather_TestDrefConstOffsets_lit.frag @@ -18,7 +18,7 @@ void main() ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0 ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 2, i32 2, i64 0, i32 0 -; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 1, i32 512, <8 x {{.*}}, <4 x {{.*}}, i32 801, <2 x {{.*}}, float 0.000000e+00, [4 x <2 x i32>] [<2 x i32> , <2 x i32> , <2 x i32> , <2 x i32> ], float 1.000000e+00) +; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 1, i32 512, ptr addrspace(4) {{.*}}, ptr addrspace(4) {{.*}}, i32 801, <2 x {{.*}}, float 0.000000e+00, [4 x <2 x i32>] [<2 x i32> , <2 x i32> , <2 x i32> , <2 x i32> ], float 1.000000e+00) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results ; SHADERTEST: call {{.*}} <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.2d.v4f32.f32(i32 1, i32 513, float 1.000000e+00,{{.*}},{{.*}},{{.*}}, i1 false, i32 0, i32 0) diff --git a/llpc/test/shaderdb/core/OpImageGather_TestIntegerSampler.frag b/llpc/test/shaderdb/core/OpImageGather_TestIntegerSampler.frag index 97be87bb57..2eb83e3bf0 100644 --- a/llpc/test/shaderdb/core/OpImageGather_TestIntegerSampler.frag +++ b/llpc/test/shaderdb/core/OpImageGather_TestIntegerSampler.frag @@ -25,12 +25,12 @@ void main() ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 2, i32 2, i64 0, i32 1 ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0 ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 2, i32 2, i64 0, i32 0 -; SHADERTEST: call <4 x i32> (...) @lgc.create.image.gather.v4i32(i32 1, i32 516, <8 x {{.*}}, <4 x {{.*}}, i32 37, <2 x float> , i32 0, float 0.000000e+00) -; SHADERTEST: call <4 x i32> (...) @lgc.create.image.gather.v4i32(i32 1, i32 516, <8 x {{.*}}, <4 x {{.*}}, i32 293, <2 x float> , i32 0, float 0.000000e+00, <2 x i32> ) -; SHADERTEST: call <4 x i32> (...) @lgc.create.image.gather.v4i32(i32 1, i32 516, <8 x {{.*}}, <4 x {{.*}}, i32 293, <2 x float> , i32 0, float 0.000000e+00, [4 x <2 x i32>] [<2 x i32> , <2 x i32> , <2 x i32> , <2 x i32> ]) -; SHADERTEST: call <4 x i32> (...) @lgc.create.image.gather.v4i32(i32 1, i32 512, <8 x {{.*}}, <4 x {{.*}}, i32 37, <2 x float> , i32 0, float 0.000000e+00) -; SHADERTEST: call <4 x i32> (...) @lgc.create.image.gather.v4i32(i32 1, i32 512, <8 x {{.*}}, <4 x {{.*}}, i32 293, <2 x float> , i32 0, float 0.000000e+00, <2 x i32> ) -; SHADERTEST: call <4 x i32> (...) @lgc.create.image.gather.v4i32(i32 1, i32 512, <8 x {{.*}}, <4 x {{.*}}, i32 293, <2 x float> , i32 0, float 0.000000e+00, [4 x <2 x i32>] [<2 x i32> , <2 x i32> , <2 x i32> , <2 x i32> ]) +; SHADERTEST: call <4 x i32> (...) @lgc.create.image.gather.v4i32(i32 1, i32 516, ptr addrspace(4) {{.*}}, ptr addrspace(4) {{.*}}, i32 37, <2 x float> , i32 0, float 0.000000e+00) +; SHADERTEST: call <4 x i32> (...) @lgc.create.image.gather.v4i32(i32 1, i32 516, ptr addrspace(4) {{.*}}, ptr addrspace(4) {{.*}}, i32 293, <2 x float> , i32 0, float 0.000000e+00, <2 x i32> ) +; SHADERTEST: call <4 x i32> (...) @lgc.create.image.gather.v4i32(i32 1, i32 516, ptr addrspace(4) {{.*}}, ptr addrspace(4) {{.*}}, i32 293, <2 x float> , i32 0, float 0.000000e+00, [4 x <2 x i32>] [<2 x i32> , <2 x i32> , <2 x i32> , <2 x i32> ]) +; SHADERTEST: call <4 x i32> (...) @lgc.create.image.gather.v4i32(i32 1, i32 512, ptr addrspace(4) {{.*}}, ptr addrspace(4) {{.*}}, i32 37, <2 x float> , i32 0, float 0.000000e+00) +; SHADERTEST: call <4 x i32> (...) @lgc.create.image.gather.v4i32(i32 1, i32 512, ptr addrspace(4) {{.*}}, ptr addrspace(4) {{.*}}, i32 293, <2 x float> , i32 0, float 0.000000e+00, <2 x i32> ) +; SHADERTEST: call <4 x i32> (...) @lgc.create.image.gather.v4i32(i32 1, i32 512, ptr addrspace(4) {{.*}}, ptr addrspace(4) {{.*}}, i32 293, <2 x float> , i32 0, float 0.000000e+00, [4 x <2 x i32>] [<2 x i32> , <2 x i32> , <2 x i32> , <2 x i32> ]) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results ; SHADERTEST: call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f16(i32 1, half 0xH0000, half 0xH3C00, <8 x i32> %{{.*}}, <4 x i32> %{{.*}}, i1 false, i32 0, i32 0) diff --git a/llpc/test/shaderdb/core/OpImageGather_TestOffset_lit.frag b/llpc/test/shaderdb/core/OpImageGather_TestOffset_lit.frag index 52fe89de91..6509cb2964 100644 --- a/llpc/test/shaderdb/core/OpImageGather_TestOffset_lit.frag +++ b/llpc/test/shaderdb/core/OpImageGather_TestOffset_lit.frag @@ -18,7 +18,7 @@ void main() ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0 ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 2, i32 2, i64 0, i32 0 -; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 1, i32 512, <8 x {{.*}}, <4 x {{.*}}, i32 293, <2 x {{.*}}, i32 2, float 0.000000e+00, <2 x {{.*}}) +; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 1, i32 512, ptr addrspace(4) {{.*}}, ptr addrspace(4) {{.*}}, i32 293, <2 x {{.*}}, i32 2, float 0.000000e+00, <2 x {{.*}}) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results ; SHADERTEST: call {{.*}} <4 x float> @llvm.amdgcn.image.gather4.lz.o.2d.v4f32.f32(i32 4,{{.*}},{{.*}},{{.*}},{{.*}}, i1 false, i32 0, i32 0) diff --git a/llpc/test/shaderdb/core/OpImageGather_TestTextureGatherBiasLod_lit.frag b/llpc/test/shaderdb/core/OpImageGather_TestTextureGatherBiasLod_lit.frag index 7c7805cc3f..9ed1e5cc97 100644 --- a/llpc/test/shaderdb/core/OpImageGather_TestTextureGatherBiasLod_lit.frag +++ b/llpc/test/shaderdb/core/OpImageGather_TestTextureGatherBiasLod_lit.frag @@ -61,22 +61,22 @@ void main() ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 2, i32 2, i64 0, i32 1 ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0 ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 2, i32 2, i64 0, i32 0 -; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 1, i32 512, <8 x {{.*}}, <4 x {{.*}}, i32 69, <2 x {{.*}}, i32 0, {{.*}}) -; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 5, i32 512, <8 x {{.*}}, <4 x {{.*}}, i32 69, <3 x {{.*}}, i32 1, {{.*}}) -; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 3, i32 512, <8 x {{.*}}, <4 x {{.*}}, i32 69, <3 x {{.*}}, i32 2, {{.*}}) -; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 8, i32 512, <8 x {{.*}}, <4 x {{.*}}, i32 69, <4 x {{.*}}, i32 3, {{.*}}) -; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 1, i32 512, <8 x {{.*}}, <4 x {{.*}}, i32 325, <2 x {{.*}}, i32 0, {{.*}}, <2 x i32> zeroinitializer) -; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 5, i32 512, <8 x {{.*}}, <4 x {{.*}}, i32 325, <3 x {{.*}}, i32 1, {{.*}}, <2 x i32> ) -; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 1, i32 512, <8 x {{.*}}, <4 x {{.*}}, i32 325, <2 x {{.*}}, i32 0, {{.*}}, [4 x <2 x i32>] [<2 x i32> zeroinitializer, <2 x i32> , <2 x i32> , <2 x i32> ]) -; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 5, i32 512, <8 x {{.*}}, <4 x {{.*}}, i32 325, <3 x {{.*}}, i32 1, {{.*}}, [4 x <2 x i32>] [<2 x i32> zeroinitializer, <2 x i32> , <2 x i32> , <2 x i32> ]) -; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 1, i32 512, <8 x {{.*}}, <4 x {{.*}}, i32 37, <2 x {{.*}}, i32 0, {{.*}}) -; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 5, i32 512, <8 x {{.*}}, <4 x {{.*}}, i32 37, <3 x {{.*}}, i32 1, {{.*}}) -; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 3, i32 512, <8 x {{.*}}, <4 x {{.*}}, i32 37, <3 x {{.*}}, i32 2, {{.*}}) -; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 8, i32 512, <8 x {{.*}}, <4 x {{.*}}, i32 37, <4 x {{.*}}, i32 3, {{.*}}) -; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 1, i32 512, <8 x {{.*}}, <4 x {{.*}}, i32 293, <2 x {{.*}}, i32 0, {{.*}}, <2 x i32> zeroinitializer) -; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 5, i32 512, <8 x {{.*}}, <4 x {{.*}}, i32 293, <3 x {{.*}}, i32 1, {{.*}}, <2 x i32> ) -; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 1, i32 512, <8 x {{.*}}, <4 x {{.*}}, i32 293, <2 x {{.*}}, i32 0, {{.*}}, [4 x <2 x i32>] [<2 x i32> zeroinitializer, <2 x i32> , <2 x i32> , <2 x i32> ]) -; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 5, i32 512, <8 x {{.*}}, <4 x {{.*}}, i32 293, <3 x {{.*}}, i32 1, {{.*}}, [4 x <2 x i32>] [<2 x i32> zeroinitializer, <2 x i32> , <2 x i32> , <2 x i32> ]) +; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 1, i32 512, ptr addrspace(4) {{.*}}, ptr addrspace(4) {{.*}}, i32 69, <2 x {{.*}}, i32 0, {{.*}}) +; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 5, i32 512, ptr addrspace(4) {{.*}}, ptr addrspace(4) {{.*}}, i32 69, <3 x {{.*}}, i32 1, {{.*}}) +; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 3, i32 512, ptr addrspace(4) {{.*}}, ptr addrspace(4) {{.*}}, i32 69, <3 x {{.*}}, i32 2, {{.*}}) +; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 8, i32 512, ptr addrspace(4) {{.*}}, ptr addrspace(4) {{.*}}, i32 69, <4 x {{.*}}, i32 3, {{.*}}) +; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 1, i32 512, ptr addrspace(4) {{.*}}, ptr addrspace(4) {{.*}}, i32 325, <2 x {{.*}}, i32 0, {{.*}}, <2 x i32> zeroinitializer) +; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 5, i32 512, ptr addrspace(4) {{.*}}, ptr addrspace(4) {{.*}}, i32 325, <3 x {{.*}}, i32 1, {{.*}}, <2 x i32> ) +; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 1, i32 512, ptr addrspace(4) {{.*}}, ptr addrspace(4) {{.*}}, i32 325, <2 x {{.*}}, i32 0, {{.*}}, [4 x <2 x i32>] [<2 x i32> zeroinitializer, <2 x i32> , <2 x i32> , <2 x i32> ]) +; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 5, i32 512, ptr addrspace(4) {{.*}}, ptr addrspace(4) {{.*}}, i32 325, <3 x {{.*}}, i32 1, {{.*}}, [4 x <2 x i32>] [<2 x i32> zeroinitializer, <2 x i32> , <2 x i32> , <2 x i32> ]) +; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 1, i32 512, ptr addrspace(4) {{.*}}, ptr addrspace(4) {{.*}}, i32 37, <2 x {{.*}}, i32 0, {{.*}}) +; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 5, i32 512, ptr addrspace(4) {{.*}}, ptr addrspace(4) {{.*}}, i32 37, <3 x {{.*}}, i32 1, {{.*}}) +; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 3, i32 512, ptr addrspace(4) {{.*}}, ptr addrspace(4) {{.*}}, i32 37, <3 x {{.*}}, i32 2, {{.*}}) +; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 8, i32 512, ptr addrspace(4) {{.*}}, ptr addrspace(4) {{.*}}, i32 37, <4 x {{.*}}, i32 3, {{.*}}) +; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 1, i32 512, ptr addrspace(4) {{.*}}, ptr addrspace(4) {{.*}}, i32 293, <2 x {{.*}}, i32 0, {{.*}}, <2 x i32> zeroinitializer) +; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 5, i32 512, ptr addrspace(4) {{.*}}, ptr addrspace(4) {{.*}}, i32 293, <3 x {{.*}}, i32 1, {{.*}}, <2 x i32> ) +; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 1, i32 512, ptr addrspace(4) {{.*}}, ptr addrspace(4) {{.*}}, i32 293, <2 x {{.*}}, i32 0, {{.*}}, [4 x <2 x i32>] [<2 x i32> zeroinitializer, <2 x i32> , <2 x i32> , <2 x i32> ]) +; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 5, i32 512, ptr addrspace(4) {{.*}}, ptr addrspace(4) {{.*}}, i32 293, <3 x {{.*}}, i32 1, {{.*}}, [4 x <2 x i32>] [<2 x i32> zeroinitializer, <2 x i32> , <2 x i32> , <2 x i32> ]) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results ; SHADERTEST: call {{.*}} <4 x float> @llvm.amdgcn.image.gather4.b.2d.v4f32.f32.f32(i32 1,{{.*}},{{.*}},{{.*}},{{.*}},{{.*}}, i1 false, i32 0, i32 0) diff --git a/llpc/test/shaderdb/core/OpImageGather_TestTextureGatherOffset_lit.frag b/llpc/test/shaderdb/core/OpImageGather_TestTextureGatherOffset_lit.frag index 584be47256..d05255542d 100644 --- a/llpc/test/shaderdb/core/OpImageGather_TestTextureGatherOffset_lit.frag +++ b/llpc/test/shaderdb/core/OpImageGather_TestTextureGatherOffset_lit.frag @@ -31,9 +31,9 @@ void main() ; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 2, i32 2, i64 1, i32 0) ; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 1, i32 1, i64 0, i32 0) ; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 2, i32 2, i64 0, i32 0) -; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 1, i32 512, <8 x {{.*}}, <4 x {{.*}}, i32 293, <2 x float> , i32 2, float 0.000000e+00, <2 x {{.*}}) -; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 5, i32 384, <8 x {{.*}}, <4 x {{.*}}, i32 293, <3 x float> , i32 3, float 0.000000e+00, <2 x {{.*}}) -; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 9, i32 512, <8 x {{.*}}, <4 x {{.*}}, i32 293, <2 x float> , i32 0, float 0.000000e+00, <2 x i32> ) +; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 1, i32 512, ptr addrspace(4) {{.*}}, ptr addrspace(4) {{.*}}, i32 293, <2 x float> , i32 2, float 0.000000e+00, <2 x {{.*}}) +; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 5, i32 384, ptr addrspace(4) {{.*}}, ptr addrspace(4) {{.*}}, i32 293, <3 x float> , i32 3, float 0.000000e+00, <2 x {{.*}}) +; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 9, i32 512, ptr addrspace(4) {{.*}}, ptr addrspace(4) {{.*}}, i32 293, <2 x float> , i32 0, float 0.000000e+00, <2 x i32> ) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results ; SHADERTEST: call {{.*}} <4 x float> @llvm.amdgcn.image.gather4.lz.o.2d.v4f32.f32(i32 4,{{.*}}, float 0x3FB99999A0000000, float 0x3FB99999A0000000,{{.*}}, i1 false, i32 0, i32 0) diff --git a/llpc/test/shaderdb/core/OpImageGather_TestTextureGather_lit.frag b/llpc/test/shaderdb/core/OpImageGather_TestTextureGather_lit.frag index 5134dadc01..2bdc97e82b 100644 --- a/llpc/test/shaderdb/core/OpImageGather_TestTextureGather_lit.frag +++ b/llpc/test/shaderdb/core/OpImageGather_TestTextureGather_lit.frag @@ -30,9 +30,9 @@ void main() ; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 2, i32 2, i64 1, i32 0) ; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 1, i32 1, i64 0, i32 0) ; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 2, i32 2, i64 0, i32 0) -; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 1, i32 512, <8 x {{.*}}, <4 x {{.*}}, i32 37, <2 x float> , i32 2, float 0.000000e+00) -; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 5, i32 384, <8 x {{.*}}, <4 x {{.*}}, i32 37, <3 x float> , i32 3, float 0.000000e+00) -; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 9, i32 512, <8 x {{.*}}, <4 x {{.*}}, i32 37, <2 x float> , i32 0, float 0.000000e+00) +; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 1, i32 512, ptr addrspace(4) {{.*}}, ptr addrspace(4) {{.*}}, i32 37, <2 x float> , i32 2, float 0.000000e+00) +; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 5, i32 384, ptr addrspace(4) {{.*}}, ptr addrspace(4) {{.*}}, i32 37, <3 x float> , i32 3, float 0.000000e+00) +; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.gather.v4f32(i32 9, i32 512, ptr addrspace(4) {{.*}}, ptr addrspace(4) {{.*}}, i32 37, <2 x float> , i32 0, float 0.000000e+00) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results ; SHADERTEST: call {{.*}} <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32(i32 4, float 0x3FB99999A0000000, float 0x3FB99999A0000000, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <4 x i32> %{{[-0-9A-Za0z_.]+}}, i1 false, i32 0, i32 0) diff --git a/llpc/test/shaderdb/core/OpImageQueryLevels_TestTextureQueryLevels_lit.frag b/llpc/test/shaderdb/core/OpImageQueryLevels_TestTextureQueryLevels_lit.frag index 403ee311ff..9ca1f9f322 100644 --- a/llpc/test/shaderdb/core/OpImageQueryLevels_TestTextureQueryLevels_lit.frag +++ b/llpc/test/shaderdb/core/OpImageQueryLevels_TestTextureQueryLevels_lit.frag @@ -30,10 +30,10 @@ void main() ; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 1, i32 1, i64 0, i32 1) ; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 1, i32 1, i64 1, i32 0) ; SHADERTEST: ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4{{.*}}(i32 1, i32 1, i64 0, i32 0) -; SHADERTEST: call i32 (...) @lgc.create.image.query.levels.i32(i32 0, i32 512, <8 x {{.*}}) -; SHADERTEST: call i32 (...) @lgc.create.image.query.levels.i32(i32 1, i32 128, <8 x {{.*}}) -; SHADERTEST: call i32 (...) @lgc.create.image.query.levels.i32(i32 1, i32 512, <8 x {{.*}}) -; SHADERTEST: call i32 (...) @lgc.create.image.query.levels.i32(i32 8, i32 128, <8 x {{.*}}) +; SHADERTEST: call i32 (...) @lgc.create.image.query.levels.i32(i32 0, i32 512, ptr addrspace(4) {{.*}}) +; SHADERTEST: call i32 (...) @lgc.create.image.query.levels.i32(i32 1, i32 128, ptr addrspace(4) {{.*}}) +; SHADERTEST: call i32 (...) @lgc.create.image.query.levels.i32(i32 1, i32 512, ptr addrspace(4) {{.*}}) +; SHADERTEST: call i32 (...) @lgc.create.image.query.levels.i32(i32 8, i32 128, ptr addrspace(4) {{.*}}) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results ; SHADERTEST: AMDLLPC SUCCESS diff --git a/llpc/test/shaderdb/core/OpImageQuerySize_TestBasic_lit.frag b/llpc/test/shaderdb/core/OpImageQuerySize_TestBasic_lit.frag index 483ef81f1d..45ed00b62b 100644 --- a/llpc/test/shaderdb/core/OpImageQuerySize_TestBasic_lit.frag +++ b/llpc/test/shaderdb/core/OpImageQuerySize_TestBasic_lit.frag @@ -106,7 +106,7 @@ void main() ; SHADERTEST: call {{.*}} @lgc.create.image.query.size.v2i32(i32 9, i32 512, {{.*}}, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.query.size.v2i32(i32 4, i32 512, {{.*}}, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.query.size.v3i32(i32 5, i32 512, {{.*}}, i32 0) -; SHADERTEST: call {{.*}} @lgc.create.image.query.size.i32(i32 0, i32 512, {{.*}}, i32 0) +; SHADERTEST: call {{.*}} @lgc.create.image.query.size.i32(i32 10, i32 512, {{.*}}, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.query.size.v2i32(i32 6, i32 512, {{.*}}, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.query.size.v3i32(i32 7, i32 512, {{.*}}, i32 0) diff --git a/llpc/test/shaderdb/core/OpImageQuerySize_TestImageSize_lit.frag b/llpc/test/shaderdb/core/OpImageQuerySize_TestImageSize_lit.frag index 9a00841e7e..f9b18ff291 100644 --- a/llpc/test/shaderdb/core/OpImageQuerySize_TestImageSize_lit.frag +++ b/llpc/test/shaderdb/core/OpImageQuerySize_TestImageSize_lit.frag @@ -36,7 +36,7 @@ void main() ; SHADERTEST: call {{.*}} @lgc.create.image.query.size.i32(i32 0, i32 512, {{.*}}, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.query.size.v2i32(i32 9, i32 512, {{.*}}, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.query.size.v2i32(i32 6, i32 512, {{.*}}, i32 0) -; SHADERTEST: call {{.*}} @lgc.create.image.query.size.i32(i32 0, i32 128, {{.*}}, i32 0) +; SHADERTEST: call {{.*}} @lgc.create.image.query.size.i32(i32 10, i32 128, {{.*}}, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.query.size.v3i32(i32 8, i32 128, {{.*}}, i32 0) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results diff --git a/llpc/test/shaderdb/core/OpImageQuerySize_TestImage_lit.comp b/llpc/test/shaderdb/core/OpImageQuerySize_TestImage_lit.comp index fd99503fae..aaa04cdc47 100644 --- a/llpc/test/shaderdb/core/OpImageQuerySize_TestImage_lit.comp +++ b/llpc/test/shaderdb/core/OpImageQuerySize_TestImage_lit.comp @@ -60,7 +60,7 @@ void main() ; SHADERTEST: call {{.*}} @lgc.create.image.query.size.v3i32(i32 8, i32 512, {{.*}}, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.query.size.v2i32(i32 6, i32 512, {{.*}}, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.query.size.v3i32(i32 7, i32 512, {{.*}}, i32 0) -; SHADERTEST: call {{.*}} @lgc.create.image.query.size.i32(i32 0, i32 512, {{.*}}, i32 0) +; SHADERTEST: call {{.*}} @lgc.create.image.query.size.i32(i32 10, i32 512, {{.*}}, i32 0) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results ; SHADERTEST: AMDLLPC SUCCESS diff --git a/llpc/test/shaderdb/core/OpImageQuerySize_TestTextureSize_lit.frag b/llpc/test/shaderdb/core/OpImageQuerySize_TestTextureSize_lit.frag index 6c0b3a9485..dbc8973fd0 100644 --- a/llpc/test/shaderdb/core/OpImageQuerySize_TestTextureSize_lit.frag +++ b/llpc/test/shaderdb/core/OpImageQuerySize_TestTextureSize_lit.frag @@ -29,7 +29,7 @@ void main() ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 1) ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.query.size.v2i32(i32 9, i32 512, {{.*}}, i32 0) -; SHADERTEST: call {{.*}} @lgc.create.image.query.size.i32(i32 0, i32 128, {{.*}}, i32 0) +; SHADERTEST: call {{.*}} @lgc.create.image.query.size.i32(i32 10, i32 128, {{.*}}, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.query.size.v2i32(i32 6, i32 512, {{.*}}, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.query.size.v3i32(i32 7, i32 128, {{.*}}, i32 0) diff --git a/llpc/test/shaderdb/core/OpImageRead_TestBuffer_lit.comp b/llpc/test/shaderdb/core/OpImageRead_TestBuffer_lit.comp index 8c2bdc89ad..048f86a7ab 100644 --- a/llpc/test/shaderdb/core/OpImageRead_TestBuffer_lit.comp +++ b/llpc/test/shaderdb/core/OpImageRead_TestBuffer_lit.comp @@ -19,7 +19,7 @@ void main() ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 4, i32 4, i64 0, i32 0 -; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.load.v4f32(i32 0, i32 512, <4 x i32> %{{[-0-9A-Za0z_.]+}}, i32 3) +; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.load.v4f32(i32 10, i32 512, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, i32 3) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results ; SHADERTEST: call {{.*}} <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32({{.*}}, i32 3, i32 0, i32 0, i32 0) diff --git a/llpc/test/shaderdb/core/OpImageRead_TestImageLoad_lit.frag b/llpc/test/shaderdb/core/OpImageRead_TestImageLoad_lit.frag index 3f38b36347..0674a80145 100644 --- a/llpc/test/shaderdb/core/OpImageRead_TestImageLoad_lit.frag +++ b/llpc/test/shaderdb/core/OpImageRead_TestImageLoad_lit.frag @@ -35,7 +35,7 @@ void main() ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0) ; SHADERTEST: call {{.*}} @lgc.create.image.load.v4f32(i32 0, i32 512, {{.*}}, i32 1) ; SHADERTEST: call {{.*}} @lgc.create.image.load.v4f32(i32 9, i32 512, {{.*}}, <2 x i32> ) -; SHADERTEST: call {{.*}} @lgc.create.image.load.v4f32(i32 0, i32 128, {{.*}}, i32 4) +; SHADERTEST: call {{.*}} @lgc.create.image.load.v4f32(i32 10, i32 128, {{.*}}, i32 4) ; SHADERTEST: call {{.*}} @lgc.create.image.load.v4f32(i32 8, i32 128, {{.*}}, <4 x i32> ) ; SHADERTEST: call {{.*}} @lgc.create.image.load.v4f32(i32 6, i32 512, {{.*}}, <3 x i32> ) diff --git a/llpc/test/shaderdb/core/OpImageRead_TestMemoryQualifier_lit.comp b/llpc/test/shaderdb/core/OpImageRead_TestMemoryQualifier_lit.comp index 226e84d136..edba3359b8 100644 --- a/llpc/test/shaderdb/core/OpImageRead_TestMemoryQualifier_lit.comp +++ b/llpc/test/shaderdb/core/OpImageRead_TestMemoryQualifier_lit.comp @@ -27,10 +27,10 @@ void main() ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 2 ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 1 ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0 -; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.load.v4f32(i32 1, i32 512, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <2 x i32> ) -; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.load.v4f32(i32 1, i32 512, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <2 x i32> ) -; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.load.v4f32(i32 1, i32 513, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <2 x i32> ) -; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.load.v4f32(i32 1, i32 515, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <2 x i32> ) +; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.load.v4f32(i32 1, i32 512, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> ) +; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.load.v4f32(i32 1, i32 512, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> ) +; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.load.v4f32(i32 1, i32 513, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> ) +; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.load.v4f32(i32 1, i32 515, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, <2 x i32> ) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results ; SHADERTEST: call {{.*}} <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i16(i32 15, i16 1, i16 1, <8 x i32> %{{.*}}, i32 0, i32 0) diff --git a/llpc/test/shaderdb/core/OpImageSampleExplicitLod_TestTextureGradClamp_lit.frag b/llpc/test/shaderdb/core/OpImageSampleExplicitLod_TestTextureGradClamp_lit.frag index 9257edf333..b7666b14b1 100644 --- a/llpc/test/shaderdb/core/OpImageSampleExplicitLod_TestTextureGradClamp_lit.frag +++ b/llpc/test/shaderdb/core/OpImageSampleExplicitLod_TestTextureGradClamp_lit.frag @@ -53,12 +53,12 @@ void main() ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 2, i32 512, {{.*}}, {{.*}}, i32 409, <3 x float> , <3 x float> , <3 x float> , {{.*}}, <3 x i32> ) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results -; SHADERTEST: call i32 @llvm.amdgcn.readfirstlane -; SHADERTEST: load <4 x i32>, ptr addrspace(4) %{{[0-9]*}} ; SHADERTEST: load <8 x i32>, ptr addrspace(4) %{{[0-9]*}} -; SHADERTEST: call {{.*}} <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f32.f32({{.*}}, float 1.000000e+00, float 1.000000e+00, float 0x3FF19999A0000000, float 0x3FF19999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, {{.*}}) ; SHADERTEST: load <4 x i32>, ptr addrspace(4) %{{[0-9]*}} +; SHADERTEST: call i32 @llvm.amdgcn.readfirstlane +; SHADERTEST: call {{.*}} <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f32.f32({{.*}}, float 1.000000e+00, float 1.000000e+00, float 0x3FF19999A0000000, float 0x3FF19999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, {{.*}}) ; SHADERTEST: load <8 x i32>, ptr addrspace(4) %{{[0-9]*}} +; SHADERTEST: load <4 x i32>, ptr addrspace(4) %{{[0-9]*}} ; SHADERTEST: call {{.*}} <4 x float> @llvm.amdgcn.image.sample.d.cl.3d.v4f32.f32.f32({{.*}}, float 0x3FF3333340000000, float 0x3FF3333340000000, float 0x3FF3333340000000, float 0x3FF4CCCCC0000000, float 0x3FF4CCCCC0000000, float 0x3FF4CCCCC0000000, float 0x3FC99999A0000000, float 0x3FC99999A0000000, float 0x3FC99999A0000000, {{.*}}) ; SHADERTEST: call {{.*}} <4 x float> @llvm.amdgcn.image.sample.d.cl.o.2d.v4f32.f32.f32({{.*}}, i32 514, float 1.000000e+00, float 1.000000e+00, float 0x3FF19999A0000000, float 0x3FF19999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, {{.*}}) ; SHADERTEST: call {{.*}} <4 x float> @llvm.amdgcn.image.sample.d.cl.o.3d.v4f32.f32.f32({{.*}}, i32 197379, float 0x3FF3333340000000, float 0x3FF3333340000000, float 0x3FF3333340000000, float 0x3FF4CCCCC0000000, float 0x3FF4CCCCC0000000, float 0x3FF4CCCCC0000000, float 0x3FC99999A0000000, float 0x3FC99999A0000000, float 0x3FC99999A0000000, {{.*}}) diff --git a/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestArrayDirectAccess_lit.frag b/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestArrayDirectAccess_lit.frag index 64da83e7c7..e42cb57358 100644 --- a/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestArrayDirectAccess_lit.frag +++ b/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestArrayDirectAccess_lit.frag @@ -14,7 +14,7 @@ void main() ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results -; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 0, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <4 x i32> %{{[-0-9A-Za0z_.]+}}, i32 1, <2 x float> ) +; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, i32 1, <2 x float> ) ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0) diff --git a/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestBasic_lit.frag b/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestBasic_lit.frag index 69a0938216..79b46e8c01 100644 --- a/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestBasic_lit.frag +++ b/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestBasic_lit.frag @@ -13,7 +13,7 @@ void main() ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results -; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 512, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <4 x i32> %{{[-0-9A-Za0z_.]+}}, i32 1, <2 x float> zeroinitializer) +; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 512, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, i32 1, <2 x float> zeroinitializer) ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0) diff --git a/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestIntegerSampler_lit.frag b/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestIntegerSampler_lit.frag index 5bddde6b7a..780f8fc1eb 100644 --- a/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestIntegerSampler_lit.frag +++ b/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestIntegerSampler_lit.frag @@ -16,8 +16,8 @@ void main() ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results -; SHADERTEST: call <4 x i32> (...) @lgc.create.image.sample.v4i32(i32 1, i32 516, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <4 x i32> %{{[-0-9A-Za0z_.]+}}, i32 1, <2 x float> ) -; SHADERTEST: call <4 x i32> (...) @lgc.create.image.sample.v4i32(i32 1, i32 512, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <4 x i32> %{{[-0-9A-Za0z_.]+}}, i32 1, <2 x float> ) +; SHADERTEST: call <4 x i32> (...) @lgc.create.image.sample.v4i32(i32 1, i32 516, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, i32 1, <2 x float> ) +; SHADERTEST: call <4 x i32> (...) @lgc.create.image.sample.v4i32(i32 1, i32 512, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, i32 1, <2 x float> ) ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 1) diff --git a/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestMultiDimArrayDirectAccess_lit.frag b/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestMultiDimArrayDirectAccess_lit.frag index d772429a42..af4dec00a8 100644 --- a/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestMultiDimArrayDirectAccess_lit.frag +++ b/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestMultiDimArrayDirectAccess_lit.frag @@ -14,7 +14,7 @@ void main() ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results -; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 0, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <4 x i32> %{{[-0-9A-Za0z_.]+}}, i32 1, <2 x float> ) +; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 0, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, i32 1, <2 x float> ) ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0) diff --git a/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestSeparate_lit.frag b/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestSeparate_lit.frag index eb6fe23a80..60923e7b20 100644 --- a/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestSeparate_lit.frag +++ b/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestSeparate_lit.frag @@ -14,7 +14,7 @@ void main() ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results -; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 512, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <4 x i32> %{{[-0-9A-Za0z_.]+}}, i32 1, <2 x float> zeroinitializer) +; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 512, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, i32 1, <2 x float> zeroinitializer) ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results ; SHADERTEST: call {{.*}} @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 1) diff --git a/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestTextureBiasClamp_lit.frag b/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestTextureBiasClamp_lit.frag index e141adff10..a132ad6d38 100644 --- a/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestTextureBiasClamp_lit.frag +++ b/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestTextureBiasClamp_lit.frag @@ -74,31 +74,31 @@ void main() ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 8, i32 512, {{.*}}, {{.*}}, i32 193, <4 x float> %{{[0-9]*}}, float 2.000000e+00, {{.*}}) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results -; SHADERTEST: call i32 @llvm.amdgcn.readfirstlane -; SHADERTEST: load <4 x i32>, ptr addrspace(4) %{{[0-9]*}} ; SHADERTEST: load <8 x i32>, ptr addrspace(4) %{{[0-9]*}} -; SHADERTEST: call {{.*}} <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32({{.*}}, float 2.000000e+00, float %{{[0-9]*}}, {{.*}}) ; SHADERTEST: load <4 x i32>, ptr addrspace(4) %{{[0-9]*}} +; SHADERTEST: call i32 @llvm.amdgcn.readfirstlane +; SHADERTEST: call {{.*}} <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32({{.*}}, float 2.000000e+00, float %{{[0-9]*}}, {{.*}}) ; SHADERTEST: load <8 x i32>, ptr addrspace(4) %{{[0-9]*}} -; SHADERTEST: call {{.*}} <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f32({{.*}}, float 2.000000e+00, float %{{[.i0-9]*}}, float %{{[.i0-9]*}}, {{.*}}) ; SHADERTEST: load <4 x i32>, ptr addrspace(4) %{{[0-9]*}} +; SHADERTEST: call {{.*}} <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f32({{.*}}, float 2.000000e+00, float %{{[.i0-9]*}}, float %{{[.i0-9]*}}, {{.*}}) ; SHADERTEST: load <8 x i32>, ptr addrspace(4) %{{[0-9]*}} -; SHADERTEST: call {{.*}} <4 x float> @llvm.amdgcn.image.sample.b.cl.3d.v4f32.f32.f32({{.*}}, float 2.000000e+00, float %{{[.i0-9]*}}, float %{{[.i0-9]*}}, float %{{[.i0-9]*}}, {{.*}}) ; SHADERTEST: load <4 x i32>, ptr addrspace(4) %{{[0-9]*}} +; SHADERTEST: call {{.*}} <4 x float> @llvm.amdgcn.image.sample.b.cl.3d.v4f32.f32.f32({{.*}}, float 2.000000e+00, float %{{[.i0-9]*}}, float %{{[.i0-9]*}}, float %{{[.i0-9]*}}, {{.*}}) ; SHADERTEST: load <8 x i32>, ptr addrspace(4) %{{[0-9]*}} +; SHADERTEST: load <4 x i32>, ptr addrspace(4) %{{[0-9]*}} ; SHADERTEST: call {{.*}} float @llvm.amdgcn.cubesc(float %{{[.i0-9]*}}, float %{{[.i0-9]*}}, float %{{[.i0-9]*}}) ; SHADERTEST: call {{.*}} float @llvm.amdgcn.cubetc(float %{{[.i0-9]*}}, float %{{[.i0-9]*}}, float %{{[.i0-9]*}}) ; SHADERTEST: call {{.*}} float @llvm.amdgcn.cubema(float %{{[.i0-9]*}}, float %{{[.i0-9]*}}, float %{{[.i0-9]*}}) ; SHADERTEST: call {{.*}} float @llvm.amdgcn.cubeid(float %{{[.i0-9]*}}, float %{{[.i0-9]*}}, float %{{[.i0-9]*}}) ; SHADERTEST: call {{.*}} <4 x float> @llvm.amdgcn.image.sample.b.cl.cube.v4f32.f32.f32({{.*}}, float 2.000000e+00, {{.*}}) -; SHADERTEST: load <4 x i32>, ptr addrspace(4) %{{[0-9]*}} ; SHADERTEST: load <8 x i32>, ptr addrspace(4) %{{[0-9]*}} -; SHADERTEST: call {{.*}} <4 x float> @llvm.amdgcn.image.sample.b.cl.1darray.v4f32.f32.f32({{.*}}, float 2.000000e+00, float %{{[.i0-9]*}}, float %{{[0-9]*}}, {{.*}}) ; SHADERTEST: load <4 x i32>, ptr addrspace(4) %{{[0-9]*}} +; SHADERTEST: call {{.*}} <4 x float> @llvm.amdgcn.image.sample.b.cl.1darray.v4f32.f32.f32({{.*}}, float 2.000000e+00, float %{{[.i0-9]*}}, float %{{[0-9]*}}, {{.*}}) ; SHADERTEST: load <8 x i32>, ptr addrspace(4) %{{[0-9]*}} -; SHADERTEST: call {{.*}} <4 x float> @llvm.amdgcn.image.sample.b.cl.2darray.v4f32.f32.f32({{.*}}, float 2.000000e+00, float %{{[.i0-9]*}}, float %{{[.i0-9]*}}, float %{{[0-9]*}}, {{.*}}) ; SHADERTEST: load <4 x i32>, ptr addrspace(4) %{{[0-9]*}} +; SHADERTEST: call {{.*}} <4 x float> @llvm.amdgcn.image.sample.b.cl.2darray.v4f32.f32.f32({{.*}}, float 2.000000e+00, float %{{[.i0-9]*}}, float %{{[.i0-9]*}}, float %{{[0-9]*}}, {{.*}}) ; SHADERTEST: load <8 x i32>, ptr addrspace(4) %{{[0-9]*}} +; SHADERTEST: load <4 x i32>, ptr addrspace(4) %{{[0-9]*}} ; SHADERTEST: call {{.*}} <4 x float> @llvm.amdgcn.image.sample.b.cl.cube.v4f32.f32.f32({{.*}}, float 2.000000e+00, {{.*}}) ; SHADERTEST: AMDLLPC SUCCESS diff --git a/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestTextureClamp_lit.frag b/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestTextureClamp_lit.frag index 002bcc9bf2..e48d5ec2f5 100644 --- a/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestTextureClamp_lit.frag +++ b/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestTextureClamp_lit.frag @@ -74,30 +74,30 @@ void main() ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 8, i32 512, {{.*}}, {{.*}}, i32 129, <4 x float> %{{[0-9]*}}, {{.*}}) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results -; SHADERTEST: load <4 x i32>, ptr addrspace(4) %{{[0-9]*}} ; SHADERTEST: load <8 x i32>, ptr addrspace(4) %{{[0-9]*}} -; SHADERTEST: call {{.*}} <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f32({{.*}}, float %{{[0-9]*}}, {{.*}}) ; SHADERTEST: load <4 x i32>, ptr addrspace(4) %{{[0-9]*}} +; SHADERTEST: call {{.*}} <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f32({{.*}}, float %{{[0-9]*}}, {{.*}}) ; SHADERTEST: load <8 x i32>, ptr addrspace(4) %{{[0-9]*}} -; SHADERTEST: call {{.*}} <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f32({{.*}}, float %{{[.i0-9]*}}, float %{{[.i0-9]*}}, {{.*}}) ; SHADERTEST: load <4 x i32>, ptr addrspace(4) %{{[0-9]*}} +; SHADERTEST: call {{.*}} <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f32({{.*}}, float %{{[.i0-9]*}}, float %{{[.i0-9]*}}, {{.*}}) ; SHADERTEST: load <8 x i32>, ptr addrspace(4) %{{[0-9]*}} -; SHADERTEST: call {{.*}} <4 x float> @llvm.amdgcn.image.sample.cl.3d.v4f32.f32({{.*}}, float %{{[.i0-9]*}}, float %{{[.i0-9]*}}, float %{{[.i0-9]*}}, {{.*}}) ; SHADERTEST: load <4 x i32>, ptr addrspace(4) %{{[0-9]*}} +; SHADERTEST: call {{.*}} <4 x float> @llvm.amdgcn.image.sample.cl.3d.v4f32.f32({{.*}}, float %{{[.i0-9]*}}, float %{{[.i0-9]*}}, float %{{[.i0-9]*}}, {{.*}}) ; SHADERTEST: load <8 x i32>, ptr addrspace(4) %{{[0-9]*}} +; SHADERTEST: load <4 x i32>, ptr addrspace(4) %{{[0-9]*}} ; SHADERTEST: call {{.*}} float @llvm.amdgcn.cubesc(float %{{[.i0-9]*}}, float %{{[.i0-9]*}}, float %{{[.i0-9]*}}) ; SHADERTEST: call {{.*}} float @llvm.amdgcn.cubetc(float %{{[.i0-9]*}}, float %{{[.i0-9]*}}, float %{{[.i0-9]*}}) ; SHADERTEST: call {{.*}} float @llvm.amdgcn.cubema(float %{{[.i0-9]*}}, float %{{[.i0-9]*}}, float %{{[.i0-9]*}}) ; SHADERTEST: call {{.*}} float @llvm.amdgcn.cubeid(float %{{[.i0-9]*}}, float %{{[.i0-9]*}}, float %{{[.i0-9]*}}) ; SHADERTEST: call {{.*}} <4 x float> @llvm.amdgcn.image.sample.cl.cube.v4f32.f32 -; SHADERTEST: load <4 x i32>, ptr addrspace(4) %{{[0-9]*}} ; SHADERTEST: load <8 x i32>, ptr addrspace(4) %{{[0-9]*}} -; SHADERTEST: call {{.*}} <4 x float> @llvm.amdgcn.image.sample.cl.1darray.v4f32.f32({{.*}}, float %{{[.i0-9]*}}, float %{{[0-9]*}}, {{.*}}) ; SHADERTEST: load <4 x i32>, ptr addrspace(4) %{{[0-9]*}} +; SHADERTEST: call {{.*}} <4 x float> @llvm.amdgcn.image.sample.cl.1darray.v4f32.f32({{.*}}, float %{{[.i0-9]*}}, float %{{[0-9]*}}, {{.*}}) ; SHADERTEST: load <8 x i32>, ptr addrspace(4) %{{[0-9]*}} -; SHADERTEST: call {{.*}} <4 x float> @llvm.amdgcn.image.sample.cl.2darray.v4f32.f32({{.*}}, float %{{[.i0-9]*}}, float %{{[.i0-9]*}}, float %{{[0-9]*}}, {{.*}}) ; SHADERTEST: load <4 x i32>, ptr addrspace(4) %{{[0-9]*}} +; SHADERTEST: call {{.*}} <4 x float> @llvm.amdgcn.image.sample.cl.2darray.v4f32.f32({{.*}}, float %{{[.i0-9]*}}, float %{{[.i0-9]*}}, float %{{[0-9]*}}, {{.*}}) ; SHADERTEST: load <8 x i32>, ptr addrspace(4) %{{[0-9]*}} +; SHADERTEST: load <4 x i32>, ptr addrspace(4) %{{[0-9]*}} ; SHADERTEST: call {{.*}} <4 x float> @llvm.amdgcn.image.sample.cl.cube.v4f32.f32 ; SHADERTEST: AMDLLPC SUCCESS diff --git a/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestTextureGradClamp_lit.frag b/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestTextureGradClamp_lit.frag index add49732f6..6a83db23d5 100644 --- a/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestTextureGradClamp_lit.frag +++ b/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestTextureGradClamp_lit.frag @@ -76,31 +76,31 @@ void main() ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 8, i32 512, {{.*}}, {{.*}}, i32 153, <4 x float> %{{[0-9]*}}, <3 x float> %{{[0-9]*}}, <3 x float> %{{[0-9]*}}, {{.*}}) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results -; SHADERTEST: call i32 @llvm.amdgcn.readfirstlane -; SHADERTEST: load <4 x i32>, ptr addrspace(4) %{{[0-9]*}} ; SHADERTEST: load <8 x i32>, ptr addrspace(4) %{{[0-9]*}} -; SHADERTEST: call {{.*}} <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f32({{.*}}, float %{{[0-9]*}}, float %{{[0-9]*}}, float %{{[0-9]*}}, {{.*}}) ; SHADERTEST: load <4 x i32>, ptr addrspace(4) %{{[0-9]*}} +; SHADERTEST: call i32 @llvm.amdgcn.readfirstlane +; SHADERTEST: call {{.*}} <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f32({{.*}}, float %{{[0-9]*}}, float %{{[0-9]*}}, float %{{[0-9]*}}, {{.*}}) ; SHADERTEST: load <8 x i32>, ptr addrspace(4) %{{[0-9]*}} -; SHADERTEST: call {{.*}} <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f32.f32({{.*}}, float %{{[0-9]*}}, float %{{[0-9]*}}, float %{{[0-9]*}}, float %{{[0-9]*}}, float %{{[.i0-9]*}}, float %{{[.i0-9]*}}, {{.*}}) ; SHADERTEST: load <4 x i32>, ptr addrspace(4) %{{[0-9]*}} +; SHADERTEST: call {{.*}} <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f32.f32({{.*}}, float %{{[0-9]*}}, float %{{[0-9]*}}, float %{{[0-9]*}}, float %{{[0-9]*}}, float %{{[.i0-9]*}}, float %{{[.i0-9]*}}, {{.*}}) ; SHADERTEST: load <8 x i32>, ptr addrspace(4) %{{[0-9]*}} -; SHADERTEST: call {{.*}} <4 x float> @llvm.amdgcn.image.sample.d.cl.3d.v4f32.f32.f32({{.*}}, float %{{[0-9]*}}, float %{{[0-9]*}}, float %{{[0-9]*}}, float %{{[0-9]*}}, float %{{[0-9]*}}, float %{{[0-9]*}}, float %{{[.i0-9]*}}, float %{{[.i0-9]*}}, float %{{[.i0-9]*}}, {{.*}}) ; SHADERTEST: load <4 x i32>, ptr addrspace(4) %{{[0-9]*}} +; SHADERTEST: call {{.*}} <4 x float> @llvm.amdgcn.image.sample.d.cl.3d.v4f32.f32.f32({{.*}}, float %{{[0-9]*}}, float %{{[0-9]*}}, float %{{[0-9]*}}, float %{{[0-9]*}}, float %{{[0-9]*}}, float %{{[0-9]*}}, float %{{[.i0-9]*}}, float %{{[.i0-9]*}}, float %{{[.i0-9]*}}, {{.*}}) ; SHADERTEST: load <8 x i32>, ptr addrspace(4) %{{[0-9]*}} +; SHADERTEST: load <4 x i32>, ptr addrspace(4) %{{[0-9]*}} ; SHADERTEST: call {{.*}} float @llvm.amdgcn.cubesc(float %{{[.i0-9]*}}, float %{{[.i0-9]*}}, float %{{[.i0-9]*}}) ; SHADERTEST: call {{.*}} float @llvm.amdgcn.cubetc(float %{{[.i0-9]*}}, float %{{[.i0-9]*}}, float %{{[.i0-9]*}}) ; SHADERTEST: call {{.*}} float @llvm.amdgcn.cubema(float %{{[.i0-9]*}}, float %{{[.i0-9]*}}, float %{{[.i0-9]*}}) ; SHADERTEST: call {{.*}} float @llvm.amdgcn.cubeid(float %{{[.i0-9]*}}, float %{{[.i0-9]*}}, float %{{[.i0-9]*}}) ; SHADERTEST: call {{.*}} <4 x float> @llvm.amdgcn.image.sample.d.cl.cube.v4f32.f32.f32 -; SHADERTEST: load <4 x i32>, ptr addrspace(4) %{{[0-9]*}} ; SHADERTEST: load <8 x i32>, ptr addrspace(4) %{{[0-9]*}} -; SHADERTEST: call {{.*}} <4 x float> @llvm.amdgcn.image.sample.d.cl.1darray.v4f32.f32.f32({{.*}}, float %{{[0-9]*}}, float %{{[0-9]*}}, float %{{[.i0-9]*}}, float %{{[0-9]*}}, {{.*}}) ; SHADERTEST: load <4 x i32>, ptr addrspace(4) %{{[0-9]*}} +; SHADERTEST: call {{.*}} <4 x float> @llvm.amdgcn.image.sample.d.cl.1darray.v4f32.f32.f32({{.*}}, float %{{[0-9]*}}, float %{{[0-9]*}}, float %{{[.i0-9]*}}, float %{{[0-9]*}}, {{.*}}) ; SHADERTEST: load <8 x i32>, ptr addrspace(4) %{{[0-9]*}} -; SHADERTEST: call {{.*}} <4 x float> @llvm.amdgcn.image.sample.d.cl.2darray.v4f32.f32.f32({{.*}}, float %{{[0-9]*}}, float %{{[0-9]*}}, float %{{[0-9]*}}, float %{{[0-9]*}}, float %{{[.i0-9]*}}, float %{{[.i0-9]*}}, float %{{[0-9]*}}, {{.*}}) ; SHADERTEST: load <4 x i32>, ptr addrspace(4) %{{[0-9]*}} +; SHADERTEST: call {{.*}} <4 x float> @llvm.amdgcn.image.sample.d.cl.2darray.v4f32.f32.f32({{.*}}, float %{{[0-9]*}}, float %{{[0-9]*}}, float %{{[0-9]*}}, float %{{[0-9]*}}, float %{{[.i0-9]*}}, float %{{[.i0-9]*}}, float %{{[0-9]*}}, {{.*}}) ; SHADERTEST: load <8 x i32>, ptr addrspace(4) %{{[0-9]*}} +; SHADERTEST: load <4 x i32>, ptr addrspace(4) %{{[0-9]*}} ; SHADERTEST: call {{.*}} <4 x float> @llvm.amdgcn.image.sample.d.cl.cube.v4f32.f32.f32 ; SHADERTEST: AMDLLPC SUCCESS diff --git a/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestTextureGradOffsetClamp_lit.frag b/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestTextureGradOffsetClamp_lit.frag index 149360a1f3..b13c902967 100644 --- a/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestTextureGradOffsetClamp_lit.frag +++ b/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestTextureGradOffsetClamp_lit.frag @@ -61,21 +61,21 @@ void main() ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 5, i32 512, {{.*}}, {{.*}}, i32 409, <3 x float> , <2 x float> , <2 x float> , {{.*}}, <2 x i32> ) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results -; SHADERTEST: call i32 @llvm.amdgcn.readfirstlane -; SHADERTEST: load <4 x i32>, ptr addrspace(4) %{{.*}} ; SHADERTEST: load <8 x i32>, ptr addrspace(4) %{{.*}} -; SHADERTEST: call {{.*}} <4 x float> @llvm.amdgcn.image.sample.d.cl.o.1d.v4f32.f32.f32({{.*}}, i32 2, float 0x3FC99999A0000000, float 0x3FD3333340000000, float 0x3FB99999A0000000, {{.*}}) ; SHADERTEST: load <4 x i32>, ptr addrspace(4) %{{.*}} +; SHADERTEST: call i32 @llvm.amdgcn.readfirstlane +; SHADERTEST: call {{.*}} <4 x float> @llvm.amdgcn.image.sample.d.cl.o.1d.v4f32.f32.f32({{.*}}, i32 2, float 0x3FC99999A0000000, float 0x3FD3333340000000, float 0x3FB99999A0000000, {{.*}}) ; SHADERTEST: load <8 x i32>, ptr addrspace(4) %{{.*}} -; SHADERTEST: call {{.*}} <4 x float> @llvm.amdgcn.image.sample.d.cl.o.2d.v4f32.f32.f32({{.*}}, i32 514, float 0x3FC99999A0000000, float 0x3FC99999A0000000, float 0x3FD3333340000000, float 0x3FD3333340000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, {{.*}}) ; SHADERTEST: load <4 x i32>, ptr addrspace(4) %{{.*}} +; SHADERTEST: call {{.*}} <4 x float> @llvm.amdgcn.image.sample.d.cl.o.2d.v4f32.f32.f32({{.*}}, i32 514, float 0x3FC99999A0000000, float 0x3FC99999A0000000, float 0x3FD3333340000000, float 0x3FD3333340000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, {{.*}}) ; SHADERTEST: load <8 x i32>, ptr addrspace(4) %{{.*}} -; SHADERTEST: call {{.*}} <4 x float> @llvm.amdgcn.image.sample.d.cl.o.3d.v4f32.f32.f32({{.*}}, i32 131586, float 0x3FC99999A0000000, float 0x3FC99999A0000000, float 0x3FC99999A0000000, float 0x3FD3333340000000, float 0x3FD3333340000000, float 0x3FD3333340000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, {{.*}}) ; SHADERTEST: load <4 x i32>, ptr addrspace(4) %{{.*}} +; SHADERTEST: call {{.*}} <4 x float> @llvm.amdgcn.image.sample.d.cl.o.3d.v4f32.f32.f32({{.*}}, i32 131586, float 0x3FC99999A0000000, float 0x3FC99999A0000000, float 0x3FC99999A0000000, float 0x3FD3333340000000, float 0x3FD3333340000000, float 0x3FD3333340000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, {{.*}}) ; SHADERTEST: load <8 x i32>, ptr addrspace(4) %{{.*}} -; SHADERTEST: call {{.*}} <4 x float> @llvm.amdgcn.image.sample.d.cl.o.1darray.v4f32.f32.f32({{.*}}, i32 2, float 0x3FC99999A0000000, float 0x3FD3333340000000, float 0x3FB99999A0000000, float 0.000000e+00, {{.*}}) ; SHADERTEST: load <4 x i32>, ptr addrspace(4) %{{.*}} +; SHADERTEST: call {{.*}} <4 x float> @llvm.amdgcn.image.sample.d.cl.o.1darray.v4f32.f32.f32({{.*}}, i32 2, float 0x3FC99999A0000000, float 0x3FD3333340000000, float 0x3FB99999A0000000, float 0.000000e+00, {{.*}}) ; SHADERTEST: load <8 x i32>, ptr addrspace(4) %{{.*}} +; SHADERTEST: load <4 x i32>, ptr addrspace(4) %{{.*}} ; SHADERTEST: call {{.*}} <4 x float> @llvm.amdgcn.image.sample.d.cl.o.2darray.v4f32.f32.f32({{.*}}, i32 514, float 0x3FC99999A0000000, float 0x3FC99999A0000000, float 0x3FD3333340000000, float 0x3FD3333340000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0.000000e+00, {{.*}}) ; SHADERTEST: AMDLLPC SUCCESS diff --git a/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestTextureOffsetClamp_lit.frag b/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestTextureOffsetClamp_lit.frag index 761e8f8e83..5e7dd16029 100644 --- a/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestTextureOffsetClamp_lit.frag +++ b/llpc/test/shaderdb/core/OpImageSampleImplicitLod_TestTextureOffsetClamp_lit.frag @@ -61,21 +61,21 @@ void main() ; SHADERTEST: call {{.*}} @lgc.create.image.sample.v4f32(i32 5, i32 512, {{.*}}, {{.*}}, i32 385, <3 x float> , {{.*}}, <2 x i32> ) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results -; SHADERTEST: call i32 @llvm.amdgcn.readfirstlane -; SHADERTEST: load <4 x i32>, ptr addrspace(4) %{{[0-9]*}} ; SHADERTEST: load <8 x i32>, ptr addrspace(4) %{{[0-9]*}} -; SHADERTEST: call {{.*}} <4 x float> @llvm.amdgcn.image.sample.cl.o.1d.v4f32.f32({{.*}}, i32 2, float 0x3FB99999A0000000, {{.*}}) ; SHADERTEST: load <4 x i32>, ptr addrspace(4) %{{[0-9]*}} +; SHADERTEST: call i32 @llvm.amdgcn.readfirstlane +; SHADERTEST: call {{.*}} <4 x float> @llvm.amdgcn.image.sample.cl.o.1d.v4f32.f32({{.*}}, i32 2, float 0x3FB99999A0000000, {{.*}}) ; SHADERTEST: load <8 x i32>, ptr addrspace(4) %{{[0-9]*}} -; SHADERTEST: call {{.*}} <4 x float> @llvm.amdgcn.image.sample.cl.o.2d.v4f32.f32({{.*}}, i32 514, float 0x3FB99999A0000000, float 0x3FB99999A0000000, {{.*}}) ; SHADERTEST: load <4 x i32>, ptr addrspace(4) %{{[0-9]*}} +; SHADERTEST: call {{.*}} <4 x float> @llvm.amdgcn.image.sample.cl.o.2d.v4f32.f32({{.*}}, i32 514, float 0x3FB99999A0000000, float 0x3FB99999A0000000, {{.*}}) ; SHADERTEST: load <8 x i32>, ptr addrspace(4) %{{[0-9]*}} -; SHADERTEST: call {{.*}} <4 x float> @llvm.amdgcn.image.sample.cl.o.3d.v4f32.f32({{.*}}, i32 131586, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, {{.*}}) ; SHADERTEST: load <4 x i32>, ptr addrspace(4) %{{[0-9]*}} +; SHADERTEST: call {{.*}} <4 x float> @llvm.amdgcn.image.sample.cl.o.3d.v4f32.f32({{.*}}, i32 131586, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, {{.*}}) ; SHADERTEST: load <8 x i32>, ptr addrspace(4) %{{[0-9]*}} -; SHADERTEST: call {{.*}} <4 x float> @llvm.amdgcn.image.sample.cl.o.1darray.v4f32.f32({{.*}}, i32 2, float 0x3FB99999A0000000, float 0.000000e+00, {{.*}}) ; SHADERTEST: load <4 x i32>, ptr addrspace(4) %{{[0-9]*}} +; SHADERTEST: call {{.*}} <4 x float> @llvm.amdgcn.image.sample.cl.o.1darray.v4f32.f32({{.*}}, i32 2, float 0x3FB99999A0000000, float 0.000000e+00, {{.*}}) ; SHADERTEST: load <8 x i32>, ptr addrspace(4) %{{[0-9]*}} +; SHADERTEST: load <4 x i32>, ptr addrspace(4) %{{[0-9]*}} ; SHADERTEST: call {{.*}} <4 x float> @llvm.amdgcn.image.sample.cl.o.2darray.v4f32.f32({{.*}}, i32 514, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0.000000e+00, {{.*}}) ; SHADERTEST: AMDLLPC SUCCESS diff --git a/llpc/test/shaderdb/core/OpLogicalNotEqual_TestGeneral_lit.frag b/llpc/test/shaderdb/core/OpLogicalNotEqual_TestGeneral_lit.frag index 8868c4c22e..8f5321288a 100644 --- a/llpc/test/shaderdb/core/OpLogicalNotEqual_TestGeneral_lit.frag +++ b/llpc/test/shaderdb/core/OpLogicalNotEqual_TestGeneral_lit.frag @@ -30,10 +30,10 @@ void main() // SHADERTEST-NEXT: [[TMP0:%.*]] = call ptr addrspace(7) @lgc.load.buffer.desc(i64 0, i32 0, i32 0, i32 2) // SHADERTEST-NEXT: [[TMP1:%.*]] = call ptr @llvm.invariant.start.p7(i64 -1, ptr addrspace(7) [[TMP0]]) // SHADERTEST-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(7) [[TMP0]], align 4 -// SHADERTEST-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr addrspace(7) [[TMP0]], i32 8 +// SHADERTEST-NEXT: [[TMP3:%.*]] = getelementptr {{(inbounds )?}}i8, ptr addrspace(7) [[TMP0]], i32 8 // SHADERTEST-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(7) [[TMP3]], align 4 // SHADERTEST-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP2]], [[TMP4]] -// SHADERTEST-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr addrspace(7) [[TMP0]], i32 12 +// SHADERTEST-NEXT: [[TMP6:%.*]] = getelementptr {{(inbounds )?}}i8, ptr addrspace(7) [[TMP0]], i32 12 // SHADERTEST-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(7) [[TMP6]], align 4 // SHADERTEST-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP2]], [[TMP7]] // SHADERTEST-NEXT: [[TMP9:%.*]] = and i1 [[TMP8]], [[TMP5]] diff --git a/llpc/test/shaderdb/core/OpSLessThanEqual_TestSignedAndUnsigned_lit.frag b/llpc/test/shaderdb/core/OpSLessThanEqual_TestSignedAndUnsigned_lit.frag index 7956d7e89c..21dfa067ef 100644 --- a/llpc/test/shaderdb/core/OpSLessThanEqual_TestSignedAndUnsigned_lit.frag +++ b/llpc/test/shaderdb/core/OpSLessThanEqual_TestSignedAndUnsigned_lit.frag @@ -25,7 +25,7 @@ void main() // SHADERTEST-NEXT: [[TMP0:%.*]] = call ptr addrspace(7) @lgc.load.buffer.desc(i64 0, i32 0, i32 0, i32 0) // SHADERTEST-NEXT: [[TMP1:%.*]] = call ptr @llvm.invariant.start.p7(i64 -1, ptr addrspace(7) [[TMP0]]) // SHADERTEST-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr addrspace(7) [[TMP0]], align 8 -// SHADERTEST-NEXT: [[TMP3:%.*]] = getelementptr inbounds {{i8|<{ [[]2 x i32], [[]2 x i32], [[]2 x i32], [[]2 x i32] }>}}, ptr addrspace(7) [[TMP0]], i32 {{8|0, i32 1}} +// SHADERTEST-NEXT: [[TMP3:%.*]] = getelementptr {{inbounds i8|<{ [[]2 x i32], [[]2 x i32], [[]2 x i32], [[]2 x i32] }>|i8}}, ptr addrspace(7) [[TMP0]], i32 {{8|0, i32 1}} // SHADERTEST-NEXT: [[TMP4:%.*]] = load <2 x i32>, ptr addrspace(7) [[TMP3]], align 8 // SHADERTEST-NEXT: [[TMP5:%.*]] = extractelement <2 x i32> [[TMP2]], i64 0 // SHADERTEST-NEXT: [[TMP6:%.*]] = extractelement <2 x i32> [[TMP4]], i64 0 diff --git a/llpc/test/shaderdb/core/OpSLessThan_TestSignedAndUnsigned_lit.frag b/llpc/test/shaderdb/core/OpSLessThan_TestSignedAndUnsigned_lit.frag index 2bb60ce1df..357603eba2 100644 --- a/llpc/test/shaderdb/core/OpSLessThan_TestSignedAndUnsigned_lit.frag +++ b/llpc/test/shaderdb/core/OpSLessThan_TestSignedAndUnsigned_lit.frag @@ -25,7 +25,7 @@ void main() // SHADERTEST-NEXT: [[TMP0:%.*]] = call ptr addrspace(7) @lgc.load.buffer.desc(i64 0, i32 0, i32 0, i32 0) // SHADERTEST-NEXT: [[TMP1:%.*]] = call ptr @llvm.invariant.start.p7(i64 -1, ptr addrspace(7) [[TMP0]]) // SHADERTEST-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr addrspace(7) [[TMP0]], align 8 -// SHADERTEST-NEXT: [[TMP3:%.*]] = getelementptr inbounds {{i8|<{ [[]2 x i32], [[]2 x i32], [[]2 x i32], [[]2 x i32] }>}}, ptr addrspace(7) [[TMP0]], i32 {{8|0, i32 1}} +// SHADERTEST-NEXT: [[TMP3:%.*]] = getelementptr {{inbounds i8|<{ [[]2 x i32], [[]2 x i32], [[]2 x i32], [[]2 x i32] }>|i8}}, ptr addrspace(7) [[TMP0]], i32 {{8|0, i32 1}} // SHADERTEST-NEXT: [[TMP4:%.*]] = load <2 x i32>, ptr addrspace(7) [[TMP3]], align 8 // SHADERTEST-NEXT: [[TMP5:%.*]] = extractelement <2 x i32> [[TMP2]], i64 0 // SHADERTEST-NEXT: [[TMP6:%.*]] = extractelement <2 x i32> [[TMP4]], i64 0 diff --git a/llpc/test/shaderdb/core/TestXfbStateMetadata.vert b/llpc/test/shaderdb/core/TestXfbStateMetadata.vert index 25b03a1630..c0a28c5490 100644 --- a/llpc/test/shaderdb/core/TestXfbStateMetadata.vert +++ b/llpc/test/shaderdb/core/TestXfbStateMetadata.vert @@ -29,8 +29,8 @@ void main() // //. // CHECK: attributes #[[ATTR0]] = { nounwind "denormal-fp-math-f32"="preserve-sign" } -// CHECK: attributes #[[ATTR1:[0-9]+]] = { nounwind willreturn memory(read) } -// CHECK: attributes #[[ATTR2:[0-9]+]] = { nounwind } +// CHECK: attributes #[[ATTR1:[0-9]+]] = { nounwind } +// CHECK: attributes #[[ATTR2:[0-9]+]] = { nounwind willreturn memory(read) } //. // CHECK: [[META1]] = !{i32 1} // CHECK: [[META6]] = !{i32 0} diff --git a/llpc/test/shaderdb/extensions/ExtMultiView_TestSubpassLoad_lit.pipe b/llpc/test/shaderdb/extensions/ExtMultiView_TestSubpassLoad_lit.pipe index 660dfcb827..54bf3513dc 100644 --- a/llpc/test/shaderdb/extensions/ExtMultiView_TestSubpassLoad_lit.pipe +++ b/llpc/test/shaderdb/extensions/ExtMultiView_TestSubpassLoad_lit.pipe @@ -3,7 +3,7 @@ ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results -; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.load.v4f32(i32 1, i32 608, <8 x i32> +; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.load.v4f32(i32 1, i32 608, ptr addrspace(4) ; SHADERTEST: AMDLLPC SUCCESS ; END_SHADERTEST diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestFmaDouble_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestFmaDouble_lit.frag index 07064eab48..ad76b1641c 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestFmaDouble_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestFmaDouble_lit.frag @@ -23,16 +23,16 @@ void main() // CHECK-NEXT: [[TMP0:%.*]] = call ptr addrspace(7) @lgc.load.buffer.desc(i64 0, i32 0, i32 0, i32 0) // CHECK-NEXT: [[TMP1:%.*]] = call ptr @llvm.invariant.start.p7(i64 -1, ptr addrspace(7) [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = load double, ptr addrspace(7) [[TMP0]], align 8 -// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds {{i8|<{ double, double, double, [[]8 x i8], [[]3 x double], [[]8 x i8], [[]3 x double], [[]8 x i8], [[]3 x double] }>}}, ptr addrspace(7) [[TMP0]], i32 {{8|0, i32 1}} +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr {{inbounds i8|<{ double, double, double, [[]8 x i8], [[]3 x double], [[]8 x i8], [[]3 x double], [[]8 x i8], [[]3 x double] }>|i8}}, ptr addrspace(7) [[TMP0]], i32 {{8|0, i32 1}} // CHECK-NEXT: [[TMP4:%.*]] = load double, ptr addrspace(7) [[TMP3]], align 8 -// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds {{i8|<{ double, double, double, [[]8 x i8], [[]3 x double], [[]8 x i8], [[]3 x double], [[]8 x i8], [[]3 x double] }>}}, ptr addrspace(7) [[TMP0]], i32 {{16|0, i32 2}} +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr {{inbounds i8|<{ double, double, double, [[]8 x i8], [[]3 x double], [[]8 x i8], [[]3 x double], [[]8 x i8], [[]3 x double] }>|i8}}, ptr addrspace(7) [[TMP0]], i32 {{16|0, i32 2}} // CHECK-NEXT: [[TMP6:%.*]] = load double, ptr addrspace(7) [[TMP5]], align 8 // CHECK-NEXT: [[TMP7:%.*]] = call reassoc nnan nsz arcp contract double (...) @lgc.create.fma.f64(double [[TMP2]], double [[TMP4]], double [[TMP6]]) -// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds {{i8|<{ double, double, double, [[]8 x i8], [[]3 x double], [[]8 x i8], [[]3 x double], [[]8 x i8], [[]3 x double] }>}}, ptr addrspace(7) [[TMP0]], i32 {{32|0, i32 4}} +// CHECK-NEXT: [[TMP8:%.*]] = getelementptr {{inbounds i8|<{ double, double, double, [[]8 x i8], [[]3 x double], [[]8 x i8], [[]3 x double], [[]8 x i8], [[]3 x double] }>|i8}}, ptr addrspace(7) [[TMP0]], i32 {{32|0, i32 4}} // CHECK-NEXT: [[TMP9:%.*]] = load <3 x double>, ptr addrspace(7) [[TMP8]], align 32 -// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds {{i8|<{ double, double, double, [[]8 x i8], [[]3 x double], [[]8 x i8], [[]3 x double], [[]8 x i8], [[]3 x double] }>}}, ptr addrspace(7) [[TMP0]], i32 {{64|0, i32 6}} +// CHECK-NEXT: [[TMP10:%.*]] = getelementptr {{inbounds i8|<{ double, double, double, [[]8 x i8], [[]3 x double], [[]8 x i8], [[]3 x double], [[]8 x i8], [[]3 x double] }>|i8}}, ptr addrspace(7) [[TMP0]], i32 {{64|0, i32 6}} // CHECK-NEXT: [[TMP11:%.*]] = load <3 x double>, ptr addrspace(7) [[TMP10]], align 32 -// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds {{i8|<{ double, double, double, [[]8 x i8], [[]3 x double], [[]8 x i8], [[]3 x double], [[]8 x i8], [[]3 x double] }>}}, ptr addrspace(7) [[TMP0]], i32 {{96|0, i32 8}} +// CHECK-NEXT: [[TMP12:%.*]] = getelementptr {{inbounds i8|<{ double, double, double, [[]8 x i8], [[]3 x double], [[]8 x i8], [[]3 x double], [[]8 x i8], [[]3 x double] }>|i8}}, ptr addrspace(7) [[TMP0]], i32 {{96|0, i32 8}} // CHECK-NEXT: [[TMP13:%.*]] = load <3 x double>, ptr addrspace(7) [[TMP12]], align 32 // CHECK-NEXT: [[TMP14:%.*]] = call reassoc nnan nsz arcp contract <3 x double> (...) @lgc.create.fma.v3f64(<3 x double> [[TMP9]], <3 x double> [[TMP11]], <3 x double> [[TMP13]]) // CHECK-NEXT: [[D3_0_0_VEC_EXTRACT:%.*]] = extractelement <3 x double> [[TMP14]], i64 0 diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestFmaFloat_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestFmaFloat_lit.frag index 015a965534..653bdff986 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestFmaFloat_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestFmaFloat_lit.frag @@ -23,16 +23,16 @@ void main() // CHECK-NEXT: [[TMP0:%.*]] = call ptr addrspace(7) @lgc.load.buffer.desc(i64 0, i32 0, i32 0, i32 0) // CHECK-NEXT: [[TMP1:%.*]] = call ptr @llvm.invariant.start.p7(i64 -1, ptr addrspace(7) [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(7) [[TMP0]], align 4 -// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds {{i8|<{ float, float, float, [[]4 x i8], [[]3 x float], [[]4 x i8], [[]3 x float], [[]4 x i8], [[]3 x float] }>}}, ptr addrspace(7) [[TMP0]], i32 {{4|0, i32 1}} +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr {{inbounds i8|<{ float, float, float, [[]4 x i8], [[]3 x float], [[]4 x i8], [[]3 x float], [[]4 x i8], [[]3 x float] }>|i8}}, ptr addrspace(7) [[TMP0]], i32 {{4|0, i32 1}} // CHECK-NEXT: [[TMP4:%.*]] = load float, ptr addrspace(7) [[TMP3]], align 4 -// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds {{i8|<{ float, float, float, [[]4 x i8], [[]3 x float], [[]4 x i8], [[]3 x float], [[]4 x i8], [[]3 x float] }>}}, ptr addrspace(7) [[TMP0]], i32 {{8|0, i32 2}} +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr {{inbounds i8|<{ float, float, float, [[]4 x i8], [[]3 x float], [[]4 x i8], [[]3 x float], [[]4 x i8], [[]3 x float] }>|i8}}, ptr addrspace(7) [[TMP0]], i32 {{8|0, i32 2}} // CHECK-NEXT: [[TMP6:%.*]] = load float, ptr addrspace(7) [[TMP5]], align 4 // CHECK-NEXT: [[TMP7:%.*]] = call reassoc nnan nsz arcp contract afn float (...) @lgc.create.fma.f32(float [[TMP2]], float [[TMP4]], float [[TMP6]]) -// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds {{i8|<{ float, float, float, [[]4 x i8], [[]3 x float], [[]4 x i8], [[]3 x float], [[]4 x i8], [[]3 x float] }>}}, ptr addrspace(7) [[TMP0]], i32 {{16|0, i32 4}} +// CHECK-NEXT: [[TMP8:%.*]] = getelementptr {{inbounds i8|<{ float, float, float, [[]4 x i8], [[]3 x float], [[]4 x i8], [[]3 x float], [[]4 x i8], [[]3 x float] }>|i8}}, ptr addrspace(7) [[TMP0]], i32 {{16|0, i32 4}} // CHECK-NEXT: [[TMP9:%.*]] = load <3 x float>, ptr addrspace(7) [[TMP8]], align 16 -// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds {{i8|<{ float, float, float, [[]4 x i8], [[]3 x float], [[]4 x i8], [[]3 x float], [[]4 x i8], [[]3 x float] }>}}, ptr addrspace(7) [[TMP0]], i32 {{32|0, i32 6}} +// CHECK-NEXT: [[TMP10:%.*]] = getelementptr {{inbounds i8|<{ float, float, float, [[]4 x i8], [[]3 x float], [[]4 x i8], [[]3 x float], [[]4 x i8], [[]3 x float] }>|i8}}, ptr addrspace(7) [[TMP0]], i32 {{32|0, i32 6}} // CHECK-NEXT: [[TMP11:%.*]] = load <3 x float>, ptr addrspace(7) [[TMP10]], align 16 -// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds {{i8|<{ float, float, float, [[]4 x i8], [[]3 x float], [[]4 x i8], [[]3 x float], [[]4 x i8], [[]3 x float] }>}}, ptr addrspace(7) [[TMP0]], i32 {{48|0, i32 8}} +// CHECK-NEXT: [[TMP12:%.*]] = getelementptr {{inbounds i8|<{ float, float, float, [[]4 x i8], [[]3 x float], [[]4 x i8], [[]3 x float], [[]4 x i8], [[]3 x float] }>|i8}}, ptr addrspace(7) [[TMP0]], i32 {{48|0, i32 8}} // CHECK-NEXT: [[TMP13:%.*]] = load <3 x float>, ptr addrspace(7) [[TMP12]], align 16 // CHECK-NEXT: [[TMP14:%.*]] = call reassoc nnan nsz arcp contract afn <3 x float> (...) @lgc.create.fma.v3f32(<3 x float> [[TMP9]], <3 x float> [[TMP11]], <3 x float> [[TMP13]]) // CHECK-NEXT: [[F3_0_0_VEC_EXTRACT:%.*]] = extractelement <3 x float> [[TMP14]], i64 0 diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestMixSelectDouble_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestMixSelectDouble_lit.frag index 4cc9302d98..3674cbd309 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestMixSelectDouble_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestMixSelectDouble_lit.frag @@ -28,17 +28,17 @@ void main() // CHECK-NEXT: [[TMP0:%.*]] = call ptr addrspace(7) @lgc.load.buffer.desc(i64 0, i32 0, i32 0, i32 0) // CHECK-NEXT: [[TMP1:%.*]] = call ptr @llvm.invariant.start.p7(i64 -1, ptr addrspace(7) [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = load double, ptr addrspace(7) [[TMP0]], align 8 -// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds {{i8|<{ double, double, i32, [[]12 x i8], [[]3 x double], [[]8 x i8], [[]3 x double], [[]8 x i8], [[]3 x i32] }>}}, ptr addrspace(7) [[TMP0]], i32 {{8|0, i32 1}} +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr {{inbounds i8|<{ double, double, i32, [[]12 x i8], [[]3 x double], [[]8 x i8], [[]3 x double], [[]8 x i8], [[]3 x i32] }>|i8}}, ptr addrspace(7) [[TMP0]], i32 {{8|0, i32 1}} // CHECK-NEXT: [[TMP4:%.*]] = load double, ptr addrspace(7) [[TMP3]], align 8 -// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds {{i8|<{ double, double, i32, [[]12 x i8], [[]3 x double], [[]8 x i8], [[]3 x double], [[]8 x i8], [[]3 x i32] }>}}, ptr addrspace(7) [[TMP0]], i32 {{16|0, i32 2}} +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr {{inbounds i8|<{ double, double, i32, [[]12 x i8], [[]3 x double], [[]8 x i8], [[]3 x double], [[]8 x i8], [[]3 x i32] }>|i8}}, ptr addrspace(7) [[TMP0]], i32 {{16|0, i32 2}} // CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(7) [[TMP5]], align 4 // CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP6]], 0 // CHECK-NEXT: [[TMP7:%.*]] = select reassoc nnan nsz arcp contract i1 [[DOTNOT]], double [[TMP2]], double [[TMP4]] -// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds {{i8|<{ double, double, i32, [[]12 x i8], [[]3 x double], [[]8 x i8], [[]3 x double], [[]8 x i8], [[]3 x i32] }>}}, ptr addrspace(7) [[TMP0]], i32 {{32|0, i32 4}} +// CHECK-NEXT: [[TMP8:%.*]] = getelementptr {{inbounds i8|<{ double, double, i32, [[]12 x i8], [[]3 x double], [[]8 x i8], [[]3 x double], [[]8 x i8], [[]3 x i32] }>|i8}}, ptr addrspace(7) [[TMP0]], i32 {{32|0, i32 4}} // CHECK-NEXT: [[TMP9:%.*]] = load <3 x double>, ptr addrspace(7) [[TMP8]], align 32 -// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds {{i8|<{ double, double, i32, [[]12 x i8], [[]3 x double], [[]8 x i8], [[]3 x double], [[]8 x i8], [[]3 x i32] }>}}, ptr addrspace(7) [[TMP0]], i32 {{64|0, i32 6}} +// CHECK-NEXT: [[TMP10:%.*]] = getelementptr {{inbounds i8|<{ double, double, i32, [[]12 x i8], [[]3 x double], [[]8 x i8], [[]3 x double], [[]8 x i8], [[]3 x i32] }>|i8}}, ptr addrspace(7) [[TMP0]], i32 {{64|0, i32 6}} // CHECK-NEXT: [[TMP11:%.*]] = load <3 x double>, ptr addrspace(7) [[TMP10]], align 32 -// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds {{i8|<{ double, double, i32, [[]12 x i8], [[]3 x double], [[]8 x i8], [[]3 x double], [[]8 x i8], [[]3 x i32] }>}}, ptr addrspace(7) [[TMP0]], i32 {{96|0, i32 8}} +// CHECK-NEXT: [[TMP12:%.*]] = getelementptr {{inbounds i8|<{ double, double, i32, [[]12 x i8], [[]3 x double], [[]8 x i8], [[]3 x double], [[]8 x i8], [[]3 x i32] }>|i8}}, ptr addrspace(7) [[TMP0]], i32 {{96|0, i32 8}} // CHECK-NEXT: [[TMP13:%.*]] = load <3 x i32>, ptr addrspace(7) [[TMP12]], align 16 // CHECK-NEXT: [[TMP14:%.*]] = extractelement <3 x i32> [[TMP13]], i64 1 // CHECK-NEXT: [[DOTNOT2:%.*]] = icmp eq i32 [[TMP14]], 0 diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestMixSelectFloat_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestMixSelectFloat_lit.frag index 6889ff741e..f003075a6d 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestMixSelectFloat_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestMixSelectFloat_lit.frag @@ -28,17 +28,17 @@ void main() // CHECK-NEXT: [[TMP0:%.*]] = call ptr addrspace(7) @lgc.load.buffer.desc(i64 0, i32 0, i32 0, i32 0) // CHECK-NEXT: [[TMP1:%.*]] = call ptr @llvm.invariant.start.p7(i64 -1, ptr addrspace(7) [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(7) [[TMP0]], align 4 -// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds {{i8|<{ float, float, i32, [[]4 x i8], [[]3 x float], [[]4 x i8], [[]3 x float], [[]4 x i8], [[]3 x i32] }>}}, ptr addrspace(7) [[TMP0]], i32 {{4|0, i32 1}} +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr {{inbounds i8|<{ float, float, i32, [[]4 x i8], [[]3 x float], [[]4 x i8], [[]3 x float], [[]4 x i8], [[]3 x i32] }>|i8}}, ptr addrspace(7) [[TMP0]], i32 {{4|0, i32 1}} // CHECK-NEXT: [[TMP4:%.*]] = load float, ptr addrspace(7) [[TMP3]], align 4 -// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds {{i8|<{ float, float, i32, [[]4 x i8], [[]3 x float], [[]4 x i8], [[]3 x float], [[]4 x i8], [[]3 x i32] }>}}, ptr addrspace(7) [[TMP0]], i32 {{8|0, i32 2}} +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr {{inbounds i8|<{ float, float, i32, [[]4 x i8], [[]3 x float], [[]4 x i8], [[]3 x float], [[]4 x i8], [[]3 x i32] }>|i8}}, ptr addrspace(7) [[TMP0]], i32 {{8|0, i32 2}} // CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(7) [[TMP5]], align 4 // CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP6]], 0 // CHECK-NEXT: [[TMP7:%.*]] = select reassoc nnan nsz arcp contract afn i1 [[DOTNOT]], float [[TMP2]], float [[TMP4]] -// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds {{i8|<{ float, float, i32, [[]4 x i8], [[]3 x float], [[]4 x i8], [[]3 x float], [[]4 x i8], [[]3 x i32] }>}}, ptr addrspace(7) [[TMP0]], i32 {{16|0, i32 4}} +// CHECK-NEXT: [[TMP8:%.*]] = getelementptr {{inbounds i8|<{ float, float, i32, [[]4 x i8], [[]3 x float], [[]4 x i8], [[]3 x float], [[]4 x i8], [[]3 x i32] }>|i8}}, ptr addrspace(7) [[TMP0]], i32 {{16|0, i32 4}} // CHECK-NEXT: [[TMP9:%.*]] = load <3 x float>, ptr addrspace(7) [[TMP8]], align 16 -// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds {{i8|<{ float, float, i32, [[]4 x i8], [[]3 x float], [[]4 x i8], [[]3 x float], [[]4 x i8], [[]3 x i32] }>}}, ptr addrspace(7) [[TMP0]], i32 {{32|0, i32 6}} +// CHECK-NEXT: [[TMP10:%.*]] = getelementptr {{inbounds i8|<{ float, float, i32, [[]4 x i8], [[]3 x float], [[]4 x i8], [[]3 x float], [[]4 x i8], [[]3 x i32] }>|i8}}, ptr addrspace(7) [[TMP0]], i32 {{32|0, i32 6}} // CHECK-NEXT: [[TMP11:%.*]] = load <3 x float>, ptr addrspace(7) [[TMP10]], align 16 -// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds {{i8|<{ float, float, i32, [[]4 x i8], [[]3 x float], [[]4 x i8], [[]3 x float], [[]4 x i8], [[]3 x i32] }>}}, ptr addrspace(7) [[TMP0]], i32 {{48|0, i32 8}} +// CHECK-NEXT: [[TMP12:%.*]] = getelementptr {{inbounds i8|<{ float, float, i32, [[]4 x i8], [[]3 x float], [[]4 x i8], [[]3 x float], [[]4 x i8], [[]3 x i32] }>|i8}}, ptr addrspace(7) [[TMP0]], i32 {{48|0, i32 8}} // CHECK-NEXT: [[TMP13:%.*]] = load <3 x i32>, ptr addrspace(7) [[TMP12]], align 16 // CHECK-NEXT: [[TMP14:%.*]] = extractelement <3 x i32> [[TMP13]], i64 1 // CHECK-NEXT: [[DOTNOT2:%.*]] = icmp eq i32 [[TMP14]], 0 diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestMixSelectInt_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestMixSelectInt_lit.frag index 8108e0fbf5..03a4d7b21e 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestMixSelectInt_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestMixSelectInt_lit.frag @@ -28,17 +28,17 @@ void main() // CHECK-NEXT: [[TMP0:%.*]] = call ptr addrspace(7) @lgc.load.buffer.desc(i64 0, i32 0, i32 0, i32 0) // CHECK-NEXT: [[TMP1:%.*]] = call ptr @llvm.invariant.start.p7(i64 -1, ptr addrspace(7) [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(7) [[TMP0]], align 4 -// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds {{i8|<{ i32, i32, i32, [[]4 x i8], [[]3 x i32], [[]4 x i8], [[]3 x i32], [[]4 x i8], [[]3 x i32] }>}}, ptr addrspace(7) [[TMP0]], i32 {{4|0, i32 1}} +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr {{inbounds i8|<{ i32, i32, i32, [[]4 x i8], [[]3 x i32], [[]4 x i8], [[]3 x i32], [[]4 x i8], [[]3 x i32] }>|i8}}, ptr addrspace(7) [[TMP0]], i32 {{4|0, i32 1}} // CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(7) [[TMP3]], align 4 -// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds {{i8|<{ i32, i32, i32, [[]4 x i8], [[]3 x i32], [[]4 x i8], [[]3 x i32], [[]4 x i8], [[]3 x i32] }>}}, ptr addrspace(7) [[TMP0]], i32 {{8|0, i32 2}} +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr {{inbounds i8|<{ i32, i32, i32, [[]4 x i8], [[]3 x i32], [[]4 x i8], [[]3 x i32], [[]4 x i8], [[]3 x i32] }>|i8}}, ptr addrspace(7) [[TMP0]], i32 {{8|0, i32 2}} // CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(7) [[TMP5]], align 4 // CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP6]], 0 // CHECK-NEXT: [[TMP7:%.*]] = select i1 [[DOTNOT]], i32 [[TMP2]], i32 [[TMP4]] -// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds {{i8|<{ i32, i32, i32, [[]4 x i8], [[]3 x i32], [[]4 x i8], [[]3 x i32], [[]4 x i8], [[]3 x i32] }>}}, ptr addrspace(7) [[TMP0]], i32 {{16|0, i32 4}} +// CHECK-NEXT: [[TMP8:%.*]] = getelementptr {{inbounds i8|<{ i32, i32, i32, [[]4 x i8], [[]3 x i32], [[]4 x i8], [[]3 x i32], [[]4 x i8], [[]3 x i32] }>|i8}}, ptr addrspace(7) [[TMP0]], i32 {{16|0, i32 4}} // CHECK-NEXT: [[TMP9:%.*]] = load <3 x i32>, ptr addrspace(7) [[TMP8]], align 16 -// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds {{i8|<{ i32, i32, i32, [[]4 x i8], [[]3 x i32], [[]4 x i8], [[]3 x i32], [[]4 x i8], [[]3 x i32] }>}}, ptr addrspace(7) [[TMP0]], i32 {{32|0, i32 6}} +// CHECK-NEXT: [[TMP10:%.*]] = getelementptr {{inbounds i8|<{ i32, i32, i32, [[]4 x i8], [[]3 x i32], [[]4 x i8], [[]3 x i32], [[]4 x i8], [[]3 x i32] }>|i8}}, ptr addrspace(7) [[TMP0]], i32 {{32|0, i32 6}} // CHECK-NEXT: [[TMP11:%.*]] = load <3 x i32>, ptr addrspace(7) [[TMP10]], align 16 -// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds {{i8|<{ i32, i32, i32, [[]4 x i8], [[]3 x i32], [[]4 x i8], [[]3 x i32], [[]4 x i8], [[]3 x i32] }>}}, ptr addrspace(7) [[TMP0]], i32 {{48|0, i32 8}} +// CHECK-NEXT: [[TMP12:%.*]] = getelementptr {{inbounds i8|<{ i32, i32, i32, [[]4 x i8], [[]3 x i32], [[]4 x i8], [[]3 x i32], [[]4 x i8], [[]3 x i32] }>|i8}}, ptr addrspace(7) [[TMP0]], i32 {{48|0, i32 8}} // CHECK-NEXT: [[TMP13:%.*]] = load <3 x i32>, ptr addrspace(7) [[TMP12]], align 16 // CHECK-NEXT: [[TMP14:%.*]] = extractelement <3 x i32> [[TMP13]], i64 1 // CHECK-NEXT: [[DOTNOT2:%.*]] = icmp eq i32 [[TMP14]], 0 diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestMixSelectUint_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestMixSelectUint_lit.frag index 461ebc24c6..fb95138864 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestMixSelectUint_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestMixSelectUint_lit.frag @@ -28,17 +28,17 @@ void main() // CHECK-NEXT: [[TMP0:%.*]] = call ptr addrspace(7) @lgc.load.buffer.desc(i64 0, i32 0, i32 0, i32 0) // CHECK-NEXT: [[TMP1:%.*]] = call ptr @llvm.invariant.start.p7(i64 -1, ptr addrspace(7) [[TMP0]]) // CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(7) [[TMP0]], align 4 -// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds {{i8|<{ i32, i32, i32, [[]4 x i8], [[]3 x i32], [[]4 x i8], [[]3 x i32], [[]4 x i8], [[]3 x i32] }>}}, ptr addrspace(7) [[TMP0]], i32 {{4|0, i32 1}} +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr {{inbounds i8|<{ i32, i32, i32, [[]4 x i8], [[]3 x i32], [[]4 x i8], [[]3 x i32], [[]4 x i8], [[]3 x i32] }>|i8}}, ptr addrspace(7) [[TMP0]], i32 {{4|0, i32 1}} // CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(7) [[TMP3]], align 4 -// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds {{i8|<{ i32, i32, i32, [[]4 x i8], [[]3 x i32], [[]4 x i8], [[]3 x i32], [[]4 x i8], [[]3 x i32] }>}}, ptr addrspace(7) [[TMP0]], i32 {{8|0, i32 2}} +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr {{inbounds i8|<{ i32, i32, i32, [[]4 x i8], [[]3 x i32], [[]4 x i8], [[]3 x i32], [[]4 x i8], [[]3 x i32] }>|i8}}, ptr addrspace(7) [[TMP0]], i32 {{8|0, i32 2}} // CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(7) [[TMP5]], align 4 // CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP6]], 0 // CHECK-NEXT: [[TMP7:%.*]] = select i1 [[DOTNOT]], i32 [[TMP2]], i32 [[TMP4]] -// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds {{i8|<{ i32, i32, i32, [[]4 x i8], [[]3 x i32], [[]4 x i8], [[]3 x i32], [[]4 x i8], [[]3 x i32] }>}}, ptr addrspace(7) [[TMP0]], i32 {{16|0, i32 4}} +// CHECK-NEXT: [[TMP8:%.*]] = getelementptr {{inbounds i8|<{ i32, i32, i32, [[]4 x i8], [[]3 x i32], [[]4 x i8], [[]3 x i32], [[]4 x i8], [[]3 x i32] }>|i8}}, ptr addrspace(7) [[TMP0]], i32 {{16|0, i32 4}} // CHECK-NEXT: [[TMP9:%.*]] = load <3 x i32>, ptr addrspace(7) [[TMP8]], align 16 -// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds {{i8|<{ i32, i32, i32, [[]4 x i8], [[]3 x i32], [[]4 x i8], [[]3 x i32], [[]4 x i8], [[]3 x i32] }>}}, ptr addrspace(7) [[TMP0]], i32 {{32|0, i32 6}} +// CHECK-NEXT: [[TMP10:%.*]] = getelementptr {{inbounds i8|<{ i32, i32, i32, [[]4 x i8], [[]3 x i32], [[]4 x i8], [[]3 x i32], [[]4 x i8], [[]3 x i32] }>|i8}}, ptr addrspace(7) [[TMP0]], i32 {{32|0, i32 6}} // CHECK-NEXT: [[TMP11:%.*]] = load <3 x i32>, ptr addrspace(7) [[TMP10]], align 16 -// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds {{i8|<{ i32, i32, i32, [[]4 x i8], [[]3 x i32], [[]4 x i8], [[]3 x i32], [[]4 x i8], [[]3 x i32] }>}}, ptr addrspace(7) [[TMP0]], i32 {{48|0, i32 8}} +// CHECK-NEXT: [[TMP12:%.*]] = getelementptr {{inbounds i8|<{ i32, i32, i32, [[]4 x i8], [[]3 x i32], [[]4 x i8], [[]3 x i32], [[]4 x i8], [[]3 x i32] }>|i8}}, ptr addrspace(7) [[TMP0]], i32 {{48|0, i32 8}} // CHECK-NEXT: [[TMP13:%.*]] = load <3 x i32>, ptr addrspace(7) [[TMP12]], align 16 // CHECK-NEXT: [[TMP14:%.*]] = extractelement <3 x i32> [[TMP13]], i64 1 // CHECK-NEXT: [[DOTNOT2:%.*]] = icmp eq i32 [[TMP14]], 0 diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestTanhFloat_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestTanhFloat_lit.frag index 5968ac8f47..f81e8478f0 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestTanhFloat_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestTanhFloat_lit.frag @@ -14,13 +14,13 @@ void main() ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results ; SHADERTEST: = call reassoc nnan nsz arcp contract afn float (...) @lgc.create.tanh.f32(float ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results -; SHADERTEST: %{{[0-9]*}} = fmul reassoc nnan nsz arcp contract afn float %{{.*}}, 0x3FF7154760000000 -; SHADERTEST: %{{[0-9]*}} = {{fsub|fneg}} reassoc nnan nsz arcp contract afn float {{(-0.000000e+00, )?}}%{{.*}} +; SHADERTEST: %{{[0-9]*}} = call reassoc nnan nsz arcp contract afn float @llvm.fabs.f32(float %{{[0-9]*}}) +; SHADERTEST: %{{[0-9]*}} = fmul reassoc nnan nsz arcp contract afn float %{{.*}}, 0xC007154760000000 ; SHADERTEST: %{{[0-9]*}} = call reassoc nnan nsz arcp contract afn float @llvm.exp2.f32(float %{{.*}}) -; SHADERTEST: %{{[0-9]*}} = call reassoc nnan nsz arcp contract afn float @llvm.exp2.f32(float %{{.*}}) -; SHADERTEST: %{{[0-9]*}} = fsub reassoc nnan nsz arcp contract afn float %{{.*}}, %{{.*}} -; SHADERTEST: %{{[0-9]*}} = fadd reassoc nnan nsz arcp contract afn float %{{.*}}, %{{.*}} -; SHADERTEST: %{{[0-9]*}} = call reassoc nnan nsz arcp contract afn float @llvm.amdgcn.fdiv.fast(float %{{.*}}, float %{{.*}}) +; SHADERTEST: %{{[0-9]*}} = fadd reassoc nnan nsz arcp contract afn float %{{.*}}, 1.000000e+00 +; SHADERTEST: %{{[0-9]*}} = call reassoc nnan nsz arcp contract afn float @llvm.amdgcn.fdiv.fast(float 2.000000e+00, float %{{.*}}) +; SHADERTEST: %{{[0-9]*}} = fsub reassoc nnan nsz arcp contract afn float 1.000000e+00, %{{.*}} +; SHADERTEST: %{{[0-9]*}} = call reassoc nnan nsz arcp contract afn float @llvm.copysign.f32(float %{{.*}}, float %{{.*}}) ; SHADERTEST: AMDLLPC SUCCESS */ // END_SHADERTEST diff --git a/llpc/test/shaderdb/extensions/OpExtInst_TestTanh_lit.frag b/llpc/test/shaderdb/extensions/OpExtInst_TestTanh_lit.frag index 79ee280562..4ab214d410 100644 --- a/llpc/test/shaderdb/extensions/OpExtInst_TestTanh_lit.frag +++ b/llpc/test/shaderdb/extensions/OpExtInst_TestTanh_lit.frag @@ -23,20 +23,20 @@ void main() ; SHADERTEST: = call reassoc nnan nsz arcp contract afn float (...) @lgc.create.tanh.f32(float ; SHADERTEST: = call reassoc nnan nsz arcp contract afn <3 x float> (...) @lgc.create.tanh.v3f32(<3 x float> ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results -; SHADERTEST: %{{.*}} = fmul reassoc nnan nsz arcp contract afn float %{{.*}}, 0x3FF7154760000000 -; SHADERTEST: %{{.*}} = {{fsub|fneg}} reassoc nnan nsz arcp contract afn float {{(-0.000000e+00, )?}}%{{.*}} +; SHADERTEST: %{{.*}} = call reassoc nnan nsz arcp contract afn float @llvm.fabs.f32(float %{{[0-9]*}}) +; SHADERTEST: %{{.*}} = fmul reassoc nnan nsz arcp contract afn float %{{.*}}, 0xC007154760000000 ; SHADERTEST: %{{.*}} = call reassoc nnan nsz arcp contract afn float @llvm.exp2.f32(float %{{.*}}) +; SHADERTEST: %{{.*}} = fadd reassoc nnan nsz arcp contract afn float %{{.*}}, 1.000000e+00 +; SHADERTEST: %{{.*}} = call reassoc nnan nsz arcp contract afn float @llvm.amdgcn.fdiv.fast(float 2.000000e+00, float %{{.*}}) +; SHADERTEST: %{{.*}} = fsub reassoc nnan nsz arcp contract afn float 1.000000e+00, %{{.*}} +; SHADERTEST: %{{.*}} = call reassoc nnan nsz arcp contract afn float @llvm.copysign.f32(float %{{.*}}, float %{{.*}}) +; SHADERTEST: %{{.*}} = call reassoc nnan nsz arcp contract afn float @llvm.fabs.f32(float %{{[0-9]*}}) +; SHADERTEST: %{{.*}} = fmul reassoc nnan nsz arcp contract afn float %{{.*}}, 0xC007154760000000 ; SHADERTEST: %{{.*}} = call reassoc nnan nsz arcp contract afn float @llvm.exp2.f32(float %{{.*}}) -; SHADERTEST: %{{.*}} = fsub reassoc nnan nsz arcp contract afn float %{{.*}}, %{{.*}} -; SHADERTEST: %{{.*}} = fadd reassoc nnan nsz arcp contract afn float %{{.*}}, %{{.*}} -; SHADERTEST: %{{.*}} = call reassoc nnan nsz arcp contract afn float @llvm.amdgcn.fdiv.fast(float %{{.*}}, float %{{.*}}) -; SHADERTEST: %{{.*}} = fmul reassoc nnan nsz arcp contract afn float %{{.*}}, 0x3FF7154760000000 -; SHADERTEST: %{{.*}} = {{fsub|fneg}} reassoc nnan nsz arcp contract afn float {{(-0.000000e+00, )?}}%{{.*}} -; SHADERTEST: %{{.*}} = call reassoc nnan nsz arcp contract afn float @llvm.exp2.f32(float %{{.*}}) -; SHADERTEST: %{{.*}} = call reassoc nnan nsz arcp contract afn float @llvm.exp2.f32(float %{{.*}}) -; SHADERTEST: %{{.*}} = fsub reassoc nnan nsz arcp contract afn float %{{.*}}, %{{.*}} -; SHADERTEST: %{{.*}} = fadd reassoc nnan nsz arcp contract afn float %{{.*}}, %{{.*}} -; SHADERTEST: %{{.*}} = call reassoc nnan nsz arcp contract afn float @llvm.amdgcn.fdiv.fast(float %{{.*}}, float %{{.*}}) +; SHADERTEST: %{{.*}} = fadd reassoc nnan nsz arcp contract afn float %{{.*}}, 1.000000e+00 +; SHADERTEST: %{{.*}} = call reassoc nnan nsz arcp contract afn float @llvm.amdgcn.fdiv.fast(float 2.000000e+00, float %{{.*}}) +; SHADERTEST: %{{.*}} = fsub reassoc nnan nsz arcp contract afn float 1.000000e+00, %{{.*}} +; SHADERTEST: %{{.*}} = call reassoc nnan nsz arcp contract afn float @llvm.copysign.f32(float %{{.*}}, float %{{.*}}) ; SHADERTEST: AMDLLPC SUCCESS */ // END_SHADERTEST diff --git a/llpc/test/shaderdb/general/ImgDescLoad.comp b/llpc/test/shaderdb/general/ImgDescLoad.comp index ccc4431815..3abbe69a02 100644 --- a/llpc/test/shaderdb/general/ImgDescLoad.comp +++ b/llpc/test/shaderdb/general/ImgDescLoad.comp @@ -4,11 +4,22 @@ /* ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s -; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results -; SHADERTEST: [[SMP_DESC:%[0-9]*]] = load <4 x i32>, ptr addrspace(4) %{{[0-9]*}}, align 16, !invariant.load -; SHADERTEST: [[IMG_DESC:%[0-9]*]] = load <8 x i32>, ptr addrspace(4) %{{[0-9]*}}, align 32, !invariant.load -; SHADERTEST: lgc.create.image.sample.v4f32(i32 1, i32 512, <8 x i32> [[IMG_DESC]], <4 x i32> [[SMP_DESC]], i32 33, <2 x float> zeroinitializer, float 0.000000e+00) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline before-patching results +; SHADERTEST: [[IMG_DESC:%[0-9]*]] = load <8 x i32>, ptr addrspace(4) %{{[0-9]*}}, align 32, !invariant.load +; SHADERTEST: [[SMP_DESC:%[0-9]*]] = load <4 x i32>, ptr addrspace(4) %{{[0-9]*}}, align 16, !invariant.load +; SHADERTEST: %{{[0-9]*}} = extractelement <4 x i32> [[SMP_DESC]], i64 0 +; SHADERTEST: %{{[0-9]*}} = call i32 @llvm.amdgcn.readfirstlane(i32 %{{[0-9]*}}) +; SHADERTEST: %{{[0-9]*}} = insertelement <4 x i32> poison, i32 %{{[0-9]*}}, i64 0 +; SHADERTEST: %{{[0-9]*}} = extractelement <4 x i32> [[SMP_DESC]], i64 1 +; SHADERTEST: %{{[0-9]*}} = call i32 @llvm.amdgcn.readfirstlane(i32 %{{[0-9]*}}) +; SHADERTEST: %{{[0-9]*}} = insertelement <4 x i32> %{{[0-9]*}}, i32 %{{[0-9]*}}, i64 1 +; SHADERTEST: %{{[0-9]*}} = extractelement <4 x i32> [[SMP_DESC]], i64 2 +; SHADERTEST: %{{[0-9]*}} = call i32 @llvm.amdgcn.readfirstlane(i32 %{{[0-9]*}}) +; SHADERTEST: %{{[0-9]*}} = insertelement <4 x i32> %{{[0-9]*}}, i32 %{{[0-9]*}}, i64 2 +; SHADERTEST: %{{[0-9]*}} = extractelement <4 x i32> [[SMP_DESC]], i64 3 +; SHADERTEST: %{{[0-9]*}} = call i32 @llvm.amdgcn.readfirstlane(i32 %{{[0-9]*}}) +; SHADERTEST: [[NEW_SMP_DESC:%[0-9]*]] = insertelement <4 x i32> %{{[0-9]*}}, i32 %{{[0-9]*}}, i64 3 +; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32 15, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, <8 x i32> [[IMG_DESC]], <4 x i32> [[NEW_SMP_DESC]], i1 false, i32 0, i32 0) */ // END_SHADERTEST diff --git a/llpc/test/shaderdb/general/PipelineCs_DebugPrintf.pipe b/llpc/test/shaderdb/general/PipelineCs_DebugPrintf.pipe index ee39f82720..d4a15e421b 100644 --- a/llpc/test/shaderdb/general/PipelineCs_DebugPrintf.pipe +++ b/llpc/test/shaderdb/general/PipelineCs_DebugPrintf.pipe @@ -24,10 +24,9 @@ userDataNode[0].next[0].set = 0xFFFFFFFF userDataNode[0].next[0].binding = 6 ; CHECK-LABEL: @lgc.shader.CS.main( ; CHECK-NEXT: .entry: -; CHECK-NEXT: [[TMP0:%.*]] = call ptr addrspace(7) @lgc.load.buffer.desc(i64 4294967295, i32 6, i32 0, i32 2) -; CHECK-NEXT: [[TMP1:%.*]] = call <3 x i32> (...) @lgc.create.read.builtin.input.v3i32(i32 28, i32 0, i32 poison, i32 poison) -; CHECK-NEXT: [[__LLPC_INPUT_PROXY_GL_GLOBALINVOCATIONID_0_VEC_EXTRACT:%.*]] = extractelement <3 x i32> [[TMP1]], i64 0 -; CHECK-NEXT: call void (...) @lgc.debug.printf(ptr addrspace(7) [[TMP0]], ptr addrspace(4) @str, i32 [[__LLPC_INPUT_PROXY_GL_GLOBALINVOCATIONID_0_VEC_EXTRACT]]) -; CHECK-NEXT: call void (...) @lgc.debug.printf(ptr addrspace(7) [[TMP0]], ptr addrspace(4) @str.1, double 1.000000e+00, double 1.000000e+00) +; CHECK-NEXT: [[TMP0:%.*]] = call <3 x i32> (...) @lgc.create.read.builtin.input.v3i32(i32 28, i32 0, i32 poison, i32 poison) +; CHECK-NEXT: [[__LLPC_INPUT_PROXY_GL_GLOBALINVOCATIONID_0_VEC_EXTRACT:%.*]] = extractelement <3 x i32> [[TMP0]], i64 0 +; CHECK-NEXT: call void (...) @lgc.debug.printf(ptr nonnull @[[GLOB0:[0-9]+]], i32 [[__LLPC_INPUT_PROXY_GL_GLOBALINVOCATIONID_0_VEC_EXTRACT]]) +; CHECK-NEXT: call void (...) @lgc.debug.printf(ptr nonnull @[[GLOB1:[0-9]+]], double 1.000000e+00, double 1.000000e+00) ; CHECK-NEXT: ret void ; diff --git a/llpc/test/shaderdb/general/PipelineRays_TestLgcRtTraceRayOp.pipe b/llpc/test/shaderdb/general/PipelineRays_TestLgcRtTraceRayOp.pipe index 7ed6a30f8d..d432d16530 100644 --- a/llpc/test/shaderdb/general/PipelineRays_TestLgcRtTraceRayOp.pipe +++ b/llpc/test/shaderdb/general/PipelineRays_TestLgcRtTraceRayOp.pipe @@ -1,6 +1,7 @@ ; Check lgc.rt.trace.ray dialect is being generated. ; BEGIN_SHADERTEST +; REQUIRES: gpurt ; RUN: amdllpc --print-after=llpc-spirv-lower-translator -gfxip 10.3 -o /dev/null 2>&1 %s | FileCheck -check-prefixes=SHADERTEST %s ; SHADERTEST-LABEL: @main( ; SHADERTEST: call void (...) @lgc.rt.trace.ray(i64 %{{[0-9]+}}, i32 0, i32 %{{[0-9]+}}, i32 0, i32 0, i32 0, <3 x float> %{{[0-9]+}}, float %{{[0-9]+}}, <3 x float> %{{[0-9]+}}, float %{{[0-9]+}}, ptr addrspace(5) @RayPayloadKHR0, [1 x i32] [i32 16]) diff --git a/llpc/test/shaderdb/general/WorkaroundStorageImageFormats.pipe b/llpc/test/shaderdb/general/WorkaroundStorageImageFormats.pipe index 0ee38a19af..2854fdab17 100644 --- a/llpc/test/shaderdb/general/WorkaroundStorageImageFormats.pipe +++ b/llpc/test/shaderdb/general/WorkaroundStorageImageFormats.pipe @@ -28,7 +28,6 @@ userDataNode[0].next[0].binding = 0 ; CHECK-NEXT: .entry: ; CHECK-NEXT: [[TMP0:%.*]] = call ptr addrspace(4) (...) @lgc.create.get.desc.ptr.p4(i32 1, i32 1, i64 0, i32 0) ; CHECK-NEXT: [[TMP1:%.*]] = call i32 (...) @lgc.create.get.desc.stride.i32(i32 1, i32 1, i64 0, i32 0) -; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP0]], align 32 -; CHECK-NEXT: call void (...) @lgc.create.image.store(<4 x float> , i32 1, i32 512, <8 x i32> [[TMP2]], <2 x i32> ) +; CHECK-NEXT: call void (...) @lgc.create.image.store(<4 x float> , i32 1, i32 512, ptr addrspace(4) [[TMP0]], <2 x i32> ) ; CHECK-NEXT: ret void ; diff --git a/llpc/test/shaderdb/gfx10/PipelineVsFs_TestVsOutMiscSideBusEna.pipe b/llpc/test/shaderdb/gfx10/PipelineVsFs_TestVsOutMiscSideBusEna.pipe index 074a293e19..4e89bd1ffd 100644 --- a/llpc/test/shaderdb/gfx10/PipelineVsFs_TestVsOutMiscSideBusEna.pipe +++ b/llpc/test/shaderdb/gfx10/PipelineVsFs_TestVsOutMiscSideBusEna.pipe @@ -64,7 +64,6 @@ entryPoint = main ; SHADERTEST-NEXT: .pa_cl_clip_cntl: ; SHADERTEST-NEXT: .dx_linear_attr_clip_ena: true ; SHADERTEST-NEXT: .rasterization_kill: false -; SHADERTEST-NEXT: .vs_out_misc_side_bus_ena: true ; SHADERTEST-NEXT: .vte_vport_provoke_disable: false ; SHADERTEST-NEXT: .pa_cl_vs_out_cntl: ; SHADERTEST-NEXT: .clip_dist_ena_0: true @@ -84,6 +83,7 @@ entryPoint = main ; SHADERTEST-NEXT: .cull_dist_ena_6: false ; SHADERTEST-NEXT: .cull_dist_ena_7: false ; SHADERTEST-NEXT: .vs_out_cc_dist0_vec_ena: true +; SHADERTEST-NEXT: .vs_out_misc_side_bus_ena: true ; SHADERTEST-NEXT: .pa_cl_vte_cntl: ; SHADERTEST-NEXT: .vtx_w0_fmt: true ; SHADERTEST-NEXT: .x_offset_ena: true @@ -193,7 +193,8 @@ entryPoint = main ; SHADERTEST-NEXT: .entry_point: _amdgpu_ps_main ; SHADERTEST-NEXT: .float_mode: 0xc0 ; SHADERTEST-NEXT: .ieee_mode: false -; SHADERTEST: .mem_ordered: true +; SHADERTEST-NEXT: .lds_size: 0 +; SHADERTEST-NEXT: .mem_ordered: true ; SHADERTEST-NEXT: .scratch_en: false ; SHADERTEST-NEXT: .scratch_memory_size: 0 ; SHADERTEST-NEXT: .sgpr_count: 0x2 @@ -246,7 +247,8 @@ entryPoint = main ; SHADERTEST-NEXT: .entry_point: _amdgpu_vs_main ; SHADERTEST-NEXT: .float_mode: 0xc0 ; SHADERTEST-NEXT: .ieee_mode: false -; SHADERTEST: .mem_ordered: true +; SHADERTEST-NEXT: .lds_size: 0 +; SHADERTEST-NEXT: .mem_ordered: true ; SHADERTEST-NEXT: .scratch_en: false ; SHADERTEST-NEXT: .scratch_memory_size: 0 ; SHADERTEST-NEXT: .sgpr_count: 0x3 diff --git a/llpc/test/shaderdb/gfx11/AttributePrecedesPos.pipe b/llpc/test/shaderdb/gfx11/AttributePrecedesPos.pipe index 7aa05e7a74..243b999907 100644 --- a/llpc/test/shaderdb/gfx11/AttributePrecedesPos.pipe +++ b/llpc/test/shaderdb/gfx11/AttributePrecedesPos.pipe @@ -3,7 +3,7 @@ ; RUN: amdllpc %gfxip %s -v | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: @_amdgpu_gs_main( ; SHADERTEST: call void @llvm.amdgcn.struct.buffer.store.v4f32(<4 x float> %{{.*}}, <4 x i32> %{{.*}}, i32 %{{.*}}, i32 0, i32 %{{.*}}, i32 3) -; SHADERTEST: fence release +; SHADERTEST: fence syncscope("agent") release ; SHADERTEST: call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}}, i1 false, i1 false) ; SHADERTEST: call void @llvm.amdgcn.exp.f32(i32 13, i32 1, float 1.000000e+00, float poison, float poison, float poison, i1 true, i1 false) ; SHADERTEST-LABEL: _amdgpu_gs_main: diff --git a/llpc/test/shaderdb/gfx11/cooperativeMatrix/array-of-matrices.comp b/llpc/test/shaderdb/gfx11/cooperativeMatrix/array-of-matrices.comp index 4ca900bf0d..6aad9398cd 100644 --- a/llpc/test/shaderdb/gfx11/cooperativeMatrix/array-of-matrices.comp +++ b/llpc/test/shaderdb/gfx11/cooperativeMatrix/array-of-matrices.comp @@ -35,7 +35,7 @@ void main() { // CHECK-NEXT: [[TMP0:%.*]] = call ptr addrspace(4) (...) @lgc.create.load.push.constants.ptr.p4() // CHECK-NEXT: [[TMP1:%.*]] = call ptr addrspace(7) @lgc.load.buffer.desc(i64 0, i32 0, i32 0, i32 2) // CHECK-NEXT: [[LOAD:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.load__v8f32(ptr addrspace(7) [[TMP1]], i32 32, i1 true, i32 1, i32 0, i32 0, i32 16) -// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr addrspace(7) [[TMP1]], i32 512 +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr {{(inbounds )?}}i8, ptr addrspace(7) [[TMP1]], i32 512 // CHECK-NEXT: [[LOAD2:%.*]] = call <8 x float> (...) @lgc.cooperative.matrix.load__v8f32(ptr addrspace(7) [[TMP2]], i32 32, i1 true, i32 1, i32 0, i32 0, i32 16) // CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[TMP0]], align 4 // CHECK-NEXT: [[TMP4:%.*]] = icmp ult i32 [[TMP3]], 2 @@ -49,13 +49,13 @@ void main() { // CHECK: 10: // CHECK-NEXT: [[TMP11:%.*]] = phi i32 [ 0, [[DOTENTRY:%.*]] ], [ [[TMP9]], [[TMP5]] ] // CHECK-NEXT: store i32 [[TMP11]], ptr addrspace(7) [[TMP1]], align 4 -// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[TMP0]], i64 4 +// CHECK-NEXT: [[TMP12:%.*]] = getelementptr {{(inbounds )?}}i8, ptr addrspace(4) [[TMP0]], i64 4 // CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(4) [[TMP12]], align 4 // CHECK-NEXT: [[TMP14:%.*]] = icmp ult i32 [[TMP13]], 2 // CHECK-NEXT: [[TMP15:%.*]] = icmp eq i32 [[TMP13]], 1 // CHECK-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], <8 x float> [[LOAD2]], <8 x float> [[LOAD]] // CHECK-NEXT: [[TMP17:%.*]] = select i1 [[TMP14]], <8 x float> [[TMP16]], <8 x float> zeroinitializer -// CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr addrspace(7) [[TMP1]], i32 1024 +// CHECK-NEXT: [[TMP18:%.*]] = getelementptr {{(inbounds )?}}i8, ptr addrspace(7) [[TMP1]], i32 1024 // CHECK-NEXT: call void (...) @lgc.cooperative.matrix.store(ptr addrspace(7) [[TMP18]], i32 64, i1 true, i32 1, i32 0, i32 0, i32 16, <8 x float> [[TMP17]]) // CHECK-NEXT: ret void // diff --git a/llpc/test/shaderdb/graphics_library/PipelineVsFs_TestGraphicsLibrary.pipe b/llpc/test/shaderdb/graphics_library/PipelineVsFs_TestGraphicsLibrary.pipe index 685133b765..584652edee 100644 --- a/llpc/test/shaderdb/graphics_library/PipelineVsFs_TestGraphicsLibrary.pipe +++ b/llpc/test/shaderdb/graphics_library/PipelineVsFs_TestGraphicsLibrary.pipe @@ -20,7 +20,7 @@ colorExport=PipelineLibCes_TestColorExport.pipe ; SHADERTEST-NEXT: [[VERTEXINDEX:%.*]] = add i32 [[TMP3]], [[TMP4]] ; SHADERTEST-NEXT: [[DOTFR:%.*]] = freeze i32 [[VERTEXINDEX]] ; SHADERTEST-NEXT: [[TMP5:%.*]] = icmp slt i32 [[DOTFR]], 3 -; SHADERTEST-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr addrspace(7) [[TMP0]], i32 4 +; SHADERTEST-NEXT: [[TMP6:%.*]] = getelementptr {{(inbounds )?}}i8, ptr addrspace(7) [[TMP0]], i32 4 ; SHADERTEST-NEXT: [[DOT0_IN:%.*]] = select i1 [[TMP5]], ptr addrspace(7) [[TMP0]], ptr addrspace(7) [[TMP6]] ; SHADERTEST-NEXT: [[DOT0:%.*]] = load float, ptr addrspace(7) [[DOT0_IN]], align 4 ; SHADERTEST-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[TMP2]], float 1.000000e+00, i64 3 @@ -34,16 +34,16 @@ colorExport=PipelineLibCes_TestColorExport.pipe ; SHADERTEST-NEXT: .entry: ; SHADERTEST-NEXT: [[TMP0:%.*]] = call ptr addrspace(7) @lgc.load.buffer.desc(i64 1, i32 1, i32 0, i32 0) ; SHADERTEST-NEXT: [[TMP1:%.*]] = call ptr @llvm.invariant.start.p7(i64 -1, ptr addrspace(7) [[TMP0]]) -; SHADERTEST-NEXT: [[FRAGCOORD:%.*]] = call <4 x float> @lgc.input.import.builtin.FragCoord.v4f32.i32(i32 15) #[[ATTR3]] +; SHADERTEST-NEXT: [[FRAGCOORD:%.*]] = call <4 x float> @lgc.input.import.builtin.FragCoord.v4f32.i32(i32 15) #[[ATTR4]] ; SHADERTEST-NEXT: [[__LLPC_INPUT_PROXY_GL_FRAGCOORD_4_VEC_EXTRACT:%.*]] = extractelement <4 x float> [[FRAGCOORD]], i64 1 ; SHADERTEST-NEXT: [[TMP2:%.*]] = fadd reassoc nnan nsz arcp contract afn float [[__LLPC_INPUT_PROXY_GL_FRAGCOORD_4_VEC_EXTRACT]], -5.000000e-01 ; SHADERTEST-NEXT: [[TMP3:%.*]] = fptosi float [[TMP2]] to i32 ; SHADERTEST-NEXT: [[DOTFR:%.*]] = freeze i32 [[TMP3]] ; SHADERTEST-NEXT: [[TMP4:%.*]] = icmp slt i32 [[DOTFR]], 8 -; SHADERTEST-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr addrspace(7) [[TMP0]], i32 16 +; SHADERTEST-NEXT: [[TMP5:%.*]] = getelementptr {{(inbounds )?}}i8, ptr addrspace(7) [[TMP0]], i32 16 ; SHADERTEST-NEXT: [[DOT0_IN:%.*]] = select i1 [[TMP4]], ptr addrspace(7) [[TMP0]], ptr addrspace(7) [[TMP5]] ; SHADERTEST-NEXT: [[DOT0:%.*]] = load <4 x float>, ptr addrspace(7) [[DOT0_IN]], align 16 -; SHADERTEST-NEXT: call void @lgc.output.export.generic.i32.i32.v4f32(i32 0, i32 0, <4 x float> [[DOT0]]) #[[ATTR4]] +; SHADERTEST-NEXT: call void @lgc.output.export.generic.i32.i32.v4f32(i32 0, i32 0, <4 x float> [[DOT0]]) #[[ATTR3]] ; SHADERTEST-NEXT: ret void ; ; diff --git a/llpc/test/shaderdb/object/ObjFragMask_TestFragFetch_lit.frag b/llpc/test/shaderdb/object/ObjFragMask_TestFragFetch_lit.frag index 0eeac24367..dda5e3a2bf 100644 --- a/llpc/test/shaderdb/object/ObjFragMask_TestFragFetch_lit.frag +++ b/llpc/test/shaderdb/object/ObjFragMask_TestFragFetch_lit.frag @@ -32,11 +32,11 @@ void main() ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results -; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.load.v4f32(i32 6, i32 512, <8 x i32> -; SHADERTEST: call <4 x i32> (...) @lgc.create.image.load.v4i32(i32 2, i32 512, <8 x i32> -; SHADERTEST: call <4 x i32> (...) @lgc.create.image.load.v4i32(i32 7, i32 512, <8 x i32> -; SHADERTEST: call <4 x i32> (...) @lgc.create.image.load.v4i32(i32 1, i32 544, <8 x i32> -; SHADERTEST: call <4 x i32> (...) @lgc.create.image.load.v4i32(i32 6, i32 544, <8 x i32> +; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.load.v4f32(i32 6, i32 512, ptr addrspace(4) +; SHADERTEST: call <4 x i32> (...) @lgc.create.image.load.v4i32(i32 2, i32 512, ptr addrspace(4) +; SHADERTEST: call <4 x i32> (...) @lgc.create.image.load.v4i32(i32 7, i32 512, ptr addrspace(4) +; SHADERTEST: call <4 x i32> (...) @lgc.create.image.load.v4i32(i32 1, i32 544, ptr addrspace(4) +; SHADERTEST: call <4 x i32> (...) @lgc.create.image.load.v4i32(i32 6, i32 544, ptr addrspace(4) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results ; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i16 ; SHADERTEST: call i32 @llvm.amdgcn.image.load.3d.i32.i16(i32 1, i16 2, i16 3, i16 1, <8 x i32> %{{.*}}, i32 0, i32 0) diff --git a/llpc/test/shaderdb/object/ObjImage_TestCubeAtomicAdd_lit.comp b/llpc/test/shaderdb/object/ObjImage_TestCubeAtomicAdd_lit.comp index 17520b8f99..365fc5f536 100644 --- a/llpc/test/shaderdb/object/ObjImage_TestCubeAtomicAdd_lit.comp +++ b/llpc/test/shaderdb/object/ObjImage_TestCubeAtomicAdd_lit.comp @@ -16,10 +16,10 @@ void main (void) /* ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 2, i32 8, i32 0, i32 0, <8 x i32> +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 2, i32 8, i32 0, i32 0, ptr addrspace(4) ; SHADERTEST-LABEL: {{^// LLPC.*}} SPIR-V lowering results -; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 2, i32 8, i32 0, i32 0, <8 x i32> +; SHADERTEST: call i32 (...) @lgc.create.image.atomic.i32(i32 2, i32 8, i32 0, i32 0, ptr addrspace(4) ; SHADERTEST-LABEL: {{^// LLPC}} final pipeline module info ; SHADERTEST: call i32 @llvm.amdgcn.image.atomic.add.cube.i32.i32 diff --git a/llpc/test/shaderdb/object/ObjImage_TestMemoryQualifier_lit.frag b/llpc/test/shaderdb/object/ObjImage_TestMemoryQualifier_lit.frag index d8be3affa1..921561a6de 100644 --- a/llpc/test/shaderdb/object/ObjImage_TestMemoryQualifier_lit.frag +++ b/llpc/test/shaderdb/object/ObjImage_TestMemoryQualifier_lit.frag @@ -16,13 +16,13 @@ void main() /* ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results -; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.load.v4f32(i32 0, i32 513, <8 x i32> -; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.load.v4f32(i32 1, i32 512, <8 x i32> -; SHADERTEST: call void (...) @lgc.create.image.store(<4 x float> %{{[^,]*}}, i32 9, i32 515, <8 x i32> +; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.load.v4f32(i32 0, i32 513, ptr addrspace(4) +; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.load.v4f32(i32 1, i32 512, ptr addrspace(4) +; SHADERTEST: call void (...) @lgc.create.image.store(<4 x float> %{{[^,]*}}, i32 9, i32 515, ptr addrspace(4) ; SHADERTEST-LABEL: {{^// LLPC.*}} SPIR-V lowering results -; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.load.v4f32(i32 0, i32 513, <8 x i32> -; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.load.v4f32(i32 1, i32 512, <8 x i32> -; SHADERTEST: call void (...) @lgc.create.image.store(<4 x float> %{{[^,]*}}, i32 9, i32 515, <8 x i32> +; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.load.v4f32(i32 0, i32 513, ptr addrspace(4) +; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.load.v4f32(i32 1, i32 512, ptr addrspace(4) +; SHADERTEST: call void (...) @lgc.create.image.store(<4 x float> %{{[^,]*}}, i32 9, i32 515, ptr addrspace(4) ; SHADERTEST: AMDLLPC SUCCESS */ // END_SHADERTEST diff --git a/llpc/test/shaderdb/object/ObjResource_TestAlias_lit.frag b/llpc/test/shaderdb/object/ObjResource_TestAlias_lit.frag index c2d76316b4..37f246212a 100644 --- a/llpc/test/shaderdb/object/ObjResource_TestAlias_lit.frag +++ b/llpc/test/shaderdb/object/ObjResource_TestAlias_lit.frag @@ -39,16 +39,15 @@ void main() /* ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results -; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 512, <8 x i32> -; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 512, <8 x i32> -; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.load.v4f32(i32 1, i32 512, <8 x i32> -; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.load.v4f32(i32 1, i32 512, <8 x i32> +; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 512, ptr addrspace(4) +; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 512, ptr addrspace(4) +; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.load.v4f32(i32 1, i32 512, ptr addrspace(4) +; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.load.v4f32(i32 1, i32 512, ptr addrspace(4) ; SHADERTEST-LABEL: {{^// LLPC.*}} SPIR-V lowering results ; SHADERTEST: call {{.*}} {{.*}}@lgc.load.buffer.desc{{.*}}(i64 0, i32 1,{{.*}} -; SHADERTEST: load <4 x float> -; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 512, <8 x i32> -; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.load.v4f32(i32 1, i32 512, <8 x i32> +; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 512, ptr addrspace(4) +; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.load.v4f32(i32 1, i32 512, ptr addrspace(4) ; SHADERTEST: AMDLLPC SUCCESS diff --git a/llpc/test/shaderdb/object/ObjSampler_TestLargeId_lit.frag b/llpc/test/shaderdb/object/ObjSampler_TestLargeId_lit.frag index 39a9031568..d04cd7195d 100644 --- a/llpc/test/shaderdb/object/ObjSampler_TestLargeId_lit.frag +++ b/llpc/test/shaderdb/object/ObjSampler_TestLargeId_lit.frag @@ -46,23 +46,23 @@ void main() ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC.*}} SPIR-V lowering results -; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 512, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <4 x i32> %{{[-0-9A-Za0z_.]+}}, i32 1, <2 x float> zeroinitializer) -; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 512, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <4 x i32> %{{[-0-9A-Za0z_.]+}}, i32 1, <2 x float> zeroinitializer) -; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 512, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <4 x i32> %{{[-0-9A-Za0z_.]+}}, i32 1, <2 x float> zeroinitializer) -; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 512, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <4 x i32> %{{[-0-9A-Za0z_.]+}}, i32 1, <2 x float> zeroinitializer) -; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 512, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <4 x i32> %{{[-0-9A-Za0z_.]+}}, i32 1, <2 x float> zeroinitializer) -; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 512, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <4 x i32> %{{[-0-9A-Za0z_.]+}}, i32 1, <2 x float> zeroinitializer) -; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 512, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <4 x i32> %{{[-0-9A-Za0z_.]+}}, i32 1, <2 x float> zeroinitializer) -; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 512, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <4 x i32> %{{[-0-9A-Za0z_.]+}}, i32 1, <2 x float> zeroinitializer) -; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 512, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <4 x i32> %{{[-0-9A-Za0z_.]+}}, i32 1, <2 x float> zeroinitializer) -; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 512, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <4 x i32> %{{[-0-9A-Za0z_.]+}}, i32 1, <2 x float> zeroinitializer) -; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 512, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <4 x i32> %{{[-0-9A-Za0z_.]+}}, i32 1, <2 x float> zeroinitializer) -; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 512, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <4 x i32> %{{[-0-9A-Za0z_.]+}}, i32 1, <2 x float> zeroinitializer) -; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 512, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <4 x i32> %{{[-0-9A-Za0z_.]+}}, i32 1, <2 x float> zeroinitializer) -; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 512, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <4 x i32> %{{[-0-9A-Za0z_.]+}}, i32 1, <2 x float> zeroinitializer) -; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 512, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <4 x i32> %{{[-0-9A-Za0z_.]+}}, i32 1, <2 x float> zeroinitializer) -; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 512, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <4 x i32> %{{[-0-9A-Za0z_.]+}}, i32 1, <2 x float> zeroinitializer) -; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 512, <8 x i32> %{{[-0-9A-Za0z_.]+}}, <4 x i32> %{{[-0-9A-Za0z_.]+}}, i32 1, <2 x float> zeroinitializer) +; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 512, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, i32 1, <2 x float> zeroinitializer) +; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 512, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, i32 1, <2 x float> zeroinitializer) +; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 512, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, i32 1, <2 x float> zeroinitializer) +; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 512, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, i32 1, <2 x float> zeroinitializer) +; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 512, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, i32 1, <2 x float> zeroinitializer) +; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 512, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, i32 1, <2 x float> zeroinitializer) +; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 512, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, i32 1, <2 x float> zeroinitializer) +; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 512, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, i32 1, <2 x float> zeroinitializer) +; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 512, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, i32 1, <2 x float> zeroinitializer) +; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 512, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, i32 1, <2 x float> zeroinitializer) +; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 512, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, i32 1, <2 x float> zeroinitializer) +; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 512, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, i32 1, <2 x float> zeroinitializer) +; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 512, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, i32 1, <2 x float> zeroinitializer) +; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 512, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, i32 1, <2 x float> zeroinitializer) +; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 512, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, i32 1, <2 x float> zeroinitializer) +; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 512, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, i32 1, <2 x float> zeroinitializer) +; SHADERTEST: call reassoc nnan nsz arcp contract afn <4 x float> (...) @lgc.create.image.sample.v4f32(i32 1, i32 512, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, ptr addrspace(4) %{{[-0-9A-Za0z_.]+}}, i32 1, <2 x float> zeroinitializer) ; SHADERTEST: AMDLLPC SUCCESS */ diff --git a/llpc/test/shaderdb/object/ObjSampler_TestSeparateSamplerShadow_lit.frag b/llpc/test/shaderdb/object/ObjSampler_TestSeparateSamplerShadow_lit.frag index 4636798b25..92b381dded 100644 --- a/llpc/test/shaderdb/object/ObjSampler_TestSeparateSamplerShadow_lit.frag +++ b/llpc/test/shaderdb/object/ObjSampler_TestSeparateSamplerShadow_lit.frag @@ -14,7 +14,7 @@ void main() /* ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results -; SHADERTEST: call reassoc nnan nsz arcp contract afn float (...) @lgc.create.image.sample.f32(i32 1, i32 512, <8 x i32> +; SHADERTEST: call reassoc nnan nsz arcp contract afn float (...) @lgc.create.image.sample.f32(i32 1, i32 512, ptr addrspace(4) ; SHADERTEST-LABEL: {{^// LLPC}} pipeline patching results ; SHADERTEST: call {{.*}} float @llvm.amdgcn.image.sample.c.lz.2d.f32.f16(i32 1, float 0.000000e+00, half 0xH0000, half 0xH0000, <8 x i32> %{{.*}}, <4 x i32> %{{.*}}, i1 false, i32 0, i32 0) diff --git a/llpc/test/shaderdb/object/ObjSharedVariable_TestArrayCopy_lit.comp b/llpc/test/shaderdb/object/ObjSharedVariable_TestArrayCopy_lit.comp index 2fac95aef8..f311930853 100644 --- a/llpc/test/shaderdb/object/ObjSharedVariable_TestArrayCopy_lit.comp +++ b/llpc/test/shaderdb/object/ObjSharedVariable_TestArrayCopy_lit.comp @@ -30,8 +30,8 @@ void main() ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results ; SHADERTEST: @[[LDS:[^ ]*]] = addrspace(3) global { i32, [16 x i32] } -; SHADERTEST: store i32 %{{[0-9]*}}, ptr addrspace(3) getelementptr inbounds ({ i32, [16 x i32] }, ptr addrspace(3) @[[LDS]], i32 0, i32 1, i32 {{[0-9]*}}) -; SHADERTEST: %{{[0-9]*}} = load i32, ptr addrspace(3) getelementptr inbounds ({ i32, [16 x i32] }, ptr addrspace(3) @[[LDS]], i32 0, i32 1, i32 {{[0-9]*}}) +; SHADERTEST: store i32 %{{[0-9]*}}, ptr addrspace(3) getelementptr {{(inbounds )?}}({ i32, [16 x i32] }, ptr addrspace(3) @[[LDS]], i32 0, i32 1, i32 {{[0-9]*}}) +; SHADERTEST: %{{[0-9]*}} = load i32, ptr addrspace(3) getelementptr {{(inbounds )?}}({ i32, [16 x i32] }, ptr addrspace(3) @[[LDS]], i32 0, i32 1, i32 {{[0-9]*}}) ; SHADERTEST: %{{[0-9]*}} = getelementptr { i32, [16 x i32] }, ptr addrspace(3) @[[LDS]], i32 0, i32 1, i32 %{{[0-9]*}} ; SHADERTEST: %{{[0-9]*}} = load i32, ptr addrspace(3) %{{[0-9]*}} diff --git a/llpc/test/shaderdb/object/ObjSharedVariable_TestArray_lit.comp b/llpc/test/shaderdb/object/ObjSharedVariable_TestArray_lit.comp index bddcfcbd43..2623ea3276 100644 --- a/llpc/test/shaderdb/object/ObjSharedVariable_TestArray_lit.comp +++ b/llpc/test/shaderdb/object/ObjSharedVariable_TestArray_lit.comp @@ -22,8 +22,8 @@ void main() ; SHADERTEST: @[[LDS:[^ ]*]] = addrspace(3) global [16 x i32] poison ; SHADERTEST: %{{[0-9]*}} = getelementptr [16 x i32], ptr addrspace(3) @[[LDS]], i32 0, i32 %{{[0-9]*}} ; SHADERTEST: store i32 %{{[0-9]*}}, ptr addrspace(3) %{{[0-9]*}} -; SHADERTEST: store i32 %{{[0-9]*}}, ptr addrspace(3) getelementptr inbounds ([16 x i32], ptr addrspace(3) @[[LDS]], i32 0, i32 3) -; SHADERTEST: %{{[0-9]*}} = load i32, ptr addrspace(3) getelementptr inbounds ([16 x i32], ptr addrspace(3) @[[LDS]], i32 0, i32 4) +; SHADERTEST: store i32 %{{[0-9]*}}, ptr addrspace(3) getelementptr {{(inbounds )?}}([16 x i32], ptr addrspace(3) @[[LDS]], i32 0, i32 3) +; SHADERTEST: %{{[0-9]*}} = load i32, ptr addrspace(3) getelementptr {{(inbounds )?}}([16 x i32], ptr addrspace(3) @[[LDS]], i32 0, i32 4) ; SHADERTEST: AMDLLPC SUCCESS */ diff --git a/llpc/test/shaderdb/object/ObjStorageBlock_TestMultiLevelAccessChain_lit.vert b/llpc/test/shaderdb/object/ObjStorageBlock_TestMultiLevelAccessChain_lit.vert index 35540f4a9a..788af1911b 100644 --- a/llpc/test/shaderdb/object/ObjStorageBlock_TestMultiLevelAccessChain_lit.vert +++ b/llpc/test/shaderdb/object/ObjStorageBlock_TestMultiLevelAccessChain_lit.vert @@ -26,7 +26,7 @@ void main() ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results ; SHADERTEST: getelementptr { <4 x float> }, ptr addrspace({{.*}}) %{{[a-z0-9]*}}, i32 0, i32 0 -; SHADERTEST: getelementptr inbounds (<{ [3 x float], [4 x i8], <{ [4 x float] }> }>, ptr addrspace({{.*}}) @{{.*}}, i32 0, i32 2 +; SHADERTEST: getelementptr {{(inbounds )?}}(<{ [3 x float], [4 x i8], <{ [4 x float] }> }>, ptr addrspace({{.*}}) @{{.*}}, i32 0, i32 2 ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results ; SHADERTEST: store <4 x float> , diff --git a/llpc/test/shaderdb/object/ObjUniformBlock_TestDirectIndex_lit.frag b/llpc/test/shaderdb/object/ObjUniformBlock_TestDirectIndex_lit.frag index ba1b71735e..3b21275d95 100644 --- a/llpc/test/shaderdb/object/ObjUniformBlock_TestDirectIndex_lit.frag +++ b/llpc/test/shaderdb/object/ObjUniformBlock_TestDirectIndex_lit.frag @@ -17,7 +17,7 @@ void main() /* ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results -; SHADERTEST: getelementptr inbounds ([4 x <{ [4 x float], [10 x [4 x float]] }>], ptr addrspace({{.*}}) @{{.*}}, i32 0, i32 3, i32 1, i32 5 +; SHADERTEST: getelementptr {{(inbounds )?}}([4 x <{ [4 x float], [10 x [4 x float]] }>], ptr addrspace({{.*}}) @{{.*}}, i32 0, i32 3, i32 1, i32 5 ; SHADERTEST-LABEL: {{^// LLPC}} SPIR-V lowering results ; SHADERTEST: call {{.*}} {{.*}}@lgc.load.buffer.desc(i64 1, i32 0, i32 0 diff --git a/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadMatrixArray_lit.vert b/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadMatrixArray_lit.vert index 6aeaafdcdb..cda46c22b6 100644 --- a/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadMatrixArray_lit.vert +++ b/llpc/test/shaderdb/object/ObjUniformBlock_TestLoadMatrixArray_lit.vert @@ -20,14 +20,14 @@ void main() ; RUN: amdllpc -v %gfxip %s | FileCheck -check-prefix=SHADERTEST %s ; SHADERTEST-LABEL: {{^// LLPC}} SPIRV-to-LLVM translation results -; SHADERTEST: %{{[0-9]*}} = load <4 x float>, ptr addrspace(7) getelementptr inbounds (<{ i32, [12 x i8], [2 x [4 x %{{[a-z.]*}}]] }>, ptr addrspace(7) @{{[a-z0-9]+}}, i32 0, i32 2), align 16 -; SHADERTEST: %{{[0-9]*}} = load <4 x float>, ptr addrspace(7) getelementptr ([4 x %llpc.matrix.column], ptr addrspace(7) getelementptr inbounds (<{ i32, [12 x i8], [2 x [4 x %{{[a-z.]*}}]] }>, ptr addrspace(7) @{{[a-z0-9]+}}, i32 0, i32 2), i32 0, i32 1, i32 0), align 16 -; SHADERTEST: %{{[0-9]*}} = load <4 x float>, ptr addrspace(7) getelementptr ([4 x %llpc.matrix.column], ptr addrspace(7) getelementptr inbounds (<{ i32, [12 x i8], [2 x [4 x %{{[a-z.]*}}]] }>, ptr addrspace(7) @{{[a-z0-9]+}}, i32 0, i32 2), i32 0, i32 2, i32 0), align 16 -; SHADERTEST: %{{[0-9]*}} = load <4 x float>, ptr addrspace(7) getelementptr ([4 x %llpc.matrix.column], ptr addrspace(7) getelementptr inbounds (<{ i32, [12 x i8], [2 x [4 x %{{[a-z.]*}}]] }>, ptr addrspace(7) @{{[a-z0-9]+}}, i32 0, i32 2), i32 0, i32 3, i32 0), align 16 -; SHADERTEST: %{{[0-9]*}} = load <4 x float>, ptr addrspace(7) getelementptr inbounds (<{ i32, [12 x i8], [2 x [4 x %{{[a-z.]*}}]] }>, ptr addrspace(7) @{{[a-z0-9]+}}, i32 0, i32 2, i32 1), align 16 -; SHADERTEST: %{{[0-9]*}} = load <4 x float>, ptr addrspace(7) getelementptr inbounds (<{ i32, [12 x i8], [2 x [4 x %{{[a-z.]*}}]] }>, ptr addrspace(7) @{{[a-z0-9]+}}, i32 0, i32 2, i32 1, i32 1, i32 0), align 16 -; SHADERTEST: %{{[0-9]*}} = load <4 x float>, ptr addrspace(7) getelementptr inbounds (<{ i32, [12 x i8], [2 x [4 x %{{[a-z.]*}}]] }>, ptr addrspace(7) @{{[a-z0-9]+}}, i32 0, i32 2, i32 1, i32 2, i32 0), align 16 -; SHADERTEST: %{{[0-9]*}} = load <4 x float>, ptr addrspace(7) getelementptr inbounds (<{ i32, [12 x i8], [2 x [4 x %{{[a-z.]*}}]] }>, ptr addrspace(7) @{{[a-z0-9]+}}, i32 0, i32 2, i32 1, i32 3, i32 0), align 16 +; SHADERTEST: %{{[0-9]*}} = load <4 x float>, ptr addrspace(7) getelementptr {{(inbounds )?}}(<{ i32, [12 x i8], [2 x [4 x %{{[a-z.]*}}]] }>, ptr addrspace(7) @{{[a-z0-9]+}}, i32 0, i32 2), align 16 +; SHADERTEST: %{{[0-9]*}} = load <4 x float>, ptr addrspace(7) getelementptr ([4 x %llpc.matrix.column], ptr addrspace(7) getelementptr {{(inbounds )?}}(<{ i32, [12 x i8], [2 x [4 x %{{[a-z.]*}}]] }>, ptr addrspace(7) @{{[a-z0-9]+}}, i32 0, i32 2), i32 0, i32 1, i32 0), align 16 +; SHADERTEST: %{{[0-9]*}} = load <4 x float>, ptr addrspace(7) getelementptr ([4 x %llpc.matrix.column], ptr addrspace(7) getelementptr {{(inbounds )?}}(<{ i32, [12 x i8], [2 x [4 x %{{[a-z.]*}}]] }>, ptr addrspace(7) @{{[a-z0-9]+}}, i32 0, i32 2), i32 0, i32 2, i32 0), align 16 +; SHADERTEST: %{{[0-9]*}} = load <4 x float>, ptr addrspace(7) getelementptr ([4 x %llpc.matrix.column], ptr addrspace(7) getelementptr {{(inbounds )?}}(<{ i32, [12 x i8], [2 x [4 x %{{[a-z.]*}}]] }>, ptr addrspace(7) @{{[a-z0-9]+}}, i32 0, i32 2), i32 0, i32 3, i32 0), align 16 +; SHADERTEST: %{{[0-9]*}} = load <4 x float>, ptr addrspace(7) getelementptr {{(inbounds )?}}(<{ i32, [12 x i8], [2 x [4 x %{{[a-z.]*}}]] }>, ptr addrspace(7) @{{[a-z0-9]+}}, i32 0, i32 2, i32 1), align 16 +; SHADERTEST: %{{[0-9]*}} = load <4 x float>, ptr addrspace(7) getelementptr {{(inbounds )?}}(<{ i32, [12 x i8], [2 x [4 x %{{[a-z.]*}}]] }>, ptr addrspace(7) @{{[a-z0-9]+}}, i32 0, i32 2, i32 1, i32 1, i32 0), align 16 +; SHADERTEST: %{{[0-9]*}} = load <4 x float>, ptr addrspace(7) getelementptr {{(inbounds )?}}(<{ i32, [12 x i8], [2 x [4 x %{{[a-z.]*}}]] }>, ptr addrspace(7) @{{[a-z0-9]+}}, i32 0, i32 2, i32 1, i32 2, i32 0), align 16 +; SHADERTEST: %{{[0-9]*}} = load <4 x float>, ptr addrspace(7) getelementptr {{(inbounds )?}}(<{ i32, [12 x i8], [2 x [4 x %{{[a-z.]*}}]] }>, ptr addrspace(7) @{{[a-z0-9]+}}, i32 0, i32 2, i32 1, i32 3, i32 0), align 16 ; SHADERTEST: AMDLLPC SUCCESS */ diff --git a/llpc/test/shaderdb/ray_tracing/lit.local.cfg b/llpc/test/shaderdb/ray_tracing/lit.local.cfg index c839f74489..995b0dc96b 100644 --- a/llpc/test/shaderdb/ray_tracing/lit.local.cfg +++ b/llpc/test/shaderdb/ray_tracing/lit.local.cfg @@ -1,3 +1,5 @@ +if "gpurt" not in config.available_features: + config.unsupported = True # overwrite %gfxip in config.substitutions config.gfxip = '-gfxip=10.3' diff --git a/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_EnableColorExport.pipe b/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_EnableColorExport.pipe index 76845b01dc..b08b250318 100644 --- a/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_EnableColorExport.pipe +++ b/llpc/test/shaderdb/relocatable_shaders/PipelineVsFs_EnableColorExport.pipe @@ -71,79 +71,62 @@ attribute[0].binding = 0 attribute[0].format = VK_FORMAT_R32G32B32A32_SFLOAT attribute[0].offset = 0 -; -; SHADERTEST-LABEL: @_amdgpu_ps_main( +; SHADERTEST-LABEL: @lgc.shader.VS.main( ; SHADERTEST-NEXT: .entry: -; SHADERTEST-NEXT: [[PERSPINTERPCENTER_I1:%.*]] = extractelement <2 x float> [[PERSPINTERPCENTER:%.*]], i64 1 -; SHADERTEST-NEXT: [[PERSPINTERPCENTER_I0:%.*]] = extractelement <2 x float> [[PERSPINTERPCENTER]], i64 0 -; SHADERTEST-NEXT: [[TMP11:%.*]] = call i64 @llvm.amdgcn.s.getpc() -; SHADERTEST-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.interp.p1(float [[PERSPINTERPCENTER_I0]], i32 immarg 0, i32 immarg 0, i32 [[PRIMMASK:%.*]]) -; SHADERTEST-NEXT: [[TMP17:%.*]] = call float @llvm.amdgcn.interp.p2(float [[TMP16]], float [[PERSPINTERPCENTER_I1]], i32 immarg 0, i32 immarg 0, i32 [[PRIMMASK]]) -; SHADERTEST-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.interp.p1(float [[PERSPINTERPCENTER_I0]], i32 immarg 1, i32 immarg 0, i32 [[PRIMMASK]]) -; SHADERTEST-NEXT: [[TMP19:%.*]] = call float @llvm.amdgcn.interp.p2(float [[TMP18]], float [[PERSPINTERPCENTER_I1]], i32 immarg 1, i32 immarg 0, i32 [[PRIMMASK]]) -; SHADERTEST-NEXT: [[TMP12:%.*]] = and i64 [[TMP11]], -4294967296 -; SHADERTEST-NEXT: [[TMP13:%.*]] = zext i32 [[DESCTABLE0:%.*]] to i64 -; SHADERTEST-NEXT: [[TMP14:%.*]] = or {{(disjoint )?}}i64 [[TMP12]], [[TMP13]] -; SHADERTEST-NEXT: [[TMP15:%.*]] = inttoptr i64 [[TMP14]] to ptr addrspace(4) -; SHADERTEST-NEXT: [[DOTI0:%.*]] = fptosi float [[TMP17]] to i32 -; SHADERTEST-NEXT: [[DOTI1:%.*]] = fptosi float [[TMP19]] to i32 -; SHADERTEST-NEXT: [[TMP20:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP15]], align 32, !invariant.load !{{.*}} -; SHADERTEST-NEXT: [[TMP21:%.*]] = load <4 x i32>, ptr addrspace(4) [[TMP15]], align 16, !invariant.load !{{.*}} -; SHADERTEST-NEXT: [[DOTI01:%.*]] = sitofp i32 [[DOTI0]] to float -; SHADERTEST-NEXT: [[DOTI12:%.*]] = sitofp i32 [[DOTI1]] to float -; SHADERTEST-NEXT: [[TMP23:%.*]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float [[DOTI01]], float [[DOTI12]], <8 x i32> [[TMP20]], <4 x i32> [[TMP21]], i1 false, i32 0, i32 0) -; SHADERTEST-NEXT: [[TMP25:%.*]] = zext i32 [[COLOREXPADDR:%.*]] to i64 -; SHADERTEST-NEXT: [[TMP26:%.*]] = or {{(disjoint )?}}i64 [[TMP12]], [[TMP25]] -; SHADERTEST-NEXT: [[TMP27:%.*]] = inttoptr i64 [[TMP26]] to ptr addrspace(4) -; SHADERTEST-NEXT: call amdgpu_gfx addrspace(4) void [[TMP27]](<4 x float> [[TMP23]], i32 inreg 0) #[[ATTR1:[0-9]+]] -; SHADERTEST-NEXT: unreachable +; SHADERTEST-NEXT: [[TMP0:%.*]] = call <2 x float> @lgc.input.import.generic__v2f32(i1 false, i32 0, i32 0, i32 0, i32 poison) +; SHADERTEST-NEXT: call void @lgc.output.export.generic.i32.i32.v2f32(i32 0, i32 0, <2 x float> [[TMP0]]) #[[ATTR1:[0-9]+]] +; SHADERTEST-NEXT: ret void +; ; ; -; SHADERTEST-LABEL: amdgpu_ps_main: -; SHADERTEST: s_getpc_b64 s[6:7] -; SHADERTEST-NEXT: s_mov_b32 s6, s0 -; SHADERTEST-NEXT: s_mov_b32 s32, 0 -; SHADERTEST-NEXT: s_load_dwordx2 s[6:7], s[6:7], 0x0 -; SHADERTEST-NEXT: s_waitcnt lgkmcnt(0) -; SHADERTEST-NEXT: s_and_b32 s7, s7, 0xffff -; SHADERTEST-NEXT: s_add_u32 s6, s6, s4 -; SHADERTEST-NEXT: s_addc_u32 s7, s7, 0 -; SHADERTEST-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s6 -; SHADERTEST-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s7 -; SHADERTEST-NEXT: s_wqm_b64 exec, exec -; SHADERTEST-NEXT: s_getpc_b64 s[16:17] -; SHADERTEST-NEXT: s_mov_b32 s0, s1 -; SHADERTEST-NEXT: s_mov_b32 m0, s3 -; SHADERTEST-NEXT: s_mov_b32 s1, s17 -; SHADERTEST-NEXT: v_interp_p1_f32_e32 v2, v0, attr0.x -; SHADERTEST-NEXT: v_interp_p1_f32_e32 v0, v0, attr0.y -; SHADERTEST-NEXT: s_clause 0x1 -; SHADERTEST-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x0 -; SHADERTEST-NEXT: s_load_dwordx4 s[12:15], s[0:1], 0x0 -; SHADERTEST-NEXT: s_mov_b32 s3, s17 -; SHADERTEST-NEXT: v_interp_p2_f32_e32 v2, v1, attr0.x -; SHADERTEST-NEXT: v_interp_p2_f32_e32 v0, v1, attr0.y -; SHADERTEST-NEXT: v_cvt_i32_f32_e32 v1, v2 -; SHADERTEST-NEXT: v_cvt_i32_f32_e32 v2, v0 -; SHADERTEST-NEXT: v_cvt_f32_i32_e32 v0, v1 -; SHADERTEST-NEXT: v_cvt_f32_i32_e32 v1, v2 -; SHADERTEST-NEXT: s_waitcnt lgkmcnt(0) -; SHADERTEST-NEXT: image_sample v[0:3], v[0:1], s[4:11], s[12:15] dmask:0xf dim:SQ_RSRC_IMG_2D -; SHADERTEST-NEXT: s_mov_b32 s4, 0 -; SHADERTEST-NEXT: s_swappc_b64 s[30:31], s[2:3] +; SHADERTEST-LABEL: @lgc.shader.FS.main( +; SHADERTEST-NEXT: .entry: +; SHADERTEST-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.getpc() +; SHADERTEST-NEXT: [[TMP1:%.*]] = bitcast i64 [[TMP0]] to <2 x i32> +; SHADERTEST-NEXT: [[TMP2:%.*]] = call i64 @llvm.amdgcn.s.getpc() +; SHADERTEST-NEXT: [[TMP3:%.*]] = bitcast i64 [[TMP2]] to <2 x i32> +; SHADERTEST-NEXT: [[INTERPPERSPCENTER:%.*]] = call <2 x float> @lgc.input.import.builtin.InterpPerspCenter.v2f32.i32(i32 268435457) #[[ATTR5:[0-9]+]] +; SHADERTEST-NEXT: [[TMP4:%.*]] = call <2 x float> (...) @lgc.input.import.interpolated__v2f32(i1 false, i32 0, i32 0, i32 0, i32 poison, i32 0, <2 x float> [[INTERPPERSPCENTER]]) +; SHADERTEST-NEXT: [[TMP5:%.*]] = call i32 @lgc.load.user.data__i32(i32 44) +; SHADERTEST-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> [[TMP3]], i32 [[TMP5]], i64 0 +; SHADERTEST-NEXT: [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to i64 +; SHADERTEST-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr addrspace(4) +; SHADERTEST-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP8]], i32 0 +; SHADERTEST-NEXT: [[TMP10:%.*]] = call i32 @lgc.load.user.data__i32(i32 44) +; SHADERTEST-NEXT: [[TMP11:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[TMP10]], i64 0 +; SHADERTEST-NEXT: [[TMP12:%.*]] = bitcast <2 x i32> [[TMP11]] to i64 +; SHADERTEST-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr addrspace(4) +; SHADERTEST-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP13]], i32 0 +; SHADERTEST-NEXT: [[TMP15:%.*]] = fptosi <2 x float> [[TMP4]] to <2 x i32> +; SHADERTEST-NEXT: [[TMP16:%.*]] = sitofp <2 x i32> [[TMP15]] to <2 x float> +; SHADERTEST-NEXT: [[TMP17:%.*]] = load <8 x i32>, ptr addrspace(4) [[TMP14]], align 32, !invariant.load !11 +; SHADERTEST-NEXT: [[TMP18:%.*]] = load <4 x i32>, ptr addrspace(4) [[TMP9]], align 16, !invariant.load !11 +; SHADERTEST-NEXT: [[TMP19:%.*]] = extractelement <2 x float> [[TMP16]], i64 0 +; SHADERTEST-NEXT: [[TMP20:%.*]] = extractelement <2 x float> [[TMP16]], i64 1 +; SHADERTEST-NEXT: [[TMP21:%.*]] = extractelement <4 x i32> [[TMP18]], i64 0 +; SHADERTEST-NEXT: [[TMP22:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[TMP21]]) +; SHADERTEST-NEXT: [[TMP23:%.*]] = insertelement <4 x i32> poison, i32 [[TMP22]], i64 0 +; SHADERTEST-NEXT: [[TMP24:%.*]] = extractelement <4 x i32> [[TMP18]], i64 1 +; SHADERTEST-NEXT: [[TMP25:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[TMP24]]) +; SHADERTEST-NEXT: [[TMP26:%.*]] = insertelement <4 x i32> [[TMP23]], i32 [[TMP25]], i64 1 +; SHADERTEST-NEXT: [[TMP27:%.*]] = extractelement <4 x i32> [[TMP18]], i64 2 +; SHADERTEST-NEXT: [[TMP28:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[TMP27]]) +; SHADERTEST-NEXT: [[TMP29:%.*]] = insertelement <4 x i32> [[TMP26]], i32 [[TMP28]], i64 2 +; SHADERTEST-NEXT: [[TMP30:%.*]] = extractelement <4 x i32> [[TMP18]], i64 3 +; SHADERTEST-NEXT: [[TMP31:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[TMP30]]) +; SHADERTEST-NEXT: [[TMP32:%.*]] = insertelement <4 x i32> [[TMP29]], i32 [[TMP31]], i64 3 +; SHADERTEST-NEXT: [[TMP33:%.*]] = call reassoc nnan nsz arcp contract afn <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float [[TMP19]], float [[TMP20]], <8 x i32> [[TMP17]], <4 x i32> [[TMP32]], i1 false, i32 0, i32 0) +; SHADERTEST-NEXT: call void @lgc.output.export.generic.i32.i32.v4f32(i32 0, i32 0, <4 x float> [[TMP33]]) #[[ATTR6:[0-9]+]] +; SHADERTEST-NEXT: ret void +; ; ; ; SHADERTEST-LABEL: @color_export_shader( -; SHADERTEST-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[TMP0:%.*]], i64 0 -; SHADERTEST-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP0]], i64 1 -; SHADERTEST-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP0]], i64 2 -; SHADERTEST-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP0]], i64 3 -; SHADERTEST-NEXT: call void @llvm.amdgcn.exp.f32(i32 immarg 0, i32 immarg 15, float [[TMP2]], float [[TMP3]], float [[TMP4]], float [[TMP5]], i1 immarg true, i1 immarg true) +; SHADERTEST-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP0:%.*]], i64 0 +; SHADERTEST-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP0]], i64 1 +; SHADERTEST-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP0]], i64 2 +; SHADERTEST-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP0]], i64 3 +; SHADERTEST-NEXT: call void @llvm.amdgcn.exp.f32(i32 immarg 0, i32 immarg 15, float [[TMP3]], float [[TMP4]], float [[TMP5]], float [[TMP6]], i1 immarg true, i1 immarg true) #[[ATTR1]] ; SHADERTEST-NEXT: call void @llvm.amdgcn.endpgm() ; SHADERTEST-NEXT: unreachable ; -; SHADERTEST-LABEL: color_export_shader: -; SHADERTEST: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SHADERTEST-NEXT: exp mrt0 v0, v1, v2, v3 done vm -; SHADERTEST-NEXT: s_endpgm -; diff --git a/llpc/tool/amdllpc.cpp b/llpc/tool/amdllpc.cpp index 61211f861b..9515f034b0 100644 --- a/llpc/tool/amdllpc.cpp +++ b/llpc/tool/amdllpc.cpp @@ -680,8 +680,15 @@ static Error processInputs(ICompiler *compiler, InputSpecGroup &inputSpecs, bool const InputSpec &firstInput = inputSpecs.front(); if (isPipelineInfoFile(firstInput.filename)) { + bool unlinked = Unlinked; + + if (firstInput.filename.starts_with("PipelineLib") && !unlinked && !Unlinked.getNumOccurrences()) { + LLPC_WARN("Input filename starts with \"PipelineLib\". Assuming you meant -unlinked.\n"); + unlinked = true; + } + compileInfo.autoLayoutDesc = false; - if (Error err = processInputPipeline(compiler, compileInfo, firstInput, Unlinked, IgnoreColorAttachmentFormats)) + if (Error err = processInputPipeline(compiler, compileInfo, firstInput, unlinked, IgnoreColorAttachmentFormats)) return err; if (compileInfo.pipelineType == VfxPipelineTypeGraphicsLibrary) { @@ -792,6 +799,9 @@ static Error processInputs(ICompiler *compiler, InputSpecGroup &inputSpecs, bool dumpOptions->filterPipelineDumpByType = FilterPipelineDumpByType; dumpOptions->filterPipelineDumpByHash = FilterPipelineDumpByHash; dumpOptions->dumpDuplicatePipelines = DumpDuplicatePipelines; + + if (codegen::getFileType() != CodeGenFileType::ObjectFile) + return createResultError(Result::ErrorInvalidValue, "Pipeline dumps require the default (ELF) -filetype"); } std::unique_ptr builder = diff --git a/llpc/translator/lib/SPIRV/SPIRVReader.cpp b/llpc/translator/lib/SPIRV/SPIRVReader.cpp index 9d55f8e7c5..1aeb804992 100644 --- a/llpc/translator/lib/SPIRV/SPIRVReader.cpp +++ b/llpc/translator/lib/SPIRV/SPIRVReader.cpp @@ -1,4 +1,4 @@ -//===- SPIRVReader.cpp - Converts SPIR-V to LLVM ----------------*- C++ -*-===// +//===- SPIRVReader.cpp - Converts SPIR-V to LLVM ----------------*- C++ -*-===// // // The LLVM/SPIR-V Translator // @@ -277,7 +277,7 @@ SPIRVToLLVM::SPIRVToLLVM(Module *llvmModule, SPIRVModule *theSpirvModule, const const Vkgc::ShaderModuleUsage *moduleUsage, const Vkgc::PipelineShaderOptions *shaderOptions) : m_m(llvmModule), m_builder(builder), m_bm(theSpirvModule), m_entryTarget(nullptr), m_specConstMap(theSpecConstMap), m_convertingSamplers(convertingSamplers), m_dbgTran(m_bm, m_m, this), - m_moduleUsage(reinterpret_cast(moduleUsage)), m_debugOutputBuffer(nullptr), + m_moduleUsage(reinterpret_cast(moduleUsage)), m_shaderOptions(reinterpret_cast(shaderOptions)) { assert(m_m); m_context = &m_m->getContext(); @@ -1085,9 +1085,9 @@ Type *SPIRVToLLVM::transTypeImpl(SPIRVType *t, unsigned matrixStride, bool colum // image is not an array of three.) Type *imageTy = nullptr; if (st->getDescriptor().Dim == DimBuffer) { - imageTy = getBuilder()->getDescTy(ResourceNodeType::DescriptorTexelBuffer); + imageTy = PointerType::get(*m_context, SPIRAS_Constant); } else { - Type *singleImageTy = getBuilder()->getDescTy(ResourceNodeType::DescriptorResource); + Type *singleImageTy = PointerType::get(*m_context, SPIRAS_Constant); imageTy = ArrayType::get(singleImageTy, 3); if (st->getDescriptor().MS) { // A multisampled image is represented by a struct containing both the @@ -1105,7 +1105,7 @@ Type *SPIRVToLLVM::transTypeImpl(SPIRVType *t, unsigned matrixStride, bool colum // Get sampler type. // A sampler is represented by a struct containing the sampler itself, and the convertingSamplerIdx, an i32 // that is either 0 or the 1-based index into the converting samplers. - Type *ty = getBuilder()->getDescTy(ResourceNodeType::DescriptorSampler); + Type *ty = PointerType::get(*m_context, SPIRAS_Constant); ty = StructType::get(*m_context, {ty, getBuilder()->getInt32Ty()}); if (t->getOpCode() == OpTypeSampledImage) { // A sampledimage is represented by a struct containing the image descriptor @@ -1295,26 +1295,39 @@ Value *SPIRVToLLVM::transValue(SPIRVValue *bv, Function *f, BasicBlock *bb, bool Value *SPIRVToLLVM::transConvertInst(SPIRVValue *bv, Function *f, BasicBlock *bb) { SPIRVUnary *bc = static_cast(bv); + auto srcSpvType = bc->getOperand(0)->getType(); + auto dstSpvType = bc->getType(); auto src = transValue(bc->getOperand(0), f, bb, bb != nullptr); auto srcType = src->getType(); - auto dstType = transType(bc->getType()); + auto dstType = transType(dstSpvType); CastInst::CastOps co = Instruction::BitCast; + // Extension for OGLP: Only valid for bindless texture/image to convert uvec2 to gsampler/gimage + // uniform uvec2 textureHandle; + // vec4 result = texture(sampler2D(textureHandle), texCoord); + bool srcTypeUvec2 = srcSpvType->isTypeVectorInt(32) && (srcSpvType->getVectorComponentCount() == 2); + bool bindlessTexture = dstSpvType->isTypeSampledImage() && srcTypeUvec2; + bool bindlessImage = dstSpvType->isTypeImage() && srcTypeUvec2; + + if (bindlessTexture || bindlessImage) { + // 64 bit handle is stored in uvec2, we need to convert texHandle to uint64 at first + Value *imgDescGpuAddress = getBuilder()->CreateBitCast(src, getBuilder()->getInt64Ty()); + return transLoadBindlessImage(dstSpvType, imgDescGpuAddress, bindlessTexture); + } + lgc::CooperativeMatrixElementType srcElemTy = lgc::CooperativeMatrixElementType::Unknown; lgc::CooperativeMatrixElementType dstElemTy = lgc::CooperativeMatrixElementType::Unknown; lgc::CooperativeMatrixLayout srcLayout = lgc::CooperativeMatrixLayout::InvalidLayout; lgc::CooperativeMatrixLayout dstLayout = lgc::CooperativeMatrixLayout::InvalidLayout; if (bv->getType()->isTypeCooperativeMatrixKHR()) { - auto srcCompType = static_cast(bc->getOperand(0)->getType()) - ->getCooperativeMatrixKHRComponentType(); + auto srcCompType = static_cast(srcSpvType)->getCooperativeMatrixKHRComponentType(); srcElemTy = mapToBasicType(srcCompType); - auto dstCompType = - static_cast(bc->getType())->getCooperativeMatrixKHRComponentType(); + auto dstCompType = static_cast(dstSpvType)->getCooperativeMatrixKHRComponentType(); dstElemTy = mapToBasicType(dstCompType); - auto dstUse = static_cast(bc->getType())->getCooperativeMatrixKHRUse(); - unsigned rows = static_cast(bc->getType())->getCooperativeMatrixKHRRows(); - unsigned columns = static_cast(bc->getType())->getCooperativeMatrixKHRColumns(); + auto dstUse = static_cast(dstSpvType)->getCooperativeMatrixKHRUse(); + unsigned rows = static_cast(dstSpvType)->getCooperativeMatrixKHRRows(); + unsigned columns = static_cast(dstSpvType)->getCooperativeMatrixKHRColumns(); dstLayout = getCooperativeMatrixKHRLayout(static_cast(dstUse), dstElemTy, rows, columns); srcLayout = getCooperativeMatrixKHRLayout(static_cast(dstUse), srcElemTy, rows, columns); } @@ -2373,12 +2386,16 @@ static SyncScope::ID transScope(LLVMContext &context, const SPIRVConstant *const // Translate memory semantics from SPIR-V to LLVM. // // @param spvMemorySemantics : The semantics to translate. -// @param isAtomicRMW : Is the memory semantic from an atomic rmw operation. -static AtomicOrdering transMemorySemantics(const SPIRVConstant *const spvMemorySemantics, const bool isAtomicRMW) { +// @param readOnly : If the corresponding memory access only read. +// @param writeNone : If the corresponding memory access only write. +static AtomicOrdering transMemorySemantics(const SPIRVConstant *const spvMemorySemantics, const bool readOnly = false, + const bool writeOnly = false) { const unsigned semantics = static_cast(spvMemorySemantics->getZExtIntValue()); + // We are safe to downgrade the SequentiallyConsistent to Acquire/Release/AcquireRelease based on Vulkan validation + // rules within a module. if (semantics & MemorySemanticsSequentiallyConsistentMask) - return AtomicOrdering::SequentiallyConsistent; + return readOnly ? AtomicOrdering::Acquire : writeOnly ? AtomicOrdering::Release : AtomicOrdering::AcquireRelease; if (semantics & MemorySemanticsAcquireReleaseMask) return AtomicOrdering::AcquireRelease; if (semantics & MemorySemanticsAcquireMask) @@ -2400,8 +2417,7 @@ Value *SPIRVToLLVM::transAtomicRMW(SPIRVValue *const spvValue, const AtomicRMWIn SPIRVAtomicInstBase *const spvAtomicInst = static_cast(spvValue); const SyncScope::ID scope = transScope(*m_context, static_cast(spvAtomicInst->getOpValue(1))); - const AtomicOrdering ordering = - transMemorySemantics(static_cast(spvAtomicInst->getOpValue(2)), true); + const AtomicOrdering ordering = transMemorySemantics(static_cast(spvAtomicInst->getOpValue(2))); Value *const atomicPointer = transValue(spvAtomicInst->getOpValue(0), getBuilder()->GetInsertBlock()->getParent(), getBuilder()->GetInsertBlock()); @@ -2439,8 +2455,8 @@ template <> Value *SPIRVToLLVM::transValueWithOpcode(SPIRVValue *c SPIRVAtomicLoad *const spvAtomicLoad = static_cast(spvValue); const SyncScope::ID scope = transScope(*m_context, static_cast(spvAtomicLoad->getOpValue(1))); - const AtomicOrdering ordering = - transMemorySemantics(static_cast(spvAtomicLoad->getOpValue(2)), false); + const AtomicOrdering ordering = transMemorySemantics(static_cast(spvAtomicLoad->getOpValue(2)), + /*readOnly=*/true, /*writeOnly=*/false); Value *const loadPointer = transValue(spvAtomicLoad->getOpValue(0), getBuilder()->GetInsertBlock()->getParent(), getBuilder()->GetInsertBlock()); @@ -2467,8 +2483,8 @@ template <> Value *SPIRVToLLVM::transValueWithOpcode(SPIRVValue * SPIRVAtomicStore *const spvAtomicStore = static_cast(spvValue); const SyncScope::ID scope = transScope(*m_context, static_cast(spvAtomicStore->getOpValue(1))); - const AtomicOrdering ordering = - transMemorySemantics(static_cast(spvAtomicStore->getOpValue(2)), false); + const AtomicOrdering ordering = transMemorySemantics(static_cast(spvAtomicStore->getOpValue(2)), + /*readOnly=*/false, /*writeOnly=*/true); Value *const storePointer = transValue(spvAtomicStore->getOpValue(0), getBuilder()->GetInsertBlock()->getParent(), getBuilder()->GetInsertBlock()); @@ -2666,8 +2682,7 @@ template <> Value *SPIRVToLLVM::transValueWithOpcode(SPIRVVa SPIRVAtomicInstBase *const spvAtomicInst = static_cast(spvValue); const SyncScope::ID scope = transScope(*m_context, static_cast(spvAtomicInst->getOpValue(1))); - const AtomicOrdering ordering = - transMemorySemantics(static_cast(spvAtomicInst->getOpValue(2)), true); + const AtomicOrdering ordering = transMemorySemantics(static_cast(spvAtomicInst->getOpValue(2))); Value *const atomicPointer = transValue(spvAtomicInst->getOpValue(0), getBuilder()->GetInsertBlock()->getParent(), getBuilder()->GetInsertBlock()); @@ -2694,8 +2709,7 @@ template <> Value *SPIRVToLLVM::transValueWithOpcode(SPIRVVa SPIRVAtomicInstBase *const spvAtomicInst = static_cast(spvValue); const SyncScope::ID scope = transScope(*m_context, static_cast(spvAtomicInst->getOpValue(1))); - const AtomicOrdering ordering = - transMemorySemantics(static_cast(spvAtomicInst->getOpValue(2)), true); + const AtomicOrdering ordering = transMemorySemantics(static_cast(spvAtomicInst->getOpValue(2))); Value *const atomicPointer = transValue(spvAtomicInst->getOpValue(0), getBuilder()->GetInsertBlock()->getParent(), getBuilder()->GetInsertBlock()); @@ -2724,9 +2738,9 @@ template <> Value *SPIRVToLLVM::transValueWithOpcode(SP const SyncScope::ID scope = transScope(*m_context, static_cast(spvAtomicInst->getOpValue(1))); const AtomicOrdering successOrdering = - transMemorySemantics(static_cast(spvAtomicInst->getOpValue(2)), true); - const AtomicOrdering failureOrdering = - transMemorySemantics(static_cast(spvAtomicInst->getOpValue(3)), true); + transMemorySemantics(static_cast(spvAtomicInst->getOpValue(2))); + AtomicOrdering failureOrdering = transMemorySemantics(static_cast(spvAtomicInst->getOpValue(3)), + /*readOnly=*/true, /*writeOnly=*/false); Value *const atomicPointer = transValue(spvAtomicInst->getOpValue(0), getBuilder()->GetInsertBlock()->getParent(), getBuilder()->GetInsertBlock()); @@ -2977,6 +2991,94 @@ Value *SPIRVToLLVM::transLoadImage(SPIRVValue *spvImageLoadPtr) { return loadImageSampler(elementTy, base); } +// ===================================================================================================================== +// Translate a load for UniformConstant that is image/sampledimage +// +// @param spvElementTy : The image/sampledimage pointer +// @param imgDescGpuAddress : image descriptor's gpu memory address +// @param bindlessTexture : true is bindless texture, false is bindless image +Value *SPIRVToLLVM::transLoadBindlessImage(SPIRVType *spvElementTy, Value *imgDescGpuAddress, bool bindlessTexture) { + + Type *elementTy = transType(spvElementTy, 0, false, false, LayoutMode::Native); + Type *gpuAddrAsPtrTy = getBuilder()->getPtrTy(SPIRAS_Constant); + auto imageDescAddr = getBuilder()->CreateIntToPtr(imgDescGpuAddress, gpuAddrAsPtrTy); + + SPIRVTypeImage *spvImageTy = nullptr; + if (spvElementTy->getOpCode() == OpTypeSampledImage) { + spvImageTy = static_cast(spvElementTy)->getImageType(); + } else { + spvImageTy = static_cast(spvElementTy); + } + + auto desc = spvImageTy->getDescriptor(); + Value *imageDescPtr = nullptr; + + // Handle samplerBuffer or imageBuffer + if (desc.Dim == DimBuffer) { + auto bufferDescStride = getBuilder()->getInt32(DescriptorSizeBuffer); + imageDescPtr = getBuilder()->CreateInsertValue( + PoisonValue::get(StructType::get(*m_context, {imageDescAddr->getType(), bufferDescStride->getType(), + bufferDescStride->getType(), getBuilder()->getInt32Ty()})), + imageDescAddr, 0); + imageDescPtr = getBuilder()->CreateInsertValue(imageDescPtr, bufferDescStride, 1); + } else { + // The descriptor stride is unimportant for bindless texture/image, just use it as a placeholder + auto imageDescStride = getBuilder()->getInt32(DescriptorSizeResource); + imageDescPtr = getBuilder()->CreateInsertValue( + PoisonValue::get(StructType::get(*m_context, {imageDescAddr->getType(), imageDescStride->getType(), + imageDescStride->getType(), getBuilder()->getInt32Ty()})), + imageDescAddr, 0); + + imageDescPtr = getBuilder()->CreateInsertValue(imageDescPtr, imageDescStride, 1); + imageDescPtr = getBuilder()->CreateInsertValue(imageDescPtr, getBuilder()->getInt32(DescriptorSizeResource), 2); + imageDescPtr = getBuilder()->CreateInsertValue(imageDescPtr, getBuilder()->getInt32(1), 3); + } + + // Insert fmask descriptor address into structure + if (desc.MS) { + auto fMaskOffset = getBuilder()->getInt64(DescriptorSizeResource + DescriptorSizeSampler); + constexpr unsigned descriptorSizeFmask = 8 * sizeof(uint32_t); + auto fmaskDescStride = getBuilder()->getInt32(descriptorSizeFmask); + Value *fMaskDescAddr = + getBuilder()->CreateIntToPtr(getBuilder()->CreateAdd(imgDescGpuAddress, fMaskOffset), gpuAddrAsPtrTy); + + auto fmaskDescPtr = getBuilder()->CreateInsertValue( + PoisonValue::get(StructType::get(*m_context, {fMaskDescAddr->getType(), fmaskDescStride->getType(), + fmaskDescStride->getType(), getBuilder()->getInt32Ty()})), + fMaskDescAddr, 0); + fmaskDescPtr = getBuilder()->CreateInsertValue(fmaskDescPtr, fmaskDescStride, 1); + imageDescPtr = getBuilder()->CreateInsertValue( + PoisonValue::get(StructType::get(*m_context, {imageDescPtr->getType(), fmaskDescPtr->getType()})), imageDescPtr, + 0); + imageDescPtr = getBuilder()->CreateInsertValue(imageDescPtr, fmaskDescPtr, 1); + } + + // True for bindless texture, otherwise is bindless image + if (bindlessTexture) { + auto samplerOffset = getBuilder()->getInt64(DescriptorSizeResource); + auto samplerDescStride = getBuilder()->getInt32(DescriptorSizeSampler); + + Value *samplerDescAddr = + getBuilder()->CreateIntToPtr(getBuilder()->CreateAdd(imgDescGpuAddress, samplerOffset), gpuAddrAsPtrTy); + + Type *samplerPtrTy = StructType::get( + *m_context, {samplerDescAddr->getType(), getBuilder()->getInt32Ty(), getBuilder()->getInt32Ty()}); + Value *samplerDescPtr = Constant::getNullValue(samplerPtrTy); + + samplerDescPtr = getBuilder()->CreateInsertValue(samplerDescPtr, samplerDescAddr, 0); + samplerDescPtr = getBuilder()->CreateInsertValue(samplerDescPtr, samplerDescStride, 1); + + Value *descPtr = + PoisonValue::get(StructType::get(*m_context, {imageDescPtr->getType(), samplerDescPtr->getType()})); + descPtr = getBuilder()->CreateInsertValue(descPtr, imageDescPtr, 0); + descPtr = getBuilder()->CreateInsertValue(descPtr, samplerDescPtr, 1); + + return loadImageSampler(elementTy, descPtr); + } + + return loadImageSampler(elementTy, imageDescPtr); +} + // ===================================================================================================================== // Generate a load of an image, sampler or sampledimage // @@ -3017,33 +3119,21 @@ Value *SPIRVToLLVM::loadImageSampler(Type *elementTy, Value *base) { // an array of three image descriptors, to allow for multiple planes in YCbCr conversion. Normally we only // load one descriptor; if there are any converting samplers, we load all three, and rely on later optimizations // to remove the unused ones (and thus stop us reading off the end of the descriptor table). - elementTy = arrayTy->getElementType(); - auto *oneVal = getBuilder()->CreateLoad(elementTy, ptr); - oneVal->setMetadata(LLVMContext::MD_invariant_load, MDNode::get(*m_context, {})); - - Value *result = getBuilder()->CreateInsertValue(PoisonValue::get(arrayTy), oneVal, 0); + Value *result = getBuilder()->CreateInsertValue(PoisonValue::get(arrayTy), ptr, 0); // Pointer to image is represented as a struct containing {pointer, stride, planeStride, isResource}. if (!m_convertingSamplers.empty() && base->getType()->getStructNumElements() >= 4) { Value *planeStride = getBuilder()->CreateExtractValue(base, 2); Type *ptrTy = ptr->getType(); for (unsigned planeIdx = 1; planeIdx != arrayTy->getNumElements(); ++planeIdx) { - ptr = getBuilder()->CreateBitCast( - ptr, getBuilder()->getInt8Ty()->getPointerTo(ptr->getType()->getPointerAddressSpace())); ptr = getBuilder()->CreateGEP(getBuilder()->getInt8Ty(), ptr, planeStride); ptr = getBuilder()->CreateBitCast(ptr, ptrTy); - oneVal = getBuilder()->CreateLoad(elementTy, ptr); - oneVal->setMetadata(LLVMContext::MD_invariant_load, MDNode::get(*m_context, {})); - result = getBuilder()->CreateInsertValue(result, oneVal, planeIdx); + result = getBuilder()->CreateInsertValue(result, ptr, planeIdx); } } return result; } - - // Other cases: Just load the element from the pointer. - auto load = getBuilder()->CreateLoad(elementTy, ptr); - load->setMetadata(LLVMContext::MD_invariant_load, MDNode::get(*m_context, {})); - return load; + return ptr; } // ===================================================================================================================== @@ -3070,7 +3160,8 @@ Value *SPIRVToLLVM::transImagePointer(SPIRVValue *spvImagePtr, SPIRVType *baseTy spvImagePtr->hasDecorate(DecorationBinding, 0, &binding); bool hasDescriptorSet = spvImagePtr->hasDecorate(DecorationDescriptorSet, 0, &descriptorSet); - assert(!getPipelineOptions()->replaceSetWithResourceType || !hasDescriptorSet || + + assert(!getPipelineOptions()->getGlState().replaceSetWithResourceType || !hasDescriptorSet || static_cast(spvImagePtr->getType())->getStorageClass() == StorageClassUniformConstant); (void)hasDescriptorSet; @@ -3092,7 +3183,7 @@ Value *SPIRVToLLVM::transImagePointer(SPIRVValue *spvImagePtr, SPIRVType *baseTy Value *imageDescPtr = nullptr; Value *samplerDescPtr = nullptr; - if (getPipelineOptions()->replaceSetWithResourceType) + if (getPipelineOptions()->getGlState().replaceSetWithResourceType) assert(spvTy->getOpCode() != OpTypeSampler); if (spvTy->getOpCode() != OpTypeSampler) { @@ -3106,11 +3197,11 @@ Value *SPIRVToLLVM::transImagePointer(SPIRVValue *spvImagePtr, SPIRVType *baseTy auto resType = desc->Dim == DimBuffer ? ResourceNodeType::DescriptorTexelBuffer : ResourceNodeType::DescriptorResource; - if (getPipelineOptions()->replaceSetWithResourceType) { + if (getPipelineOptions()->getGlState().replaceSetWithResourceType) { if (spvTy->getOpCode() == OpTypeImage) { descriptorSet = PipelineContext::getGlResourceNodeSetFromType(Vkgc::ResourceMappingNodeType::DescriptorImage); } else if (spvTy->getOpCode() == OpTypeSampledImage) { - if (getPipelineOptions()->enableCombinedTexture) { + if (getPipelineOptions()->getGlState().enableCombinedTexture) { descriptorSet = PipelineContext::getGlResourceNodeSetFromType(Vkgc::ResourceMappingNodeType::DescriptorCombinedTexture); } else { @@ -3123,7 +3214,7 @@ Value *SPIRVToLLVM::transImagePointer(SPIRVValue *spvImagePtr, SPIRVType *baseTy imageDescPtr = getDescPointerAndStride(resType, descriptorSet, binding, resType); if (desc->MS) { - if (getPipelineOptions()->replaceSetWithResourceType && spvTy->getOpCode() != OpTypeImage) + if (getPipelineOptions()->getGlState().replaceSetWithResourceType && spvTy->getOpCode() != OpTypeImage) descriptorSet = PipelineContext::getGlResourceNodeSetFromType(Vkgc::ResourceMappingNodeType::DescriptorFmask); // A multisampled image pointer is a struct containing an image desc pointer and an fmask desc pointer. Value *fmaskDescPtr = getDescPointerAndStride(ResourceNodeType::DescriptorFmask, descriptorSet, binding, @@ -3136,7 +3227,8 @@ Value *SPIRVToLLVM::transImagePointer(SPIRVValue *spvImagePtr, SPIRVType *baseTy } if (spvTy->getOpCode() != OpTypeImage) { - if (getPipelineOptions()->replaceSetWithResourceType && !getPipelineOptions()->enableCombinedTexture) + if (getPipelineOptions()->getGlState().replaceSetWithResourceType && + !getPipelineOptions()->getGlState().enableCombinedTexture) descriptorSet = PipelineContext::getGlResourceNodeSetFromType(Vkgc::ResourceMappingNodeType::DescriptorSampler); // Sampler or sampledimage -- need to get the sampler {pointer,stride,convertingSamplerIdx} samplerDescPtr = getDescPointerAndStride(ResourceNodeType::DescriptorSampler, descriptorSet, binding, @@ -3211,7 +3303,7 @@ Value *SPIRVToLLVM::getDescPointerAndStride(ResourceNodeType resType, unsigned d unsigned convertingSamplerIdx = 0; unsigned nextIdx = 1; unsigned convertingSamplerDescriptorSet = descriptorSet; - if (getPipelineOptions()->replaceSetWithResourceType && + if (getPipelineOptions()->getGlState().replaceSetWithResourceType && descriptorSet == PipelineContext::getGlResourceNodeSetFromType(Vkgc::ResourceMappingNodeType::DescriptorSampler)) { // When using 'replaceSetWithResourceType' option (OGL default) it's not possible to match converting samplers @@ -3743,8 +3835,6 @@ Value *SPIRVToLLVM::indexDescPtr(Type *elementTy, Value *base, Value *index) { // Do the indexing operation by GEPping as a byte pointer. Type *ptrTy = ptr->getType(); - ptr = getBuilder()->CreateBitCast(ptr, - getBuilder()->getInt8Ty()->getPointerTo(ptr->getType()->getPointerAddressSpace())); ptr = getBuilder()->CreateGEP(getBuilder()->getInt8Ty(), ptr, index); ptr = getBuilder()->CreateBitCast(ptr, ptrTy); base = getBuilder()->CreateInsertValue(base, ptr, 0); @@ -4859,39 +4949,14 @@ template <> Value *SPIRVToLLVM::transValueWithOpcode(SPIRVValue *cons // @param bb : Which basicblock to generate code Value *SPIRVToLLVM::transDebugPrintf(SPIRVInstruction *bi, const ArrayRef spvValues, Function *func, BasicBlock *bb) { - auto resMapping = getPipelineContext()->getResourceMapping(); - unsigned nodeIndex = 0; - if (findResourceNode(resMapping->pUserDataNodes, resMapping->userDataNodeCount, Vkgc::InternalDescriptorSetId, - Vkgc::PrintfBufferBindingId, &nodeIndex) == nullptr) - return getBuilder()->getInt64(0); - - if (!m_debugOutputBuffer) { - auto spvArrType = m_bm->addRuntimeArray(m_bm->addIntegerType(32)); - auto spvStructType = m_bm->addStructType({spvArrType}); - Type *bufType = transType(spvStructType); - - m_debugOutputBuffer = - new GlobalVariable(*m_m, bufType, false, GlobalValue::ExternalLinkage, nullptr, "debugOutputBuffer", nullptr, - GlobalVariable::NotThreadLocal, SPIRAS_Uniform); - - // Setup (desc,binding) resource metadata - auto intType = getBuilder()->getInt32Ty(); - SmallVector resourceMetas = { - ConstantAsMetadata::get(ConstantInt::get(intType, Vkgc::InternalDescriptorSetId)), - ConstantAsMetadata::get(ConstantInt::get(intType, Vkgc::PrintfBufferBindingId)), - ConstantAsMetadata::get(ConstantInt::get(intType, 0))}; - - auto resMdNode = MDNode::get(*m_context, resourceMetas); - m_debugOutputBuffer->addMetadata(gSPIRVMD::Resource, *resMdNode); - } - auto spvValueItr = spvValues.begin(); - Value *formatStr = mapEntry(*spvValueItr++, nullptr); + const SPIRVEntry *spvStrEntry = *spvValueItr++; + auto spvStr = static_cast(spvStrEntry); SmallVector args; for (; spvValueItr != spvValues.end(); ++spvValueItr) { args.push_back(transValue(*spvValueItr, func, bb)); } - return getBuilder()->create(m_debugOutputBuffer, formatStr, args); + return getBuilder()->create(spvStr->getStr(), args); } // Translate an initializer. This has special handling for the case where the type to initialize to does not match the @@ -7153,7 +7218,7 @@ static unsigned convertDimension(const SPIRVTypeImageDescriptor *desc) { case Dim1D: return lgc::Builder::Dim1D; case DimBuffer: - return lgc::Builder::Dim1D; + return lgc::Builder::Dim1DBuffer; case Dim2D: return lgc::Builder::Dim2D; case DimRect: @@ -7172,7 +7237,7 @@ static unsigned convertDimension(const SPIRVTypeImageDescriptor *desc) { case Dim1D: return lgc::Builder::Dim1DArray; case DimBuffer: - return lgc::Builder::Dim1DArray; + return lgc::Builder::Dim1DArrayBuffer; case Dim2D: return lgc::Builder::Dim2DArray; case DimCube: @@ -7720,8 +7785,11 @@ Value *SPIRVToLLVM::transSPIRVImageAtomicOpFromInst(SPIRVInstruction *bi, BasicB // Determine the atomic ordering. AtomicOrdering ordering = AtomicOrdering::NotAtomic; if (scope != ScopeInvocation) { + // We are safe to downgrade the SequentiallyConsistent to Acquire/AcquireRelease based on Vulkan validation rules + // within a module. + bool readOnly = bi->getOpCode() == OpAtomicLoad; if (semantics & MemorySemanticsSequentiallyConsistentMask) - ordering = AtomicOrdering::SequentiallyConsistent; + ordering = readOnly ? AtomicOrdering::Acquire : AtomicOrdering::AcquireRelease; else if (semantics & MemorySemanticsAcquireReleaseMask) ordering = AtomicOrdering::AcquireRelease; else if (semantics & MemorySemanticsAcquireMask) @@ -8994,7 +9062,7 @@ bool SPIRVToLLVM::transDecoration(SPIRVValue *bv, ArrayRef values) { Type *mdTy = nullptr; SPIRVType *bt = bv->getType()->getPointerElementType(); bool vs64BitsAttribInputSingleLoc = (as == SPIRAS_Input && m_execModule == ExecutionModelVertex && - getPipelineOptions()->vertex64BitsAttribSingleLoc); + getPipelineOptions()->getGlState().vertex64BitsAttribSingleLoc); auto md = buildShaderInOutMetadata(bt, inOutDec, mdTy, vs64BitsAttribInputSingleLoc); // Setup input/output metadata @@ -9036,7 +9104,7 @@ bool SPIRVToLLVM::transDecoration(SPIRVValue *bv, ArrayRef values) { assert(blockTy->isTypeStruct() || blockTy->isTypeAccelerationStructureKHR() || bv->getType()->getPointerStorageClass() == StorageClassAtomicCounter); - if (getPipelineOptions()->replaceSetWithResourceType) { + if (getPipelineOptions()->getGlState().replaceSetWithResourceType) { bool hasBlock = blockTy->hasDecorate(DecorationBlock); bool hasBufferBlock = blockTy->hasDecorate(DecorationBufferBlock); @@ -9340,6 +9408,14 @@ Constant *SPIRVToLLVM::buildShaderInOutMetadata(SPIRVType *bt, ShaderInOutDecora inOutMd.Component = inOutDec.Component; inOutMd.InterpMode = inOutDec.Interp.Mode; + auto llpcContext = static_cast(m_context); + auto info = static_cast(llpcContext->getPipelineBuildInfo()); + if ((llpcContext->getPipelineType() == PipelineType::Graphics) && info->glState.enableFlatShade && + (inOutMd.Value == Vkgc::GlCompatibilityInOutLocation::FrontColor || + inOutMd.Value == Vkgc::GlCompatibilityInOutLocation::BackColor || + inOutMd.Value == Vkgc::GlCompatibilityInOutLocation::FrontSecondaryColor || + inOutMd.Value == Vkgc::GlCompatibilityInOutLocation::BackSecondaryColor)) + inOutMd.InterpMode = InterpModeFlat; inOutMd.InterpLoc = inOutDec.Interp.Loc; inOutMd.PerPatch = inOutDec.PerPatch; inOutMd.PerPrimitive = inOutDec.PerPrimitive; @@ -10418,8 +10494,10 @@ Instruction *SPIRVToLLVM::transBarrier(BasicBlock *bb, SPIRVWord execScope, SPIR Instruction *SPIRVToLLVM::transMemFence(BasicBlock *bb, SPIRVWord memSema, SPIRVWord memScope) { AtomicOrdering ordering = AtomicOrdering::NotAtomic; + // We are safe to downgrade the SequentiallyConsistent to AcquireRelease based on Vulkan validation rules within a + // module. if (memSema & MemorySemanticsSequentiallyConsistentMask) - ordering = AtomicOrdering::SequentiallyConsistent; + ordering = AtomicOrdering::AcquireRelease; else if (memSema & MemorySemanticsAcquireReleaseMask) ordering = AtomicOrdering::AcquireRelease; else if (memSema & MemorySemanticsAcquireMask) @@ -10437,10 +10515,6 @@ Instruction *SPIRVToLLVM::transMemFence(BasicBlock *bb, SPIRVWord memSema, SPIRV if (ordering == AtomicOrdering::NotAtomic) return nullptr; - // Upgrade the ordering if we need to make it available or visible - if (memSema & (MemorySemanticsMakeAvailableKHRMask | MemorySemanticsMakeVisibleKHRMask)) - ordering = AtomicOrdering::SequentiallyConsistent; - SyncScope::ID scope = SyncScope::System; switch (memScope) { diff --git a/llpc/translator/lib/SPIRV/SPIRVReader.h b/llpc/translator/lib/SPIRV/SPIRVReader.h index 04e5929cd6..aadf063459 100644 --- a/llpc/translator/lib/SPIRV/SPIRVReader.h +++ b/llpc/translator/lib/SPIRV/SPIRVReader.h @@ -110,6 +110,7 @@ class SPIRVToLLVM { template SmallVector transValueMultiWithOpcode(SPIRVValue *); template SmallVector transValueMultiWithOpcode(SPIRVValue *, Function *f, BasicBlock *bb); Value *transLoadImage(SPIRVValue *spvImageLoadPtr); + Value *transLoadBindlessImage(SPIRVType *spvElementTy, Value *imgDescGpuAddress, bool bindlessTexture); Value *loadImageSampler(Type *elementTy, Value *base); Value *transImagePointer(SPIRVValue *spvImagePtr, SPIRVType *elementTy = nullptr); Value *getDescPointerAndStride(lgc::ResourceNodeType resType, unsigned descriptorSet, unsigned binding, @@ -287,7 +288,6 @@ class SPIRVToLLVM { DenseMap, Type *> m_overlappingStructTypeWorkaroundMap; DenseMap m_blockPredecessorToCount; const Vkgc::ShaderModuleUsage *m_moduleUsage; - GlobalVariable *m_debugOutputBuffer; const Vkgc::PipelineShaderOptions *m_shaderOptions; bool m_workaroundStorageImageFormats; diff --git a/llpc/util/llpcDebug.h b/llpc/util/llpcDebug.h index f406c1794d..804574e25a 100644 --- a/llpc/util/llpcDebug.h +++ b/llpc/util/llpcDebug.h @@ -41,6 +41,15 @@ } \ while (false) +// Output error message +#define LLPC_WARN(_msg) \ + do \ + if (Llpc::EnableErrs()) { \ + llvm::outs() << "WARNING: " << _msg; \ + llvm::outs().flush(); \ + } \ + while (false) + // Output general message #define LLPC_OUTS(_msg) \ do \ diff --git a/llpc/util/llpcShaderModuleHelper.cpp b/llpc/util/llpcShaderModuleHelper.cpp index 9f9959f940..4ac381ba2a 100644 --- a/llpc/util/llpcShaderModuleHelper.cpp +++ b/llpc/util/llpcShaderModuleHelper.cpp @@ -151,20 +151,94 @@ ShaderModuleUsage ShaderModuleHelper::getShaderModuleUsageInfo(const BinaryData break; } case BuiltInPointCoord: - case BuiltInPrimitiveId: case BuiltInLayer: case BuiltInClipDistance: case BuiltInCullDistance: { shaderModuleUsage.useGenericBuiltIn = true; break; } + case BuiltInBaryCoordKHR: + case BuiltInBaryCoordNoPerspKHR: { + shaderModuleUsage.useBarycentric = true; + break; + } + case BuiltInPrimitiveId: { + shaderModuleUsage.useGenericBuiltIn = true; + shaderModuleUsage.rtSystemValueUsage.primitive.primitiveIndex = 1; + break; + } + case BuiltInInstanceId: { + shaderModuleUsage.rtSystemValueUsage.primitive.instanceID = 1; + break; + } + case BuiltInLaunchIdKHR: { + shaderModuleUsage.rtSystemValueUsage.ray.launchId = 1; + break; + } + case BuiltInLaunchSizeKHR: { + shaderModuleUsage.rtSystemValueUsage.ray.launchSize = 1; + break; + } + case BuiltInWorldRayOriginKHR: { + shaderModuleUsage.rtSystemValueUsage.ray.worldRayOrigin = 1; + break; + } + case BuiltInWorldRayDirectionKHR: { + shaderModuleUsage.rtSystemValueUsage.ray.worldRayDirection = 1; + break; + } + case BuiltInObjectRayOriginKHR: { + shaderModuleUsage.rtSystemValueUsage.primitive.objectRayOrigin = 1; + break; + } + case BuiltInObjectRayDirectionKHR: { + shaderModuleUsage.rtSystemValueUsage.primitive.objectRayDirection = 1; + break; + } + case BuiltInRayTminKHR: { + shaderModuleUsage.rtSystemValueUsage.ray.tMin = 1; + break; + } + case BuiltInInstanceCustomIndexKHR: { + shaderModuleUsage.rtSystemValueUsage.primitive.instanceIndex = 1; + break; + } + case BuiltInObjectToWorldKHR: { + shaderModuleUsage.rtSystemValueUsage.primitive.objectToWorld = 1; + break; + } + case BuiltInWorldToObjectKHR: { + shaderModuleUsage.rtSystemValueUsage.primitive.worldToObject = 1; + break; + } + case BuiltInHitTNV: { + shaderModuleUsage.rtSystemValueUsage.ray.tCurrent = 1; + break; + } + case BuiltInHitKindKHR: { + shaderModuleUsage.rtSystemValueUsage.primitive.hitKind = 1; + break; + } + case BuiltInHitTriangleVertexPositionsKHR: { + shaderModuleUsage.rtSystemValueUsage.primitive.hitTrianglePosition = 1; + break; + } + case BuiltInIncomingRayFlagsKHR: { + shaderModuleUsage.rtSystemValueUsage.ray.flags = 1; + break; + } + case BuiltInRayGeometryIndexKHR: { + shaderModuleUsage.rtSystemValueUsage.primitive.geometryIndex = 1; + break; + } default: { break; } } } else if (decoration == DecorationIndex) { hasIndexDecoration = true; - } + } else if (decoration == DecorationPerVertexKHR) + shaderModuleUsage.useBarycentric = true; break; } case OpSpecConstantTrue: diff --git a/llvmraytracing/include/llvmraytracing/Continuations.h b/llvmraytracing/include/llvmraytracing/Continuations.h index a53a01df61..624ec64ced 100644 --- a/llvmraytracing/include/llvmraytracing/Continuations.h +++ b/llvmraytracing/include/llvmraytracing/Continuations.h @@ -72,6 +72,7 @@ #pragma once +#include "TypesMetadata.h" #include "compilerutils/CompilerUtils.h" #include "llvm-dialects/Dialect/Builder.h" #include "llvmraytracing/ContinuationsUtil.h" @@ -473,8 +474,4 @@ Function *lowerStructRetArgument(Function *Fn); /// Add necessary continuation transform passes for LGC. void addLgcContinuationTransform(ModulePassManager &MPM); -/// LLVM parser callback which adds !types metadata during DXIL parsing -void DXILValueTypeMetadataCallback(Value *V, unsigned TypeID, - GetTypeByIDTy GetTypeByID, - GetContainedTypeIDTy GetContainedTypeID); } // namespace llvm diff --git a/llvmraytracing/include/llvmraytracing/ContinuationsUtil.h b/llvmraytracing/include/llvmraytracing/ContinuationsUtil.h index 8fe18562f6..ac1fb42de5 100644 --- a/llvmraytracing/include/llvmraytracing/ContinuationsUtil.h +++ b/llvmraytracing/include/llvmraytracing/ContinuationsUtil.h @@ -173,16 +173,6 @@ class ContFuncTy { void writeMetadata(Function *F); }; -/// Return element type of a function argument resolving opaque pointers -/// via !types metadata where appropriate. -/// Returns nullptr for non-pointers. -Type *getFuncArgPtrElementType(const Argument *Arg); - -/// Return element type of a function argument resolving opaque pointers -/// via !types metadata where appropriate. -/// Returns nullptr for non-pointers. -Type *getFuncArgPtrElementType(const Function *F, int ArgNo); - struct ContSetting { /// A hash value that is used as name. uint64_t NameHash; @@ -272,10 +262,6 @@ class ContHelper { // Marks an await as a waiting one with a wait mask. static constexpr const char *MDIsWaitAwaitName = "continuation.wait.await"; - // Whether this is a load instruction that should translate to a last_use - // load. - static constexpr const char *MDIsLastUseName = "amdgpu.last.use"; - static std::optional extractZExtI32Constant(MDNode *Node) { if (Node) { uint64_t Result = @@ -595,13 +581,6 @@ class ContHelper { CI.setMetadata(ContHelper::MDIsWaitAwaitName, nullptr); } - // Specifies that this is a load that marks a last use of the pointer it loads - // from. - static void setIsLastUseLoad(LoadInst &Load) { - Load.setMetadata(ContHelper::MDIsLastUseName, - MDTuple::get(Load.getContext(), {})); - } - /// Returns true if a call to the given function should be rematerialized /// in a shader of the specified kind. /// diff --git a/llvmraytracing/include/llvmraytracing/TypesMetadata.h b/llvmraytracing/include/llvmraytracing/TypesMetadata.h new file mode 100644 index 0000000000..e1db8e80d7 --- /dev/null +++ b/llvmraytracing/include/llvmraytracing/TypesMetadata.h @@ -0,0 +1,49 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + *all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + **********************************************************************************************************************/ + +//===- TypesMetadata.h - Pointee type metadata for processing DXIL ---------==// + +#pragma once + +#include "llvm/Bitcode/BitcodeReader.h" + +namespace llvm { + +/// Return element type of a function argument resolving opaque pointers +/// via !types metadata where appropriate. +/// Returns nullptr for non-pointers. +Type *getFuncArgPtrElementType(const Argument *Arg); + +/// Return element type of a function argument resolving opaque pointers +/// via !types metadata where appropriate. +/// Returns nullptr for non-pointers. +Type *getFuncArgPtrElementType(const Function *F, int ArgNo); + +/// LLVM parser callback which adds !types metadata during DXIL parsing +void DXILValueTypeMetadataCallback(Value *V, unsigned TypeID, + GetTypeByIDTy GetTypeByID, + GetContainedTypeIDTy GetContainedTypeID); + +} // namespace llvm diff --git a/llvmraytracing/lib/Continuations.cpp b/llvmraytracing/lib/Continuations.cpp index 5ecf797ef2..d314c9eb91 100644 --- a/llvmraytracing/lib/Continuations.cpp +++ b/llvmraytracing/lib/Continuations.cpp @@ -1265,6 +1265,8 @@ bool llvm::LgcMaterializable(Instruction &OrigI) { // FIXME: switch to dialectOp check. if (CalledName.starts_with("lgc.user.data") || CalledName.starts_with("lgc.shader.input") || + CalledName.starts_with("lgc.create.get.desc.ptr") || + CalledName.starts_with("lgc.load.buffer.desc") || CalledName.starts_with("lgc.load.user.data")) return true; } diff --git a/llvmraytracing/lib/DXILContPostProcess.cpp b/llvmraytracing/lib/DXILContPostProcess.cpp index 56c3c4f05b..ae434696fc 100644 --- a/llvmraytracing/lib/DXILContPostProcess.cpp +++ b/llvmraytracing/lib/DXILContPostProcess.cpp @@ -569,7 +569,7 @@ void DXILContPostProcessPassImpl::handleContStackIntrinsic( Align(CpsStackLowering::getContinuationStackAlignment())); if (FuncName.starts_with("LoadLastUse")) - ContHelper::setIsLastUseLoad(*cast(Replacement)); + CompilerUtils::setIsLastUseLoad(*cast(Replacement)); IsMemoryAccess = true; } else if (FuncName.starts_with("Store")) { diff --git a/llvmraytracing/test/dx/intrinsics/cont-payload-registers-get-i32.ll b/llvmraytracing/test/dx/intrinsics/cont-payload-registers-get-i32.ll index 5f4629bae2..c8abbcea25 100644 --- a/llvmraytracing/test/dx/intrinsics/cont-payload-registers-get-i32.ll +++ b/llvmraytracing/test/dx/intrinsics/cont-payload-registers-get-i32.ll @@ -31,7 +31,7 @@ define void @main() { ; ALL-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 ; ALL-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 ; ALL-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) -; ALL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds ([30 x i32], ptr addrspace(20) @REGISTERS, i32 0, i32 5), align 4 +; ALL-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(20) getelementptr {{(inbounds )?}}([30 x i32], ptr addrspace(20) @REGISTERS, i32 0, i32 5), align 4 ; ALL-NEXT: store i32 [[TMP2]], ptr @debug_global, align 4 ; ALL-NEXT: ret void ; ALL: entry.split: @@ -43,7 +43,7 @@ define void @main() { ; LOWERRAYTRACINGPIPELINE-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) -; LOWERRAYTRACINGPIPELINE-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(20) getelementptr inbounds ([30 x i32], ptr addrspace(20) @PAYLOAD, i32 0, i32 5), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: [[VAL:%.*]] = load i32, ptr addrspace(20) getelementptr {{(inbounds )?}}([30 x i32], ptr addrspace(20) @PAYLOAD, i32 0, i32 5), align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: store i32 [[VAL]], ptr @debug_global, align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: ret void ; diff --git a/llvmraytracing/test/dx/intrinsics/cont-payload-registers-set-i32.ll b/llvmraytracing/test/dx/intrinsics/cont-payload-registers-set-i32.ll index 14a1f07454..8eb99d7bc7 100644 --- a/llvmraytracing/test/dx/intrinsics/cont-payload-registers-set-i32.ll +++ b/llvmraytracing/test/dx/intrinsics/cont-payload-registers-set-i32.ll @@ -29,7 +29,7 @@ define void @main() { ; ALL-NEXT: store i32 [[CSPINIT]], ptr [[CSP]], align 4 ; ALL-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], 0 ; ALL-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) -; ALL-NEXT: store i32 42, ptr addrspace(20) getelementptr inbounds ([30 x i32], ptr addrspace(20) @REGISTERS, i32 0, i32 5), align 4 +; ALL-NEXT: store i32 42, ptr addrspace(20) getelementptr {{(inbounds )?}}([30 x i32], ptr addrspace(20) @REGISTERS, i32 0, i32 5), align 4 ; ALL-NEXT: ret void ; ALL: entry.split: ; ALL-NEXT: unreachable @@ -40,7 +40,7 @@ define void @main() { ; LOWERRAYTRACINGPIPELINE-NEXT: [[SYSTEM_DATA_ALLOCA:%.*]] = alloca [[STRUCT_DISPATCHSYSTEMDATA]], align 8 ; LOWERRAYTRACINGPIPELINE-NEXT: store [[STRUCT_DISPATCHSYSTEMDATA]] [[TMP0]], ptr [[SYSTEM_DATA_ALLOCA]], align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: call void @amd.dx.setLocalRootIndex(i32 0) -; LOWERRAYTRACINGPIPELINE-NEXT: store i32 42, ptr addrspace(20) getelementptr inbounds ([30 x i32], ptr addrspace(20) @PAYLOAD, i32 0, i32 5), align 4 +; LOWERRAYTRACINGPIPELINE-NEXT: store i32 42, ptr addrspace(20) getelementptr {{(inbounds )?}}([30 x i32], ptr addrspace(20) @PAYLOAD, i32 0, i32 5), align 4 ; LOWERRAYTRACINGPIPELINE-NEXT: ret void ; entry: diff --git a/tool/dumper/vkgcPipelineDumper.cpp b/tool/dumper/vkgcPipelineDumper.cpp index 7ca0eb0c2f..534fce5bc2 100644 --- a/tool/dumper/vkgcPipelineDumper.cpp +++ b/tool/dumper/vkgcPipelineDumper.cpp @@ -936,14 +936,15 @@ void PipelineDumper::dumpPipelineOptions(const PipelineOptions *options, std::os dumpFile << "options.internalRtShaders = " << options->internalRtShaders << "\n"; dumpFile << "options.forceNonUniformResourceIndexStageMask = " << options->forceNonUniformResourceIndexStageMask << "\n"; - dumpFile << "options.replaceSetWithResourceType = " << options->replaceSetWithResourceType << "\n"; - dumpFile << "options.disableSampleMask = " << options->disableSampleMask << "\n"; - dumpFile << "options.buildResourcesDataForShaderModule = " << options->buildResourcesDataForShaderModule << "\n"; - dumpFile << "options.disableTruncCoordForGather = " << options->disableTruncCoordForGather << "\n"; - dumpFile << "options.enableCombinedTexture = " << options->enableCombinedTexture << "\n"; - dumpFile << "options.vertex64BitsAttribSingleLoc = " << options->vertex64BitsAttribSingleLoc << "\n"; - dumpFile << "options.enableFragColor = " << options->enableFragColor << "\n"; - dumpFile << "options.disableBaseVertex = " << options->disableBaseVertex << "\n"; + dumpFile << "options.replaceSetWithResourceType = " << options->getGlState().replaceSetWithResourceType << "\n"; + dumpFile << "options.disableSampleMask = " << options->getGlState().disableSampleMask << "\n"; + dumpFile << "options.buildResourcesDataForShaderModule = " << options->getGlState().buildResourcesDataForShaderModule + << "\n"; + dumpFile << "options.disableTruncCoordForGather = " << options->getGlState().disableTruncCoordForGather << "\n"; + dumpFile << "options.enableCombinedTexture = " << options->getGlState().enableCombinedTexture << "\n"; + dumpFile << "options.vertex64BitsAttribSingleLoc = " << options->getGlState().vertex64BitsAttribSingleLoc << "\n"; + dumpFile << "options.enableFragColor = " << options->getGlState().enableFragColor << "\n"; + dumpFile << "options.disableBaseVertex = " << options->getGlState().disableBaseVertex << "\n"; dumpFile << "options.enablePrimGeneratedQuery = " << options->enablePrimGeneratedQuery << "\n"; dumpFile << "options.disablePerCompFetch = " << options->disablePerCompFetch << "\n"; } @@ -1043,6 +1044,7 @@ void PipelineDumper::dumpGraphicsStateInfo(const GraphicsPipelineBuildInfo *pipe dumpFile << "dynamicTopology = " << pipelineInfo->dynamicTopology << "\n"; dumpFile << "enableColorClampVs = " << pipelineInfo->glState.enableColorClampVs << "\n"; dumpFile << "enableColorClampFs = " << pipelineInfo->glState.enableColorClampFs << "\n"; + dumpFile << "enableFlatShade = " << pipelineInfo->glState.enableFlatShade << "\n"; dumpFile << "originUpperLeft = " << pipelineInfo->getGlState().originUpperLeft << "\n"; if (pipelineInfo->clientMetadataSize > 0) { @@ -1570,6 +1572,7 @@ MetroHash::Hash PipelineDumper::generateHashForGraphicsPipeline(const GraphicsPi hasher.Update(pipeline->glState.enableColorClampVs); hasher.Update(pipeline->glState.enableColorClampFs); + hasher.Update(pipeline->glState.enableFlatShade); MetroHash::Hash hash = {}; hasher.Finalize(hash.bytes); @@ -1873,7 +1876,7 @@ void PipelineDumper::updateHashForPipelineOptions(const PipelineOptions *options } if (stage == UnlinkedStageFragment || stage == UnlinkedStageCount) { hasher->Update(options->enableInterpModePatch); - hasher->Update(options->disableSampleMask); + hasher->Update(options->getGlState().disableSampleMask); } hasher->Update(options->pageMigrationEnabled); hasher->Update(options->optimizationLevel); @@ -1886,13 +1889,13 @@ void PipelineDumper::updateHashForPipelineOptions(const PipelineOptions *options hasher->Update(options->reverseThreadGroup); hasher->Update(options->internalRtShaders); hasher->Update(options->forceNonUniformResourceIndexStageMask); - hasher->Update(options->replaceSetWithResourceType); - hasher->Update(options->buildResourcesDataForShaderModule); - hasher->Update(options->disableTruncCoordForGather); - hasher->Update(options->enableCombinedTexture); - hasher->Update(options->vertex64BitsAttribSingleLoc); - hasher->Update(options->enableFragColor); - hasher->Update(options->disableBaseVertex); + hasher->Update(options->getGlState().replaceSetWithResourceType); + hasher->Update(options->getGlState().buildResourcesDataForShaderModule); + hasher->Update(options->getGlState().disableTruncCoordForGather); + hasher->Update(options->getGlState().enableCombinedTexture); + hasher->Update(options->getGlState().vertex64BitsAttribSingleLoc); + hasher->Update(options->getGlState().enableFragColor); + hasher->Update(options->getGlState().disableBaseVertex); hasher->Update(options->enablePrimGeneratedQuery); // disablePerCompFetch has been handled in updateHashForNonFragmentState } @@ -2191,12 +2194,18 @@ template // @param reader : ELF object OStream &operator<<(OStream &out, ElfReader &reader) { unsigned sectionCount = reader.getSectionCount(); + bool sortSection = reader.getMap().size() == sectionCount; char formatBuf[256]; - for (unsigned sortIdx = 0; sortIdx < sectionCount; ++sortIdx) { + for (unsigned idx = 0; idx < sectionCount; ++idx) { typename ElfReader::SectionBuffer *section = nullptr; - unsigned secIdx = 0; - Result result = reader.getSectionDataBySortingIndex(sortIdx, &secIdx, §ion); + Result result = Result::Success; + unsigned secIdx = idx; + if (sortSection) { + result = reader.getSectionDataBySortingIndex(idx, &secIdx, §ion); + } else { + result = reader.getSectionDataBySectionIndex(idx, §ion); + } assert(result == Result::Success); (void(result)); // unused if (strcmp(section->name, ShStrTabName) == 0 || strcmp(section->name, StrTabName) == 0 || @@ -2418,7 +2427,7 @@ OStream &operator<<(OStream &out, ElfReader &reader) { while (startPos < section->secHead.sh_size) { if (symIdx < symbols.size()) - endPos = static_cast(symbols[symIdx].value); + endPos = static_cast(std::min(symbols[symIdx].value, section->secHead.sh_size)); else endPos = static_cast(section->secHead.sh_size); @@ -2428,12 +2437,16 @@ OStream &operator<<(OStream &out, ElfReader &reader) { out << " " << symbols[symIdx].pSymName << " (offset = " << symbols[symIdx].value << " size = " << symbols[symIdx].size; - MetroHash::Hash hash = {}; - MetroHash64::Hash( - reinterpret_cast(voidPtrInc(section->data, static_cast(symbols[symIdx].value))), - symbols[symIdx].size, hash.bytes); - uint64_t hashCode64 = MetroHash::compact64(&hash); - snprintf(formatBuf, sizeof(formatBuf), " hash = 0x%016" PRIX64 ")\n", hashCode64); + if ((symbols[symIdx].value + symbols[symIdx].size) <= section->secHead.sh_size) { + MetroHash::Hash hash = {}; + MetroHash64::Hash(reinterpret_cast( + voidPtrInc(section->data, static_cast(symbols[symIdx].value))), + symbols[symIdx].size, hash.bytes); + uint64_t hashCode64 = MetroHash::compact64(&hash); + snprintf(formatBuf, sizeof(formatBuf), " hash = 0x%016" PRIX64 ")\n", hashCode64); + } else { + snprintf(formatBuf, sizeof(formatBuf), " hash = Unknown )\n"); + } out << formatBuf; } ++symIdx; diff --git a/tool/vfx/vfxVkSection.h b/tool/vfx/vfxVkSection.h index 313c79789b..ce4cc75742 100644 --- a/tool/vfx/vfxVkSection.h +++ b/tool/vfx/vfxVkSection.h @@ -882,6 +882,7 @@ class SectionGraphicsState : public Section { INIT_STATE_SUB_MEMBER_NAME_TO_ADDR(SectionGraphicsState, cbState, dualSourceBlendDynamic, MemberTypeBool, false); INIT_STATE_SUB_MEMBER_NAME_TO_ADDR(SectionGraphicsState, glState, enableColorClampVs, MemberTypeBool, false); INIT_STATE_SUB_MEMBER_NAME_TO_ADDR(SectionGraphicsState, glState, enableColorClampFs, MemberTypeBool, false); + INIT_STATE_SUB_MEMBER_NAME_TO_ADDR(SectionGraphicsState, glState, enableFlatShade, MemberTypeBool, false); INIT_MEMBER_ARRAY_NAME_TO_ADDR(SectionGraphicsState, m_colorBuffer, MemberTypeColorBufferItem, Vkgc::MaxColorTargets, true); @@ -896,6 +897,7 @@ class SectionGraphicsState : public Section { INIT_STATE_MEMBER_NAME_TO_ADDR(SectionGraphicsState, useSoftwareVertexBufferDescriptors, MemberTypeBool, false); INIT_MEMBER_NAME_TO_ADDR(SectionGraphicsState, m_shaderLibrary, MemberTypeString, false); INIT_MEMBER_NAME_TO_ADDR(SectionGraphicsState, m_rtState, MemberTypeRtState, true); + INIT_MEMBER_NAME_TO_ADDR(SectionGraphicsState, m_clientMetadata, MemberTypeU8Array, false); INIT_MEMBER_ARRAY_NAME_TO_ADDR(SectionGraphicsState, m_uniformConstantMaps, MemberTypeUniformConstantMap, Vkgc::ShaderStageGfxCount, true); diff --git a/util/vkgcUtil.cpp b/util/vkgcUtil.cpp index da52ff8df4..8f948b8b13 100644 --- a/util/vkgcUtil.cpp +++ b/util/vkgcUtil.cpp @@ -52,6 +52,14 @@ const char *VKAPI_CALL IUtil::GetEntryPointNameFromSpirvBinary(const BinaryData return getEntryPointNameFromSpirvBinary(spvBin); } +// ===================================================================================================================== +// Translate enum "ResourceMappingNodeType" to string +// +// @param type : Resource map node type +const char *VKAPI_CALL IUtil::GetResourceMappingNodeTypeName(ResourceMappingNodeType type) { + return getResourceMappingNodeTypeName(type); +} + // ===================================================================================================================== // Gets name string of the abbreviation for the specified shader stage // diff --git a/version/include/llpcVersion.h.in b/version/include/llpcVersion.h.in index d947653255..e8d33c7a96 100644 --- a/version/include/llpcVersion.h.in +++ b/version/include/llpcVersion.h.in @@ -37,6 +37,8 @@ // %Version History // | %Version | Change Description | // | -------- | ----------------------------------------------------------------------------------------------------- | +// | 73.0 | Add all the ogl specific pipeline options in a new structure GLState | +// | 72.4 | Add enableFlatShade to GraphicsPipelineBuildInfo. | // | 72.3 | Add enableColorClampVs and enableColorClampFs to GraphicsPipelineBuildInfo. | // | 72.2 | Add pGpurtOptions and gpurtOptionCount to RayTracingPipelineBuildInfo | // | 72.1 | Add dynamicTopology to GraphicsPipelineBuildInfo |