From ec7b9b0ce7b899377261bc091865438f9b606571 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Fri, 2 Sep 2022 16:22:28 +0200 Subject: [PATCH 1/4] Update SPIR-V headers. --- spirv.h | 23 +++++++++++++++++++++++ spirv.hpp | 23 +++++++++++++++++++++++ 2 files changed, 46 insertions(+) diff --git a/spirv.h b/spirv.h index 38f558748..5b6e8aaf4 100644 --- a/spirv.h +++ b/spirv.h @@ -98,6 +98,8 @@ typedef enum SpvExecutionModel_ { SpvExecutionModelMissNV = 5317, SpvExecutionModelCallableKHR = 5318, SpvExecutionModelCallableNV = 5318, + SpvExecutionModelTaskEXT = 5364, + SpvExecutionModelMeshEXT = 5365, SpvExecutionModelMax = 0x7fffffff, } SpvExecutionModel; @@ -165,11 +167,21 @@ typedef enum SpvExecutionMode_ { SpvExecutionModeSignedZeroInfNanPreserve = 4461, SpvExecutionModeRoundingModeRTE = 4462, SpvExecutionModeRoundingModeRTZ = 4463, + SpvExecutionModeEarlyAndLateFragmentTestsAMD = 5017, SpvExecutionModeStencilRefReplacingEXT = 5027, + SpvExecutionModeStencilRefUnchangedFrontAMD = 5079, + SpvExecutionModeStencilRefGreaterFrontAMD = 5080, + SpvExecutionModeStencilRefLessFrontAMD = 5081, + SpvExecutionModeStencilRefUnchangedBackAMD = 5082, + SpvExecutionModeStencilRefGreaterBackAMD = 5083, + SpvExecutionModeStencilRefLessBackAMD = 5084, + SpvExecutionModeOutputLinesEXT = 5269, SpvExecutionModeOutputLinesNV = 5269, + SpvExecutionModeOutputPrimitivesEXT = 5270, SpvExecutionModeOutputPrimitivesNV = 5270, SpvExecutionModeDerivativeGroupQuadsNV = 5289, SpvExecutionModeDerivativeGroupLinearNV = 5290, + SpvExecutionModeOutputTrianglesEXT = 5298, SpvExecutionModeOutputTrianglesNV = 5298, SpvExecutionModePixelInterlockOrderedEXT = 5366, SpvExecutionModePixelInterlockUnorderedEXT = 5367, @@ -219,6 +231,7 @@ typedef enum SpvStorageClass_ { SpvStorageClassShaderRecordBufferNV = 5343, SpvStorageClassPhysicalStorageBuffer = 5349, SpvStorageClassPhysicalStorageBufferEXT = 5349, + SpvStorageClassTaskPayloadWorkgroupEXT = 5402, SpvStorageClassCodeSectionINTEL = 5605, SpvStorageClassDeviceOnlyINTEL = 5936, SpvStorageClassHostOnlyINTEL = 5937, @@ -501,6 +514,7 @@ typedef enum SpvDecoration_ { SpvDecorationPassthroughNV = 5250, SpvDecorationViewportRelativeNV = 5252, SpvDecorationSecondaryViewportRelativeNV = 5256, + SpvDecorationPerPrimitiveEXT = 5271, SpvDecorationPerPrimitiveNV = 5271, SpvDecorationPerViewNV = 5272, SpvDecorationPerTaskNV = 5273, @@ -650,6 +664,10 @@ typedef enum SpvBuiltIn_ { SpvBuiltInFragmentSizeNV = 5292, SpvBuiltInFragInvocationCountEXT = 5293, SpvBuiltInInvocationsPerPixelNV = 5293, + SpvBuiltInPrimitivePointIndicesEXT = 5294, + SpvBuiltInPrimitiveLineIndicesEXT = 5295, + SpvBuiltInPrimitiveTriangleIndicesEXT = 5296, + SpvBuiltInCullPrimitiveEXT = 5299, SpvBuiltInLaunchIdKHR = 5319, SpvBuiltInLaunchIdNV = 5319, SpvBuiltInLaunchSizeKHR = 5320, @@ -990,6 +1008,7 @@ typedef enum SpvCapability_ { SpvCapabilityFragmentFullyCoveredEXT = 5265, SpvCapabilityMeshShadingNV = 5266, SpvCapabilityImageFootprintNV = 5282, + SpvCapabilityMeshShadingEXT = 5283, SpvCapabilityFragmentBarycentricKHR = 5284, SpvCapabilityFragmentBarycentricNV = 5284, SpvCapabilityComputeDerivativeGroupQuadsNV = 5288, @@ -1589,6 +1608,8 @@ typedef enum SpvOp_ { SpvOpFragmentFetchAMD = 5012, SpvOpReadClockKHR = 5056, SpvOpImageSampleFootprintNV = 5283, + SpvOpEmitMeshTasksEXT = 5294, + SpvOpSetMeshOutputsEXT = 5295, SpvOpGroupNonUniformPartitionNV = 5296, SpvOpWritePackedPrimitiveIndices4x8NV = 5299, SpvOpReportIntersectionKHR = 5334, @@ -2262,6 +2283,8 @@ inline void SpvHasResultAndType(SpvOp opcode, bool *hasResult, bool *hasResultTy case SpvOpFragmentFetchAMD: *hasResult = true; *hasResultType = true; break; case SpvOpReadClockKHR: *hasResult = true; *hasResultType = true; break; case SpvOpImageSampleFootprintNV: *hasResult = true; *hasResultType = true; break; + case SpvOpEmitMeshTasksEXT: *hasResult = false; *hasResultType = false; break; + case SpvOpSetMeshOutputsEXT: *hasResult = false; *hasResultType = false; break; case SpvOpGroupNonUniformPartitionNV: *hasResult = true; *hasResultType = true; break; case SpvOpWritePackedPrimitiveIndices4x8NV: *hasResult = false; *hasResultType = false; break; case SpvOpReportIntersectionNV: *hasResult = true; *hasResultType = true; break; diff --git a/spirv.hpp b/spirv.hpp index 48d93d64c..e25264af2 100644 --- a/spirv.hpp +++ b/spirv.hpp @@ -94,6 +94,8 @@ enum ExecutionModel { ExecutionModelMissNV = 5317, ExecutionModelCallableKHR = 5318, ExecutionModelCallableNV = 5318, + ExecutionModelTaskEXT = 5364, + ExecutionModelMeshEXT = 5365, ExecutionModelMax = 0x7fffffff, }; @@ -161,11 +163,21 @@ enum ExecutionMode { ExecutionModeSignedZeroInfNanPreserve = 4461, ExecutionModeRoundingModeRTE = 4462, ExecutionModeRoundingModeRTZ = 4463, + ExecutionModeEarlyAndLateFragmentTestsAMD = 5017, ExecutionModeStencilRefReplacingEXT = 5027, + ExecutionModeStencilRefUnchangedFrontAMD = 5079, + ExecutionModeStencilRefGreaterFrontAMD = 5080, + ExecutionModeStencilRefLessFrontAMD = 5081, + ExecutionModeStencilRefUnchangedBackAMD = 5082, + ExecutionModeStencilRefGreaterBackAMD = 5083, + ExecutionModeStencilRefLessBackAMD = 5084, + ExecutionModeOutputLinesEXT = 5269, ExecutionModeOutputLinesNV = 5269, + ExecutionModeOutputPrimitivesEXT = 5270, ExecutionModeOutputPrimitivesNV = 5270, ExecutionModeDerivativeGroupQuadsNV = 5289, ExecutionModeDerivativeGroupLinearNV = 5290, + ExecutionModeOutputTrianglesEXT = 5298, ExecutionModeOutputTrianglesNV = 5298, ExecutionModePixelInterlockOrderedEXT = 5366, ExecutionModePixelInterlockUnorderedEXT = 5367, @@ -215,6 +227,7 @@ enum StorageClass { StorageClassShaderRecordBufferNV = 5343, StorageClassPhysicalStorageBuffer = 5349, StorageClassPhysicalStorageBufferEXT = 5349, + StorageClassTaskPayloadWorkgroupEXT = 5402, StorageClassCodeSectionINTEL = 5605, StorageClassDeviceOnlyINTEL = 5936, StorageClassHostOnlyINTEL = 5937, @@ -497,6 +510,7 @@ enum Decoration { DecorationPassthroughNV = 5250, DecorationViewportRelativeNV = 5252, DecorationSecondaryViewportRelativeNV = 5256, + DecorationPerPrimitiveEXT = 5271, DecorationPerPrimitiveNV = 5271, DecorationPerViewNV = 5272, DecorationPerTaskNV = 5273, @@ -646,6 +660,10 @@ enum BuiltIn { BuiltInFragmentSizeNV = 5292, BuiltInFragInvocationCountEXT = 5293, BuiltInInvocationsPerPixelNV = 5293, + BuiltInPrimitivePointIndicesEXT = 5294, + BuiltInPrimitiveLineIndicesEXT = 5295, + BuiltInPrimitiveTriangleIndicesEXT = 5296, + BuiltInCullPrimitiveEXT = 5299, BuiltInLaunchIdKHR = 5319, BuiltInLaunchIdNV = 5319, BuiltInLaunchSizeKHR = 5320, @@ -986,6 +1004,7 @@ enum Capability { CapabilityFragmentFullyCoveredEXT = 5265, CapabilityMeshShadingNV = 5266, CapabilityImageFootprintNV = 5282, + CapabilityMeshShadingEXT = 5283, CapabilityFragmentBarycentricKHR = 5284, CapabilityFragmentBarycentricNV = 5284, CapabilityComputeDerivativeGroupQuadsNV = 5288, @@ -1585,6 +1604,8 @@ enum Op { OpFragmentFetchAMD = 5012, OpReadClockKHR = 5056, OpImageSampleFootprintNV = 5283, + OpEmitMeshTasksEXT = 5294, + OpSetMeshOutputsEXT = 5295, OpGroupNonUniformPartitionNV = 5296, OpWritePackedPrimitiveIndices4x8NV = 5299, OpReportIntersectionKHR = 5334, @@ -2258,6 +2279,8 @@ inline void HasResultAndType(Op opcode, bool *hasResult, bool *hasResultType) { case OpFragmentFetchAMD: *hasResult = true; *hasResultType = true; break; case OpReadClockKHR: *hasResult = true; *hasResultType = true; break; case OpImageSampleFootprintNV: *hasResult = true; *hasResultType = true; break; + case OpEmitMeshTasksEXT: *hasResult = false; *hasResultType = false; break; + case OpSetMeshOutputsEXT: *hasResult = false; *hasResultType = false; break; case OpGroupNonUniformPartitionNV: *hasResult = true; *hasResultType = true; break; case OpWritePackedPrimitiveIndices4x8NV: *hasResult = false; *hasResultType = false; break; case OpReportIntersectionNV: *hasResult = true; *hasResultType = true; break; From 6a73e68d30d12b06b7b8c6edd36686e0cb3c12d7 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Mon, 5 Sep 2022 11:18:10 +0200 Subject: [PATCH 2/4] Update glslang reference. --- checkout_glslang_spirv_tools.sh | 2 +- reference/opt/shaders/frag/barycentric-khr.frag | 2 +- reference/shaders/frag/barycentric-khr.frag | 2 +- shaders/frag/barycentric-khr.frag | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/checkout_glslang_spirv_tools.sh b/checkout_glslang_spirv_tools.sh index 70bb0dfd9..2530e924a 100755 --- a/checkout_glslang_spirv_tools.sh +++ b/checkout_glslang_spirv_tools.sh @@ -2,7 +2,7 @@ # Copyright 2016-2021 The Khronos Group Inc. # SPDX-License-Identifier: Apache-2.0 -GLSLANG_REV=69ae9e7460499b488cb2d32edae623a95264db14 +GLSLANG_REV=a53aa3e94f8b4bf22a6eb6e8a207fe91ab02b989 SPIRV_TOOLS_REV=4c456f7da67c5437a6fb7d4d20d78e2a5ae2acf2 SPIRV_HEADERS_REV=87d5b782bec60822aa878941e6b13c0a9a954c9b PROTOCOL=https diff --git a/reference/opt/shaders/frag/barycentric-khr.frag b/reference/opt/shaders/frag/barycentric-khr.frag index 56eeea82d..71a44c385 100644 --- a/reference/opt/shaders/frag/barycentric-khr.frag +++ b/reference/opt/shaders/frag/barycentric-khr.frag @@ -3,7 +3,7 @@ layout(location = 0) out vec2 value; layout(location = 0) pervertexEXT in vec2 vUV[3]; -layout(location = 1) pervertexEXT in vec2 vUV2[3]; +layout(location = 3) pervertexEXT in vec2 vUV2[3]; void main() { diff --git a/reference/shaders/frag/barycentric-khr.frag b/reference/shaders/frag/barycentric-khr.frag index 56eeea82d..71a44c385 100644 --- a/reference/shaders/frag/barycentric-khr.frag +++ b/reference/shaders/frag/barycentric-khr.frag @@ -3,7 +3,7 @@ layout(location = 0) out vec2 value; layout(location = 0) pervertexEXT in vec2 vUV[3]; -layout(location = 1) pervertexEXT in vec2 vUV2[3]; +layout(location = 3) pervertexEXT in vec2 vUV2[3]; void main() { diff --git a/shaders/frag/barycentric-khr.frag b/shaders/frag/barycentric-khr.frag index b904b66f0..fcaca04e2 100644 --- a/shaders/frag/barycentric-khr.frag +++ b/shaders/frag/barycentric-khr.frag @@ -3,7 +3,7 @@ layout(location = 0) out vec2 value; layout(location = 0) pervertexEXT in vec2 vUV[3]; -layout(location = 1) pervertexEXT in vec2 vUV2[3]; +layout(location = 3) pervertexEXT in vec2 vUV2[3]; void main () { value = gl_BaryCoordEXT.x * vUV[0] + gl_BaryCoordEXT.y * vUV[1] + gl_BaryCoordEXT.z * vUV[2]; From 5762617729b6519998cee11df63c60c94cd50446 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Fri, 2 Sep 2022 16:31:04 +0200 Subject: [PATCH 3/4] GLSL: Implement GL_EXT_mesh_shader. --- ...ader-basic-lines.spv14.vk.nocompat.mesh.vk | 66 +++++++++ ...der-basic-points.spv14.vk.nocompat.mesh.vk | 66 +++++++++ ...r-basic-triangle.spv14.vk.nocompat.mesh.vk | 66 +++++++++ ...ader-basic-lines.spv14.vk.nocompat.mesh.vk | 63 ++++++++ ...der-basic-points.spv14.vk.nocompat.mesh.vk | 63 ++++++++ ...r-basic-triangle.spv14.vk.nocompat.mesh.vk | 63 ++++++++ ...-shader-basic-lines.spv14.vk.nocompat.mesh | 63 ++++++++ ...shader-basic-points.spv14.vk.nocompat.mesh | 63 ++++++++ ...ader-basic-triangle.spv14.vk.nocompat.mesh | 63 ++++++++ spirv_common.hpp | 1 + spirv_cross.cpp | 14 ++ spirv_glsl.cpp | 137 +++++++++++++++--- spirv_glsl.hpp | 4 +- spirv_parser.cpp | 4 + 14 files changed, 715 insertions(+), 21 deletions(-) create mode 100644 reference/opt/shaders/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh.vk create mode 100644 reference/opt/shaders/mesh/mesh-shader-basic-points.spv14.vk.nocompat.mesh.vk create mode 100644 reference/opt/shaders/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh.vk create mode 100644 reference/shaders/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh.vk create mode 100644 reference/shaders/mesh/mesh-shader-basic-points.spv14.vk.nocompat.mesh.vk create mode 100644 reference/shaders/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh.vk create mode 100644 shaders/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh create mode 100644 shaders/mesh/mesh-shader-basic-points.spv14.vk.nocompat.mesh create mode 100644 shaders/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh diff --git a/reference/opt/shaders/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh.vk b/reference/opt/shaders/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh.vk new file mode 100644 index 000000000..774a27f11 --- /dev/null +++ b/reference/opt/shaders/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh.vk @@ -0,0 +1,66 @@ +#version 450 +#extension GL_EXT_mesh_shader : require +#extension GL_EXT_fragment_shading_rate : require +layout(local_size_x = 2, local_size_y = 3, local_size_z = 4) in; +layout(max_vertices = 24, max_primitives = 22, lines) out; + +out gl_MeshPerVertexEXT +{ + vec4 gl_Position; + float gl_PointSize; + float gl_ClipDistance[1]; + float gl_CullDistance[2]; +} gl_MeshVerticesEXT[]; + +struct TaskPayload +{ + float a; + float b; + int c; +}; + +layout(location = 0) out vec4 vOut[24]; +layout(location = 2) out BlockOut +{ + vec4 a; + vec4 b; +} outputs[24]; + +layout(location = 1) perprimitiveEXT out vec4 vPrim[22]; +layout(location = 4) perprimitiveEXT out BlockOutPrim +{ + vec4 a; + vec4 b; +} prim_outputs[22]; + +taskPayloadSharedEXT TaskPayload payload; +shared float shared_float[16]; + +void main() +{ + SetMeshOutputsEXT(24u, 22u); + vec3 _29 = vec3(gl_GlobalInvocationID); + float _31 = _29.x; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = vec4(_31, _29.yz, 1.0); + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_PointSize = 2.0; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_ClipDistance[0] = 4.0; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[1] = 5.0; + vOut[gl_LocalInvocationIndex] = vec4(_31, _29.yz, 2.0); + outputs[gl_LocalInvocationIndex].a = vec4(5.0); + outputs[gl_LocalInvocationIndex].b = vec4(6.0); + barrier(); + if (gl_LocalInvocationIndex < 22u) + { + vPrim[gl_LocalInvocationIndex] = vec4(vec3(gl_WorkGroupID), 3.0); + prim_outputs[gl_LocalInvocationIndex].a = vec4(payload.a); + prim_outputs[gl_LocalInvocationIndex].b = vec4(payload.b); + gl_PrimitiveLineIndicesEXT[gl_LocalInvocationIndex] = uvec2(0u, 1u) + uvec2(gl_LocalInvocationIndex); + int _129 = int(gl_GlobalInvocationID.x); + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveID = _129; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_Layer = _129 + 1; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_ViewportIndex = _129 + 2; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_CullPrimitiveEXT = (gl_GlobalInvocationID.x & 1u) != 0u; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveShadingRateEXT = _129 + 3; + } +} + diff --git a/reference/opt/shaders/mesh/mesh-shader-basic-points.spv14.vk.nocompat.mesh.vk b/reference/opt/shaders/mesh/mesh-shader-basic-points.spv14.vk.nocompat.mesh.vk new file mode 100644 index 000000000..bacc7fdfd --- /dev/null +++ b/reference/opt/shaders/mesh/mesh-shader-basic-points.spv14.vk.nocompat.mesh.vk @@ -0,0 +1,66 @@ +#version 450 +#extension GL_EXT_mesh_shader : require +#extension GL_EXT_fragment_shading_rate : require +layout(local_size_x = 2, local_size_y = 3, local_size_z = 4) in; +layout(max_vertices = 24, max_primitives = 22, points) out; + +out gl_MeshPerVertexEXT +{ + vec4 gl_Position; + float gl_PointSize; + float gl_ClipDistance[1]; + float gl_CullDistance[2]; +} gl_MeshVerticesEXT[]; + +struct TaskPayload +{ + float a; + float b; + int c; +}; + +layout(location = 0) out vec4 vOut[24]; +layout(location = 2) out BlockOut +{ + vec4 a; + vec4 b; +} outputs[24]; + +layout(location = 1) perprimitiveEXT out vec4 vPrim[22]; +layout(location = 4) perprimitiveEXT out BlockOutPrim +{ + vec4 a; + vec4 b; +} prim_outputs[22]; + +taskPayloadSharedEXT TaskPayload payload; +shared float shared_float[16]; + +void main() +{ + SetMeshOutputsEXT(24u, 22u); + vec3 _29 = vec3(gl_GlobalInvocationID); + float _31 = _29.x; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = vec4(_31, _29.yz, 1.0); + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_PointSize = 2.0; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_ClipDistance[0] = 4.0; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[1] = 5.0; + vOut[gl_LocalInvocationIndex] = vec4(_31, _29.yz, 2.0); + outputs[gl_LocalInvocationIndex].a = vec4(5.0); + outputs[gl_LocalInvocationIndex].b = vec4(6.0); + barrier(); + if (gl_LocalInvocationIndex < 22u) + { + vPrim[gl_LocalInvocationIndex] = vec4(vec3(gl_WorkGroupID), 3.0); + prim_outputs[gl_LocalInvocationIndex].a = vec4(payload.a); + prim_outputs[gl_LocalInvocationIndex].b = vec4(payload.b); + gl_PrimitivePointIndicesEXT[gl_LocalInvocationIndex] = gl_LocalInvocationIndex; + int _124 = int(gl_GlobalInvocationID.x); + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveID = _124; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_Layer = _124 + 1; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_ViewportIndex = _124 + 2; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_CullPrimitiveEXT = (gl_GlobalInvocationID.x & 1u) != 0u; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveShadingRateEXT = _124 + 3; + } +} + diff --git a/reference/opt/shaders/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh.vk b/reference/opt/shaders/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh.vk new file mode 100644 index 000000000..cdd9e1cf3 --- /dev/null +++ b/reference/opt/shaders/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh.vk @@ -0,0 +1,66 @@ +#version 450 +#extension GL_EXT_mesh_shader : require +#extension GL_EXT_fragment_shading_rate : require +layout(local_size_x = 2, local_size_y = 3, local_size_z = 4) in; +layout(max_vertices = 24, max_primitives = 22, triangles) out; + +out gl_MeshPerVertexEXT +{ + vec4 gl_Position; + float gl_PointSize; + float gl_ClipDistance[1]; + float gl_CullDistance[2]; +} gl_MeshVerticesEXT[]; + +struct TaskPayload +{ + float a; + float b; + int c; +}; + +layout(location = 0) out vec4 vOut[24]; +layout(location = 2) out BlockOut +{ + vec4 a; + vec4 b; +} outputs[24]; + +layout(location = 1) perprimitiveEXT out vec4 vPrim[22]; +layout(location = 4) perprimitiveEXT out BlockOutPrim +{ + vec4 a; + vec4 b; +} prim_outputs[22]; + +taskPayloadSharedEXT TaskPayload payload; +shared float shared_float[16]; + +void main() +{ + SetMeshOutputsEXT(24u, 22u); + vec3 _29 = vec3(gl_GlobalInvocationID); + float _31 = _29.x; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = vec4(_31, _29.yz, 1.0); + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_PointSize = 2.0; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_ClipDistance[0] = 4.0; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[1] = 5.0; + vOut[gl_LocalInvocationIndex] = vec4(_31, _29.yz, 2.0); + outputs[gl_LocalInvocationIndex].a = vec4(5.0); + outputs[gl_LocalInvocationIndex].b = vec4(6.0); + barrier(); + if (gl_LocalInvocationIndex < 22u) + { + vPrim[gl_LocalInvocationIndex] = vec4(vec3(gl_WorkGroupID), 3.0); + prim_outputs[gl_LocalInvocationIndex].a = vec4(payload.a); + prim_outputs[gl_LocalInvocationIndex].b = vec4(payload.b); + gl_PrimitiveTriangleIndicesEXT[gl_LocalInvocationIndex] = uvec3(0u, 1u, 2u) + uvec3(gl_LocalInvocationIndex); + int _128 = int(gl_GlobalInvocationID.x); + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveID = _128; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_Layer = _128 + 1; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_ViewportIndex = _128 + 2; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_CullPrimitiveEXT = (gl_GlobalInvocationID.x & 1u) != 0u; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveShadingRateEXT = _128 + 3; + } +} + diff --git a/reference/shaders/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh.vk b/reference/shaders/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh.vk new file mode 100644 index 000000000..20597b986 --- /dev/null +++ b/reference/shaders/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh.vk @@ -0,0 +1,63 @@ +#version 450 +#extension GL_EXT_mesh_shader : require +#extension GL_EXT_fragment_shading_rate : require +layout(local_size_x = 2, local_size_y = 3, local_size_z = 4) in; +layout(max_vertices = 24, max_primitives = 22, lines) out; + +out gl_MeshPerVertexEXT +{ + vec4 gl_Position; + float gl_PointSize; + float gl_ClipDistance[1]; + float gl_CullDistance[2]; +} gl_MeshVerticesEXT[]; + +struct TaskPayload +{ + float a; + float b; + int c; +}; + +layout(location = 0) out vec4 vOut[24]; +layout(location = 2) out BlockOut +{ + vec4 a; + vec4 b; +} outputs[24]; + +layout(location = 1) perprimitiveEXT out vec4 vPrim[22]; +layout(location = 4) perprimitiveEXT out BlockOutPrim +{ + vec4 a; + vec4 b; +} prim_outputs[22]; + +taskPayloadSharedEXT TaskPayload payload; +shared float shared_float[16]; + +void main() +{ + SetMeshOutputsEXT(24u, 22u); + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = vec4(vec3(gl_GlobalInvocationID), 1.0); + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_PointSize = 2.0; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_ClipDistance[0] = 4.0; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[1] = 5.0; + vOut[gl_LocalInvocationIndex] = vec4(vec3(gl_GlobalInvocationID), 2.0); + outputs[gl_LocalInvocationIndex].a = vec4(5.0); + outputs[gl_LocalInvocationIndex].b = vec4(6.0); + barrier(); + if (gl_LocalInvocationIndex < 22u) + { + vPrim[gl_LocalInvocationIndex] = vec4(vec3(gl_WorkGroupID), 3.0); + prim_outputs[gl_LocalInvocationIndex].a = vec4(payload.a); + prim_outputs[gl_LocalInvocationIndex].b = vec4(payload.b); + gl_PrimitiveLineIndicesEXT[gl_LocalInvocationIndex] = uvec2(0u, 1u) + uvec2(gl_LocalInvocationIndex); + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveID = int(gl_GlobalInvocationID.x); + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_Layer = int(gl_GlobalInvocationID.x) + 1; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_ViewportIndex = int(gl_GlobalInvocationID.x) + 2; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_CullPrimitiveEXT = (gl_GlobalInvocationID.x & 1u) != 0u; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveShadingRateEXT = int(gl_GlobalInvocationID.x) + 3; + } +} + diff --git a/reference/shaders/mesh/mesh-shader-basic-points.spv14.vk.nocompat.mesh.vk b/reference/shaders/mesh/mesh-shader-basic-points.spv14.vk.nocompat.mesh.vk new file mode 100644 index 000000000..ecb8285df --- /dev/null +++ b/reference/shaders/mesh/mesh-shader-basic-points.spv14.vk.nocompat.mesh.vk @@ -0,0 +1,63 @@ +#version 450 +#extension GL_EXT_mesh_shader : require +#extension GL_EXT_fragment_shading_rate : require +layout(local_size_x = 2, local_size_y = 3, local_size_z = 4) in; +layout(max_vertices = 24, max_primitives = 22, points) out; + +out gl_MeshPerVertexEXT +{ + vec4 gl_Position; + float gl_PointSize; + float gl_ClipDistance[1]; + float gl_CullDistance[2]; +} gl_MeshVerticesEXT[]; + +struct TaskPayload +{ + float a; + float b; + int c; +}; + +layout(location = 0) out vec4 vOut[24]; +layout(location = 2) out BlockOut +{ + vec4 a; + vec4 b; +} outputs[24]; + +layout(location = 1) perprimitiveEXT out vec4 vPrim[22]; +layout(location = 4) perprimitiveEXT out BlockOutPrim +{ + vec4 a; + vec4 b; +} prim_outputs[22]; + +taskPayloadSharedEXT TaskPayload payload; +shared float shared_float[16]; + +void main() +{ + SetMeshOutputsEXT(24u, 22u); + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = vec4(vec3(gl_GlobalInvocationID), 1.0); + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_PointSize = 2.0; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_ClipDistance[0] = 4.0; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[1] = 5.0; + vOut[gl_LocalInvocationIndex] = vec4(vec3(gl_GlobalInvocationID), 2.0); + outputs[gl_LocalInvocationIndex].a = vec4(5.0); + outputs[gl_LocalInvocationIndex].b = vec4(6.0); + barrier(); + if (gl_LocalInvocationIndex < 22u) + { + vPrim[gl_LocalInvocationIndex] = vec4(vec3(gl_WorkGroupID), 3.0); + prim_outputs[gl_LocalInvocationIndex].a = vec4(payload.a); + prim_outputs[gl_LocalInvocationIndex].b = vec4(payload.b); + gl_PrimitivePointIndicesEXT[gl_LocalInvocationIndex] = gl_LocalInvocationIndex; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveID = int(gl_GlobalInvocationID.x); + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_Layer = int(gl_GlobalInvocationID.x) + 1; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_ViewportIndex = int(gl_GlobalInvocationID.x) + 2; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_CullPrimitiveEXT = (gl_GlobalInvocationID.x & 1u) != 0u; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveShadingRateEXT = int(gl_GlobalInvocationID.x) + 3; + } +} + diff --git a/reference/shaders/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh.vk b/reference/shaders/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh.vk new file mode 100644 index 000000000..e10459d7b --- /dev/null +++ b/reference/shaders/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh.vk @@ -0,0 +1,63 @@ +#version 450 +#extension GL_EXT_mesh_shader : require +#extension GL_EXT_fragment_shading_rate : require +layout(local_size_x = 2, local_size_y = 3, local_size_z = 4) in; +layout(max_vertices = 24, max_primitives = 22, triangles) out; + +out gl_MeshPerVertexEXT +{ + vec4 gl_Position; + float gl_PointSize; + float gl_ClipDistance[1]; + float gl_CullDistance[2]; +} gl_MeshVerticesEXT[]; + +struct TaskPayload +{ + float a; + float b; + int c; +}; + +layout(location = 0) out vec4 vOut[24]; +layout(location = 2) out BlockOut +{ + vec4 a; + vec4 b; +} outputs[24]; + +layout(location = 1) perprimitiveEXT out vec4 vPrim[22]; +layout(location = 4) perprimitiveEXT out BlockOutPrim +{ + vec4 a; + vec4 b; +} prim_outputs[22]; + +taskPayloadSharedEXT TaskPayload payload; +shared float shared_float[16]; + +void main() +{ + SetMeshOutputsEXT(24u, 22u); + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = vec4(vec3(gl_GlobalInvocationID), 1.0); + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_PointSize = 2.0; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_ClipDistance[0] = 4.0; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[1] = 5.0; + vOut[gl_LocalInvocationIndex] = vec4(vec3(gl_GlobalInvocationID), 2.0); + outputs[gl_LocalInvocationIndex].a = vec4(5.0); + outputs[gl_LocalInvocationIndex].b = vec4(6.0); + barrier(); + if (gl_LocalInvocationIndex < 22u) + { + vPrim[gl_LocalInvocationIndex] = vec4(vec3(gl_WorkGroupID), 3.0); + prim_outputs[gl_LocalInvocationIndex].a = vec4(payload.a); + prim_outputs[gl_LocalInvocationIndex].b = vec4(payload.b); + gl_PrimitiveTriangleIndicesEXT[gl_LocalInvocationIndex] = uvec3(0u, 1u, 2u) + uvec3(gl_LocalInvocationIndex); + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveID = int(gl_GlobalInvocationID.x); + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_Layer = int(gl_GlobalInvocationID.x) + 1; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_ViewportIndex = int(gl_GlobalInvocationID.x) + 2; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_CullPrimitiveEXT = (gl_GlobalInvocationID.x & 1u) != 0u; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveShadingRateEXT = int(gl_GlobalInvocationID.x) + 3; + } +} + diff --git a/shaders/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh b/shaders/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh new file mode 100644 index 000000000..0f1beef75 --- /dev/null +++ b/shaders/mesh/mesh-shader-basic-lines.spv14.vk.nocompat.mesh @@ -0,0 +1,63 @@ +#version 450 +#extension GL_EXT_mesh_shader : require +layout(local_size_x = 2, local_size_y = 3, local_size_z = 4) in; +layout(lines, max_vertices = 24, max_primitives = 22) out; + +out gl_MeshPerVertexEXT +{ + vec4 gl_Position; + float gl_PointSize; + float gl_ClipDistance[1]; + float gl_CullDistance[2]; +} gl_MeshVerticesEXT[]; + +layout(location = 0) out vec4 vOut[]; +layout(location = 1) perprimitiveEXT out vec4 vPrim[]; + +layout(location = 2) out BlockOut +{ + vec4 a; + vec4 b; +} outputs[]; + +layout(location = 4) perprimitiveEXT out BlockOutPrim +{ + vec4 a; + vec4 b; +} prim_outputs[]; + +shared float shared_float[16]; + +struct TaskPayload +{ + float a; + float b; + int c; +}; + +taskPayloadSharedEXT TaskPayload payload; + +void main() +{ + SetMeshOutputsEXT(24, 22); + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = vec4(gl_GlobalInvocationID, 1.0); + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_PointSize = 2.0; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_ClipDistance[0] = 4.0; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[1] = 5.0; + vOut[gl_LocalInvocationIndex] = vec4(gl_GlobalInvocationID, 2.0); + outputs[gl_LocalInvocationIndex].a = vec4(5.0); + outputs[gl_LocalInvocationIndex].b = vec4(6.0); + barrier(); + if (gl_LocalInvocationIndex < 22) + { + vPrim[gl_LocalInvocationIndex] = vec4(gl_WorkGroupID, 3.0); + prim_outputs[gl_LocalInvocationIndex].a = vec4(payload.a); + prim_outputs[gl_LocalInvocationIndex].b = vec4(payload.b); + gl_PrimitiveLineIndicesEXT[gl_LocalInvocationIndex] = uvec2(0, 1) + gl_LocalInvocationIndex; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveID = int(gl_GlobalInvocationID.x); + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_Layer = int(gl_GlobalInvocationID.x) + 1; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_ViewportIndex = int(gl_GlobalInvocationID.x) + 2; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_CullPrimitiveEXT = bool(gl_GlobalInvocationID.x & 1); + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveShadingRateEXT = int(gl_GlobalInvocationID.x) + 3; + } +} diff --git a/shaders/mesh/mesh-shader-basic-points.spv14.vk.nocompat.mesh b/shaders/mesh/mesh-shader-basic-points.spv14.vk.nocompat.mesh new file mode 100644 index 000000000..3d037bcd5 --- /dev/null +++ b/shaders/mesh/mesh-shader-basic-points.spv14.vk.nocompat.mesh @@ -0,0 +1,63 @@ +#version 450 +#extension GL_EXT_mesh_shader : require +layout(local_size_x = 2, local_size_y = 3, local_size_z = 4) in; +layout(points, max_vertices = 24, max_primitives = 22) out; + +out gl_MeshPerVertexEXT +{ + vec4 gl_Position; + float gl_PointSize; + float gl_ClipDistance[1]; + float gl_CullDistance[2]; +} gl_MeshVerticesEXT[]; + +layout(location = 0) out vec4 vOut[]; +layout(location = 1) perprimitiveEXT out vec4 vPrim[]; + +layout(location = 2) out BlockOut +{ + vec4 a; + vec4 b; +} outputs[]; + +layout(location = 4) perprimitiveEXT out BlockOutPrim +{ + vec4 a; + vec4 b; +} prim_outputs[]; + +shared float shared_float[16]; + +struct TaskPayload +{ + float a; + float b; + int c; +}; + +taskPayloadSharedEXT TaskPayload payload; + +void main() +{ + SetMeshOutputsEXT(24, 22); + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = vec4(gl_GlobalInvocationID, 1.0); + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_PointSize = 2.0; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_ClipDistance[0] = 4.0; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[1] = 5.0; + vOut[gl_LocalInvocationIndex] = vec4(gl_GlobalInvocationID, 2.0); + outputs[gl_LocalInvocationIndex].a = vec4(5.0); + outputs[gl_LocalInvocationIndex].b = vec4(6.0); + barrier(); + if (gl_LocalInvocationIndex < 22) + { + vPrim[gl_LocalInvocationIndex] = vec4(gl_WorkGroupID, 3.0); + prim_outputs[gl_LocalInvocationIndex].a = vec4(payload.a); + prim_outputs[gl_LocalInvocationIndex].b = vec4(payload.b); + gl_PrimitivePointIndicesEXT[gl_LocalInvocationIndex] = gl_LocalInvocationIndex; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveID = int(gl_GlobalInvocationID.x); + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_Layer = int(gl_GlobalInvocationID.x) + 1; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_ViewportIndex = int(gl_GlobalInvocationID.x) + 2; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_CullPrimitiveEXT = bool(gl_GlobalInvocationID.x & 1); + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveShadingRateEXT = int(gl_GlobalInvocationID.x) + 3; + } +} diff --git a/shaders/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh b/shaders/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh new file mode 100644 index 000000000..944525aa5 --- /dev/null +++ b/shaders/mesh/mesh-shader-basic-triangle.spv14.vk.nocompat.mesh @@ -0,0 +1,63 @@ +#version 450 +#extension GL_EXT_mesh_shader : require +layout(local_size_x = 2, local_size_y = 3, local_size_z = 4) in; +layout(triangles, max_vertices = 24, max_primitives = 22) out; + +out gl_MeshPerVertexEXT +{ + vec4 gl_Position; + float gl_PointSize; + float gl_ClipDistance[1]; + float gl_CullDistance[2]; +} gl_MeshVerticesEXT[]; + +layout(location = 0) out vec4 vOut[]; +layout(location = 1) perprimitiveEXT out vec4 vPrim[]; + +layout(location = 2) out BlockOut +{ + vec4 a; + vec4 b; +} outputs[]; + +layout(location = 4) perprimitiveEXT out BlockOutPrim +{ + vec4 a; + vec4 b; +} prim_outputs[]; + +shared float shared_float[16]; + +struct TaskPayload +{ + float a; + float b; + int c; +}; + +taskPayloadSharedEXT TaskPayload payload; + +void main() +{ + SetMeshOutputsEXT(24, 22); + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = vec4(gl_GlobalInvocationID, 1.0); + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_PointSize = 2.0; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_ClipDistance[0] = 4.0; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[1] = 5.0; + vOut[gl_LocalInvocationIndex] = vec4(gl_GlobalInvocationID, 2.0); + outputs[gl_LocalInvocationIndex].a = vec4(5.0); + outputs[gl_LocalInvocationIndex].b = vec4(6.0); + barrier(); + if (gl_LocalInvocationIndex < 22) + { + vPrim[gl_LocalInvocationIndex] = vec4(gl_WorkGroupID, 3.0); + prim_outputs[gl_LocalInvocationIndex].a = vec4(payload.a); + prim_outputs[gl_LocalInvocationIndex].b = vec4(payload.b); + gl_PrimitiveTriangleIndicesEXT[gl_LocalInvocationIndex] = uvec3(0, 1, 2) + gl_LocalInvocationIndex; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveID = int(gl_GlobalInvocationID.x); + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_Layer = int(gl_GlobalInvocationID.x) + 1; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_ViewportIndex = int(gl_GlobalInvocationID.x) + 2; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_CullPrimitiveEXT = bool(gl_GlobalInvocationID.x & 1); + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveShadingRateEXT = int(gl_GlobalInvocationID.x) + 3; + } +} diff --git a/spirv_common.hpp b/spirv_common.hpp index 06b1a3d89..c8e748e6f 100644 --- a/spirv_common.hpp +++ b/spirv_common.hpp @@ -682,6 +682,7 @@ struct SPIREntryPoint } workgroup_size; uint32_t invocations = 0; uint32_t output_vertices = 0; + uint32_t output_primitives = 0; spv::ExecutionModel model = spv::ExecutionModelMax; bool geometry_passthrough = false; }; diff --git a/spirv_cross.cpp b/spirv_cross.cpp index 050c875e8..17072c19a 100644 --- a/spirv_cross.cpp +++ b/spirv_cross.cpp @@ -154,6 +154,10 @@ bool Compiler::block_is_pure(const SPIRBlock &block) case OpEmitVertex: return false; + // Mesh shader functions modify global state. + case OpSetMeshOutputsEXT: + return false; + // Barriers disallow any reordering, so we should treat blocks with barrier as writing. case OpControlBarrier: case OpMemoryBarrier: @@ -1069,8 +1073,11 @@ void Compiler::parse_fixup() { auto &var = id.get(); if (var.storage == StorageClassPrivate || var.storage == StorageClassWorkgroup || + var.storage == StorageClassTaskPayloadWorkgroupEXT || var.storage == StorageClassOutput) + { global_variables.push_back(var.self); + } if (variable_storage_is_aliased(var)) aliased_variables.push_back(var.self); } @@ -2177,6 +2184,10 @@ void Compiler::set_execution_mode(ExecutionMode mode, uint32_t arg0, uint32_t ar execution.output_vertices = arg0; break; + case ExecutionModeOutputPrimitivesEXT: + execution.output_primitives = arg0; + break; + default: break; } @@ -2297,6 +2308,9 @@ uint32_t Compiler::get_execution_mode_argument(spv::ExecutionMode mode, uint32_t case ExecutionModeOutputVertices: return execution.output_vertices; + case ExecutionModeOutputPrimitivesEXT: + return execution.output_primitives; + default: return 0; } diff --git a/spirv_glsl.cpp b/spirv_glsl.cpp index 0a7237083..e80a6ceae 100644 --- a/spirv_glsl.cpp +++ b/spirv_glsl.cpp @@ -497,6 +497,14 @@ void CompilerGLSL::find_static_extensions() require_extension_internal("GL_NV_ray_tracing"); break; + case ExecutionModelMeshEXT: + if (options.es || options.version < 450) + SPIRV_CROSS_THROW("Mesh shaders require GLSL 450 or above."); + if (!options.vulkan_semantics) + SPIRV_CROSS_THROW("Mesh shaders require Vulkan semantics."); + require_extension_internal("GL_EXT_mesh_shader"); + break; + default: break; } @@ -1060,6 +1068,8 @@ void CompilerGLSL::emit_header() break; case ExecutionModelGLCompute: + case ExecutionModelTaskEXT: + case ExecutionModelMeshEXT: { if (execution.workgroup_size.constant != 0 || execution.flags.get(ExecutionModeLocalSizeId)) { @@ -1078,6 +1088,18 @@ void CompilerGLSL::emit_header() inputs.push_back(join("local_size_y = ", execution.workgroup_size.y)); inputs.push_back(join("local_size_z = ", execution.workgroup_size.z)); } + + if (execution.model == ExecutionModelMeshEXT) + { + outputs.push_back(join("max_vertices = ", execution.output_vertices)); + outputs.push_back(join("max_primitives = ", execution.output_primitives)); + if (execution.flags.get(ExecutionModeOutputTrianglesEXT)) + outputs.push_back("triangles"); + else if (execution.flags.get(ExecutionModeOutputLinesEXT)) + outputs.push_back("lines"); + else if (execution.flags.get(ExecutionModeOutputPoints)) + outputs.push_back("points"); + } break; } @@ -1235,6 +1257,8 @@ string CompilerGLSL::to_interpolation_qualifiers(const Bitset &flags) res += "sample "; if (flags.get(DecorationInvariant)) res += "invariant "; + if (flags.get(DecorationPerPrimitiveEXT)) + res += "perprimitiveEXT "; if (flags.get(DecorationExplicitInterpAMD)) { @@ -2624,7 +2648,7 @@ void CompilerGLSL::emit_interface_block(const SPIRVariable &var) } // Workaround to make sure we can emit "patch in/out" correctly. - fixup_io_block_patch_qualifiers(var); + fixup_io_block_patch_primitive_qualifiers(var); // Block names should never alias. auto block_name = to_name(type.self, false); @@ -2647,8 +2671,15 @@ void CompilerGLSL::emit_interface_block(const SPIRVariable &var) // Instance names cannot alias block names. resource_names.insert(block_name); - bool is_patch = has_decoration(var.self, DecorationPatch); - statement(layout_for_variable(var), (is_patch ? "patch " : ""), qual, block_name); + const char *block_qualifier; + if (has_decoration(var.self, DecorationPatch)) + block_qualifier = "patch "; + else if (has_decoration(var.self, DecorationPerPrimitiveEXT)) + block_qualifier = "perprimitiveEXT "; + else + block_qualifier = ""; + + statement(layout_for_variable(var), block_qualifier, qual, block_name); begin_scope(); type.member_name_cache.clear(); @@ -3084,7 +3115,8 @@ bool CompilerGLSL::should_force_emit_builtin_block(StorageClass storage) }); // If we're declaring clip/cull planes with control points we need to force block declaration. - if (get_execution_model() == ExecutionModelTessellationControl && + if ((get_execution_model() == ExecutionModelTessellationControl || + get_execution_model() == ExecutionModelMeshEXT) && (clip_distance_count || cull_distance_count)) { should_force = true; @@ -3093,7 +3125,7 @@ bool CompilerGLSL::should_force_emit_builtin_block(StorageClass storage) return should_force; } -void CompilerGLSL::fixup_implicit_builtin_block_names() +void CompilerGLSL::fixup_implicit_builtin_block_names(ExecutionModel model) { ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { auto &type = this->get(var.basetype); @@ -3101,11 +3133,22 @@ void CompilerGLSL::fixup_implicit_builtin_block_names() if ((var.storage == StorageClassOutput || var.storage == StorageClassInput) && block && is_builtin_variable(var)) { - // Make sure the array has a supported name in the code. - if (var.storage == StorageClassOutput) - set_name(var.self, "gl_out"); - else if (var.storage == StorageClassInput) - set_name(var.self, "gl_in"); + if (model != ExecutionModelMeshEXT) + { + // Make sure the array has a supported name in the code. + if (var.storage == StorageClassOutput) + set_name(var.self, "gl_out"); + else if (var.storage == StorageClassInput) + set_name(var.self, "gl_in"); + } + else + { + auto flags = get_buffer_block_flags(var.self); + if (flags.get(DecorationPerPrimitiveEXT)) + set_name(var.self, "gl_MeshPrimitivesEXT"); + else + set_name(var.self, "gl_MeshVerticesEXT"); + } } }); } @@ -3129,6 +3172,11 @@ void CompilerGLSL::emit_declared_builtin_block(StorageClass storage, ExecutionMo uint32_t xfb_stride = 0, xfb_buffer = 0, geom_stream = 0; std::unordered_map builtin_xfb_offsets; + const auto builtin_is_per_vertex_set = [](BuiltIn builtin) -> bool { + return builtin == BuiltInPosition || builtin == BuiltInPointSize || + builtin == BuiltInClipDistance || builtin == BuiltInCullDistance; + }; + ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { auto &type = this->get(var.basetype); bool block = has_decoration(type.self, DecorationBlock); @@ -3139,7 +3187,7 @@ void CompilerGLSL::emit_declared_builtin_block(StorageClass storage, ExecutionMo uint32_t index = 0; for (auto &m : ir.meta[type.self].members) { - if (m.builtin) + if (m.builtin && builtin_is_per_vertex_set(m.builtin_type)) { builtins.set(m.builtin_type); if (m.builtin_type == BuiltInCullDistance) @@ -3192,7 +3240,7 @@ void CompilerGLSL::emit_declared_builtin_block(StorageClass storage, ExecutionMo { // While we're at it, collect all declared global builtins (HLSL mostly ...). auto &m = ir.meta[var.self].decoration; - if (m.builtin) + if (m.builtin && builtin_is_per_vertex_set(m.builtin_type)) { global_builtins.set(m.builtin_type); if (m.builtin_type == BuiltInCullDistance) @@ -3281,7 +3329,9 @@ void CompilerGLSL::emit_declared_builtin_block(StorageClass storage, ExecutionMo attr.push_back(join("stream = ", geom_stream)); } - if (!attr.empty()) + if (model == ExecutionModelMeshEXT) + statement("out gl_MeshPerVertexEXT"); + else if (!attr.empty()) statement("layout(", merge(attr), ") out gl_PerVertex"); else statement("out gl_PerVertex"); @@ -3399,7 +3449,8 @@ void CompilerGLSL::emit_resources() case ExecutionModelGeometry: case ExecutionModelTessellationControl: case ExecutionModelTessellationEvaluation: - fixup_implicit_builtin_block_names(); + case ExecutionModelMeshEXT: + fixup_implicit_builtin_block_names(execution.model); break; default: @@ -3419,6 +3470,7 @@ void CompilerGLSL::emit_resources() break; case ExecutionModelVertex: + case ExecutionModelMeshEXT: emit_declared_builtin_block(StorageClassOutput, execution.model); break; @@ -8890,6 +8942,15 @@ string CompilerGLSL::builtin_to_glsl(BuiltIn builtin, StorageClass storage) SPIRV_CROSS_THROW("Need desktop GL to use GL_NV_conservative_raster_underestimation."); return "gl_FragFullyCoveredNV"; + case BuiltInPrimitiveTriangleIndicesEXT: + return "gl_PrimitiveTriangleIndicesEXT"; + case BuiltInPrimitiveLineIndicesEXT: + return "gl_PrimitiveLineIndicesEXT"; + case BuiltInPrimitivePointIndicesEXT: + return "gl_PrimitivePointIndicesEXT"; + case BuiltInCullPrimitiveEXT: + return "gl_CullPrimitiveEXT"; + default: return join("gl_BuiltIn_", convert_to_string(builtin)); } @@ -9078,14 +9139,19 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice // but HLSL seems to just emit straight arrays here. // We must pretend this access goes through gl_in/gl_out arrays // to be able to access certain builtins as arrays. + // Similar concerns apply for mesh shaders where we have to redirect to gl_MeshVerticesEXT or MeshPrimitivesEXT. auto builtin = ir.meta[base].decoration.builtin_type; + bool mesh_shader = get_execution_model() == ExecutionModelMeshEXT; + switch (builtin) { // case BuiltInCullDistance: // These are already arrays, need to figure out rules for these in tess/geom. // case BuiltInClipDistance: case BuiltInPosition: case BuiltInPointSize: - if (var->storage == StorageClassInput) + if (mesh_shader) + expr = join("gl_MeshVerticesEXT[", to_expression(index, register_expression_read), "].", expr); + else if (var->storage == StorageClassInput) expr = join("gl_in[", to_expression(index, register_expression_read), "].", expr); else if (var->storage == StorageClassOutput) expr = join("gl_out[", to_expression(index, register_expression_read), "].", expr); @@ -9093,6 +9159,17 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice append_index(index, is_literal); break; + case BuiltInPrimitiveId: + case BuiltInLayer: + case BuiltInViewportIndex: + case BuiltInCullPrimitiveEXT: + case BuiltInPrimitiveShadingRateKHR: + if (mesh_shader) + expr = join("gl_MeshPrimitivesEXT[", to_expression(index, register_expression_read), "].", expr); + else + append_index(index, is_literal); + break; + default: append_index(index, is_literal); break; @@ -13510,6 +13587,10 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) } break; + case OpSetMeshOutputsEXT: + statement("SetMeshOutputsEXT(", to_unpacked_expression(ops[0]), ", ", to_unpacked_expression(ops[1]), ");"); + break; + default: statement("// unimplemented op ", instruction.op); break; @@ -13818,28 +13899,41 @@ string CompilerGLSL::to_precision_qualifiers_glsl(uint32_t id) return flags_to_qualifiers_glsl(type, ir.meta[id].decoration.decoration_flags); } -void CompilerGLSL::fixup_io_block_patch_qualifiers(const SPIRVariable &var) +void CompilerGLSL::fixup_io_block_patch_primitive_qualifiers(const SPIRVariable &var) { // Works around weird behavior in glslangValidator where // a patch out block is translated to just block members getting the decoration. // To make glslang not complain when we compile again, we have to transform this back to a case where // the variable itself has Patch decoration, and not members. + // Same for perprimitiveEXT. auto &type = get(var.basetype); if (has_decoration(type.self, DecorationBlock)) { uint32_t member_count = uint32_t(type.member_types.size()); + Decoration promoted_decoration = {}; + bool do_promote_decoration = false; for (uint32_t i = 0; i < member_count; i++) { if (has_member_decoration(type.self, i, DecorationPatch)) { - set_decoration(var.self, DecorationPatch); + promoted_decoration = DecorationPatch; + do_promote_decoration = true; + break; + } + else if (has_member_decoration(type.self, i, DecorationPerPrimitiveEXT)) + { + promoted_decoration = DecorationPerPrimitiveEXT; + do_promote_decoration = true; break; } } - if (has_decoration(var.self, DecorationPatch)) + if (do_promote_decoration) + { + set_decoration(var.self, promoted_decoration); for (uint32_t i = 0; i < member_count; i++) - unset_member_decoration(type.self, i, DecorationPatch); + unset_member_decoration(type.self, i, promoted_decoration); + } } } @@ -13852,6 +13946,8 @@ string CompilerGLSL::to_qualifiers_glsl(uint32_t id) if (var && var->storage == StorageClassWorkgroup && !backend.shared_is_implied) res += "shared "; + else if (var && var->storage == StorageClassTaskPayloadWorkgroupEXT) + res += "taskPayloadSharedEXT "; res += to_interpolation_qualifiers(flags); if (var) @@ -16337,6 +16433,9 @@ void CompilerGLSL::cast_from_variable_load(uint32_t source_id, std::string &expr case BuiltInIncomingRayFlagsNV: case BuiltInLaunchIdNV: case BuiltInLaunchSizeNV: + case BuiltInPrimitiveTriangleIndicesEXT: + case BuiltInPrimitiveLineIndicesEXT: + case BuiltInPrimitivePointIndicesEXT: expected_type = SPIRType::UInt; break; diff --git a/spirv_glsl.hpp b/spirv_glsl.hpp index d967b456b..2d1dad6cf 100644 --- a/spirv_glsl.hpp +++ b/spirv_glsl.hpp @@ -628,7 +628,7 @@ class CompilerGLSL : public Compiler void emit_buffer_reference_block(uint32_t type_id, bool forward_declaration); void emit_buffer_block_legacy(const SPIRVariable &var); void emit_buffer_block_flattened(const SPIRVariable &type); - void fixup_implicit_builtin_block_names(); + void fixup_implicit_builtin_block_names(spv::ExecutionModel model); void emit_declared_builtin_block(spv::StorageClass storage, spv::ExecutionModel model); bool should_force_emit_builtin_block(spv::StorageClass storage); void emit_push_constant_block_vulkan(const SPIRVariable &var); @@ -772,7 +772,7 @@ class CompilerGLSL : public Compiler std::string type_to_glsl_constructor(const SPIRType &type); std::string argument_decl(const SPIRFunction::Parameter &arg); virtual std::string to_qualifiers_glsl(uint32_t id); - void fixup_io_block_patch_qualifiers(const SPIRVariable &var); + void fixup_io_block_patch_primitive_qualifiers(const SPIRVariable &var); void emit_output_variable_initializer(const SPIRVariable &var); std::string to_precision_qualifiers_glsl(uint32_t id); virtual const char *to_storage_qualifiers_glsl(const SPIRVariable &var); diff --git a/spirv_parser.cpp b/spirv_parser.cpp index c290a5ebb..526c92cb5 100644 --- a/spirv_parser.cpp +++ b/spirv_parser.cpp @@ -349,6 +349,10 @@ void Parser::parse(const Instruction &instruction) execution.output_vertices = ops[2]; break; + case ExecutionModeOutputPrimitivesEXT: + execution.output_primitives = ops[2]; + break; + default: break; } From 4c345166dc5d41dc170ab0ad37885fe44b72334c Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Mon, 5 Sep 2022 12:31:22 +0200 Subject: [PATCH 4/4] GLSL: Implement task shaders. Due to bugged glslang / spirv-tools w.r.t. terminator instructions, add a hack to ignore invalid SPIR-V for the time being. --- main.cpp | 4 + ...k-shader-basic-2.vk.spv14.nocompat.task.vk | 42 ++++++ ...ask-shader-basic.vk.spv14.nocompat.task.vk | 35 +++++ ...ader.vk.nocompat.invalid.spv14.asm.task.vk | 35 +++++ ...k-shader-basic-2.vk.spv14.nocompat.task.vk | 42 ++++++ ...ask-shader-basic.vk.spv14.nocompat.task.vk | 35 +++++ ...-shader.vk.nocompat.invalid.spv14.asm.task | 132 ++++++++++++++++++ spirv_common.hpp | 10 +- spirv_cross.cpp | 4 +- spirv_glsl.cpp | 8 ++ spirv_parser.cpp | 21 +++ spirv_parser.hpp | 2 + 12 files changed, 368 insertions(+), 2 deletions(-) create mode 100644 reference/opt/shaders/task/task-shader-basic-2.vk.spv14.nocompat.task.vk create mode 100644 reference/opt/shaders/task/task-shader-basic.vk.spv14.nocompat.task.vk create mode 100644 reference/shaders-no-opt/asm/task/task-shader.vk.nocompat.invalid.spv14.asm.task.vk create mode 100644 reference/shaders/task/task-shader-basic-2.vk.spv14.nocompat.task.vk create mode 100644 reference/shaders/task/task-shader-basic.vk.spv14.nocompat.task.vk create mode 100644 shaders-no-opt/asm/task/task-shader.vk.nocompat.invalid.spv14.asm.task diff --git a/main.cpp b/main.cpp index 81db89ce0..8d015a6dc 100644 --- a/main.cpp +++ b/main.cpp @@ -1082,6 +1082,10 @@ static ExecutionModel stage_to_execution_model(const std::string &stage) return ExecutionModelMissKHR; else if (stage == "rcall") return ExecutionModelCallableKHR; + else if (stage == "mesh") + return spv::ExecutionModelMeshEXT; + else if (stage == "task") + return spv::ExecutionModelTaskEXT; else SPIRV_CROSS_THROW("Invalid stage."); } diff --git a/reference/opt/shaders/task/task-shader-basic-2.vk.spv14.nocompat.task.vk b/reference/opt/shaders/task/task-shader-basic-2.vk.spv14.nocompat.task.vk new file mode 100644 index 000000000..98704e22d --- /dev/null +++ b/reference/opt/shaders/task/task-shader-basic-2.vk.spv14.nocompat.task.vk @@ -0,0 +1,42 @@ +#version 450 +#extension GL_EXT_mesh_shader : require +layout(local_size_x = 4, local_size_y = 3, local_size_z = 2) in; + +struct Payload +{ + float v[3]; +}; + +shared float vs[24]; +taskPayloadSharedEXT Payload p; + +void main() +{ + vs[gl_LocalInvocationIndex] = 10.0; + barrier(); + if (gl_LocalInvocationIndex < 12u) + { + vs[gl_LocalInvocationIndex] += vs[gl_LocalInvocationIndex + 12u]; + } + barrier(); + if (gl_LocalInvocationIndex < 6u) + { + vs[gl_LocalInvocationIndex] += vs[gl_LocalInvocationIndex + 6u]; + } + barrier(); + if (gl_LocalInvocationIndex < 3u) + { + vs[gl_LocalInvocationIndex] += vs[gl_LocalInvocationIndex + 3u]; + } + barrier(); + p.v[gl_LocalInvocationIndex] = vs[gl_LocalInvocationIndex]; + if (vs[5] > 20.0) + { + EmitMeshTasksEXT(uint(int(vs[4])), uint(int(vs[6])), uint(int(vs[8]))); + } + else + { + EmitMeshTasksEXT(uint(int(vs[6])), 10u, 50u); + } +} + diff --git a/reference/opt/shaders/task/task-shader-basic.vk.spv14.nocompat.task.vk b/reference/opt/shaders/task/task-shader-basic.vk.spv14.nocompat.task.vk new file mode 100644 index 000000000..1d491e701 --- /dev/null +++ b/reference/opt/shaders/task/task-shader-basic.vk.spv14.nocompat.task.vk @@ -0,0 +1,35 @@ +#version 450 +#extension GL_EXT_mesh_shader : require +layout(local_size_x = 4, local_size_y = 3, local_size_z = 2) in; + +struct Payload +{ + float v[3]; +}; + +shared float vs[24]; +taskPayloadSharedEXT Payload p; + +void main() +{ + vs[gl_LocalInvocationIndex] = 10.0; + barrier(); + if (gl_LocalInvocationIndex < 12u) + { + vs[gl_LocalInvocationIndex] += vs[gl_LocalInvocationIndex + 12u]; + } + barrier(); + if (gl_LocalInvocationIndex < 6u) + { + vs[gl_LocalInvocationIndex] += vs[gl_LocalInvocationIndex + 6u]; + } + barrier(); + if (gl_LocalInvocationIndex < 3u) + { + vs[gl_LocalInvocationIndex] += vs[gl_LocalInvocationIndex + 3u]; + } + barrier(); + p.v[gl_LocalInvocationIndex] = vs[gl_LocalInvocationIndex]; + EmitMeshTasksEXT(uint(int(vs[4])), uint(int(vs[6])), uint(int(vs[8]))); +} + diff --git a/reference/shaders-no-opt/asm/task/task-shader.vk.nocompat.invalid.spv14.asm.task.vk b/reference/shaders-no-opt/asm/task/task-shader.vk.nocompat.invalid.spv14.asm.task.vk new file mode 100644 index 000000000..1d491e701 --- /dev/null +++ b/reference/shaders-no-opt/asm/task/task-shader.vk.nocompat.invalid.spv14.asm.task.vk @@ -0,0 +1,35 @@ +#version 450 +#extension GL_EXT_mesh_shader : require +layout(local_size_x = 4, local_size_y = 3, local_size_z = 2) in; + +struct Payload +{ + float v[3]; +}; + +shared float vs[24]; +taskPayloadSharedEXT Payload p; + +void main() +{ + vs[gl_LocalInvocationIndex] = 10.0; + barrier(); + if (gl_LocalInvocationIndex < 12u) + { + vs[gl_LocalInvocationIndex] += vs[gl_LocalInvocationIndex + 12u]; + } + barrier(); + if (gl_LocalInvocationIndex < 6u) + { + vs[gl_LocalInvocationIndex] += vs[gl_LocalInvocationIndex + 6u]; + } + barrier(); + if (gl_LocalInvocationIndex < 3u) + { + vs[gl_LocalInvocationIndex] += vs[gl_LocalInvocationIndex + 3u]; + } + barrier(); + p.v[gl_LocalInvocationIndex] = vs[gl_LocalInvocationIndex]; + EmitMeshTasksEXT(uint(int(vs[4])), uint(int(vs[6])), uint(int(vs[8]))); +} + diff --git a/reference/shaders/task/task-shader-basic-2.vk.spv14.nocompat.task.vk b/reference/shaders/task/task-shader-basic-2.vk.spv14.nocompat.task.vk new file mode 100644 index 000000000..98704e22d --- /dev/null +++ b/reference/shaders/task/task-shader-basic-2.vk.spv14.nocompat.task.vk @@ -0,0 +1,42 @@ +#version 450 +#extension GL_EXT_mesh_shader : require +layout(local_size_x = 4, local_size_y = 3, local_size_z = 2) in; + +struct Payload +{ + float v[3]; +}; + +shared float vs[24]; +taskPayloadSharedEXT Payload p; + +void main() +{ + vs[gl_LocalInvocationIndex] = 10.0; + barrier(); + if (gl_LocalInvocationIndex < 12u) + { + vs[gl_LocalInvocationIndex] += vs[gl_LocalInvocationIndex + 12u]; + } + barrier(); + if (gl_LocalInvocationIndex < 6u) + { + vs[gl_LocalInvocationIndex] += vs[gl_LocalInvocationIndex + 6u]; + } + barrier(); + if (gl_LocalInvocationIndex < 3u) + { + vs[gl_LocalInvocationIndex] += vs[gl_LocalInvocationIndex + 3u]; + } + barrier(); + p.v[gl_LocalInvocationIndex] = vs[gl_LocalInvocationIndex]; + if (vs[5] > 20.0) + { + EmitMeshTasksEXT(uint(int(vs[4])), uint(int(vs[6])), uint(int(vs[8]))); + } + else + { + EmitMeshTasksEXT(uint(int(vs[6])), 10u, 50u); + } +} + diff --git a/reference/shaders/task/task-shader-basic.vk.spv14.nocompat.task.vk b/reference/shaders/task/task-shader-basic.vk.spv14.nocompat.task.vk new file mode 100644 index 000000000..1d491e701 --- /dev/null +++ b/reference/shaders/task/task-shader-basic.vk.spv14.nocompat.task.vk @@ -0,0 +1,35 @@ +#version 450 +#extension GL_EXT_mesh_shader : require +layout(local_size_x = 4, local_size_y = 3, local_size_z = 2) in; + +struct Payload +{ + float v[3]; +}; + +shared float vs[24]; +taskPayloadSharedEXT Payload p; + +void main() +{ + vs[gl_LocalInvocationIndex] = 10.0; + barrier(); + if (gl_LocalInvocationIndex < 12u) + { + vs[gl_LocalInvocationIndex] += vs[gl_LocalInvocationIndex + 12u]; + } + barrier(); + if (gl_LocalInvocationIndex < 6u) + { + vs[gl_LocalInvocationIndex] += vs[gl_LocalInvocationIndex + 6u]; + } + barrier(); + if (gl_LocalInvocationIndex < 3u) + { + vs[gl_LocalInvocationIndex] += vs[gl_LocalInvocationIndex + 3u]; + } + barrier(); + p.v[gl_LocalInvocationIndex] = vs[gl_LocalInvocationIndex]; + EmitMeshTasksEXT(uint(int(vs[4])), uint(int(vs[6])), uint(int(vs[8]))); +} + diff --git a/shaders-no-opt/asm/task/task-shader.vk.nocompat.invalid.spv14.asm.task b/shaders-no-opt/asm/task/task-shader.vk.nocompat.invalid.spv14.asm.task new file mode 100644 index 000000000..cbef97ed1 --- /dev/null +++ b/shaders-no-opt/asm/task/task-shader.vk.nocompat.invalid.spv14.asm.task @@ -0,0 +1,132 @@ +; SPIR-V +; Version: 1.4 +; Generator: Khronos Glslang Reference Front End; 10 +; Bound: 93 +; Schema: 0 + OpCapability MeshShadingEXT + OpExtension "SPV_EXT_mesh_shader" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint TaskEXT %main "main" %vs %gl_LocalInvocationIndex %p + OpExecutionMode %main LocalSize 4 3 2 + OpSource GLSL 450 + OpSourceExtension "GL_EXT_mesh_shader" + OpName %main "main" + OpName %vs "vs" + OpName %gl_LocalInvocationIndex "gl_LocalInvocationIndex" + OpName %Payload "Payload" + OpMemberName %Payload 0 "v" + OpName %p "p" + OpDecorate %gl_LocalInvocationIndex BuiltIn LocalInvocationIndex + OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %uint = OpTypeInt 32 0 + %uint_24 = OpConstant %uint 24 +%_arr_float_uint_24 = OpTypeArray %float %uint_24 +%_ptr_Workgroup__arr_float_uint_24 = OpTypePointer Workgroup %_arr_float_uint_24 + %vs = OpVariable %_ptr_Workgroup__arr_float_uint_24 Workgroup +%_ptr_Input_uint = OpTypePointer Input %uint +%gl_LocalInvocationIndex = OpVariable %_ptr_Input_uint Input + %float_10 = OpConstant %float 10 +%_ptr_Workgroup_float = OpTypePointer Workgroup %float + %uint_2 = OpConstant %uint 2 + %uint_264 = OpConstant %uint 264 + %uint_12 = OpConstant %uint 12 + %bool = OpTypeBool + %uint_6 = OpConstant %uint 6 + %uint_3 = OpConstant %uint 3 +%_arr_float_uint_3 = OpTypeArray %float %uint_3 + %Payload = OpTypeStruct %_arr_float_uint_3 +%_ptr_TaskPayloadWorkgroupEXT_Payload = OpTypePointer TaskPayloadWorkgroupEXT %Payload + %p = OpVariable %_ptr_TaskPayloadWorkgroupEXT_Payload TaskPayloadWorkgroupEXT + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 +%_ptr_TaskPayloadWorkgroupEXT_float = OpTypePointer TaskPayloadWorkgroupEXT %float + %int_4 = OpConstant %int 4 + %int_6 = OpConstant %int 6 + %int_8 = OpConstant %int 8 + %v3uint = OpTypeVector %uint 3 + %uint_4 = OpConstant %uint 4 +%gl_WorkGroupSize = OpConstantComposite %v3uint %uint_4 %uint_3 %uint_2 + %main = OpFunction %void None %3 + %5 = OpLabel + %14 = OpLoad %uint %gl_LocalInvocationIndex + %17 = OpAccessChain %_ptr_Workgroup_float %vs %14 + OpStore %17 %float_10 + OpControlBarrier %uint_2 %uint_2 %uint_264 + %20 = OpLoad %uint %gl_LocalInvocationIndex + %23 = OpULessThan %bool %20 %uint_12 + OpSelectionMerge %25 None + OpBranchConditional %23 %24 %25 + %24 = OpLabel + %26 = OpLoad %uint %gl_LocalInvocationIndex + %27 = OpLoad %uint %gl_LocalInvocationIndex + %28 = OpIAdd %uint %27 %uint_12 + %29 = OpAccessChain %_ptr_Workgroup_float %vs %28 + %30 = OpLoad %float %29 + %31 = OpAccessChain %_ptr_Workgroup_float %vs %26 + %32 = OpLoad %float %31 + %33 = OpFAdd %float %32 %30 + %34 = OpAccessChain %_ptr_Workgroup_float %vs %26 + OpStore %34 %33 + OpBranch %25 + %25 = OpLabel + OpControlBarrier %uint_2 %uint_2 %uint_264 + %35 = OpLoad %uint %gl_LocalInvocationIndex + %37 = OpULessThan %bool %35 %uint_6 + OpSelectionMerge %39 None + OpBranchConditional %37 %38 %39 + %38 = OpLabel + %40 = OpLoad %uint %gl_LocalInvocationIndex + %41 = OpLoad %uint %gl_LocalInvocationIndex + %42 = OpIAdd %uint %41 %uint_6 + %43 = OpAccessChain %_ptr_Workgroup_float %vs %42 + %44 = OpLoad %float %43 + %45 = OpAccessChain %_ptr_Workgroup_float %vs %40 + %46 = OpLoad %float %45 + %47 = OpFAdd %float %46 %44 + %48 = OpAccessChain %_ptr_Workgroup_float %vs %40 + OpStore %48 %47 + OpBranch %39 + %39 = OpLabel + OpControlBarrier %uint_2 %uint_2 %uint_264 + %49 = OpLoad %uint %gl_LocalInvocationIndex + %51 = OpULessThan %bool %49 %uint_3 + OpSelectionMerge %53 None + OpBranchConditional %51 %52 %53 + %52 = OpLabel + %54 = OpLoad %uint %gl_LocalInvocationIndex + %55 = OpLoad %uint %gl_LocalInvocationIndex + %56 = OpIAdd %uint %55 %uint_3 + %57 = OpAccessChain %_ptr_Workgroup_float %vs %56 + %58 = OpLoad %float %57 + %59 = OpAccessChain %_ptr_Workgroup_float %vs %54 + %60 = OpLoad %float %59 + %61 = OpFAdd %float %60 %58 + %62 = OpAccessChain %_ptr_Workgroup_float %vs %54 + OpStore %62 %61 + OpBranch %53 + %53 = OpLabel + OpControlBarrier %uint_2 %uint_2 %uint_264 + %69 = OpLoad %uint %gl_LocalInvocationIndex + %70 = OpLoad %uint %gl_LocalInvocationIndex + %71 = OpAccessChain %_ptr_Workgroup_float %vs %70 + %72 = OpLoad %float %71 + %74 = OpAccessChain %_ptr_TaskPayloadWorkgroupEXT_float %p %int_0 %69 + OpStore %74 %72 + %76 = OpAccessChain %_ptr_Workgroup_float %vs %int_4 + %77 = OpLoad %float %76 + %78 = OpConvertFToS %int %77 + %79 = OpBitcast %uint %78 + %81 = OpAccessChain %_ptr_Workgroup_float %vs %int_6 + %82 = OpLoad %float %81 + %83 = OpConvertFToS %int %82 + %84 = OpBitcast %uint %83 + %86 = OpAccessChain %_ptr_Workgroup_float %vs %int_8 + %87 = OpLoad %float %86 + %88 = OpConvertFToS %int %87 + %89 = OpBitcast %uint %88 + OpEmitMeshTasksEXT %79 %84 %89 %p + OpFunctionEnd diff --git a/spirv_common.hpp b/spirv_common.hpp index c8e748e6f..5c2ad7476 100644 --- a/spirv_common.hpp +++ b/spirv_common.hpp @@ -777,7 +777,8 @@ struct SPIRBlock : IVariant Unreachable, // Noop Kill, // Discard IgnoreIntersection, // Ray Tracing - TerminateRay // Ray Tracing + TerminateRay, // Ray Tracing + EmitMeshTasks // Mesh shaders }; enum Merge @@ -839,6 +840,13 @@ struct SPIRBlock : IVariant BlockID false_block = 0; BlockID default_block = 0; + // If terminator is EmitMeshTasksEXT. + struct + { + ID groups[3]; + ID payload; + } mesh = {}; + SmallVector ops; struct Phi diff --git a/spirv_cross.cpp b/spirv_cross.cpp index 17072c19a..04ea35fa5 100644 --- a/spirv_cross.cpp +++ b/spirv_cross.cpp @@ -98,7 +98,8 @@ bool Compiler::block_is_pure(const SPIRBlock &block) // This is a global side effect of the function. if (block.terminator == SPIRBlock::Kill || block.terminator == SPIRBlock::TerminateRay || - block.terminator == SPIRBlock::IgnoreIntersection) + block.terminator == SPIRBlock::IgnoreIntersection || + block.terminator == SPIRBlock::EmitMeshTasks) return false; for (auto &i : block.ops) @@ -155,6 +156,7 @@ bool Compiler::block_is_pure(const SPIRBlock &block) return false; // Mesh shader functions modify global state. + // (EmitMeshTasks is a terminator). case OpSetMeshOutputsEXT: return false; diff --git a/spirv_glsl.cpp b/spirv_glsl.cpp index e80a6ceae..bcd4f911c 100644 --- a/spirv_glsl.cpp +++ b/spirv_glsl.cpp @@ -498,6 +498,7 @@ void CompilerGLSL::find_static_extensions() break; case ExecutionModelMeshEXT: + case ExecutionModelTaskEXT: if (options.es || options.version < 450) SPIRV_CROSS_THROW("Mesh shaders require GLSL 450 or above."); if (!options.vulkan_semantics) @@ -16105,6 +16106,13 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block) statement("terminateRayEXT;"); break; + case SPIRBlock::EmitMeshTasks: + statement("EmitMeshTasksEXT(", + to_unpacked_expression(block.mesh.groups[0]), ", ", + to_unpacked_expression(block.mesh.groups[1]), ", ", + to_unpacked_expression(block.mesh.groups[2]), ");"); + break; + default: SPIRV_CROSS_THROW("Unimplemented block terminator."); } diff --git a/spirv_parser.cpp b/spirv_parser.cpp index 526c92cb5..49eb1933c 100644 --- a/spirv_parser.cpp +++ b/spirv_parser.cpp @@ -183,6 +183,15 @@ void Parser::parse(const Instruction &instruction) auto op = static_cast(instruction.op); uint32_t length = instruction.length; + // HACK for glslang that might emit OpEmitMeshTasksEXT followed by return / branch. + // Instead of failing hard, just ignore it. + if (ignore_trailing_block_opcodes) + { + ignore_trailing_block_opcodes = false; + if (op == OpReturn || op == OpBranch || op == OpUnreachable) + return; + } + switch (op) { case OpSourceContinued: @@ -1107,6 +1116,18 @@ void Parser::parse(const Instruction &instruction) current_block = nullptr; break; + case OpEmitMeshTasksEXT: + if (!current_block) + SPIRV_CROSS_THROW("Trying to end a non-existing block."); + current_block->terminator = SPIRBlock::EmitMeshTasks; + for (uint32_t i = 0; i < 3; i++) + current_block->mesh.groups[i] = ops[i]; + current_block->mesh.payload = length >= 4 ? ops[3] : 0; + current_block = nullptr; + // Currently glslang is bugged and does not treat EmitMeshTasksEXT as a terminator. + ignore_trailing_block_opcodes = true; + break; + case OpReturn: { if (!current_block) diff --git a/spirv_parser.hpp b/spirv_parser.hpp index d72fc71d8..dabc0e224 100644 --- a/spirv_parser.hpp +++ b/spirv_parser.hpp @@ -46,6 +46,8 @@ class Parser ParsedIR ir; SPIRFunction *current_function = nullptr; SPIRBlock *current_block = nullptr; + // For workarounds. + bool ignore_trailing_block_opcodes = false; void parse(const Instruction &instr); const uint32_t *stream(const Instruction &instr) const;