diff --git a/shaders/mesh/mesh-shader-basic-triangle.mesh b/shaders/mesh/mesh-shader-basic-triangle.mesh new file mode 100644 index 000000000..944525aa5 --- /dev/null +++ b/shaders/mesh/mesh-shader-basic-triangle.mesh @@ -0,0 +1,63 @@ +#version 450 +#extension GL_EXT_mesh_shader : require +layout(local_size_x = 2, local_size_y = 3, local_size_z = 4) in; +layout(triangles, max_vertices = 24, max_primitives = 22) out; + +out gl_MeshPerVertexEXT +{ + vec4 gl_Position; + float gl_PointSize; + float gl_ClipDistance[1]; + float gl_CullDistance[2]; +} gl_MeshVerticesEXT[]; + +layout(location = 0) out vec4 vOut[]; +layout(location = 1) perprimitiveEXT out vec4 vPrim[]; + +layout(location = 2) out BlockOut +{ + vec4 a; + vec4 b; +} outputs[]; + +layout(location = 4) perprimitiveEXT out BlockOutPrim +{ + vec4 a; + vec4 b; +} prim_outputs[]; + +shared float shared_float[16]; + +struct TaskPayload +{ + float a; + float b; + int c; +}; + +taskPayloadSharedEXT TaskPayload payload; + +void main() +{ + SetMeshOutputsEXT(24, 22); + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = vec4(gl_GlobalInvocationID, 1.0); + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_PointSize = 2.0; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_ClipDistance[0] = 4.0; + gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_CullDistance[1] = 5.0; + vOut[gl_LocalInvocationIndex] = vec4(gl_GlobalInvocationID, 2.0); + outputs[gl_LocalInvocationIndex].a = vec4(5.0); + outputs[gl_LocalInvocationIndex].b = vec4(6.0); + barrier(); + if (gl_LocalInvocationIndex < 22) + { + vPrim[gl_LocalInvocationIndex] = vec4(gl_WorkGroupID, 3.0); + prim_outputs[gl_LocalInvocationIndex].a = vec4(payload.a); + prim_outputs[gl_LocalInvocationIndex].b = vec4(payload.b); + gl_PrimitiveTriangleIndicesEXT[gl_LocalInvocationIndex] = uvec3(0, 1, 2) + gl_LocalInvocationIndex; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveID = int(gl_GlobalInvocationID.x); + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_Layer = int(gl_GlobalInvocationID.x) + 1; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_ViewportIndex = int(gl_GlobalInvocationID.x) + 2; + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_CullPrimitiveEXT = bool(gl_GlobalInvocationID.x & 1); + gl_MeshPrimitivesEXT[gl_LocalInvocationIndex].gl_PrimitiveShadingRateEXT = int(gl_GlobalInvocationID.x) + 3; + } +} diff --git a/spirv_common.hpp b/spirv_common.hpp index 06b1a3d89..c8e748e6f 100644 --- a/spirv_common.hpp +++ b/spirv_common.hpp @@ -682,6 +682,7 @@ struct SPIREntryPoint } workgroup_size; uint32_t invocations = 0; uint32_t output_vertices = 0; + uint32_t output_primitives = 0; spv::ExecutionModel model = spv::ExecutionModelMax; bool geometry_passthrough = false; }; diff --git a/spirv_cross.cpp b/spirv_cross.cpp index 050c875e8..17072c19a 100644 --- a/spirv_cross.cpp +++ b/spirv_cross.cpp @@ -154,6 +154,10 @@ bool Compiler::block_is_pure(const SPIRBlock &block) case OpEmitVertex: return false; + // Mesh shader functions modify global state. + case OpSetMeshOutputsEXT: + return false; + // Barriers disallow any reordering, so we should treat blocks with barrier as writing. case OpControlBarrier: case OpMemoryBarrier: @@ -1069,8 +1073,11 @@ void Compiler::parse_fixup() { auto &var = id.get(); if (var.storage == StorageClassPrivate || var.storage == StorageClassWorkgroup || + var.storage == StorageClassTaskPayloadWorkgroupEXT || var.storage == StorageClassOutput) + { global_variables.push_back(var.self); + } if (variable_storage_is_aliased(var)) aliased_variables.push_back(var.self); } @@ -2177,6 +2184,10 @@ void Compiler::set_execution_mode(ExecutionMode mode, uint32_t arg0, uint32_t ar execution.output_vertices = arg0; break; + case ExecutionModeOutputPrimitivesEXT: + execution.output_primitives = arg0; + break; + default: break; } @@ -2297,6 +2308,9 @@ uint32_t Compiler::get_execution_mode_argument(spv::ExecutionMode mode, uint32_t case ExecutionModeOutputVertices: return execution.output_vertices; + case ExecutionModeOutputPrimitivesEXT: + return execution.output_primitives; + default: return 0; } diff --git a/spirv_glsl.cpp b/spirv_glsl.cpp index 0a7237083..e80a6ceae 100644 --- a/spirv_glsl.cpp +++ b/spirv_glsl.cpp @@ -497,6 +497,14 @@ void CompilerGLSL::find_static_extensions() require_extension_internal("GL_NV_ray_tracing"); break; + case ExecutionModelMeshEXT: + if (options.es || options.version < 450) + SPIRV_CROSS_THROW("Mesh shaders require GLSL 450 or above."); + if (!options.vulkan_semantics) + SPIRV_CROSS_THROW("Mesh shaders require Vulkan semantics."); + require_extension_internal("GL_EXT_mesh_shader"); + break; + default: break; } @@ -1060,6 +1068,8 @@ void CompilerGLSL::emit_header() break; case ExecutionModelGLCompute: + case ExecutionModelTaskEXT: + case ExecutionModelMeshEXT: { if (execution.workgroup_size.constant != 0 || execution.flags.get(ExecutionModeLocalSizeId)) { @@ -1078,6 +1088,18 @@ void CompilerGLSL::emit_header() inputs.push_back(join("local_size_y = ", execution.workgroup_size.y)); inputs.push_back(join("local_size_z = ", execution.workgroup_size.z)); } + + if (execution.model == ExecutionModelMeshEXT) + { + outputs.push_back(join("max_vertices = ", execution.output_vertices)); + outputs.push_back(join("max_primitives = ", execution.output_primitives)); + if (execution.flags.get(ExecutionModeOutputTrianglesEXT)) + outputs.push_back("triangles"); + else if (execution.flags.get(ExecutionModeOutputLinesEXT)) + outputs.push_back("lines"); + else if (execution.flags.get(ExecutionModeOutputPoints)) + outputs.push_back("points"); + } break; } @@ -1235,6 +1257,8 @@ string CompilerGLSL::to_interpolation_qualifiers(const Bitset &flags) res += "sample "; if (flags.get(DecorationInvariant)) res += "invariant "; + if (flags.get(DecorationPerPrimitiveEXT)) + res += "perprimitiveEXT "; if (flags.get(DecorationExplicitInterpAMD)) { @@ -2624,7 +2648,7 @@ void CompilerGLSL::emit_interface_block(const SPIRVariable &var) } // Workaround to make sure we can emit "patch in/out" correctly. - fixup_io_block_patch_qualifiers(var); + fixup_io_block_patch_primitive_qualifiers(var); // Block names should never alias. auto block_name = to_name(type.self, false); @@ -2647,8 +2671,15 @@ void CompilerGLSL::emit_interface_block(const SPIRVariable &var) // Instance names cannot alias block names. resource_names.insert(block_name); - bool is_patch = has_decoration(var.self, DecorationPatch); - statement(layout_for_variable(var), (is_patch ? "patch " : ""), qual, block_name); + const char *block_qualifier; + if (has_decoration(var.self, DecorationPatch)) + block_qualifier = "patch "; + else if (has_decoration(var.self, DecorationPerPrimitiveEXT)) + block_qualifier = "perprimitiveEXT "; + else + block_qualifier = ""; + + statement(layout_for_variable(var), block_qualifier, qual, block_name); begin_scope(); type.member_name_cache.clear(); @@ -3084,7 +3115,8 @@ bool CompilerGLSL::should_force_emit_builtin_block(StorageClass storage) }); // If we're declaring clip/cull planes with control points we need to force block declaration. - if (get_execution_model() == ExecutionModelTessellationControl && + if ((get_execution_model() == ExecutionModelTessellationControl || + get_execution_model() == ExecutionModelMeshEXT) && (clip_distance_count || cull_distance_count)) { should_force = true; @@ -3093,7 +3125,7 @@ bool CompilerGLSL::should_force_emit_builtin_block(StorageClass storage) return should_force; } -void CompilerGLSL::fixup_implicit_builtin_block_names() +void CompilerGLSL::fixup_implicit_builtin_block_names(ExecutionModel model) { ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { auto &type = this->get(var.basetype); @@ -3101,11 +3133,22 @@ void CompilerGLSL::fixup_implicit_builtin_block_names() if ((var.storage == StorageClassOutput || var.storage == StorageClassInput) && block && is_builtin_variable(var)) { - // Make sure the array has a supported name in the code. - if (var.storage == StorageClassOutput) - set_name(var.self, "gl_out"); - else if (var.storage == StorageClassInput) - set_name(var.self, "gl_in"); + if (model != ExecutionModelMeshEXT) + { + // Make sure the array has a supported name in the code. + if (var.storage == StorageClassOutput) + set_name(var.self, "gl_out"); + else if (var.storage == StorageClassInput) + set_name(var.self, "gl_in"); + } + else + { + auto flags = get_buffer_block_flags(var.self); + if (flags.get(DecorationPerPrimitiveEXT)) + set_name(var.self, "gl_MeshPrimitivesEXT"); + else + set_name(var.self, "gl_MeshVerticesEXT"); + } } }); } @@ -3129,6 +3172,11 @@ void CompilerGLSL::emit_declared_builtin_block(StorageClass storage, ExecutionMo uint32_t xfb_stride = 0, xfb_buffer = 0, geom_stream = 0; std::unordered_map builtin_xfb_offsets; + const auto builtin_is_per_vertex_set = [](BuiltIn builtin) -> bool { + return builtin == BuiltInPosition || builtin == BuiltInPointSize || + builtin == BuiltInClipDistance || builtin == BuiltInCullDistance; + }; + ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { auto &type = this->get(var.basetype); bool block = has_decoration(type.self, DecorationBlock); @@ -3139,7 +3187,7 @@ void CompilerGLSL::emit_declared_builtin_block(StorageClass storage, ExecutionMo uint32_t index = 0; for (auto &m : ir.meta[type.self].members) { - if (m.builtin) + if (m.builtin && builtin_is_per_vertex_set(m.builtin_type)) { builtins.set(m.builtin_type); if (m.builtin_type == BuiltInCullDistance) @@ -3192,7 +3240,7 @@ void CompilerGLSL::emit_declared_builtin_block(StorageClass storage, ExecutionMo { // While we're at it, collect all declared global builtins (HLSL mostly ...). auto &m = ir.meta[var.self].decoration; - if (m.builtin) + if (m.builtin && builtin_is_per_vertex_set(m.builtin_type)) { global_builtins.set(m.builtin_type); if (m.builtin_type == BuiltInCullDistance) @@ -3281,7 +3329,9 @@ void CompilerGLSL::emit_declared_builtin_block(StorageClass storage, ExecutionMo attr.push_back(join("stream = ", geom_stream)); } - if (!attr.empty()) + if (model == ExecutionModelMeshEXT) + statement("out gl_MeshPerVertexEXT"); + else if (!attr.empty()) statement("layout(", merge(attr), ") out gl_PerVertex"); else statement("out gl_PerVertex"); @@ -3399,7 +3449,8 @@ void CompilerGLSL::emit_resources() case ExecutionModelGeometry: case ExecutionModelTessellationControl: case ExecutionModelTessellationEvaluation: - fixup_implicit_builtin_block_names(); + case ExecutionModelMeshEXT: + fixup_implicit_builtin_block_names(execution.model); break; default: @@ -3419,6 +3470,7 @@ void CompilerGLSL::emit_resources() break; case ExecutionModelVertex: + case ExecutionModelMeshEXT: emit_declared_builtin_block(StorageClassOutput, execution.model); break; @@ -8890,6 +8942,15 @@ string CompilerGLSL::builtin_to_glsl(BuiltIn builtin, StorageClass storage) SPIRV_CROSS_THROW("Need desktop GL to use GL_NV_conservative_raster_underestimation."); return "gl_FragFullyCoveredNV"; + case BuiltInPrimitiveTriangleIndicesEXT: + return "gl_PrimitiveTriangleIndicesEXT"; + case BuiltInPrimitiveLineIndicesEXT: + return "gl_PrimitiveLineIndicesEXT"; + case BuiltInPrimitivePointIndicesEXT: + return "gl_PrimitivePointIndicesEXT"; + case BuiltInCullPrimitiveEXT: + return "gl_CullPrimitiveEXT"; + default: return join("gl_BuiltIn_", convert_to_string(builtin)); } @@ -9078,14 +9139,19 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice // but HLSL seems to just emit straight arrays here. // We must pretend this access goes through gl_in/gl_out arrays // to be able to access certain builtins as arrays. + // Similar concerns apply for mesh shaders where we have to redirect to gl_MeshVerticesEXT or MeshPrimitivesEXT. auto builtin = ir.meta[base].decoration.builtin_type; + bool mesh_shader = get_execution_model() == ExecutionModelMeshEXT; + switch (builtin) { // case BuiltInCullDistance: // These are already arrays, need to figure out rules for these in tess/geom. // case BuiltInClipDistance: case BuiltInPosition: case BuiltInPointSize: - if (var->storage == StorageClassInput) + if (mesh_shader) + expr = join("gl_MeshVerticesEXT[", to_expression(index, register_expression_read), "].", expr); + else if (var->storage == StorageClassInput) expr = join("gl_in[", to_expression(index, register_expression_read), "].", expr); else if (var->storage == StorageClassOutput) expr = join("gl_out[", to_expression(index, register_expression_read), "].", expr); @@ -9093,6 +9159,17 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice append_index(index, is_literal); break; + case BuiltInPrimitiveId: + case BuiltInLayer: + case BuiltInViewportIndex: + case BuiltInCullPrimitiveEXT: + case BuiltInPrimitiveShadingRateKHR: + if (mesh_shader) + expr = join("gl_MeshPrimitivesEXT[", to_expression(index, register_expression_read), "].", expr); + else + append_index(index, is_literal); + break; + default: append_index(index, is_literal); break; @@ -13510,6 +13587,10 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) } break; + case OpSetMeshOutputsEXT: + statement("SetMeshOutputsEXT(", to_unpacked_expression(ops[0]), ", ", to_unpacked_expression(ops[1]), ");"); + break; + default: statement("// unimplemented op ", instruction.op); break; @@ -13818,28 +13899,41 @@ string CompilerGLSL::to_precision_qualifiers_glsl(uint32_t id) return flags_to_qualifiers_glsl(type, ir.meta[id].decoration.decoration_flags); } -void CompilerGLSL::fixup_io_block_patch_qualifiers(const SPIRVariable &var) +void CompilerGLSL::fixup_io_block_patch_primitive_qualifiers(const SPIRVariable &var) { // Works around weird behavior in glslangValidator where // a patch out block is translated to just block members getting the decoration. // To make glslang not complain when we compile again, we have to transform this back to a case where // the variable itself has Patch decoration, and not members. + // Same for perprimitiveEXT. auto &type = get(var.basetype); if (has_decoration(type.self, DecorationBlock)) { uint32_t member_count = uint32_t(type.member_types.size()); + Decoration promoted_decoration = {}; + bool do_promote_decoration = false; for (uint32_t i = 0; i < member_count; i++) { if (has_member_decoration(type.self, i, DecorationPatch)) { - set_decoration(var.self, DecorationPatch); + promoted_decoration = DecorationPatch; + do_promote_decoration = true; + break; + } + else if (has_member_decoration(type.self, i, DecorationPerPrimitiveEXT)) + { + promoted_decoration = DecorationPerPrimitiveEXT; + do_promote_decoration = true; break; } } - if (has_decoration(var.self, DecorationPatch)) + if (do_promote_decoration) + { + set_decoration(var.self, promoted_decoration); for (uint32_t i = 0; i < member_count; i++) - unset_member_decoration(type.self, i, DecorationPatch); + unset_member_decoration(type.self, i, promoted_decoration); + } } } @@ -13852,6 +13946,8 @@ string CompilerGLSL::to_qualifiers_glsl(uint32_t id) if (var && var->storage == StorageClassWorkgroup && !backend.shared_is_implied) res += "shared "; + else if (var && var->storage == StorageClassTaskPayloadWorkgroupEXT) + res += "taskPayloadSharedEXT "; res += to_interpolation_qualifiers(flags); if (var) @@ -16337,6 +16433,9 @@ void CompilerGLSL::cast_from_variable_load(uint32_t source_id, std::string &expr case BuiltInIncomingRayFlagsNV: case BuiltInLaunchIdNV: case BuiltInLaunchSizeNV: + case BuiltInPrimitiveTriangleIndicesEXT: + case BuiltInPrimitiveLineIndicesEXT: + case BuiltInPrimitivePointIndicesEXT: expected_type = SPIRType::UInt; break; diff --git a/spirv_glsl.hpp b/spirv_glsl.hpp index d967b456b..2d1dad6cf 100644 --- a/spirv_glsl.hpp +++ b/spirv_glsl.hpp @@ -628,7 +628,7 @@ class CompilerGLSL : public Compiler void emit_buffer_reference_block(uint32_t type_id, bool forward_declaration); void emit_buffer_block_legacy(const SPIRVariable &var); void emit_buffer_block_flattened(const SPIRVariable &type); - void fixup_implicit_builtin_block_names(); + void fixup_implicit_builtin_block_names(spv::ExecutionModel model); void emit_declared_builtin_block(spv::StorageClass storage, spv::ExecutionModel model); bool should_force_emit_builtin_block(spv::StorageClass storage); void emit_push_constant_block_vulkan(const SPIRVariable &var); @@ -772,7 +772,7 @@ class CompilerGLSL : public Compiler std::string type_to_glsl_constructor(const SPIRType &type); std::string argument_decl(const SPIRFunction::Parameter &arg); virtual std::string to_qualifiers_glsl(uint32_t id); - void fixup_io_block_patch_qualifiers(const SPIRVariable &var); + void fixup_io_block_patch_primitive_qualifiers(const SPIRVariable &var); void emit_output_variable_initializer(const SPIRVariable &var); std::string to_precision_qualifiers_glsl(uint32_t id); virtual const char *to_storage_qualifiers_glsl(const SPIRVariable &var); diff --git a/spirv_parser.cpp b/spirv_parser.cpp index c290a5ebb..526c92cb5 100644 --- a/spirv_parser.cpp +++ b/spirv_parser.cpp @@ -349,6 +349,10 @@ void Parser::parse(const Instruction &instruction) execution.output_vertices = ops[2]; break; + case ExecutionModeOutputPrimitivesEXT: + execution.output_primitives = ops[2]; + break; + default: break; }