diff --git a/doc/classes/RenderingDevice.xml b/doc/classes/RenderingDevice.xml index 3ab4aa107d..427c57926c 100644 --- a/doc/classes/RenderingDevice.xml +++ b/doc/classes/RenderingDevice.xml @@ -2490,6 +2490,9 @@ Features support for buffer device address extension. + + Support for 32-bit image atomic operations. + Maximum number of uniform sets that can be bound at a given time. diff --git a/drivers/apple/foundation_helpers.h b/drivers/apple/foundation_helpers.h new file mode 100644 index 0000000000..db87fba96c --- /dev/null +++ b/drivers/apple/foundation_helpers.h @@ -0,0 +1,56 @@ +/**************************************************************************/ +/* foundation_helpers.h */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#pragma once + +#import + +class String; +template +class CharStringT; + +using CharString = CharStringT; + +namespace conv { + +/** + * Converts a Godot String to an NSString without allocating an intermediate UTF-8 buffer. + * */ +NSString *to_nsstring(const String &p_str); +/** + * Converts a Godot CharString to an NSString without allocating an intermediate UTF-8 buffer. + * */ +NSString *to_nsstring(const CharString &p_str); +/** + * Converts an NSString to a Godot String without allocating intermediate buffers. + * */ +String to_string(NSString *p_str); + +} //namespace conv diff --git a/drivers/apple/foundation_helpers.mm b/drivers/apple/foundation_helpers.mm new file mode 100644 index 0000000000..0453011b1d --- /dev/null +++ b/drivers/apple/foundation_helpers.mm @@ -0,0 +1,85 @@ +/**************************************************************************/ +/* foundation_helpers.mm */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#import "foundation_helpers.h" + +#import "core/string/ustring.h" + +#import + +namespace conv { + +NSString *to_nsstring(const String &p_str) { + return [[NSString alloc] initWithBytes:(const void *)p_str.ptr() + length:p_str.length() * sizeof(char32_t) + encoding:NSUTF32LittleEndianStringEncoding]; +} + +NSString *to_nsstring(const CharString &p_str) { + return [[NSString alloc] initWithBytes:(const void *)p_str.ptr() + length:p_str.length() + encoding:NSUTF8StringEncoding]; +} + +String to_string(NSString *p_str) { + CFStringRef str = (__bridge CFStringRef)p_str; + CFStringEncoding fastest = CFStringGetFastestEncoding(str); + // Sometimes, CFString will return a pointer to it's encoded data, + // so we can create the string without allocating intermediate buffers. + const char *p = CFStringGetCStringPtr(str, fastest); + if (p) { + switch (fastest) { + case kCFStringEncodingASCII: + return String::ascii(Span(p, CFStringGetLength(str))); + case kCFStringEncodingUTF8: + return String::utf8(p); + case kCFStringEncodingUTF32LE: + return String::utf32(Span((char32_t *)p, CFStringGetLength(str))); + default: + break; + } + } + + CFRange range = CFRangeMake(0, CFStringGetLength(str)); + CFIndex byte_len = 0; + // Try to losslessly convert the string directly into a String's buffer to avoid intermediate allocations. + CFIndex n = CFStringGetBytes(str, range, kCFStringEncodingUTF32LE, 0, NO, nil, 0, &byte_len); + if (n == range.length) { + String res; + res.resize_uninitialized((byte_len / sizeof(char32_t)) + 1); + res[n] = 0; + n = CFStringGetBytes(str, range, kCFStringEncodingUTF32LE, 0, NO, (UInt8 *)res.ptrw(), res.length() * sizeof(char32_t), nil); + return res; + } + + return String::utf8(p_str.UTF8String); +} + +} //namespace conv diff --git a/drivers/d3d12/rendering_device_driver_d3d12.cpp b/drivers/d3d12/rendering_device_driver_d3d12.cpp index a91eee3050..f454291a1b 100644 --- a/drivers/d3d12/rendering_device_driver_d3d12.cpp +++ b/drivers/d3d12/rendering_device_driver_d3d12.cpp @@ -5586,6 +5586,8 @@ bool RenderingDeviceDriverD3D12::has_feature(Features p_feature) { return true; case SUPPORTS_BUFFER_DEVICE_ADDRESS: return true; + case SUPPORTS_IMAGE_ATOMIC_32_BIT: + return true; default: return false; } diff --git a/drivers/metal/SCsub b/drivers/metal/SCsub index a4c1c65b82..f55933a2c3 100644 --- a/drivers/metal/SCsub +++ b/drivers/metal/SCsub @@ -12,7 +12,6 @@ thirdparty_obj = [] thirdparty_dir = "#thirdparty/spirv-cross/" thirdparty_sources = [ "spirv_cfg.cpp", - "spirv_cross_util.cpp", "spirv_cross.cpp", "spirv_parser.cpp", "spirv_msl.cpp", diff --git a/drivers/metal/metal_device_properties.h b/drivers/metal/metal_device_properties.h index 720efd64e1..24a1a4cdf9 100644 --- a/drivers/metal/metal_device_properties.h +++ b/drivers/metal/metal_device_properties.h @@ -94,6 +94,8 @@ struct API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MetalFeatures { bool metal_fx_spatial = false; /**< If true, Metal FX spatial functions are supported. */ bool metal_fx_temporal = false; /**< If true, Metal FX temporal functions are supported. */ bool supports_gpu_address = false; /**< If true, referencing a GPU address in a shader is supported. */ + bool supports_image_atomic_32_bit = false; /**< If true, 32-bit atomic operations on images are supported. */ + bool supports_image_atomic_64_bit = false; /**< If true, 64-bit atomic operations on images are supported. */ }; struct MetalLimits { diff --git a/drivers/metal/metal_device_properties.mm b/drivers/metal/metal_device_properties.mm index 43946ede6e..4b06e24ad3 100644 --- a/drivers/metal/metal_device_properties.mm +++ b/drivers/metal/metal_device_properties.mm @@ -121,6 +121,12 @@ void MetalDeviceProperties::init_features(id p_device) { features.simdPermute = [p_device supportsFamily:MTLGPUFamilyApple6]; features.simdReduction = [p_device supportsFamily:MTLGPUFamilyApple7]; features.argument_buffers_tier = p_device.argumentBuffersSupport; + features.supports_image_atomic_32_bit = [p_device supportsFamily:MTLGPUFamilyApple6]; + features.supports_image_atomic_64_bit = [p_device supportsFamily:MTLGPUFamilyApple8]; + if (OS::get_singleton()->get_environment("GODOT_MTL_DISABLE_IMAGE_ATOMICS") == "1") { + features.supports_image_atomic_32_bit = false; + features.supports_image_atomic_64_bit = false; + } if (@available(macOS 13.0, iOS 16.0, tvOS 16.0, *)) { features.needs_arg_encoders = !([p_device supportsFamily:MTLGPUFamilyMetal3] && features.argument_buffers_tier == MTLArgumentBuffersTier2); diff --git a/drivers/metal/metal_objects.h b/drivers/metal/metal_objects.h index b89d4ba2e0..4826ded95d 100644 --- a/drivers/metal/metal_objects.h +++ b/drivers/metal/metal_objects.h @@ -309,9 +309,23 @@ public: class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDCommandBuffer { private: +#pragma mark - Common State + + // From RenderingDevice + static constexpr uint32_t MAX_PUSH_CONSTANT_SIZE = 128; + RenderingDeviceDriverMetal *device_driver = nullptr; id queue = nil; id commandBuffer = nil; + bool state_begin = false; + + _FORCE_INLINE_ id command_buffer() { + DEV_ASSERT(state_begin); + if (commandBuffer == nil) { + commandBuffer = queue.commandBuffer; + } + return commandBuffer; + } void _end_compute_dispatch(); void _end_blit(); @@ -326,6 +340,11 @@ private: void _end_render_pass(); void _render_clear_render_area(); +#pragma mark - Compute + + void _compute_set_dirty_state(); + void _compute_bind_uniform_sets(); + public: MDCommandBufferStateType type = MDCommandBufferStateType::None; @@ -349,18 +368,18 @@ public: LocalVector vertex_offsets; ResourceUsageMap resource_usage; // clang-format off - enum DirtyFlag: uint8_t { - DIRTY_NONE = 0b0000'0000, - DIRTY_PIPELINE = 0b0000'0001, //! pipeline state - DIRTY_UNIFORMS = 0b0000'0010, //! uniform sets - DIRTY_DEPTH = 0b0000'0100, //! depth / stencil state - DIRTY_VERTEX = 0b0000'1000, //! vertex buffers - DIRTY_VIEWPORT = 0b0001'0000, //! viewport rectangles - DIRTY_SCISSOR = 0b0010'0000, //! scissor rectangles - DIRTY_BLEND = 0b0100'0000, //! blend state - DIRTY_RASTER = 0b1000'0000, //! encoder state like cull mode - - DIRTY_ALL = 0xff, + enum DirtyFlag: uint16_t { + DIRTY_NONE = 0, + DIRTY_PIPELINE = 1 << 0, //! pipeline state + DIRTY_UNIFORMS = 1 << 1, //! uniform sets + DIRTY_PUSH = 1 << 2, //! push constants + DIRTY_DEPTH = 1 << 3, //! depth / stencil state + DIRTY_VERTEX = 1 << 4, //! vertex buffers + DIRTY_VIEWPORT = 1 << 5, //! viewport rectangles + DIRTY_SCISSOR = 1 << 6, //! scissor rectangles + DIRTY_BLEND = 1 << 7, //! blend state + DIRTY_RASTER = 1 << 8, //! encoder state like cull mode + DIRTY_ALL = (1 << 9) - 1, }; // clang-format on BitField dirty = DIRTY_NONE; @@ -368,6 +387,9 @@ public: LocalVector uniform_sets; // Bit mask of the uniform sets that are dirty, to prevent redundant binding. uint64_t uniform_set_mask = 0; + uint8_t push_constant_data[MAX_PUSH_CONSTANT_SIZE]; + uint32_t push_constant_data_len = 0; + uint32_t push_constant_bindings[2] = { 0 }; _FORCE_INLINE_ void reset(); void end_encoding(); @@ -422,6 +444,13 @@ public: dirty.set_flag(DirtyFlag::DIRTY_UNIFORMS); } + _FORCE_INLINE_ void mark_push_constants_dirty() { + if (push_constant_data_len == 0) { + return; + } + dirty.set_flag(DirtyFlag::DIRTY_PUSH); + } + _FORCE_INLINE_ void mark_blend_dirty() { if (!blend_constants.has_value()) { return; @@ -464,16 +493,46 @@ public: MDComputePipeline *pipeline = nullptr; id encoder = nil; ResourceUsageMap resource_usage; - _FORCE_INLINE_ void reset() { - pipeline = nil; - encoder = nil; - // Keep the keys, as they are likely to be used again. - for (KeyValue>> &kv : resource_usage) { - kv.value.clear(); + // clang-format off + enum DirtyFlag: uint16_t { + DIRTY_NONE = 0, + DIRTY_PIPELINE = 1 << 0, //! pipeline state + DIRTY_UNIFORMS = 1 << 1, //! uniform sets + DIRTY_PUSH = 1 << 2, //! push constants + DIRTY_ALL = (1 << 3) - 1, + }; + // clang-format on + BitField dirty = DIRTY_NONE; + + LocalVector uniform_sets; + // Bit mask of the uniform sets that are dirty, to prevent redundant binding. + uint64_t uniform_set_mask = 0; + uint8_t push_constant_data[MAX_PUSH_CONSTANT_SIZE]; + uint32_t push_constant_data_len = 0; + uint32_t push_constant_bindings[1] = { 0 }; + + _FORCE_INLINE_ void reset(); + void end_encoding(); + + _FORCE_INLINE_ void mark_uniforms_dirty(void) { + if (uniform_sets.is_empty()) { + return; } + for (uint32_t i = 0; i < uniform_sets.size(); i++) { + if (uniform_sets[i] != nullptr) { + uniform_set_mask |= 1 << i; + } + } + dirty.set_flag(DirtyFlag::DIRTY_UNIFORMS); + } + + _FORCE_INLINE_ void mark_push_constants_dirty() { + if (push_constant_data_len == 0) { + return; + } + dirty.set_flag(DirtyFlag::DIRTY_PUSH); } - void end_encoding(); } compute; // State specific to a blit pass. @@ -496,6 +555,7 @@ public: void encodeRenderCommandEncoderWithDescriptor(MTLRenderPassDescriptor *p_desc, NSString *p_label); void bind_pipeline(RDD::PipelineID p_pipeline); + void encode_push_constant_data(RDD::ShaderID p_shader, VectorView p_data); #pragma mark - Render Commands @@ -661,8 +721,6 @@ public: Vector sets; bool uses_argument_buffers = true; - virtual void encode_push_constant_data(VectorView p_data, MDCommandBuffer *p_cb) = 0; - MDShader(CharString p_name, Vector p_sets, bool p_uses_argument_buffers) : name(p_name), sets(p_sets), uses_argument_buffers(p_uses_argument_buffers) {} virtual ~MDShader() = default; @@ -671,15 +729,13 @@ public: class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDComputeShader final : public MDShader { public: struct { - uint32_t binding = -1; + int32_t binding = -1; uint32_t size = 0; } push_constants; MTLSize local = {}; MDLibrary *kernel; - void encode_push_constant_data(VectorView p_data, MDCommandBuffer *p_cb) final; - MDComputeShader(CharString p_name, Vector p_sets, bool p_uses_argument_buffers, MDLibrary *p_kernel); }; @@ -700,8 +756,6 @@ public: MDLibrary *vert; MDLibrary *frag; - void encode_push_constant_data(VectorView p_data, MDCommandBuffer *p_cb) final; - MDRenderShader(CharString p_name, Vector p_sets, bool p_needs_view_mask_buffer, diff --git a/drivers/metal/metal_objects.mm b/drivers/metal/metal_objects.mm index f8056e217b..96ade55c1b 100644 --- a/drivers/metal/metal_objects.mm +++ b/drivers/metal/metal_objects.mm @@ -62,8 +62,8 @@ #undef MAX void MDCommandBuffer::begin() { - DEV_ASSERT(commandBuffer == nil); - commandBuffer = queue.commandBuffer; + DEV_ASSERT(commandBuffer == nil && !state_begin); + state_begin = true; } void MDCommandBuffer::end() { @@ -83,6 +83,7 @@ void MDCommandBuffer::commit() { end(); [commandBuffer commit]; commandBuffer = nil; + state_begin = false; } void MDCommandBuffer::bind_pipeline(RDD::PipelineID p_pipeline) { @@ -136,7 +137,7 @@ void MDCommandBuffer::bind_pipeline(RDD::PipelineID p_pipeline) { render.desc.colorAttachments[0].resolveTexture = res_tex; } #endif - render.encoder = [commandBuffer renderCommandEncoderWithDescriptor:render.desc]; + render.encoder = [command_buffer() renderCommandEncoderWithDescriptor:render.desc]; } if (render.pipeline != rp) { @@ -160,9 +161,44 @@ void MDCommandBuffer::bind_pipeline(RDD::PipelineID p_pipeline) { DEV_ASSERT(type == MDCommandBufferStateType::None); type = MDCommandBufferStateType::Compute; - compute.pipeline = (MDComputePipeline *)p; - compute.encoder = commandBuffer.computeCommandEncoder; - [compute.encoder setComputePipelineState:compute.pipeline->state]; + if (compute.pipeline != p) { + compute.dirty.set_flag(ComputeState::DIRTY_PIPELINE); + compute.mark_uniforms_dirty(); + compute.pipeline = (MDComputePipeline *)p; + } + } +} + +void MDCommandBuffer::encode_push_constant_data(RDD::ShaderID p_shader, VectorView p_data) { + switch (type) { + case MDCommandBufferStateType::Render: { + MDRenderShader *shader = (MDRenderShader *)(p_shader.id); + if (shader->push_constants.vert.binding == -1 && shader->push_constants.frag.binding == -1) { + return; + } + render.push_constant_bindings[0] = shader->push_constants.vert.binding; + render.push_constant_bindings[1] = shader->push_constants.frag.binding; + void const *ptr = p_data.ptr(); + render.push_constant_data_len = p_data.size() * sizeof(uint32_t); + DEV_ASSERT(render.push_constant_data_len <= sizeof(RenderState::push_constant_data)); + memcpy(render.push_constant_data, ptr, render.push_constant_data_len); + render.mark_push_constants_dirty(); + } break; + case MDCommandBufferStateType::Compute: { + MDComputeShader *shader = (MDComputeShader *)(p_shader.id); + if (shader->push_constants.binding == -1) { + return; + } + compute.push_constant_bindings[0] = shader->push_constants.binding; + void const *ptr = p_data.ptr(); + compute.push_constant_data_len = p_data.size() * sizeof(uint32_t); + DEV_ASSERT(compute.push_constant_data_len <= sizeof(ComputeState::push_constant_data)); + memcpy(compute.push_constant_data, ptr, compute.push_constant_data_len); + compute.mark_push_constants_dirty(); + } break; + case MDCommandBufferStateType::Blit: + case MDCommandBufferStateType::None: + return; } } @@ -181,7 +217,7 @@ id MDCommandBuffer::blit_command_encoder() { } type = MDCommandBufferStateType::Blit; - blit.encoder = commandBuffer.blitCommandEncoder; + blit.encoder = command_buffer().blitCommandEncoder; return blit.encoder; } @@ -200,7 +236,7 @@ void MDCommandBuffer::encodeRenderCommandEncoderWithDescriptor(MTLRenderPassDesc break; } - id enc = [commandBuffer renderCommandEncoderWithDescriptor:p_desc]; + id enc = [command_buffer() renderCommandEncoderWithDescriptor:p_desc]; if (p_label != nil) { [enc pushDebugGroup:p_label]; [enc popDebugGroup]; @@ -344,6 +380,19 @@ void MDCommandBuffer::render_clear_attachments(VectorView void MDCommandBuffer::_render_set_dirty_state() { _render_bind_uniform_sets(); + if (render.dirty.has_flag(RenderState::DIRTY_PUSH)) { + if (render.push_constant_bindings[0] != (uint32_t)-1) { + [render.encoder setVertexBytes:render.push_constant_data + length:render.push_constant_data_len + atIndex:render.push_constant_bindings[0]]; + } + if (render.push_constant_bindings[1] != (uint32_t)-1) { + [render.encoder setFragmentBytes:render.push_constant_data + length:render.push_constant_data_len + atIndex:render.push_constant_bindings[1]]; + } + } + MDSubpass const &subpass = render.get_subpass(); if (subpass.view_count > 1) { uint32_t view_range[2] = { 0, subpass.view_count }; @@ -552,7 +601,7 @@ uint32_t MDCommandBuffer::_populate_vertices(simd::float4 *p_vertices, uint32_t } void MDCommandBuffer::render_begin_pass(RDD::RenderPassID p_render_pass, RDD::FramebufferID p_frameBuffer, RDD::CommandBufferType p_cmd_buffer_type, const Rect2i &p_rect, VectorView p_clear_values) { - DEV_ASSERT(commandBuffer != nil); + DEV_ASSERT(command_buffer() != nil); end(); MDRenderPass *pass = (MDRenderPass *)(p_render_pass.id); @@ -639,7 +688,7 @@ void MDCommandBuffer::_render_clear_render_area() { } void MDCommandBuffer::render_next_subpass() { - DEV_ASSERT(commandBuffer != nil); + DEV_ASSERT(command_buffer() != nil); if (render.current_subpass == UINT32_MAX) { render.current_subpass = 0; @@ -726,7 +775,7 @@ void MDCommandBuffer::render_next_subpass() { // the defaultRasterSampleCount from the pipeline's sample count. render.desc = desc; } else { - render.encoder = [commandBuffer renderCommandEncoderWithDescriptor:desc]; + render.encoder = [command_buffer() renderCommandEncoderWithDescriptor:desc]; if (!render.is_rendering_entire_area) { _render_clear_render_area(); @@ -895,6 +944,7 @@ void MDCommandBuffer::RenderState::reset() { dirty = DIRTY_NONE; uniform_sets.clear(); uniform_set_mask = 0; + push_constant_data_len = 0; clear_values.clear(); viewports.clear(); scissors.clear(); @@ -960,29 +1010,108 @@ void MDCommandBuffer::ComputeState::end_encoding() { #pragma mark - Compute +void MDCommandBuffer::_compute_set_dirty_state() { + if (compute.dirty.has_flag(ComputeState::DIRTY_PIPELINE)) { + compute.encoder = [command_buffer() computeCommandEncoderWithDispatchType:MTLDispatchTypeConcurrent]; + [compute.encoder setComputePipelineState:compute.pipeline->state]; + } + + _compute_bind_uniform_sets(); + + if (compute.dirty.has_flag(ComputeState::DIRTY_PUSH)) { + if (compute.push_constant_bindings[0] != (uint32_t)-1) { + [compute.encoder setBytes:compute.push_constant_data + length:compute.push_constant_data_len + atIndex:compute.push_constant_bindings[0]]; + } + } + + compute.dirty.clear(); +} + +void MDCommandBuffer::_compute_bind_uniform_sets() { + DEV_ASSERT(type == MDCommandBufferStateType::Compute); + if (!compute.dirty.has_flag(ComputeState::DIRTY_UNIFORMS)) { + return; + } + + compute.dirty.clear_flag(ComputeState::DIRTY_UNIFORMS); + uint64_t set_uniforms = compute.uniform_set_mask; + compute.uniform_set_mask = 0; + + MDComputeShader *shader = compute.pipeline->shader; + + while (set_uniforms != 0) { + // Find the index of the next set bit. + uint32_t index = (uint32_t)__builtin_ctzll(set_uniforms); + // Clear the set bit. + set_uniforms &= (set_uniforms - 1); + MDUniformSet *set = compute.uniform_sets[index]; + if (set == nullptr || index >= (uint32_t)shader->sets.size()) { + continue; + } + set->bind_uniforms(shader, compute, index); + } +} + +void MDCommandBuffer::ComputeState::reset() { + pipeline = nil; + encoder = nil; + dirty = DIRTY_NONE; + uniform_sets.clear(); + uniform_set_mask = 0; + push_constant_data_len = 0; + // Keep the keys, as they are likely to be used again. + for (KeyValue>> &kv : resource_usage) { + kv.value.clear(); + } +} + void MDCommandBuffer::compute_bind_uniform_set(RDD::UniformSetID p_uniform_set, RDD::ShaderID p_shader, uint32_t p_set_index) { DEV_ASSERT(type == MDCommandBufferStateType::Compute); - MDShader *shader = (MDShader *)(p_shader.id); MDUniformSet *set = (MDUniformSet *)(p_uniform_set.id); - set->bind_uniforms(shader, compute, p_set_index); + if (compute.uniform_sets.size() <= p_set_index) { + uint32_t s = render.uniform_sets.size(); + compute.uniform_sets.resize(p_set_index + 1); + // Set intermediate values to null. + std::fill(&compute.uniform_sets[s], &compute.uniform_sets[p_set_index] + 1, nullptr); + } + + if (compute.uniform_sets[p_set_index] != set) { + compute.dirty.set_flag(ComputeState::DIRTY_UNIFORMS); + compute.uniform_set_mask |= 1ULL << p_set_index; + compute.uniform_sets[p_set_index] = set; + } } void MDCommandBuffer::compute_bind_uniform_sets(VectorView p_uniform_sets, RDD::ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count) { DEV_ASSERT(type == MDCommandBufferStateType::Compute); - MDShader *shader = (MDShader *)(p_shader.id); - - // TODO(sgc): Bind multiple buffers using [encoder setBuffers:offsets:withRange:] - for (size_t i = 0u; i < p_set_count; ++i) { + for (size_t i = 0; i < p_set_count; ++i) { MDUniformSet *set = (MDUniformSet *)(p_uniform_sets[i].id); - set->bind_uniforms(shader, compute, p_first_set_index + i); + + uint32_t index = p_first_set_index + i; + if (compute.uniform_sets.size() <= index) { + uint32_t s = compute.uniform_sets.size(); + compute.uniform_sets.resize(index + 1); + // Set intermediate values to null. + std::fill(&compute.uniform_sets[s], &compute.uniform_sets[index] + 1, nullptr); + } + + if (compute.uniform_sets[index] != set) { + compute.dirty.set_flag(ComputeState::DIRTY_UNIFORMS); + compute.uniform_set_mask |= 1ULL << index; + compute.uniform_sets[index] = set; + } } } void MDCommandBuffer::compute_dispatch(uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups) { DEV_ASSERT(type == MDCommandBufferStateType::Compute); + _compute_set_dirty_state(); + MTLRegion region = MTLRegionMake3D(0, 0, 0, p_x_groups, p_y_groups, p_z_groups); id enc = compute.encoder; @@ -992,6 +1121,8 @@ void MDCommandBuffer::compute_dispatch(uint32_t p_x_groups, uint32_t p_y_groups, void MDCommandBuffer::compute_dispatch_indirect(RDD::BufferID p_indirect_buffer, uint64_t p_offset) { DEV_ASSERT(type == MDCommandBufferStateType::Compute); + _compute_set_dirty_state(); + id indirectBuffer = rid::get(p_indirect_buffer); id enc = compute.encoder; @@ -1021,20 +1152,6 @@ MDComputeShader::MDComputeShader(CharString p_name, MDShader(p_name, p_sets, p_uses_argument_buffers), kernel(p_kernel) { } -void MDComputeShader::encode_push_constant_data(VectorView p_data, MDCommandBuffer *p_cb) { - DEV_ASSERT(p_cb->type == MDCommandBufferStateType::Compute); - if (push_constants.binding == (uint32_t)-1) { - return; - } - - id enc = p_cb->compute.encoder; - - void const *ptr = p_data.ptr(); - size_t length = p_data.size() * sizeof(uint32_t); - - [enc setBytes:ptr length:length atIndex:push_constants.binding]; -} - MDRenderShader::MDRenderShader(CharString p_name, Vector p_sets, bool p_needs_view_mask_buffer, @@ -1046,22 +1163,6 @@ MDRenderShader::MDRenderShader(CharString p_name, frag(p_frag) { } -void MDRenderShader::encode_push_constant_data(VectorView p_data, MDCommandBuffer *p_cb) { - DEV_ASSERT(p_cb->type == MDCommandBufferStateType::Render); - id __unsafe_unretained enc = p_cb->render.encoder; - - void const *ptr = p_data.ptr(); - size_t length = p_data.size() * sizeof(uint32_t); - - if (push_constants.vert.binding > -1) { - [enc setVertexBytes:ptr length:length atIndex:push_constants.vert.binding]; - } - - if (push_constants.frag.binding > -1) { - [enc setFragmentBytes:ptr length:length atIndex:push_constants.frag.binding]; - } -} - void MDUniformSet::bind_uniforms_argument_buffers(MDShader *p_shader, MDCommandBuffer::RenderState &p_state, uint32_t p_set_index) { DEV_ASSERT(p_shader->uses_argument_buffers); DEV_ASSERT(p_state.encoder != nil); diff --git a/drivers/metal/rendering_device_driver_metal.mm b/drivers/metal/rendering_device_driver_metal.mm index ed450b8bf8..e099e28a39 100644 --- a/drivers/metal/rendering_device_driver_metal.mm +++ b/drivers/metal/rendering_device_driver_metal.mm @@ -58,6 +58,7 @@ #include "core/io/marshalls.h" #include "core/string/ustring.h" #include "core/templates/hash_map.h" +#include "drivers/apple/foundation_helpers.h" #import #import @@ -317,12 +318,6 @@ RDD::TextureID RenderingDeviceDriverMetal::texture_create(const TextureFormat &p desc.usage |= MTLTextureUsageShaderWrite; } - if (@available(macOS 14.0, iOS 17.0, tvOS 17.0, *)) { - if (format_caps & kMTLFmtCapsAtomic) { - desc.usage |= MTLTextureUsageShaderAtomic; - } - } - bool can_be_attachment = flags::any(format_caps, (kMTLFmtCapsColorAtt | kMTLFmtCapsDSAtt)); if (flags::any(p_format.usage_bits, TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) && @@ -334,6 +329,18 @@ RDD::TextureID RenderingDeviceDriverMetal::texture_create(const TextureFormat &p desc.usage |= MTLTextureUsageShaderRead; } + if (p_format.usage_bits & TEXTURE_USAGE_STORAGE_ATOMIC_BIT) { + if (@available(macOS 14.0, iOS 17.0, tvOS 17.0, *)) { + if (format_caps & kMTLFmtCapsAtomic) { + desc.usage |= MTLTextureUsageShaderAtomic; + } else { + ERR_FAIL_V_MSG(RDD::TextureID(), "Atomic operations on this texture format are not supported."); + } + } else { + ERR_FAIL_V_MSG(RDD::TextureID(), "Atomic texture operations not supported on this OS version."); + } + } + if (p_format.usage_bits & TEXTURE_USAGE_VRS_ATTACHMENT_BIT) { ERR_FAIL_V_MSG(RDD::TextureID(), "unsupported: TEXTURE_USAGE_VRS_ATTACHMENT_BIT"); } @@ -363,7 +370,29 @@ RDD::TextureID RenderingDeviceDriverMetal::texture_create(const TextureFormat &p // Check if it is a linear format for atomic operations and therefore needs a buffer, // as generally Metal does not support atomic operations on textures. - bool needs_buffer = is_linear || (p_format.array_layers == 1 && p_format.mipmaps == 1 && p_format.texture_type == TEXTURE_TYPE_2D && flags::any(p_format.usage_bits, TEXTURE_USAGE_STORAGE_BIT) && (p_format.format == DATA_FORMAT_R32_UINT || p_format.format == DATA_FORMAT_R32_SINT || p_format.format == DATA_FORMAT_R32G32_UINT || p_format.format == DATA_FORMAT_R32G32_SINT)); + bool needs_buffer = is_linear; + + // Check for atomic requirements. + if (flags::any(p_format.usage_bits, TEXTURE_USAGE_STORAGE_BIT) && p_format.array_layers == 1 && p_format.mipmaps == 1 && p_format.texture_type == TEXTURE_TYPE_2D) { + switch (p_format.format) { + case RenderingDeviceCommons::DATA_FORMAT_R32_SINT: + case RenderingDeviceCommons::DATA_FORMAT_R32_UINT: { + if (!device_properties->features.supports_image_atomic_32_bit) { + // We can emulate 32-bit atomic operations on textures. + needs_buffer = true; + } + } break; + case RenderingDeviceCommons::DATA_FORMAT_R32G32_SINT: + case RenderingDeviceCommons::DATA_FORMAT_R32G32_UINT: { + if (!device_properties->features.supports_image_atomic_64_bit) { + // No emulation for 64-bit atomics. + ERR_FAIL_V_MSG(TextureID(), "64-bit atomic operations are not supported."); + } + } break; + default: + break; + } + } id obj = nil; if (needs_buffer) { @@ -900,9 +929,15 @@ Error RenderingDeviceDriverMetal::command_queue_execute_and_present(CommandQueue MDCommandBuffer *cmd_buffer = (MDCommandBuffer *)(p_cmd_buffers[size - 1].id); Fence *fence = (Fence *)(p_cmd_fence.id); if (fence != nullptr) { - [cmd_buffer->get_command_buffer() addCompletedHandler:^(id buffer) { + id cb = cmd_buffer->get_command_buffer(); + if (cb == nil) { + // If there is nothing to do, signal the fence immediately. dispatch_semaphore_signal(fence->semaphore); - }]; + } else { + [cb addCompletedHandler:^(id buffer) { + dispatch_semaphore_signal(fence->semaphore); + }]; + } } for (uint32_t i = 0; i < p_swap_chains.size(); i++) { @@ -1730,8 +1765,7 @@ void RenderingDeviceDriverMetal::pipeline_free(PipelineID p_pipeline_id) { void RenderingDeviceDriverMetal::command_bind_push_constants(CommandBufferID p_cmd_buffer, ShaderID p_shader, uint32_t p_dst_first_index, VectorView p_data) { MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id); - MDShader *shader = (MDShader *)(p_shader.id); - shader->encode_push_constant_data(p_data, cb); + cb->encode_push_constant_data(p_shader, p_data); } // ----- CACHE ----- @@ -2417,6 +2451,7 @@ RDD::PipelineID RenderingDeviceDriverMetal::compute_pipeline_create(ShaderID p_s MTLComputePipelineDescriptor *desc = [MTLComputePipelineDescriptor new]; desc.computeFunction = function; + desc.label = conv::to_nsstring(shader->name); if (archive) { desc.binaryArchives = @[ archive ]; } @@ -2735,6 +2770,8 @@ bool RenderingDeviceDriverMetal::has_feature(Features p_feature) { return device_properties->features.metal_fx_spatial; case SUPPORTS_METALFX_TEMPORAL: return device_properties->features.metal_fx_temporal; + case SUPPORTS_IMAGE_ATOMIC_32_BIT: + return device_properties->features.supports_image_atomic_32_bit; default: return false; } diff --git a/drivers/metal/rendering_shader_container_metal.mm b/drivers/metal/rendering_shader_container_metal.mm index c2e4518a06..d9c81dba6f 100644 --- a/drivers/metal/rendering_shader_container_metal.mm +++ b/drivers/metal/rendering_shader_container_metal.mm @@ -199,6 +199,8 @@ bool RenderingShaderContainerMetal::_set_code_from_spirv(const Vectorfeatures.mslVersionMajor, device_profile->features.mslVersionMinor); msl_options.set_msl_version(device_profile->features.mslVersionMajor, device_profile->features.mslVersionMinor); mtl_reflection_data.msl_version = msl_options.msl_version; msl_options.platform = device_profile->platform == MetalDeviceProfile::Platform::macOS ? CompilerMSL::Options::macOS : CompilerMSL::Options::iOS; @@ -209,7 +211,7 @@ bool RenderingShaderContainerMetal::_set_code_from_spirv(const Vectorget_environment(U"GODOT_DISABLE_ARGUMENT_BUFFERS"); v == U"1") { + if (String v = OS::get_singleton()->get_environment("GODOT_MTL_DISABLE_ARGUMENT_BUFFERS"); v == "1") { disable_argument_buffers = true; } @@ -236,6 +238,10 @@ bool RenderingShaderContainerMetal::_set_code_from_spirv(const Vector= CompilerMSL::Options::make_msl_version(3, 2)) { + // All 3.2+ versions support device coherence, so we can disable texture fences. + msl_options.readwrite_texture_fences = false; + } CompilerGLSL::Options options{}; options.vertex.flip_vert_y = true; @@ -417,6 +423,10 @@ bool RenderingShaderContainerMetal::_set_code_from_spirv(const Vectorhas_feature(RD::SUPPORTS_IMAGE_ATOMIC_32_BIT) ? RD::UNIFORM_TYPE_IMAGE : RD::UNIFORM_TYPE_STORAGE_BUFFER; RD::TextureFormat tf; tf.format = RD::DATA_FORMAT_R16G16B16A16_SFLOAT; @@ -440,29 +441,29 @@ void Fog::VolumetricFog::init(const Vector3i &fog_size, RID p_sky_shader) { fog_map = RD::get_singleton()->texture_create(tf, RD::TextureView()); RD::get_singleton()->set_resource_name(fog_map, "Fog map"); -#if defined(MACOS_ENABLED) || defined(APPLE_EMBEDDED_ENABLED) - Vector dm; - dm.resize_initialized(fog_size.x * fog_size.y * fog_size.z * 4); + if (atomic_type == RD::UNIFORM_TYPE_STORAGE_BUFFER) { + Vector dm; + dm.resize_initialized(fog_size.x * fog_size.y * fog_size.z * 4); - density_map = RD::get_singleton()->storage_buffer_create(dm.size(), dm); - RD::get_singleton()->set_resource_name(density_map, "Fog density map"); - light_map = RD::get_singleton()->storage_buffer_create(dm.size(), dm); - RD::get_singleton()->set_resource_name(light_map, "Fog light map"); - emissive_map = RD::get_singleton()->storage_buffer_create(dm.size(), dm); - RD::get_singleton()->set_resource_name(emissive_map, "Fog emissive map"); -#else - tf.format = RD::DATA_FORMAT_R32_UINT; - tf.usage_bits = RD::TEXTURE_USAGE_STORAGE_BIT | RD::TEXTURE_USAGE_CAN_COPY_TO_BIT; - density_map = RD::get_singleton()->texture_create(tf, RD::TextureView()); - RD::get_singleton()->set_resource_name(density_map, "Fog density map"); - RD::get_singleton()->texture_clear(density_map, Color(0, 0, 0, 0), 0, 1, 0, 1); - light_map = RD::get_singleton()->texture_create(tf, RD::TextureView()); - RD::get_singleton()->set_resource_name(light_map, "Fog light map"); - RD::get_singleton()->texture_clear(light_map, Color(0, 0, 0, 0), 0, 1, 0, 1); - emissive_map = RD::get_singleton()->texture_create(tf, RD::TextureView()); - RD::get_singleton()->set_resource_name(emissive_map, "Fog emissive map"); - RD::get_singleton()->texture_clear(emissive_map, Color(0, 0, 0, 0), 0, 1, 0, 1); -#endif + density_map = RD::get_singleton()->storage_buffer_create(dm.size(), dm); + RD::get_singleton()->set_resource_name(density_map, "Fog density map"); + light_map = RD::get_singleton()->storage_buffer_create(dm.size(), dm); + RD::get_singleton()->set_resource_name(light_map, "Fog light map"); + emissive_map = RD::get_singleton()->storage_buffer_create(dm.size(), dm); + RD::get_singleton()->set_resource_name(emissive_map, "Fog emissive map"); + } else { + tf.format = RD::DATA_FORMAT_R32_UINT; + tf.usage_bits = RD::TEXTURE_USAGE_STORAGE_BIT | RD::TEXTURE_USAGE_CAN_COPY_TO_BIT | RD::TEXTURE_USAGE_STORAGE_ATOMIC_BIT; + density_map = RD::get_singleton()->texture_create(tf, RD::TextureView()); + RD::get_singleton()->set_resource_name(density_map, "Fog density map"); + RD::get_singleton()->texture_clear(density_map, Color(0, 0, 0, 0), 0, 1, 0, 1); + light_map = RD::get_singleton()->texture_create(tf, RD::TextureView()); + RD::get_singleton()->set_resource_name(light_map, "Fog light map"); + RD::get_singleton()->texture_clear(light_map, Color(0, 0, 0, 0), 0, 1, 0, 1); + emissive_map = RD::get_singleton()->texture_create(tf, RD::TextureView()); + RD::get_singleton()->set_resource_name(emissive_map, "Fog emissive map"); + RD::get_singleton()->texture_clear(emissive_map, Color(0, 0, 0, 0), 0, 1, 0, 1); + } Vector uniforms; { @@ -579,11 +580,7 @@ void Fog::volumetric_fog_update(const VolumetricFogSettings &p_settings, const P { RD::Uniform u; -#if defined(MACOS_ENABLED) || defined(APPLE_EMBEDDED_ENABLED) - u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; -#else - u.uniform_type = RD::UNIFORM_TYPE_IMAGE; -#endif + u.uniform_type = fog->atomic_type; u.binding = 1; u.append_id(fog->emissive_map); uniforms.push_back(u); @@ -599,11 +596,7 @@ void Fog::volumetric_fog_update(const VolumetricFogSettings &p_settings, const P { RD::Uniform u; -#if defined(MACOS_ENABLED) || defined(APPLE_EMBEDDED_ENABLED) - u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; -#else - u.uniform_type = RD::UNIFORM_TYPE_IMAGE; -#endif + u.uniform_type = fog->atomic_type; u.binding = 3; u.append_id(fog->density_map); uniforms.push_back(u); @@ -611,11 +604,7 @@ void Fog::volumetric_fog_update(const VolumetricFogSettings &p_settings, const P { RD::Uniform u; -#if defined(MACOS_ENABLED) || defined(APPLE_EMBEDDED_ENABLED) - u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; -#else - u.uniform_type = RD::UNIFORM_TYPE_IMAGE; -#endif + u.uniform_type = fog->atomic_type; u.binding = 4; u.append_id(fog->light_map); uniforms.push_back(u); @@ -918,22 +907,14 @@ void Fog::volumetric_fog_update(const VolumetricFogSettings &p_settings, const P } { RD::Uniform u; -#if defined(MACOS_ENABLED) || defined(APPLE_EMBEDDED_ENABLED) - u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; -#else - u.uniform_type = RD::UNIFORM_TYPE_IMAGE; -#endif + u.uniform_type = fog->atomic_type; u.binding = 16; u.append_id(fog->density_map); uniforms.push_back(u); } { RD::Uniform u; -#if defined(MACOS_ENABLED) || defined(APPLE_EMBEDDED_ENABLED) - u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; -#else - u.uniform_type = RD::UNIFORM_TYPE_IMAGE; -#endif + u.uniform_type = fog->atomic_type; u.binding = 17; u.append_id(fog->light_map); uniforms.push_back(u); @@ -941,11 +922,7 @@ void Fog::volumetric_fog_update(const VolumetricFogSettings &p_settings, const P { RD::Uniform u; -#if defined(MACOS_ENABLED) || defined(APPLE_EMBEDDED_ENABLED) - u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; -#else - u.uniform_type = RD::UNIFORM_TYPE_IMAGE; -#endif + u.uniform_type = fog->atomic_type; u.binding = 18; u.append_id(fog->emissive_map); uniforms.push_back(u); diff --git a/servers/rendering/renderer_rd/environment/fog.h b/servers/rendering/renderer_rd/environment/fog.h index 6e91b4bcf4..f593b3310f 100644 --- a/servers/rendering/renderer_rd/environment/fog.h +++ b/servers/rendering/renderer_rd/environment/fog.h @@ -316,6 +316,9 @@ public: int last_shadow_filter = -1; + // If the device doesn't support image atomics, use storage buffers instead. + RD::UniformType atomic_type = RD::UNIFORM_TYPE_IMAGE; + virtual void configure(RenderSceneBuffersRD *p_render_buffers) override {} virtual void free_data() override {} diff --git a/servers/rendering/renderer_rd/shader_rd.cpp b/servers/rendering/renderer_rd/shader_rd.cpp index 49ebbcdaf7..ed8ecdc4b1 100644 --- a/servers/rendering/renderer_rd/shader_rd.cpp +++ b/servers/rendering/renderer_rd/shader_rd.cpp @@ -234,11 +234,13 @@ void ShaderRD::_build_variant_code(StringBuilder &builder, uint32_t p_variant, c builder.append(String("#define ") + String(E.key) + "_CODE_USED\n"); } #if (defined(MACOS_ENABLED) || defined(APPLE_EMBEDDED_ENABLED)) - if (RD::get_singleton()->get_device_capabilities().device_family == RDD::DEVICE_VULKAN) { + RenderingDevice *rd = RD::get_singleton(); + if (rd->get_device_capabilities().device_family == RDD::DEVICE_VULKAN) { builder.append("#define MOLTENVK_USED\n"); } - // Image atomics are supported on Metal 3.1 but no support in MoltenVK or SPIRV-Cross yet. - builder.append("#define NO_IMAGE_ATOMICS\n"); + if (!rd->has_feature(RD::SUPPORTS_IMAGE_ATOMIC_32_BIT)) { + builder.append("#define NO_IMAGE_ATOMICS\n"); + } #endif builder.append(String("#define RENDER_DRIVER_") + OS::get_singleton()->get_current_rendering_driver_name().to_upper() + "\n"); diff --git a/servers/rendering/renderer_rd/shaders/environment/volumetric_fog.glsl b/servers/rendering/renderer_rd/shaders/environment/volumetric_fog.glsl index 929f1e34df..4ca666019a 100644 --- a/servers/rendering/renderer_rd/shaders/environment/volumetric_fog.glsl +++ b/servers/rendering/renderer_rd/shaders/environment/volumetric_fog.glsl @@ -2,6 +2,8 @@ #version 450 +#pragma use_vulkan_memory_model + #VERSION_DEFINES layout(local_size_x = 4, local_size_y = 4, local_size_z = 4) in; diff --git a/servers/rendering/renderer_rd/shaders/environment/volumetric_fog_process.glsl b/servers/rendering/renderer_rd/shaders/environment/volumetric_fog_process.glsl index 832058553e..17ee5ced28 100644 --- a/servers/rendering/renderer_rd/shaders/environment/volumetric_fog_process.glsl +++ b/servers/rendering/renderer_rd/shaders/environment/volumetric_fog_process.glsl @@ -2,6 +2,8 @@ #version 450 +#pragma use_vulkan_memory_model + #VERSION_DEFINES #ifdef MODE_DENSITY diff --git a/servers/rendering/rendering_device.cpp b/servers/rendering/rendering_device.cpp index ddf6f91d25..0030c8674d 100644 --- a/servers/rendering/rendering_device.cpp +++ b/servers/rendering/rendering_device.cpp @@ -7979,6 +7979,7 @@ void RenderingDevice::_bind_methods() { BIND_ENUM_CONSTANT(SUPPORTS_METALFX_SPATIAL); BIND_ENUM_CONSTANT(SUPPORTS_METALFX_TEMPORAL); BIND_ENUM_CONSTANT(SUPPORTS_BUFFER_DEVICE_ADDRESS); + BIND_ENUM_CONSTANT(SUPPORTS_IMAGE_ATOMIC_32_BIT); BIND_ENUM_CONSTANT(LIMIT_MAX_BOUND_UNIFORM_SETS); BIND_ENUM_CONSTANT(LIMIT_MAX_FRAMEBUFFER_COLOR_ATTACHMENTS); diff --git a/servers/rendering/rendering_device_commons.h b/servers/rendering/rendering_device_commons.h index 2196622828..960233e3ab 100644 --- a/servers/rendering/rendering_device_commons.h +++ b/servers/rendering/rendering_device_commons.h @@ -952,6 +952,7 @@ public: // If not supported, a fragment shader with only side effects (i.e., writes to buffers, but doesn't output to attachments), may be optimized down to no-op by the GPU driver. SUPPORTS_FRAGMENT_SHADER_WITH_ONLY_SIDE_EFFECTS, SUPPORTS_BUFFER_DEVICE_ADDRESS, + SUPPORTS_IMAGE_ATOMIC_32_BIT, }; enum SubgroupOperations { diff --git a/thirdparty/README.md b/thirdparty/README.md index a15f7304cf..8996fc8aa2 100644 --- a/thirdparty/README.md +++ b/thirdparty/README.md @@ -978,7 +978,7 @@ Its version and license is described in this file under `hidapi`. ## spirv-cross - Upstream: https://github.com/KhronosGroup/SPIRV-Cross -- Version: git (6173e24b31f09a0c3217103a130e74c4ddec14a6, 2024) +- Version: git (d7440cbc6c50332600fdf21c45e6a5df0b07e54c, 2025) - License: Apache 2.0 Files extracted from upstream source: diff --git a/thirdparty/spirv-cross/spirv.hpp b/thirdparty/spirv-cross/spirv.hpp index 5047b9b302..f7a7bf835e 100644 --- a/thirdparty/spirv-cross/spirv.hpp +++ b/thirdparty/spirv-cross/spirv.hpp @@ -1,26 +1,10 @@ -// Copyright (c) 2014-2024 The Khronos Group Inc. +// Copyright: 2014-2024 The Khronos Group Inc. +// License: MIT // -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and/or associated documentation files (the "Materials"), -// to deal in the Materials without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Materials, and to permit persons to whom the -// Materials are furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Materials. -// -// MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS -// STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND -// HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/ -// -// THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS -// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS -// IN THE MATERIALS. +// MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS +// KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS +// SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT +// https://www.khronos.org/registry/ // This header is automatically generated by the same tool that creates // the Binary Section of the SPIR-V specification. @@ -69,6 +53,12 @@ enum SourceLanguage { SourceLanguageHLSL = 5, SourceLanguageCPP_for_OpenCL = 6, SourceLanguageSYCL = 7, + SourceLanguageHERO_C = 8, + SourceLanguageNZSL = 9, + SourceLanguageWGSL = 10, + SourceLanguageSlang = 11, + SourceLanguageZig = 12, + SourceLanguageRust = 13, SourceLanguageMax = 0x7fffffff, }; @@ -156,6 +146,9 @@ enum ExecutionMode { ExecutionModeSubgroupsPerWorkgroupId = 37, ExecutionModeLocalSizeId = 38, ExecutionModeLocalSizeHintId = 39, + ExecutionModeNonCoherentColorAttachmentReadEXT = 4169, + ExecutionModeNonCoherentDepthAttachmentReadEXT = 4170, + ExecutionModeNonCoherentStencilAttachmentReadEXT = 4171, ExecutionModeSubgroupUniformControlFlowKHR = 4421, ExecutionModePostDepthCoverage = 4446, ExecutionModeDenormPreserve = 4459, @@ -163,19 +156,32 @@ enum ExecutionMode { ExecutionModeSignedZeroInfNanPreserve = 4461, ExecutionModeRoundingModeRTE = 4462, ExecutionModeRoundingModeRTZ = 4463, + ExecutionModeNonCoherentTileAttachmentReadQCOM = 4489, + ExecutionModeTileShadingRateQCOM = 4490, ExecutionModeEarlyAndLateFragmentTestsAMD = 5017, ExecutionModeStencilRefReplacingEXT = 5027, + ExecutionModeCoalescingAMDX = 5069, + ExecutionModeIsApiEntryAMDX = 5070, + ExecutionModeMaxNodeRecursionAMDX = 5071, + ExecutionModeStaticNumWorkgroupsAMDX = 5072, + ExecutionModeShaderIndexAMDX = 5073, + ExecutionModeMaxNumWorkgroupsAMDX = 5077, ExecutionModeStencilRefUnchangedFrontAMD = 5079, ExecutionModeStencilRefGreaterFrontAMD = 5080, ExecutionModeStencilRefLessFrontAMD = 5081, ExecutionModeStencilRefUnchangedBackAMD = 5082, ExecutionModeStencilRefGreaterBackAMD = 5083, ExecutionModeStencilRefLessBackAMD = 5084, + ExecutionModeQuadDerivativesKHR = 5088, + ExecutionModeRequireFullQuadsKHR = 5089, + ExecutionModeSharesInputWithAMDX = 5102, ExecutionModeOutputLinesEXT = 5269, ExecutionModeOutputLinesNV = 5269, ExecutionModeOutputPrimitivesEXT = 5270, ExecutionModeOutputPrimitivesNV = 5270, + ExecutionModeDerivativeGroupQuadsKHR = 5289, ExecutionModeDerivativeGroupQuadsNV = 5289, + ExecutionModeDerivativeGroupLinearKHR = 5290, ExecutionModeDerivativeGroupLinearNV = 5290, ExecutionModeOutputTrianglesEXT = 5298, ExecutionModeOutputTrianglesNV = 5298, @@ -195,7 +201,14 @@ enum ExecutionMode { ExecutionModeNoGlobalOffsetINTEL = 5895, ExecutionModeNumSIMDWorkitemsINTEL = 5896, ExecutionModeSchedulerTargetFmaxMhzINTEL = 5903, + ExecutionModeMaximallyReconvergesKHR = 6023, + ExecutionModeFPFastMathDefault = 6028, + ExecutionModeStreamingInterfaceINTEL = 6154, + ExecutionModeRegisterMapInterfaceINTEL = 6160, ExecutionModeNamedBarrierCountINTEL = 6417, + ExecutionModeMaximumRegistersINTEL = 6461, + ExecutionModeMaximumRegistersIdINTEL = 6462, + ExecutionModeNamedMaximumRegistersINTEL = 6463, ExecutionModeMax = 0x7fffffff, }; @@ -213,6 +226,9 @@ enum StorageClass { StorageClassAtomicCounter = 10, StorageClassImage = 11, StorageClassStorageBuffer = 12, + StorageClassTileImageEXT = 4172, + StorageClassTileAttachmentQCOM = 4491, + StorageClassNodePayloadAMDX = 5068, StorageClassCallableDataKHR = 5328, StorageClassCallableDataNV = 5328, StorageClassIncomingCallableDataKHR = 5329, @@ -227,6 +243,7 @@ enum StorageClass { StorageClassShaderRecordBufferNV = 5343, StorageClassPhysicalStorageBuffer = 5349, StorageClassPhysicalStorageBufferEXT = 5349, + StorageClassHitObjectAttributeNV = 5385, StorageClassTaskPayloadWorkgroupEXT = 5402, StorageClassCodeSectionINTEL = 5605, StorageClassDeviceOnlyINTEL = 5936, @@ -242,6 +259,7 @@ enum Dim { DimRect = 4, DimBuffer = 5, DimSubpassData = 6, + DimTileImageDataEXT = 4173, DimMax = 0x7fffffff, }; @@ -348,6 +366,15 @@ enum ImageChannelDataType { ImageChannelDataTypeFloat = 14, ImageChannelDataTypeUnormInt24 = 15, ImageChannelDataTypeUnormInt101010_2 = 16, + ImageChannelDataTypeUnormInt10X6EXT = 17, + ImageChannelDataTypeUnsignedIntRaw10EXT = 19, + ImageChannelDataTypeUnsignedIntRaw12EXT = 20, + ImageChannelDataTypeUnormInt2_101010EXT = 21, + ImageChannelDataTypeUnsignedInt10X6EXT = 22, + ImageChannelDataTypeUnsignedInt12X4EXT = 23, + ImageChannelDataTypeUnsignedInt14X2EXT = 24, + ImageChannelDataTypeUnormInt12X4EXT = 25, + ImageChannelDataTypeUnormInt14X2EXT = 26, ImageChannelDataTypeMax = 0x7fffffff, }; @@ -405,8 +432,11 @@ enum FPFastMathModeShift { FPFastMathModeNSZShift = 2, FPFastMathModeAllowRecipShift = 3, FPFastMathModeFastShift = 4, + FPFastMathModeAllowContractShift = 16, FPFastMathModeAllowContractFastINTELShift = 16, + FPFastMathModeAllowReassocShift = 17, FPFastMathModeAllowReassocINTELShift = 17, + FPFastMathModeAllowTransformShift = 18, FPFastMathModeMax = 0x7fffffff, }; @@ -417,8 +447,11 @@ enum FPFastMathModeMask { FPFastMathModeNSZMask = 0x00000004, FPFastMathModeAllowRecipMask = 0x00000008, FPFastMathModeFastMask = 0x00000010, + FPFastMathModeAllowContractMask = 0x00010000, FPFastMathModeAllowContractFastINTELMask = 0x00010000, + FPFastMathModeAllowReassocMask = 0x00020000, FPFastMathModeAllowReassocINTELMask = 0x00020000, + FPFastMathModeAllowTransformMask = 0x00040000, }; enum FPRoundingMode { @@ -452,6 +485,7 @@ enum FunctionParameterAttribute { FunctionParameterAttributeNoCapture = 5, FunctionParameterAttributeNoWrite = 6, FunctionParameterAttributeNoReadWrite = 7, + FunctionParameterAttributeRuntimeAlignedINTEL = 5940, FunctionParameterAttributeMax = 0x7fffffff, }; @@ -503,12 +537,21 @@ enum Decoration { DecorationMaxByteOffset = 45, DecorationAlignmentId = 46, DecorationMaxByteOffsetId = 47, + DecorationSaturatedToLargestFloat8NormalConversionEXT = 4216, DecorationNoSignedWrap = 4469, DecorationNoUnsignedWrap = 4470, DecorationWeightTextureQCOM = 4487, DecorationBlockMatchTextureQCOM = 4488, DecorationBlockMatchSamplerQCOM = 4499, DecorationExplicitInterpAMD = 4999, + DecorationNodeSharesPayloadLimitsWithAMDX = 5019, + DecorationNodeMaxPayloadsAMDX = 5020, + DecorationTrackFinishWritingAMDX = 5078, + DecorationPayloadNodeNameAMDX = 5091, + DecorationPayloadNodeBaseIndexAMDX = 5098, + DecorationPayloadNodeSparseArrayAMDX = 5099, + DecorationPayloadNodeArraySizeAMDX = 5100, + DecorationPayloadDispatchIndirectAMDX = 5105, DecorationOverrideCoverageNV = 5248, DecorationPassthroughNV = 5250, DecorationViewportRelativeNV = 5252, @@ -525,6 +568,7 @@ enum Decoration { DecorationRestrictPointerEXT = 5355, DecorationAliasedPointer = 5356, DecorationAliasedPointerEXT = 5356, + DecorationHitObjectShaderRecordBufferNV = 5386, DecorationBindlessSamplerNV = 5398, DecorationBindlessImageNV = 5399, DecorationBoundSamplerNV = 5400, @@ -557,20 +601,45 @@ enum Decoration { DecorationMergeINTEL = 5834, DecorationBankBitsINTEL = 5835, DecorationForcePow2DepthINTEL = 5836, + DecorationStridesizeINTEL = 5883, + DecorationWordsizeINTEL = 5884, + DecorationTrueDualPortINTEL = 5885, DecorationBurstCoalesceINTEL = 5899, DecorationCacheSizeINTEL = 5900, DecorationDontStaticallyCoalesceINTEL = 5901, DecorationPrefetchINTEL = 5902, DecorationStallEnableINTEL = 5905, DecorationFuseLoopsInFunctionINTEL = 5907, + DecorationMathOpDSPModeINTEL = 5909, DecorationAliasScopeINTEL = 5914, DecorationNoAliasINTEL = 5915, + DecorationInitiationIntervalINTEL = 5917, + DecorationMaxConcurrencyINTEL = 5918, + DecorationPipelineEnableINTEL = 5919, DecorationBufferLocationINTEL = 5921, DecorationIOPipeStorageINTEL = 5944, DecorationFunctionFloatingPointModeINTEL = 6080, DecorationSingleElementVectorINTEL = 6085, DecorationVectorComputeCallableFunctionINTEL = 6087, DecorationMediaBlockIOINTEL = 6140, + DecorationStallFreeINTEL = 6151, + DecorationFPMaxErrorDecorationINTEL = 6170, + DecorationLatencyControlLabelINTEL = 6172, + DecorationLatencyControlConstraintINTEL = 6173, + DecorationConduitKernelArgumentINTEL = 6175, + DecorationRegisterMapKernelArgumentINTEL = 6176, + DecorationMMHostInterfaceAddressWidthINTEL = 6177, + DecorationMMHostInterfaceDataWidthINTEL = 6178, + DecorationMMHostInterfaceLatencyINTEL = 6179, + DecorationMMHostInterfaceReadWriteModeINTEL = 6180, + DecorationMMHostInterfaceMaxBurstINTEL = 6181, + DecorationMMHostInterfaceWaitRequestINTEL = 6182, + DecorationStableKernelArgumentINTEL = 6183, + DecorationHostAccessINTEL = 6188, + DecorationInitModeINTEL = 6190, + DecorationImplementInRegisterMapINTEL = 6191, + DecorationCacheControlLoadINTEL = 6442, + DecorationCacheControlStoreINTEL = 6443, DecorationMax = 0x7fffffff, }; @@ -616,6 +685,11 @@ enum BuiltIn { BuiltInSubgroupLocalInvocationId = 41, BuiltInVertexIndex = 42, BuiltInInstanceIndex = 43, + BuiltInCoreIDARM = 4160, + BuiltInCoreCountARM = 4161, + BuiltInCoreMaxIDARM = 4162, + BuiltInWarpIDARM = 4163, + BuiltInWarpMaxIDARM = 4164, BuiltInSubgroupEqMask = 4416, BuiltInSubgroupEqMaskKHR = 4416, BuiltInSubgroupGeMask = 4417, @@ -633,6 +707,9 @@ enum BuiltIn { BuiltInDeviceIndex = 4438, BuiltInViewIndex = 4440, BuiltInShadingRateKHR = 4444, + BuiltInTileOffsetQCOM = 4492, + BuiltInTileDimensionQCOM = 4493, + BuiltInTileApronSizeQCOM = 4494, BuiltInBaryCoordNoPerspAMD = 4992, BuiltInBaryCoordNoPerspCentroidAMD = 4993, BuiltInBaryCoordNoPerspSampleAMD = 4994, @@ -641,6 +718,8 @@ enum BuiltIn { BuiltInBaryCoordSmoothSampleAMD = 4997, BuiltInBaryCoordPullModelAMD = 4998, BuiltInFragStencilRefEXT = 5014, + BuiltInRemainingRecursionLevelsAMDX = 5021, + BuiltInShaderIndexAMDX = 5073, BuiltInViewportMaskNV = 5253, BuiltInSecondaryPositionNV = 5257, BuiltInSecondaryViewportMaskNV = 5258, @@ -693,13 +772,25 @@ enum BuiltIn { BuiltInHitKindKHR = 5333, BuiltInHitKindNV = 5333, BuiltInCurrentRayTimeNV = 5334, + BuiltInHitTriangleVertexPositionsKHR = 5335, + BuiltInHitMicroTriangleVertexPositionsNV = 5337, + BuiltInHitMicroTriangleVertexBarycentricsNV = 5344, BuiltInIncomingRayFlagsKHR = 5351, BuiltInIncomingRayFlagsNV = 5351, BuiltInRayGeometryIndexKHR = 5352, + BuiltInHitIsSphereNV = 5359, + BuiltInHitIsLSSNV = 5360, + BuiltInHitSpherePositionNV = 5361, BuiltInWarpsPerSMNV = 5374, BuiltInSMCountNV = 5375, BuiltInWarpIDNV = 5376, BuiltInSMIDNV = 5377, + BuiltInHitLSSPositionsNV = 5396, + BuiltInHitKindFrontFacingMicroTriangleNV = 5405, + BuiltInHitKindBackFacingMicroTriangleNV = 5406, + BuiltInHitSphereRadiusNV = 5420, + BuiltInHitLSSRadiiNV = 5421, + BuiltInClusterIDNV = 5436, BuiltInCullMaskKHR = 6021, BuiltInMax = 0x7fffffff, }; @@ -734,6 +825,8 @@ enum LoopControlShift { LoopControlMaxInterleavingINTELShift = 21, LoopControlSpeculatedIterationsINTELShift = 22, LoopControlNoFusionINTELShift = 23, + LoopControlLoopCountINTELShift = 24, + LoopControlMaxReinvocationDelayINTELShift = 25, LoopControlMax = 0x7fffffff, }; @@ -756,6 +849,8 @@ enum LoopControlMask { LoopControlMaxInterleavingINTELMask = 0x00200000, LoopControlSpeculatedIterationsINTELMask = 0x00400000, LoopControlNoFusionINTELMask = 0x00800000, + LoopControlLoopCountINTELMask = 0x01000000, + LoopControlMaxReinvocationDelayINTELMask = 0x02000000, }; enum FunctionControlShift { @@ -763,6 +858,7 @@ enum FunctionControlShift { FunctionControlDontInlineShift = 1, FunctionControlPureShift = 2, FunctionControlConstShift = 3, + FunctionControlOptNoneEXTShift = 16, FunctionControlOptNoneINTELShift = 16, FunctionControlMax = 0x7fffffff, }; @@ -773,6 +869,7 @@ enum FunctionControlMask { FunctionControlDontInlineMask = 0x00000002, FunctionControlPureMask = 0x00000004, FunctionControlConstMask = 0x00000008, + FunctionControlOptNoneEXTMask = 0x00010000, FunctionControlOptNoneINTELMask = 0x00010000, }; @@ -959,6 +1056,17 @@ enum Capability { CapabilityShaderLayer = 69, CapabilityShaderViewportIndex = 70, CapabilityUniformDecoration = 71, + CapabilityCoreBuiltinsARM = 4165, + CapabilityTileImageColorReadAccessEXT = 4166, + CapabilityTileImageDepthReadAccessEXT = 4167, + CapabilityTileImageStencilReadAccessEXT = 4168, + CapabilityTensorsARM = 4174, + CapabilityStorageTensorArrayDynamicIndexingARM = 4175, + CapabilityStorageTensorArrayNonUniformIndexingARM = 4176, + CapabilityGraphARM = 4191, + CapabilityCooperativeMatrixLayoutsARM = 4201, + CapabilityFloat8EXT = 4212, + CapabilityFloat8CooperativeMatrixEXT = 4213, CapabilityFragmentShadingRateKHR = 4422, CapabilitySubgroupBallotKHR = 4423, CapabilityDrawParameters = 4427, @@ -988,11 +1096,13 @@ enum Capability { CapabilityRoundingModeRTZ = 4468, CapabilityRayQueryProvisionalKHR = 4471, CapabilityRayQueryKHR = 4472, + CapabilityUntypedPointersKHR = 4473, CapabilityRayTraversalPrimitiveCullingKHR = 4478, CapabilityRayTracingKHR = 4479, CapabilityTextureSampleWeightedQCOM = 4484, CapabilityTextureBoxFilterQCOM = 4485, CapabilityTextureBlockMatchQCOM = 4486, + CapabilityTileShadingQCOM = 4495, CapabilityTextureBlockMatch2QCOM = 4498, CapabilityFloat16ImageAMD = 5008, CapabilityImageGatherBiasLodAMD = 5009, @@ -1001,6 +1111,13 @@ enum Capability { CapabilityImageReadWriteLodAMD = 5015, CapabilityInt64ImageEXT = 5016, CapabilityShaderClockKHR = 5055, + CapabilityShaderEnqueueAMDX = 5067, + CapabilityQuadControlKHR = 5087, + CapabilityInt4TypeINTEL = 5112, + CapabilityInt4CooperativeMatrixINTEL = 5114, + CapabilityBFloat16TypeKHR = 5116, + CapabilityBFloat16DotProductKHR = 5117, + CapabilityBFloat16CooperativeMatrixKHR = 5118, CapabilitySampleMaskOverrideCoverageNV = 5249, CapabilityGeometryShaderPassthroughNV = 5251, CapabilityShaderViewportIndexLayerEXT = 5254, @@ -1014,6 +1131,7 @@ enum Capability { CapabilityMeshShadingEXT = 5283, CapabilityFragmentBarycentricKHR = 5284, CapabilityFragmentBarycentricNV = 5284, + CapabilityComputeDerivativeGroupQuadsKHR = 5288, CapabilityComputeDerivativeGroupQuadsNV = 5288, CapabilityFragmentDensityEXT = 5291, CapabilityShadingRateNV = 5291, @@ -1042,6 +1160,7 @@ enum Capability { CapabilityUniformTexelBufferArrayNonUniformIndexingEXT = 5311, CapabilityStorageTexelBufferArrayNonUniformIndexing = 5312, CapabilityStorageTexelBufferArrayNonUniformIndexingEXT = 5312, + CapabilityRayTracingPositionFetchKHR = 5336, CapabilityRayTracingNV = 5340, CapabilityRayTracingMotionBlurNV = 5341, CapabilityVulkanMemoryModel = 5345, @@ -1050,6 +1169,7 @@ enum Capability { CapabilityVulkanMemoryModelDeviceScopeKHR = 5346, CapabilityPhysicalStorageBufferAddresses = 5347, CapabilityPhysicalStorageBufferAddressesEXT = 5347, + CapabilityComputeDerivativeGroupLinearKHR = 5350, CapabilityComputeDerivativeGroupLinearNV = 5350, CapabilityRayTracingProvisionalKHR = 5353, CapabilityCooperativeMatrixNV = 5357, @@ -1059,7 +1179,25 @@ enum Capability { CapabilityFragmentShaderPixelInterlockEXT = 5378, CapabilityDemoteToHelperInvocation = 5379, CapabilityDemoteToHelperInvocationEXT = 5379, + CapabilityDisplacementMicromapNV = 5380, + CapabilityRayTracingOpacityMicromapEXT = 5381, + CapabilityShaderInvocationReorderNV = 5383, CapabilityBindlessTextureNV = 5390, + CapabilityRayQueryPositionFetchKHR = 5391, + CapabilityCooperativeVectorNV = 5394, + CapabilityAtomicFloat16VectorNV = 5404, + CapabilityRayTracingDisplacementMicromapNV = 5409, + CapabilityRawAccessChainsNV = 5414, + CapabilityRayTracingSpheresGeometryNV = 5418, + CapabilityRayTracingLinearSweptSpheresGeometryNV = 5419, + CapabilityCooperativeMatrixReductionsNV = 5430, + CapabilityCooperativeMatrixConversionsNV = 5431, + CapabilityCooperativeMatrixPerElementOperationsNV = 5432, + CapabilityCooperativeMatrixTensorAddressingNV = 5433, + CapabilityCooperativeMatrixBlockLoadsNV = 5434, + CapabilityCooperativeVectorTrainingNV = 5435, + CapabilityRayTracingClusterAccelerationStructureNV = 5437, + CapabilityTensorAddressingNV = 5439, CapabilitySubgroupShuffleINTEL = 5568, CapabilitySubgroupBufferBlockIOINTEL = 5569, CapabilitySubgroupImageBlockIOINTEL = 5570, @@ -1092,10 +1230,13 @@ enum Capability { CapabilityFPGAMemoryAccessesINTEL = 5898, CapabilityFPGAClusterAttributesINTEL = 5904, CapabilityLoopFuseINTEL = 5906, + CapabilityFPGADSPControlINTEL = 5908, CapabilityMemoryAccessAliasingINTEL = 5910, + CapabilityFPGAInvocationPipeliningAttributesINTEL = 5916, CapabilityFPGABufferLocationINTEL = 5920, CapabilityArbitraryPrecisionFixedPointINTEL = 5922, CapabilityUSMStorageClassesINTEL = 5935, + CapabilityRuntimeAlignedAttributeINTEL = 5939, CapabilityIOPipesINTEL = 5943, CapabilityBlockingPipesINTEL = 5945, CapabilityFPGARegINTEL = 5948, @@ -1108,16 +1249,41 @@ enum Capability { CapabilityDotProduct = 6019, CapabilityDotProductKHR = 6019, CapabilityRayCullMaskKHR = 6020, + CapabilityCooperativeMatrixKHR = 6022, + CapabilityReplicatedCompositesEXT = 6024, CapabilityBitInstructions = 6025, CapabilityGroupNonUniformRotateKHR = 6026, + CapabilityFloatControls2 = 6029, CapabilityAtomicFloat32AddEXT = 6033, CapabilityAtomicFloat64AddEXT = 6034, - CapabilityLongConstantCompositeINTEL = 6089, + CapabilityLongCompositesINTEL = 6089, + CapabilityOptNoneEXT = 6094, CapabilityOptNoneINTEL = 6094, CapabilityAtomicFloat16AddEXT = 6095, CapabilityDebugInfoModuleINTEL = 6114, + CapabilityBFloat16ConversionINTEL = 6115, CapabilitySplitBarrierINTEL = 6141, + CapabilityArithmeticFenceEXT = 6144, + CapabilityFPGAClusterAttributesV2INTEL = 6150, + CapabilityFPGAKernelAttributesv2INTEL = 6161, + CapabilityTaskSequenceINTEL = 6162, + CapabilityFPMaxErrorINTEL = 6169, + CapabilityFPGALatencyControlINTEL = 6171, + CapabilityFPGAArgumentInterfacesINTEL = 6174, + CapabilityGlobalVariableHostAccessINTEL = 6187, + CapabilityGlobalVariableFPGADecorationsINTEL = 6189, + CapabilitySubgroupBufferPrefetchINTEL = 6220, + CapabilitySubgroup2DBlockIOINTEL = 6228, + CapabilitySubgroup2DBlockTransformINTEL = 6229, + CapabilitySubgroup2DBlockTransposeINTEL = 6230, + CapabilitySubgroupMatrixMultiplyAccumulateINTEL = 6236, + CapabilityTernaryBitwiseFunctionINTEL = 6241, CapabilityGroupUniformArithmeticKHR = 6400, + CapabilityTensorFloat32RoundingINTEL = 6425, + CapabilityMaskedGatherScatterINTEL = 6427, + CapabilityCacheControlsINTEL = 6441, + CapabilityRegisterLimitsINTEL = 6460, + CapabilityBindlessImagesINTEL = 6528, CapabilityMax = 0x7fffffff, }; @@ -1130,8 +1296,10 @@ enum RayFlagsShift { RayFlagsCullFrontFacingTrianglesKHRShift = 5, RayFlagsCullOpaqueKHRShift = 6, RayFlagsCullNoOpaqueKHRShift = 7, + RayFlagsSkipBuiltinPrimitivesNVShift = 8, RayFlagsSkipTrianglesKHRShift = 8, RayFlagsSkipAABBsKHRShift = 9, + RayFlagsForceOpacityMicromap2StateEXTShift = 10, RayFlagsMax = 0x7fffffff, }; @@ -1145,8 +1313,10 @@ enum RayFlagsMask { RayFlagsCullFrontFacingTrianglesKHRMask = 0x00000020, RayFlagsCullOpaqueKHRMask = 0x00000040, RayFlagsCullNoOpaqueKHRMask = 0x00000080, + RayFlagsSkipBuiltinPrimitivesNVMask = 0x00000100, RayFlagsSkipTrianglesKHRMask = 0x00000100, RayFlagsSkipAABBsKHRMask = 0x00000200, + RayFlagsForceOpacityMicromap2StateEXTMask = 0x00000400, }; enum RayQueryIntersection { @@ -1222,6 +1392,210 @@ enum PackedVectorFormat { PackedVectorFormatMax = 0x7fffffff, }; +enum CooperativeMatrixOperandsShift { + CooperativeMatrixOperandsMatrixASignedComponentsKHRShift = 0, + CooperativeMatrixOperandsMatrixBSignedComponentsKHRShift = 1, + CooperativeMatrixOperandsMatrixCSignedComponentsKHRShift = 2, + CooperativeMatrixOperandsMatrixResultSignedComponentsKHRShift = 3, + CooperativeMatrixOperandsSaturatingAccumulationKHRShift = 4, + CooperativeMatrixOperandsMax = 0x7fffffff, +}; + +enum CooperativeMatrixOperandsMask { + CooperativeMatrixOperandsMaskNone = 0, + CooperativeMatrixOperandsMatrixASignedComponentsKHRMask = 0x00000001, + CooperativeMatrixOperandsMatrixBSignedComponentsKHRMask = 0x00000002, + CooperativeMatrixOperandsMatrixCSignedComponentsKHRMask = 0x00000004, + CooperativeMatrixOperandsMatrixResultSignedComponentsKHRMask = 0x00000008, + CooperativeMatrixOperandsSaturatingAccumulationKHRMask = 0x00000010, +}; + +enum CooperativeMatrixLayout { + CooperativeMatrixLayoutRowMajorKHR = 0, + CooperativeMatrixLayoutColumnMajorKHR = 1, + CooperativeMatrixLayoutRowBlockedInterleavedARM = 4202, + CooperativeMatrixLayoutColumnBlockedInterleavedARM = 4203, + CooperativeMatrixLayoutMax = 0x7fffffff, +}; + +enum CooperativeMatrixUse { + CooperativeMatrixUseMatrixAKHR = 0, + CooperativeMatrixUseMatrixBKHR = 1, + CooperativeMatrixUseMatrixAccumulatorKHR = 2, + CooperativeMatrixUseMax = 0x7fffffff, +}; + +enum CooperativeMatrixReduceShift { + CooperativeMatrixReduceRowShift = 0, + CooperativeMatrixReduceColumnShift = 1, + CooperativeMatrixReduce2x2Shift = 2, + CooperativeMatrixReduceMax = 0x7fffffff, +}; + +enum CooperativeMatrixReduceMask { + CooperativeMatrixReduceMaskNone = 0, + CooperativeMatrixReduceRowMask = 0x00000001, + CooperativeMatrixReduceColumnMask = 0x00000002, + CooperativeMatrixReduce2x2Mask = 0x00000004, +}; + +enum TensorClampMode { + TensorClampModeUndefined = 0, + TensorClampModeConstant = 1, + TensorClampModeClampToEdge = 2, + TensorClampModeRepeat = 3, + TensorClampModeRepeatMirrored = 4, + TensorClampModeMax = 0x7fffffff, +}; + +enum TensorAddressingOperandsShift { + TensorAddressingOperandsTensorViewShift = 0, + TensorAddressingOperandsDecodeFuncShift = 1, + TensorAddressingOperandsMax = 0x7fffffff, +}; + +enum TensorAddressingOperandsMask { + TensorAddressingOperandsMaskNone = 0, + TensorAddressingOperandsTensorViewMask = 0x00000001, + TensorAddressingOperandsDecodeFuncMask = 0x00000002, +}; + +enum TensorOperandsShift { + TensorOperandsNontemporalARMShift = 0, + TensorOperandsOutOfBoundsValueARMShift = 1, + TensorOperandsMakeElementAvailableARMShift = 2, + TensorOperandsMakeElementVisibleARMShift = 3, + TensorOperandsNonPrivateElementARMShift = 4, + TensorOperandsMax = 0x7fffffff, +}; + +enum TensorOperandsMask { + TensorOperandsMaskNone = 0, + TensorOperandsNontemporalARMMask = 0x00000001, + TensorOperandsOutOfBoundsValueARMMask = 0x00000002, + TensorOperandsMakeElementAvailableARMMask = 0x00000004, + TensorOperandsMakeElementVisibleARMMask = 0x00000008, + TensorOperandsNonPrivateElementARMMask = 0x00000010, +}; + +enum InitializationModeQualifier { + InitializationModeQualifierInitOnDeviceReprogramINTEL = 0, + InitializationModeQualifierInitOnDeviceResetINTEL = 1, + InitializationModeQualifierMax = 0x7fffffff, +}; + +enum HostAccessQualifier { + HostAccessQualifierNoneINTEL = 0, + HostAccessQualifierReadINTEL = 1, + HostAccessQualifierWriteINTEL = 2, + HostAccessQualifierReadWriteINTEL = 3, + HostAccessQualifierMax = 0x7fffffff, +}; + +enum LoadCacheControl { + LoadCacheControlUncachedINTEL = 0, + LoadCacheControlCachedINTEL = 1, + LoadCacheControlStreamingINTEL = 2, + LoadCacheControlInvalidateAfterReadINTEL = 3, + LoadCacheControlConstCachedINTEL = 4, + LoadCacheControlMax = 0x7fffffff, +}; + +enum StoreCacheControl { + StoreCacheControlUncachedINTEL = 0, + StoreCacheControlWriteThroughINTEL = 1, + StoreCacheControlWriteBackINTEL = 2, + StoreCacheControlStreamingINTEL = 3, + StoreCacheControlMax = 0x7fffffff, +}; + +enum NamedMaximumNumberOfRegisters { + NamedMaximumNumberOfRegistersAutoINTEL = 0, + NamedMaximumNumberOfRegistersMax = 0x7fffffff, +}; + +enum MatrixMultiplyAccumulateOperandsShift { + MatrixMultiplyAccumulateOperandsMatrixASignedComponentsINTELShift = 0, + MatrixMultiplyAccumulateOperandsMatrixBSignedComponentsINTELShift = 1, + MatrixMultiplyAccumulateOperandsMatrixCBFloat16INTELShift = 2, + MatrixMultiplyAccumulateOperandsMatrixResultBFloat16INTELShift = 3, + MatrixMultiplyAccumulateOperandsMatrixAPackedInt8INTELShift = 4, + MatrixMultiplyAccumulateOperandsMatrixBPackedInt8INTELShift = 5, + MatrixMultiplyAccumulateOperandsMatrixAPackedInt4INTELShift = 6, + MatrixMultiplyAccumulateOperandsMatrixBPackedInt4INTELShift = 7, + MatrixMultiplyAccumulateOperandsMatrixATF32INTELShift = 8, + MatrixMultiplyAccumulateOperandsMatrixBTF32INTELShift = 9, + MatrixMultiplyAccumulateOperandsMatrixAPackedFloat16INTELShift = 10, + MatrixMultiplyAccumulateOperandsMatrixBPackedFloat16INTELShift = 11, + MatrixMultiplyAccumulateOperandsMatrixAPackedBFloat16INTELShift = 12, + MatrixMultiplyAccumulateOperandsMatrixBPackedBFloat16INTELShift = 13, + MatrixMultiplyAccumulateOperandsMax = 0x7fffffff, +}; + +enum MatrixMultiplyAccumulateOperandsMask { + MatrixMultiplyAccumulateOperandsMaskNone = 0, + MatrixMultiplyAccumulateOperandsMatrixASignedComponentsINTELMask = 0x00000001, + MatrixMultiplyAccumulateOperandsMatrixBSignedComponentsINTELMask = 0x00000002, + MatrixMultiplyAccumulateOperandsMatrixCBFloat16INTELMask = 0x00000004, + MatrixMultiplyAccumulateOperandsMatrixResultBFloat16INTELMask = 0x00000008, + MatrixMultiplyAccumulateOperandsMatrixAPackedInt8INTELMask = 0x00000010, + MatrixMultiplyAccumulateOperandsMatrixBPackedInt8INTELMask = 0x00000020, + MatrixMultiplyAccumulateOperandsMatrixAPackedInt4INTELMask = 0x00000040, + MatrixMultiplyAccumulateOperandsMatrixBPackedInt4INTELMask = 0x00000080, + MatrixMultiplyAccumulateOperandsMatrixATF32INTELMask = 0x00000100, + MatrixMultiplyAccumulateOperandsMatrixBTF32INTELMask = 0x00000200, + MatrixMultiplyAccumulateOperandsMatrixAPackedFloat16INTELMask = 0x00000400, + MatrixMultiplyAccumulateOperandsMatrixBPackedFloat16INTELMask = 0x00000800, + MatrixMultiplyAccumulateOperandsMatrixAPackedBFloat16INTELMask = 0x00001000, + MatrixMultiplyAccumulateOperandsMatrixBPackedBFloat16INTELMask = 0x00002000, +}; + +enum RawAccessChainOperandsShift { + RawAccessChainOperandsRobustnessPerComponentNVShift = 0, + RawAccessChainOperandsRobustnessPerElementNVShift = 1, + RawAccessChainOperandsMax = 0x7fffffff, +}; + +enum RawAccessChainOperandsMask { + RawAccessChainOperandsMaskNone = 0, + RawAccessChainOperandsRobustnessPerComponentNVMask = 0x00000001, + RawAccessChainOperandsRobustnessPerElementNVMask = 0x00000002, +}; + +enum FPEncoding { + FPEncodingBFloat16KHR = 0, + FPEncodingFloat8E4M3EXT = 4214, + FPEncodingFloat8E5M2EXT = 4215, + FPEncodingMax = 0x7fffffff, +}; + +enum CooperativeVectorMatrixLayout { + CooperativeVectorMatrixLayoutRowMajorNV = 0, + CooperativeVectorMatrixLayoutColumnMajorNV = 1, + CooperativeVectorMatrixLayoutInferencingOptimalNV = 2, + CooperativeVectorMatrixLayoutTrainingOptimalNV = 3, + CooperativeVectorMatrixLayoutMax = 0x7fffffff, +}; + +enum ComponentType { + ComponentTypeFloat16NV = 0, + ComponentTypeFloat32NV = 1, + ComponentTypeFloat64NV = 2, + ComponentTypeSignedInt8NV = 3, + ComponentTypeSignedInt16NV = 4, + ComponentTypeSignedInt32NV = 5, + ComponentTypeSignedInt64NV = 6, + ComponentTypeUnsignedInt8NV = 7, + ComponentTypeUnsignedInt16NV = 8, + ComponentTypeUnsignedInt32NV = 9, + ComponentTypeUnsignedInt64NV = 10, + ComponentTypeSignedInt8PackedNV = 1000491000, + ComponentTypeUnsignedInt8PackedNV = 1000491001, + ComponentTypeFloatE4M3NV = 1000491002, + ComponentTypeFloatE5M2NV = 1000491003, + ComponentTypeMax = 0x7fffffff, +}; + enum Op { OpNop = 0, OpUndef = 1, @@ -1567,14 +1941,37 @@ enum Op { OpPtrEqual = 401, OpPtrNotEqual = 402, OpPtrDiff = 403, + OpColorAttachmentReadEXT = 4160, + OpDepthAttachmentReadEXT = 4161, + OpStencilAttachmentReadEXT = 4162, + OpTypeTensorARM = 4163, + OpTensorReadARM = 4164, + OpTensorWriteARM = 4165, + OpTensorQuerySizeARM = 4166, + OpGraphConstantARM = 4181, + OpGraphEntryPointARM = 4182, + OpGraphARM = 4183, + OpGraphInputARM = 4184, + OpGraphSetOutputARM = 4185, + OpGraphEndARM = 4186, + OpTypeGraphARM = 4190, OpTerminateInvocation = 4416, + OpTypeUntypedPointerKHR = 4417, + OpUntypedVariableKHR = 4418, + OpUntypedAccessChainKHR = 4419, + OpUntypedInBoundsAccessChainKHR = 4420, OpSubgroupBallotKHR = 4421, OpSubgroupFirstInvocationKHR = 4422, + OpUntypedPtrAccessChainKHR = 4423, + OpUntypedInBoundsPtrAccessChainKHR = 4424, + OpUntypedArrayLengthKHR = 4425, + OpUntypedPrefetchKHR = 4426, OpSubgroupAllKHR = 4428, OpSubgroupAnyKHR = 4429, OpSubgroupAllEqualKHR = 4430, OpGroupNonUniformRotateKHR = 4431, OpSubgroupReadInvocationKHR = 4432, + OpExtInstWithForwardRefsKHR = 4433, OpTraceRayKHR = 4445, OpExecuteCallableKHR = 4446, OpConvertUToAccelerationStructureKHR = 4447, @@ -1592,6 +1989,14 @@ enum Op { OpUDotAccSatKHR = 4454, OpSUDotAccSat = 4455, OpSUDotAccSatKHR = 4455, + OpTypeCooperativeMatrixKHR = 4456, + OpCooperativeMatrixLoadKHR = 4457, + OpCooperativeMatrixStoreKHR = 4458, + OpCooperativeMatrixMulAddKHR = 4459, + OpCooperativeMatrixLengthKHR = 4460, + OpConstantCompositeReplicateEXT = 4461, + OpSpecConstantCompositeReplicateEXT = 4462, + OpCompositeConstructReplicateEXT = 4463, OpTypeRayQueryKHR = 4472, OpRayQueryInitializeKHR = 4473, OpRayQueryTerminateKHR = 4474, @@ -1618,11 +2023,64 @@ enum Op { OpFragmentMaskFetchAMD = 5011, OpFragmentFetchAMD = 5012, OpReadClockKHR = 5056, + OpAllocateNodePayloadsAMDX = 5074, + OpEnqueueNodePayloadsAMDX = 5075, + OpTypeNodePayloadArrayAMDX = 5076, + OpFinishWritingNodePayloadAMDX = 5078, + OpNodePayloadArrayLengthAMDX = 5090, + OpIsNodePayloadValidAMDX = 5101, + OpConstantStringAMDX = 5103, + OpSpecConstantStringAMDX = 5104, + OpGroupNonUniformQuadAllKHR = 5110, + OpGroupNonUniformQuadAnyKHR = 5111, + OpHitObjectRecordHitMotionNV = 5249, + OpHitObjectRecordHitWithIndexMotionNV = 5250, + OpHitObjectRecordMissMotionNV = 5251, + OpHitObjectGetWorldToObjectNV = 5252, + OpHitObjectGetObjectToWorldNV = 5253, + OpHitObjectGetObjectRayDirectionNV = 5254, + OpHitObjectGetObjectRayOriginNV = 5255, + OpHitObjectTraceRayMotionNV = 5256, + OpHitObjectGetShaderRecordBufferHandleNV = 5257, + OpHitObjectGetShaderBindingTableRecordIndexNV = 5258, + OpHitObjectRecordEmptyNV = 5259, + OpHitObjectTraceRayNV = 5260, + OpHitObjectRecordHitNV = 5261, + OpHitObjectRecordHitWithIndexNV = 5262, + OpHitObjectRecordMissNV = 5263, + OpHitObjectExecuteShaderNV = 5264, + OpHitObjectGetCurrentTimeNV = 5265, + OpHitObjectGetAttributesNV = 5266, + OpHitObjectGetHitKindNV = 5267, + OpHitObjectGetPrimitiveIndexNV = 5268, + OpHitObjectGetGeometryIndexNV = 5269, + OpHitObjectGetInstanceIdNV = 5270, + OpHitObjectGetInstanceCustomIndexNV = 5271, + OpHitObjectGetWorldRayDirectionNV = 5272, + OpHitObjectGetWorldRayOriginNV = 5273, + OpHitObjectGetRayTMaxNV = 5274, + OpHitObjectGetRayTMinNV = 5275, + OpHitObjectIsEmptyNV = 5276, + OpHitObjectIsHitNV = 5277, + OpHitObjectIsMissNV = 5278, + OpReorderThreadWithHitObjectNV = 5279, + OpReorderThreadWithHintNV = 5280, + OpTypeHitObjectNV = 5281, OpImageSampleFootprintNV = 5283, + OpTypeCooperativeVectorNV = 5288, + OpCooperativeVectorMatrixMulNV = 5289, + OpCooperativeVectorOuterProductAccumulateNV = 5290, + OpCooperativeVectorReduceSumAccumulateNV = 5291, + OpCooperativeVectorMatrixMulAddNV = 5292, + OpCooperativeMatrixConvertNV = 5293, OpEmitMeshTasksEXT = 5294, OpSetMeshOutputsEXT = 5295, OpGroupNonUniformPartitionNV = 5296, OpWritePackedPrimitiveIndices4x8NV = 5299, + OpFetchMicroTriangleVertexPositionNV = 5300, + OpFetchMicroTriangleVertexBarycentricNV = 5301, + OpCooperativeVectorLoadNV = 5302, + OpCooperativeVectorStoreNV = 5303, OpReportIntersectionKHR = 5334, OpReportIntersectionNV = 5334, OpIgnoreIntersectionNV = 5335, @@ -1630,9 +2088,12 @@ enum Op { OpTraceNV = 5337, OpTraceMotionNV = 5338, OpTraceRayMotionNV = 5339, + OpRayQueryGetIntersectionTriangleVertexPositionsKHR = 5340, OpTypeAccelerationStructureKHR = 5341, OpTypeAccelerationStructureNV = 5341, OpExecuteCallableNV = 5344, + OpRayQueryGetClusterIdNV = 5345, + OpHitObjectGetClusterIdNV = 5346, OpTypeCooperativeMatrixNV = 5358, OpCooperativeMatrixLoadNV = 5359, OpCooperativeMatrixStoreNV = 5360, @@ -1640,9 +2101,26 @@ enum Op { OpCooperativeMatrixLengthNV = 5362, OpBeginInvocationInterlockEXT = 5364, OpEndInvocationInterlockEXT = 5365, + OpCooperativeMatrixReduceNV = 5366, + OpCooperativeMatrixLoadTensorNV = 5367, + OpCooperativeMatrixStoreTensorNV = 5368, + OpCooperativeMatrixPerElementOpNV = 5369, + OpTypeTensorLayoutNV = 5370, + OpTypeTensorViewNV = 5371, + OpCreateTensorLayoutNV = 5372, + OpTensorLayoutSetDimensionNV = 5373, + OpTensorLayoutSetStrideNV = 5374, + OpTensorLayoutSliceNV = 5375, + OpTensorLayoutSetClampValueNV = 5376, + OpCreateTensorViewNV = 5377, + OpTensorViewSetDimensionNV = 5378, + OpTensorViewSetStrideNV = 5379, OpDemoteToHelperInvocation = 5380, OpDemoteToHelperInvocationEXT = 5380, OpIsHelperInvocationEXT = 5381, + OpTensorViewSetClipNV = 5382, + OpTensorLayoutSetBlockSizeNV = 5384, + OpCooperativeMatrixTransposeNV = 5390, OpConvertUToImageNV = 5391, OpConvertUToSamplerNV = 5392, OpConvertImageToUNV = 5393, @@ -1650,6 +2128,20 @@ enum Op { OpConvertUToSampledImageNV = 5395, OpConvertSampledImageToUNV = 5396, OpSamplerImageAddressingModeNV = 5397, + OpRawAccessChainNV = 5398, + OpRayQueryGetIntersectionSpherePositionNV = 5427, + OpRayQueryGetIntersectionSphereRadiusNV = 5428, + OpRayQueryGetIntersectionLSSPositionsNV = 5429, + OpRayQueryGetIntersectionLSSRadiiNV = 5430, + OpRayQueryGetIntersectionLSSHitValueNV = 5431, + OpHitObjectGetSpherePositionNV = 5432, + OpHitObjectGetSphereRadiusNV = 5433, + OpHitObjectGetLSSPositionsNV = 5434, + OpHitObjectGetLSSRadiiNV = 5435, + OpHitObjectIsSphereHitNV = 5436, + OpHitObjectIsLSSHitNV = 5437, + OpRayQueryIsSphereHitNV = 5438, + OpRayQueryIsLSSHitNV = 5439, OpSubgroupShuffleINTEL = 5571, OpSubgroupShuffleDownINTEL = 5572, OpSubgroupShuffleUpINTEL = 5573, @@ -1891,8 +2383,25 @@ enum Op { OpTypeStructContinuedINTEL = 6090, OpConstantCompositeContinuedINTEL = 6091, OpSpecConstantCompositeContinuedINTEL = 6092, + OpCompositeConstructContinuedINTEL = 6096, + OpConvertFToBF16INTEL = 6116, + OpConvertBF16ToFINTEL = 6117, OpControlBarrierArriveINTEL = 6142, OpControlBarrierWaitINTEL = 6143, + OpArithmeticFenceEXT = 6145, + OpTaskSequenceCreateINTEL = 6163, + OpTaskSequenceAsyncINTEL = 6164, + OpTaskSequenceGetINTEL = 6165, + OpTaskSequenceReleaseINTEL = 6166, + OpTypeTaskSequenceINTEL = 6199, + OpSubgroupBlockPrefetchINTEL = 6221, + OpSubgroup2DBlockLoadINTEL = 6231, + OpSubgroup2DBlockLoadTransformINTEL = 6232, + OpSubgroup2DBlockLoadTransposeINTEL = 6233, + OpSubgroup2DBlockPrefetchINTEL = 6234, + OpSubgroup2DBlockStoreINTEL = 6235, + OpSubgroupMatrixMultiplyAccumulateINTEL = 6237, + OpBitwiseFunctionINTEL = 6242, OpGroupIMulKHR = 6401, OpGroupFMulKHR = 6402, OpGroupBitwiseAndKHR = 6403, @@ -1901,6 +2410,12 @@ enum Op { OpGroupLogicalAndKHR = 6406, OpGroupLogicalOrKHR = 6407, OpGroupLogicalXorKHR = 6408, + OpRoundFToTF32INTEL = 6426, + OpMaskedGatherINTEL = 6428, + OpMaskedScatterINTEL = 6429, + OpConvertHandleToImageINTEL = 6529, + OpConvertHandleToSamplerINTEL = 6530, + OpConvertHandleToSampledImageINTEL = 6531, OpMax = 0x7fffffff, }; @@ -2256,14 +2771,37 @@ inline void HasResultAndType(Op opcode, bool *hasResult, bool *hasResultType) { case OpPtrEqual: *hasResult = true; *hasResultType = true; break; case OpPtrNotEqual: *hasResult = true; *hasResultType = true; break; case OpPtrDiff: *hasResult = true; *hasResultType = true; break; + case OpColorAttachmentReadEXT: *hasResult = true; *hasResultType = true; break; + case OpDepthAttachmentReadEXT: *hasResult = true; *hasResultType = true; break; + case OpStencilAttachmentReadEXT: *hasResult = true; *hasResultType = true; break; + case OpTypeTensorARM: *hasResult = true; *hasResultType = false; break; + case OpTensorReadARM: *hasResult = true; *hasResultType = true; break; + case OpTensorWriteARM: *hasResult = false; *hasResultType = false; break; + case OpTensorQuerySizeARM: *hasResult = true; *hasResultType = true; break; + case OpGraphConstantARM: *hasResult = true; *hasResultType = true; break; + case OpGraphEntryPointARM: *hasResult = false; *hasResultType = false; break; + case OpGraphARM: *hasResult = true; *hasResultType = true; break; + case OpGraphInputARM: *hasResult = true; *hasResultType = true; break; + case OpGraphSetOutputARM: *hasResult = false; *hasResultType = false; break; + case OpGraphEndARM: *hasResult = false; *hasResultType = false; break; + case OpTypeGraphARM: *hasResult = true; *hasResultType = false; break; case OpTerminateInvocation: *hasResult = false; *hasResultType = false; break; + case OpTypeUntypedPointerKHR: *hasResult = true; *hasResultType = false; break; + case OpUntypedVariableKHR: *hasResult = true; *hasResultType = true; break; + case OpUntypedAccessChainKHR: *hasResult = true; *hasResultType = true; break; + case OpUntypedInBoundsAccessChainKHR: *hasResult = true; *hasResultType = true; break; case OpSubgroupBallotKHR: *hasResult = true; *hasResultType = true; break; case OpSubgroupFirstInvocationKHR: *hasResult = true; *hasResultType = true; break; + case OpUntypedPtrAccessChainKHR: *hasResult = true; *hasResultType = true; break; + case OpUntypedInBoundsPtrAccessChainKHR: *hasResult = true; *hasResultType = true; break; + case OpUntypedArrayLengthKHR: *hasResult = true; *hasResultType = true; break; + case OpUntypedPrefetchKHR: *hasResult = false; *hasResultType = false; break; case OpSubgroupAllKHR: *hasResult = true; *hasResultType = true; break; case OpSubgroupAnyKHR: *hasResult = true; *hasResultType = true; break; case OpSubgroupAllEqualKHR: *hasResult = true; *hasResultType = true; break; case OpGroupNonUniformRotateKHR: *hasResult = true; *hasResultType = true; break; case OpSubgroupReadInvocationKHR: *hasResult = true; *hasResultType = true; break; + case OpExtInstWithForwardRefsKHR: *hasResult = true; *hasResultType = true; break; case OpTraceRayKHR: *hasResult = false; *hasResultType = false; break; case OpExecuteCallableKHR: *hasResult = false; *hasResultType = false; break; case OpConvertUToAccelerationStructureKHR: *hasResult = true; *hasResultType = true; break; @@ -2275,6 +2813,14 @@ inline void HasResultAndType(Op opcode, bool *hasResult, bool *hasResultType) { case OpSDotAccSat: *hasResult = true; *hasResultType = true; break; case OpUDotAccSat: *hasResult = true; *hasResultType = true; break; case OpSUDotAccSat: *hasResult = true; *hasResultType = true; break; + case OpTypeCooperativeMatrixKHR: *hasResult = true; *hasResultType = false; break; + case OpCooperativeMatrixLoadKHR: *hasResult = true; *hasResultType = true; break; + case OpCooperativeMatrixStoreKHR: *hasResult = false; *hasResultType = false; break; + case OpCooperativeMatrixMulAddKHR: *hasResult = true; *hasResultType = true; break; + case OpCooperativeMatrixLengthKHR: *hasResult = true; *hasResultType = true; break; + case OpConstantCompositeReplicateEXT: *hasResult = true; *hasResultType = true; break; + case OpSpecConstantCompositeReplicateEXT: *hasResult = true; *hasResultType = true; break; + case OpCompositeConstructReplicateEXT: *hasResult = true; *hasResultType = true; break; case OpTypeRayQueryKHR: *hasResult = true; *hasResultType = false; break; case OpRayQueryInitializeKHR: *hasResult = false; *hasResultType = false; break; case OpRayQueryTerminateKHR: *hasResult = false; *hasResultType = false; break; @@ -2301,19 +2847,75 @@ inline void HasResultAndType(Op opcode, bool *hasResult, bool *hasResultType) { case OpFragmentMaskFetchAMD: *hasResult = true; *hasResultType = true; break; case OpFragmentFetchAMD: *hasResult = true; *hasResultType = true; break; case OpReadClockKHR: *hasResult = true; *hasResultType = true; break; + case OpAllocateNodePayloadsAMDX: *hasResult = true; *hasResultType = true; break; + case OpEnqueueNodePayloadsAMDX: *hasResult = false; *hasResultType = false; break; + case OpTypeNodePayloadArrayAMDX: *hasResult = true; *hasResultType = false; break; + case OpFinishWritingNodePayloadAMDX: *hasResult = true; *hasResultType = true; break; + case OpNodePayloadArrayLengthAMDX: *hasResult = true; *hasResultType = true; break; + case OpIsNodePayloadValidAMDX: *hasResult = true; *hasResultType = true; break; + case OpConstantStringAMDX: *hasResult = true; *hasResultType = false; break; + case OpSpecConstantStringAMDX: *hasResult = true; *hasResultType = false; break; + case OpGroupNonUniformQuadAllKHR: *hasResult = true; *hasResultType = true; break; + case OpGroupNonUniformQuadAnyKHR: *hasResult = true; *hasResultType = true; break; + case OpHitObjectRecordHitMotionNV: *hasResult = false; *hasResultType = false; break; + case OpHitObjectRecordHitWithIndexMotionNV: *hasResult = false; *hasResultType = false; break; + case OpHitObjectRecordMissMotionNV: *hasResult = false; *hasResultType = false; break; + case OpHitObjectGetWorldToObjectNV: *hasResult = true; *hasResultType = true; break; + case OpHitObjectGetObjectToWorldNV: *hasResult = true; *hasResultType = true; break; + case OpHitObjectGetObjectRayDirectionNV: *hasResult = true; *hasResultType = true; break; + case OpHitObjectGetObjectRayOriginNV: *hasResult = true; *hasResultType = true; break; + case OpHitObjectTraceRayMotionNV: *hasResult = false; *hasResultType = false; break; + case OpHitObjectGetShaderRecordBufferHandleNV: *hasResult = true; *hasResultType = true; break; + case OpHitObjectGetShaderBindingTableRecordIndexNV: *hasResult = true; *hasResultType = true; break; + case OpHitObjectRecordEmptyNV: *hasResult = false; *hasResultType = false; break; + case OpHitObjectTraceRayNV: *hasResult = false; *hasResultType = false; break; + case OpHitObjectRecordHitNV: *hasResult = false; *hasResultType = false; break; + case OpHitObjectRecordHitWithIndexNV: *hasResult = false; *hasResultType = false; break; + case OpHitObjectRecordMissNV: *hasResult = false; *hasResultType = false; break; + case OpHitObjectExecuteShaderNV: *hasResult = false; *hasResultType = false; break; + case OpHitObjectGetCurrentTimeNV: *hasResult = true; *hasResultType = true; break; + case OpHitObjectGetAttributesNV: *hasResult = false; *hasResultType = false; break; + case OpHitObjectGetHitKindNV: *hasResult = true; *hasResultType = true; break; + case OpHitObjectGetPrimitiveIndexNV: *hasResult = true; *hasResultType = true; break; + case OpHitObjectGetGeometryIndexNV: *hasResult = true; *hasResultType = true; break; + case OpHitObjectGetInstanceIdNV: *hasResult = true; *hasResultType = true; break; + case OpHitObjectGetInstanceCustomIndexNV: *hasResult = true; *hasResultType = true; break; + case OpHitObjectGetWorldRayDirectionNV: *hasResult = true; *hasResultType = true; break; + case OpHitObjectGetWorldRayOriginNV: *hasResult = true; *hasResultType = true; break; + case OpHitObjectGetRayTMaxNV: *hasResult = true; *hasResultType = true; break; + case OpHitObjectGetRayTMinNV: *hasResult = true; *hasResultType = true; break; + case OpHitObjectIsEmptyNV: *hasResult = true; *hasResultType = true; break; + case OpHitObjectIsHitNV: *hasResult = true; *hasResultType = true; break; + case OpHitObjectIsMissNV: *hasResult = true; *hasResultType = true; break; + case OpReorderThreadWithHitObjectNV: *hasResult = false; *hasResultType = false; break; + case OpReorderThreadWithHintNV: *hasResult = false; *hasResultType = false; break; + case OpTypeHitObjectNV: *hasResult = true; *hasResultType = false; break; case OpImageSampleFootprintNV: *hasResult = true; *hasResultType = true; break; + case OpTypeCooperativeVectorNV: *hasResult = true; *hasResultType = false; break; + case OpCooperativeVectorMatrixMulNV: *hasResult = true; *hasResultType = true; break; + case OpCooperativeVectorOuterProductAccumulateNV: *hasResult = false; *hasResultType = false; break; + case OpCooperativeVectorReduceSumAccumulateNV: *hasResult = false; *hasResultType = false; break; + case OpCooperativeVectorMatrixMulAddNV: *hasResult = true; *hasResultType = true; break; + case OpCooperativeMatrixConvertNV: *hasResult = true; *hasResultType = true; break; case OpEmitMeshTasksEXT: *hasResult = false; *hasResultType = false; break; case OpSetMeshOutputsEXT: *hasResult = false; *hasResultType = false; break; case OpGroupNonUniformPartitionNV: *hasResult = true; *hasResultType = true; break; case OpWritePackedPrimitiveIndices4x8NV: *hasResult = false; *hasResultType = false; break; - case OpReportIntersectionNV: *hasResult = true; *hasResultType = true; break; + case OpFetchMicroTriangleVertexPositionNV: *hasResult = true; *hasResultType = true; break; + case OpFetchMicroTriangleVertexBarycentricNV: *hasResult = true; *hasResultType = true; break; + case OpCooperativeVectorLoadNV: *hasResult = true; *hasResultType = true; break; + case OpCooperativeVectorStoreNV: *hasResult = false; *hasResultType = false; break; + case OpReportIntersectionKHR: *hasResult = true; *hasResultType = true; break; case OpIgnoreIntersectionNV: *hasResult = false; *hasResultType = false; break; case OpTerminateRayNV: *hasResult = false; *hasResultType = false; break; case OpTraceNV: *hasResult = false; *hasResultType = false; break; case OpTraceMotionNV: *hasResult = false; *hasResultType = false; break; case OpTraceRayMotionNV: *hasResult = false; *hasResultType = false; break; - case OpTypeAccelerationStructureNV: *hasResult = true; *hasResultType = false; break; + case OpRayQueryGetIntersectionTriangleVertexPositionsKHR: *hasResult = true; *hasResultType = true; break; + case OpTypeAccelerationStructureKHR: *hasResult = true; *hasResultType = false; break; case OpExecuteCallableNV: *hasResult = false; *hasResultType = false; break; + case OpRayQueryGetClusterIdNV: *hasResult = true; *hasResultType = true; break; + case OpHitObjectGetClusterIdNV: *hasResult = true; *hasResultType = true; break; case OpTypeCooperativeMatrixNV: *hasResult = true; *hasResultType = false; break; case OpCooperativeMatrixLoadNV: *hasResult = true; *hasResultType = true; break; case OpCooperativeMatrixStoreNV: *hasResult = false; *hasResultType = false; break; @@ -2321,8 +2923,25 @@ inline void HasResultAndType(Op opcode, bool *hasResult, bool *hasResultType) { case OpCooperativeMatrixLengthNV: *hasResult = true; *hasResultType = true; break; case OpBeginInvocationInterlockEXT: *hasResult = false; *hasResultType = false; break; case OpEndInvocationInterlockEXT: *hasResult = false; *hasResultType = false; break; + case OpCooperativeMatrixReduceNV: *hasResult = true; *hasResultType = true; break; + case OpCooperativeMatrixLoadTensorNV: *hasResult = true; *hasResultType = true; break; + case OpCooperativeMatrixStoreTensorNV: *hasResult = false; *hasResultType = false; break; + case OpCooperativeMatrixPerElementOpNV: *hasResult = true; *hasResultType = true; break; + case OpTypeTensorLayoutNV: *hasResult = true; *hasResultType = false; break; + case OpTypeTensorViewNV: *hasResult = true; *hasResultType = false; break; + case OpCreateTensorLayoutNV: *hasResult = true; *hasResultType = true; break; + case OpTensorLayoutSetDimensionNV: *hasResult = true; *hasResultType = true; break; + case OpTensorLayoutSetStrideNV: *hasResult = true; *hasResultType = true; break; + case OpTensorLayoutSliceNV: *hasResult = true; *hasResultType = true; break; + case OpTensorLayoutSetClampValueNV: *hasResult = true; *hasResultType = true; break; + case OpCreateTensorViewNV: *hasResult = true; *hasResultType = true; break; + case OpTensorViewSetDimensionNV: *hasResult = true; *hasResultType = true; break; + case OpTensorViewSetStrideNV: *hasResult = true; *hasResultType = true; break; case OpDemoteToHelperInvocation: *hasResult = false; *hasResultType = false; break; case OpIsHelperInvocationEXT: *hasResult = true; *hasResultType = true; break; + case OpTensorViewSetClipNV: *hasResult = true; *hasResultType = true; break; + case OpTensorLayoutSetBlockSizeNV: *hasResult = true; *hasResultType = true; break; + case OpCooperativeMatrixTransposeNV: *hasResult = true; *hasResultType = true; break; case OpConvertUToImageNV: *hasResult = true; *hasResultType = true; break; case OpConvertUToSamplerNV: *hasResult = true; *hasResultType = true; break; case OpConvertImageToUNV: *hasResult = true; *hasResultType = true; break; @@ -2330,6 +2949,20 @@ inline void HasResultAndType(Op opcode, bool *hasResult, bool *hasResultType) { case OpConvertUToSampledImageNV: *hasResult = true; *hasResultType = true; break; case OpConvertSampledImageToUNV: *hasResult = true; *hasResultType = true; break; case OpSamplerImageAddressingModeNV: *hasResult = false; *hasResultType = false; break; + case OpRawAccessChainNV: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionSpherePositionNV: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionSphereRadiusNV: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionLSSPositionsNV: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionLSSRadiiNV: *hasResult = true; *hasResultType = true; break; + case OpRayQueryGetIntersectionLSSHitValueNV: *hasResult = true; *hasResultType = true; break; + case OpHitObjectGetSpherePositionNV: *hasResult = true; *hasResultType = true; break; + case OpHitObjectGetSphereRadiusNV: *hasResult = true; *hasResultType = true; break; + case OpHitObjectGetLSSPositionsNV: *hasResult = true; *hasResultType = true; break; + case OpHitObjectGetLSSRadiiNV: *hasResult = true; *hasResultType = true; break; + case OpHitObjectIsSphereHitNV: *hasResult = true; *hasResultType = true; break; + case OpHitObjectIsLSSHitNV: *hasResult = true; *hasResultType = true; break; + case OpRayQueryIsSphereHitNV: *hasResult = true; *hasResultType = true; break; + case OpRayQueryIsLSSHitNV: *hasResult = true; *hasResultType = true; break; case OpSubgroupShuffleINTEL: *hasResult = true; *hasResultType = true; break; case OpSubgroupShuffleDownINTEL: *hasResult = true; *hasResultType = true; break; case OpSubgroupShuffleUpINTEL: *hasResult = true; *hasResultType = true; break; @@ -2356,7 +2989,7 @@ inline void HasResultAndType(Op opcode, bool *hasResult, bool *hasResultType) { case OpUMul32x16INTEL: *hasResult = true; *hasResultType = true; break; case OpConstantFunctionPointerINTEL: *hasResult = true; *hasResultType = true; break; case OpFunctionPointerCallINTEL: *hasResult = true; *hasResultType = true; break; - case OpAsmTargetINTEL: *hasResult = true; *hasResultType = true; break; + case OpAsmTargetINTEL: *hasResult = true; *hasResultType = false; break; case OpAsmINTEL: *hasResult = true; *hasResultType = true; break; case OpAsmCallINTEL: *hasResult = true; *hasResultType = true; break; case OpAtomicFMinEXT: *hasResult = true; *hasResultType = true; break; @@ -2569,8 +3202,25 @@ inline void HasResultAndType(Op opcode, bool *hasResult, bool *hasResultType) { case OpTypeStructContinuedINTEL: *hasResult = false; *hasResultType = false; break; case OpConstantCompositeContinuedINTEL: *hasResult = false; *hasResultType = false; break; case OpSpecConstantCompositeContinuedINTEL: *hasResult = false; *hasResultType = false; break; + case OpCompositeConstructContinuedINTEL: *hasResult = true; *hasResultType = true; break; + case OpConvertFToBF16INTEL: *hasResult = true; *hasResultType = true; break; + case OpConvertBF16ToFINTEL: *hasResult = true; *hasResultType = true; break; case OpControlBarrierArriveINTEL: *hasResult = false; *hasResultType = false; break; case OpControlBarrierWaitINTEL: *hasResult = false; *hasResultType = false; break; + case OpArithmeticFenceEXT: *hasResult = true; *hasResultType = true; break; + case OpTaskSequenceCreateINTEL: *hasResult = true; *hasResultType = true; break; + case OpTaskSequenceAsyncINTEL: *hasResult = false; *hasResultType = false; break; + case OpTaskSequenceGetINTEL: *hasResult = true; *hasResultType = true; break; + case OpTaskSequenceReleaseINTEL: *hasResult = false; *hasResultType = false; break; + case OpTypeTaskSequenceINTEL: *hasResult = true; *hasResultType = false; break; + case OpSubgroupBlockPrefetchINTEL: *hasResult = false; *hasResultType = false; break; + case OpSubgroup2DBlockLoadINTEL: *hasResult = false; *hasResultType = false; break; + case OpSubgroup2DBlockLoadTransformINTEL: *hasResult = false; *hasResultType = false; break; + case OpSubgroup2DBlockLoadTransposeINTEL: *hasResult = false; *hasResultType = false; break; + case OpSubgroup2DBlockPrefetchINTEL: *hasResult = false; *hasResultType = false; break; + case OpSubgroup2DBlockStoreINTEL: *hasResult = false; *hasResultType = false; break; + case OpSubgroupMatrixMultiplyAccumulateINTEL: *hasResult = true; *hasResultType = true; break; + case OpBitwiseFunctionINTEL: *hasResult = true; *hasResultType = true; break; case OpGroupIMulKHR: *hasResult = true; *hasResultType = true; break; case OpGroupFMulKHR: *hasResult = true; *hasResultType = true; break; case OpGroupBitwiseAndKHR: *hasResult = true; *hasResultType = true; break; @@ -2579,22 +3229,2057 @@ inline void HasResultAndType(Op opcode, bool *hasResult, bool *hasResultType) { case OpGroupLogicalAndKHR: *hasResult = true; *hasResultType = true; break; case OpGroupLogicalOrKHR: *hasResult = true; *hasResultType = true; break; case OpGroupLogicalXorKHR: *hasResult = true; *hasResultType = true; break; + case OpRoundFToTF32INTEL: *hasResult = true; *hasResultType = true; break; + case OpMaskedGatherINTEL: *hasResult = true; *hasResultType = true; break; + case OpMaskedScatterINTEL: *hasResult = false; *hasResultType = false; break; + case OpConvertHandleToImageINTEL: *hasResult = true; *hasResultType = true; break; + case OpConvertHandleToSamplerINTEL: *hasResult = true; *hasResultType = true; break; + case OpConvertHandleToSampledImageINTEL: *hasResult = true; *hasResultType = true; break; } } +inline const char* SourceLanguageToString(SourceLanguage value) { + switch (value) { + case SourceLanguageUnknown: return "Unknown"; + case SourceLanguageESSL: return "ESSL"; + case SourceLanguageGLSL: return "GLSL"; + case SourceLanguageOpenCL_C: return "OpenCL_C"; + case SourceLanguageOpenCL_CPP: return "OpenCL_CPP"; + case SourceLanguageHLSL: return "HLSL"; + case SourceLanguageCPP_for_OpenCL: return "CPP_for_OpenCL"; + case SourceLanguageSYCL: return "SYCL"; + case SourceLanguageHERO_C: return "HERO_C"; + case SourceLanguageNZSL: return "NZSL"; + case SourceLanguageWGSL: return "WGSL"; + case SourceLanguageSlang: return "Slang"; + case SourceLanguageZig: return "Zig"; + case SourceLanguageRust: return "Rust"; + default: return "Unknown"; + } +} + +inline const char* ExecutionModelToString(ExecutionModel value) { + switch (value) { + case ExecutionModelVertex: return "Vertex"; + case ExecutionModelTessellationControl: return "TessellationControl"; + case ExecutionModelTessellationEvaluation: return "TessellationEvaluation"; + case ExecutionModelGeometry: return "Geometry"; + case ExecutionModelFragment: return "Fragment"; + case ExecutionModelGLCompute: return "GLCompute"; + case ExecutionModelKernel: return "Kernel"; + case ExecutionModelTaskNV: return "TaskNV"; + case ExecutionModelMeshNV: return "MeshNV"; + case ExecutionModelRayGenerationKHR: return "RayGenerationKHR"; + case ExecutionModelIntersectionKHR: return "IntersectionKHR"; + case ExecutionModelAnyHitKHR: return "AnyHitKHR"; + case ExecutionModelClosestHitKHR: return "ClosestHitKHR"; + case ExecutionModelMissKHR: return "MissKHR"; + case ExecutionModelCallableKHR: return "CallableKHR"; + case ExecutionModelTaskEXT: return "TaskEXT"; + case ExecutionModelMeshEXT: return "MeshEXT"; + default: return "Unknown"; + } +} + +inline const char* AddressingModelToString(AddressingModel value) { + switch (value) { + case AddressingModelLogical: return "Logical"; + case AddressingModelPhysical32: return "Physical32"; + case AddressingModelPhysical64: return "Physical64"; + case AddressingModelPhysicalStorageBuffer64: return "PhysicalStorageBuffer64"; + default: return "Unknown"; + } +} + +inline const char* MemoryModelToString(MemoryModel value) { + switch (value) { + case MemoryModelSimple: return "Simple"; + case MemoryModelGLSL450: return "GLSL450"; + case MemoryModelOpenCL: return "OpenCL"; + case MemoryModelVulkan: return "Vulkan"; + default: return "Unknown"; + } +} + +inline const char* ExecutionModeToString(ExecutionMode value) { + switch (value) { + case ExecutionModeInvocations: return "Invocations"; + case ExecutionModeSpacingEqual: return "SpacingEqual"; + case ExecutionModeSpacingFractionalEven: return "SpacingFractionalEven"; + case ExecutionModeSpacingFractionalOdd: return "SpacingFractionalOdd"; + case ExecutionModeVertexOrderCw: return "VertexOrderCw"; + case ExecutionModeVertexOrderCcw: return "VertexOrderCcw"; + case ExecutionModePixelCenterInteger: return "PixelCenterInteger"; + case ExecutionModeOriginUpperLeft: return "OriginUpperLeft"; + case ExecutionModeOriginLowerLeft: return "OriginLowerLeft"; + case ExecutionModeEarlyFragmentTests: return "EarlyFragmentTests"; + case ExecutionModePointMode: return "PointMode"; + case ExecutionModeXfb: return "Xfb"; + case ExecutionModeDepthReplacing: return "DepthReplacing"; + case ExecutionModeDepthGreater: return "DepthGreater"; + case ExecutionModeDepthLess: return "DepthLess"; + case ExecutionModeDepthUnchanged: return "DepthUnchanged"; + case ExecutionModeLocalSize: return "LocalSize"; + case ExecutionModeLocalSizeHint: return "LocalSizeHint"; + case ExecutionModeInputPoints: return "InputPoints"; + case ExecutionModeInputLines: return "InputLines"; + case ExecutionModeInputLinesAdjacency: return "InputLinesAdjacency"; + case ExecutionModeTriangles: return "Triangles"; + case ExecutionModeInputTrianglesAdjacency: return "InputTrianglesAdjacency"; + case ExecutionModeQuads: return "Quads"; + case ExecutionModeIsolines: return "Isolines"; + case ExecutionModeOutputVertices: return "OutputVertices"; + case ExecutionModeOutputPoints: return "OutputPoints"; + case ExecutionModeOutputLineStrip: return "OutputLineStrip"; + case ExecutionModeOutputTriangleStrip: return "OutputTriangleStrip"; + case ExecutionModeVecTypeHint: return "VecTypeHint"; + case ExecutionModeContractionOff: return "ContractionOff"; + case ExecutionModeInitializer: return "Initializer"; + case ExecutionModeFinalizer: return "Finalizer"; + case ExecutionModeSubgroupSize: return "SubgroupSize"; + case ExecutionModeSubgroupsPerWorkgroup: return "SubgroupsPerWorkgroup"; + case ExecutionModeSubgroupsPerWorkgroupId: return "SubgroupsPerWorkgroupId"; + case ExecutionModeLocalSizeId: return "LocalSizeId"; + case ExecutionModeLocalSizeHintId: return "LocalSizeHintId"; + case ExecutionModeNonCoherentColorAttachmentReadEXT: return "NonCoherentColorAttachmentReadEXT"; + case ExecutionModeNonCoherentDepthAttachmentReadEXT: return "NonCoherentDepthAttachmentReadEXT"; + case ExecutionModeNonCoherentStencilAttachmentReadEXT: return "NonCoherentStencilAttachmentReadEXT"; + case ExecutionModeSubgroupUniformControlFlowKHR: return "SubgroupUniformControlFlowKHR"; + case ExecutionModePostDepthCoverage: return "PostDepthCoverage"; + case ExecutionModeDenormPreserve: return "DenormPreserve"; + case ExecutionModeDenormFlushToZero: return "DenormFlushToZero"; + case ExecutionModeSignedZeroInfNanPreserve: return "SignedZeroInfNanPreserve"; + case ExecutionModeRoundingModeRTE: return "RoundingModeRTE"; + case ExecutionModeRoundingModeRTZ: return "RoundingModeRTZ"; + case ExecutionModeNonCoherentTileAttachmentReadQCOM: return "NonCoherentTileAttachmentReadQCOM"; + case ExecutionModeTileShadingRateQCOM: return "TileShadingRateQCOM"; + case ExecutionModeEarlyAndLateFragmentTestsAMD: return "EarlyAndLateFragmentTestsAMD"; + case ExecutionModeStencilRefReplacingEXT: return "StencilRefReplacingEXT"; + case ExecutionModeCoalescingAMDX: return "CoalescingAMDX"; + case ExecutionModeIsApiEntryAMDX: return "IsApiEntryAMDX"; + case ExecutionModeMaxNodeRecursionAMDX: return "MaxNodeRecursionAMDX"; + case ExecutionModeStaticNumWorkgroupsAMDX: return "StaticNumWorkgroupsAMDX"; + case ExecutionModeShaderIndexAMDX: return "ShaderIndexAMDX"; + case ExecutionModeMaxNumWorkgroupsAMDX: return "MaxNumWorkgroupsAMDX"; + case ExecutionModeStencilRefUnchangedFrontAMD: return "StencilRefUnchangedFrontAMD"; + case ExecutionModeStencilRefGreaterFrontAMD: return "StencilRefGreaterFrontAMD"; + case ExecutionModeStencilRefLessFrontAMD: return "StencilRefLessFrontAMD"; + case ExecutionModeStencilRefUnchangedBackAMD: return "StencilRefUnchangedBackAMD"; + case ExecutionModeStencilRefGreaterBackAMD: return "StencilRefGreaterBackAMD"; + case ExecutionModeStencilRefLessBackAMD: return "StencilRefLessBackAMD"; + case ExecutionModeQuadDerivativesKHR: return "QuadDerivativesKHR"; + case ExecutionModeRequireFullQuadsKHR: return "RequireFullQuadsKHR"; + case ExecutionModeSharesInputWithAMDX: return "SharesInputWithAMDX"; + case ExecutionModeOutputLinesEXT: return "OutputLinesEXT"; + case ExecutionModeOutputPrimitivesEXT: return "OutputPrimitivesEXT"; + case ExecutionModeDerivativeGroupQuadsKHR: return "DerivativeGroupQuadsKHR"; + case ExecutionModeDerivativeGroupLinearKHR: return "DerivativeGroupLinearKHR"; + case ExecutionModeOutputTrianglesEXT: return "OutputTrianglesEXT"; + case ExecutionModePixelInterlockOrderedEXT: return "PixelInterlockOrderedEXT"; + case ExecutionModePixelInterlockUnorderedEXT: return "PixelInterlockUnorderedEXT"; + case ExecutionModeSampleInterlockOrderedEXT: return "SampleInterlockOrderedEXT"; + case ExecutionModeSampleInterlockUnorderedEXT: return "SampleInterlockUnorderedEXT"; + case ExecutionModeShadingRateInterlockOrderedEXT: return "ShadingRateInterlockOrderedEXT"; + case ExecutionModeShadingRateInterlockUnorderedEXT: return "ShadingRateInterlockUnorderedEXT"; + case ExecutionModeSharedLocalMemorySizeINTEL: return "SharedLocalMemorySizeINTEL"; + case ExecutionModeRoundingModeRTPINTEL: return "RoundingModeRTPINTEL"; + case ExecutionModeRoundingModeRTNINTEL: return "RoundingModeRTNINTEL"; + case ExecutionModeFloatingPointModeALTINTEL: return "FloatingPointModeALTINTEL"; + case ExecutionModeFloatingPointModeIEEEINTEL: return "FloatingPointModeIEEEINTEL"; + case ExecutionModeMaxWorkgroupSizeINTEL: return "MaxWorkgroupSizeINTEL"; + case ExecutionModeMaxWorkDimINTEL: return "MaxWorkDimINTEL"; + case ExecutionModeNoGlobalOffsetINTEL: return "NoGlobalOffsetINTEL"; + case ExecutionModeNumSIMDWorkitemsINTEL: return "NumSIMDWorkitemsINTEL"; + case ExecutionModeSchedulerTargetFmaxMhzINTEL: return "SchedulerTargetFmaxMhzINTEL"; + case ExecutionModeMaximallyReconvergesKHR: return "MaximallyReconvergesKHR"; + case ExecutionModeFPFastMathDefault: return "FPFastMathDefault"; + case ExecutionModeStreamingInterfaceINTEL: return "StreamingInterfaceINTEL"; + case ExecutionModeRegisterMapInterfaceINTEL: return "RegisterMapInterfaceINTEL"; + case ExecutionModeNamedBarrierCountINTEL: return "NamedBarrierCountINTEL"; + case ExecutionModeMaximumRegistersINTEL: return "MaximumRegistersINTEL"; + case ExecutionModeMaximumRegistersIdINTEL: return "MaximumRegistersIdINTEL"; + case ExecutionModeNamedMaximumRegistersINTEL: return "NamedMaximumRegistersINTEL"; + default: return "Unknown"; + } +} + +inline const char* StorageClassToString(StorageClass value) { + switch (value) { + case StorageClassUniformConstant: return "UniformConstant"; + case StorageClassInput: return "Input"; + case StorageClassUniform: return "Uniform"; + case StorageClassOutput: return "Output"; + case StorageClassWorkgroup: return "Workgroup"; + case StorageClassCrossWorkgroup: return "CrossWorkgroup"; + case StorageClassPrivate: return "Private"; + case StorageClassFunction: return "Function"; + case StorageClassGeneric: return "Generic"; + case StorageClassPushConstant: return "PushConstant"; + case StorageClassAtomicCounter: return "AtomicCounter"; + case StorageClassImage: return "Image"; + case StorageClassStorageBuffer: return "StorageBuffer"; + case StorageClassTileImageEXT: return "TileImageEXT"; + case StorageClassTileAttachmentQCOM: return "TileAttachmentQCOM"; + case StorageClassNodePayloadAMDX: return "NodePayloadAMDX"; + case StorageClassCallableDataKHR: return "CallableDataKHR"; + case StorageClassIncomingCallableDataKHR: return "IncomingCallableDataKHR"; + case StorageClassRayPayloadKHR: return "RayPayloadKHR"; + case StorageClassHitAttributeKHR: return "HitAttributeKHR"; + case StorageClassIncomingRayPayloadKHR: return "IncomingRayPayloadKHR"; + case StorageClassShaderRecordBufferKHR: return "ShaderRecordBufferKHR"; + case StorageClassPhysicalStorageBuffer: return "PhysicalStorageBuffer"; + case StorageClassHitObjectAttributeNV: return "HitObjectAttributeNV"; + case StorageClassTaskPayloadWorkgroupEXT: return "TaskPayloadWorkgroupEXT"; + case StorageClassCodeSectionINTEL: return "CodeSectionINTEL"; + case StorageClassDeviceOnlyINTEL: return "DeviceOnlyINTEL"; + case StorageClassHostOnlyINTEL: return "HostOnlyINTEL"; + default: return "Unknown"; + } +} + +inline const char* DimToString(Dim value) { + switch (value) { + case Dim1D: return "1D"; + case Dim2D: return "2D"; + case Dim3D: return "3D"; + case DimCube: return "Cube"; + case DimRect: return "Rect"; + case DimBuffer: return "Buffer"; + case DimSubpassData: return "SubpassData"; + case DimTileImageDataEXT: return "TileImageDataEXT"; + default: return "Unknown"; + } +} + +inline const char* SamplerAddressingModeToString(SamplerAddressingMode value) { + switch (value) { + case SamplerAddressingModeNone: return "None"; + case SamplerAddressingModeClampToEdge: return "ClampToEdge"; + case SamplerAddressingModeClamp: return "Clamp"; + case SamplerAddressingModeRepeat: return "Repeat"; + case SamplerAddressingModeRepeatMirrored: return "RepeatMirrored"; + default: return "Unknown"; + } +} + +inline const char* SamplerFilterModeToString(SamplerFilterMode value) { + switch (value) { + case SamplerFilterModeNearest: return "Nearest"; + case SamplerFilterModeLinear: return "Linear"; + default: return "Unknown"; + } +} + +inline const char* ImageFormatToString(ImageFormat value) { + switch (value) { + case ImageFormatUnknown: return "Unknown"; + case ImageFormatRgba32f: return "Rgba32f"; + case ImageFormatRgba16f: return "Rgba16f"; + case ImageFormatR32f: return "R32f"; + case ImageFormatRgba8: return "Rgba8"; + case ImageFormatRgba8Snorm: return "Rgba8Snorm"; + case ImageFormatRg32f: return "Rg32f"; + case ImageFormatRg16f: return "Rg16f"; + case ImageFormatR11fG11fB10f: return "R11fG11fB10f"; + case ImageFormatR16f: return "R16f"; + case ImageFormatRgba16: return "Rgba16"; + case ImageFormatRgb10A2: return "Rgb10A2"; + case ImageFormatRg16: return "Rg16"; + case ImageFormatRg8: return "Rg8"; + case ImageFormatR16: return "R16"; + case ImageFormatR8: return "R8"; + case ImageFormatRgba16Snorm: return "Rgba16Snorm"; + case ImageFormatRg16Snorm: return "Rg16Snorm"; + case ImageFormatRg8Snorm: return "Rg8Snorm"; + case ImageFormatR16Snorm: return "R16Snorm"; + case ImageFormatR8Snorm: return "R8Snorm"; + case ImageFormatRgba32i: return "Rgba32i"; + case ImageFormatRgba16i: return "Rgba16i"; + case ImageFormatRgba8i: return "Rgba8i"; + case ImageFormatR32i: return "R32i"; + case ImageFormatRg32i: return "Rg32i"; + case ImageFormatRg16i: return "Rg16i"; + case ImageFormatRg8i: return "Rg8i"; + case ImageFormatR16i: return "R16i"; + case ImageFormatR8i: return "R8i"; + case ImageFormatRgba32ui: return "Rgba32ui"; + case ImageFormatRgba16ui: return "Rgba16ui"; + case ImageFormatRgba8ui: return "Rgba8ui"; + case ImageFormatR32ui: return "R32ui"; + case ImageFormatRgb10a2ui: return "Rgb10a2ui"; + case ImageFormatRg32ui: return "Rg32ui"; + case ImageFormatRg16ui: return "Rg16ui"; + case ImageFormatRg8ui: return "Rg8ui"; + case ImageFormatR16ui: return "R16ui"; + case ImageFormatR8ui: return "R8ui"; + case ImageFormatR64ui: return "R64ui"; + case ImageFormatR64i: return "R64i"; + default: return "Unknown"; + } +} + +inline const char* ImageChannelOrderToString(ImageChannelOrder value) { + switch (value) { + case ImageChannelOrderR: return "R"; + case ImageChannelOrderA: return "A"; + case ImageChannelOrderRG: return "RG"; + case ImageChannelOrderRA: return "RA"; + case ImageChannelOrderRGB: return "RGB"; + case ImageChannelOrderRGBA: return "RGBA"; + case ImageChannelOrderBGRA: return "BGRA"; + case ImageChannelOrderARGB: return "ARGB"; + case ImageChannelOrderIntensity: return "Intensity"; + case ImageChannelOrderLuminance: return "Luminance"; + case ImageChannelOrderRx: return "Rx"; + case ImageChannelOrderRGx: return "RGx"; + case ImageChannelOrderRGBx: return "RGBx"; + case ImageChannelOrderDepth: return "Depth"; + case ImageChannelOrderDepthStencil: return "DepthStencil"; + case ImageChannelOrdersRGB: return "sRGB"; + case ImageChannelOrdersRGBx: return "sRGBx"; + case ImageChannelOrdersRGBA: return "sRGBA"; + case ImageChannelOrdersBGRA: return "sBGRA"; + case ImageChannelOrderABGR: return "ABGR"; + default: return "Unknown"; + } +} + +inline const char* ImageChannelDataTypeToString(ImageChannelDataType value) { + switch (value) { + case ImageChannelDataTypeSnormInt8: return "SnormInt8"; + case ImageChannelDataTypeSnormInt16: return "SnormInt16"; + case ImageChannelDataTypeUnormInt8: return "UnormInt8"; + case ImageChannelDataTypeUnormInt16: return "UnormInt16"; + case ImageChannelDataTypeUnormShort565: return "UnormShort565"; + case ImageChannelDataTypeUnormShort555: return "UnormShort555"; + case ImageChannelDataTypeUnormInt101010: return "UnormInt101010"; + case ImageChannelDataTypeSignedInt8: return "SignedInt8"; + case ImageChannelDataTypeSignedInt16: return "SignedInt16"; + case ImageChannelDataTypeSignedInt32: return "SignedInt32"; + case ImageChannelDataTypeUnsignedInt8: return "UnsignedInt8"; + case ImageChannelDataTypeUnsignedInt16: return "UnsignedInt16"; + case ImageChannelDataTypeUnsignedInt32: return "UnsignedInt32"; + case ImageChannelDataTypeHalfFloat: return "HalfFloat"; + case ImageChannelDataTypeFloat: return "Float"; + case ImageChannelDataTypeUnormInt24: return "UnormInt24"; + case ImageChannelDataTypeUnormInt101010_2: return "UnormInt101010_2"; + case ImageChannelDataTypeUnormInt10X6EXT: return "UnormInt10X6EXT"; + case ImageChannelDataTypeUnsignedIntRaw10EXT: return "UnsignedIntRaw10EXT"; + case ImageChannelDataTypeUnsignedIntRaw12EXT: return "UnsignedIntRaw12EXT"; + case ImageChannelDataTypeUnormInt2_101010EXT: return "UnormInt2_101010EXT"; + case ImageChannelDataTypeUnsignedInt10X6EXT: return "UnsignedInt10X6EXT"; + case ImageChannelDataTypeUnsignedInt12X4EXT: return "UnsignedInt12X4EXT"; + case ImageChannelDataTypeUnsignedInt14X2EXT: return "UnsignedInt14X2EXT"; + case ImageChannelDataTypeUnormInt12X4EXT: return "UnormInt12X4EXT"; + case ImageChannelDataTypeUnormInt14X2EXT: return "UnormInt14X2EXT"; + default: return "Unknown"; + } +} + +inline const char* FPRoundingModeToString(FPRoundingMode value) { + switch (value) { + case FPRoundingModeRTE: return "RTE"; + case FPRoundingModeRTZ: return "RTZ"; + case FPRoundingModeRTP: return "RTP"; + case FPRoundingModeRTN: return "RTN"; + default: return "Unknown"; + } +} + +inline const char* LinkageTypeToString(LinkageType value) { + switch (value) { + case LinkageTypeExport: return "Export"; + case LinkageTypeImport: return "Import"; + case LinkageTypeLinkOnceODR: return "LinkOnceODR"; + default: return "Unknown"; + } +} + +inline const char* AccessQualifierToString(AccessQualifier value) { + switch (value) { + case AccessQualifierReadOnly: return "ReadOnly"; + case AccessQualifierWriteOnly: return "WriteOnly"; + case AccessQualifierReadWrite: return "ReadWrite"; + default: return "Unknown"; + } +} + +inline const char* FunctionParameterAttributeToString(FunctionParameterAttribute value) { + switch (value) { + case FunctionParameterAttributeZext: return "Zext"; + case FunctionParameterAttributeSext: return "Sext"; + case FunctionParameterAttributeByVal: return "ByVal"; + case FunctionParameterAttributeSret: return "Sret"; + case FunctionParameterAttributeNoAlias: return "NoAlias"; + case FunctionParameterAttributeNoCapture: return "NoCapture"; + case FunctionParameterAttributeNoWrite: return "NoWrite"; + case FunctionParameterAttributeNoReadWrite: return "NoReadWrite"; + case FunctionParameterAttributeRuntimeAlignedINTEL: return "RuntimeAlignedINTEL"; + default: return "Unknown"; + } +} + +inline const char* DecorationToString(Decoration value) { + switch (value) { + case DecorationRelaxedPrecision: return "RelaxedPrecision"; + case DecorationSpecId: return "SpecId"; + case DecorationBlock: return "Block"; + case DecorationBufferBlock: return "BufferBlock"; + case DecorationRowMajor: return "RowMajor"; + case DecorationColMajor: return "ColMajor"; + case DecorationArrayStride: return "ArrayStride"; + case DecorationMatrixStride: return "MatrixStride"; + case DecorationGLSLShared: return "GLSLShared"; + case DecorationGLSLPacked: return "GLSLPacked"; + case DecorationCPacked: return "CPacked"; + case DecorationBuiltIn: return "BuiltIn"; + case DecorationNoPerspective: return "NoPerspective"; + case DecorationFlat: return "Flat"; + case DecorationPatch: return "Patch"; + case DecorationCentroid: return "Centroid"; + case DecorationSample: return "Sample"; + case DecorationInvariant: return "Invariant"; + case DecorationRestrict: return "Restrict"; + case DecorationAliased: return "Aliased"; + case DecorationVolatile: return "Volatile"; + case DecorationConstant: return "Constant"; + case DecorationCoherent: return "Coherent"; + case DecorationNonWritable: return "NonWritable"; + case DecorationNonReadable: return "NonReadable"; + case DecorationUniform: return "Uniform"; + case DecorationUniformId: return "UniformId"; + case DecorationSaturatedConversion: return "SaturatedConversion"; + case DecorationStream: return "Stream"; + case DecorationLocation: return "Location"; + case DecorationComponent: return "Component"; + case DecorationIndex: return "Index"; + case DecorationBinding: return "Binding"; + case DecorationDescriptorSet: return "DescriptorSet"; + case DecorationOffset: return "Offset"; + case DecorationXfbBuffer: return "XfbBuffer"; + case DecorationXfbStride: return "XfbStride"; + case DecorationFuncParamAttr: return "FuncParamAttr"; + case DecorationFPRoundingMode: return "FPRoundingMode"; + case DecorationFPFastMathMode: return "FPFastMathMode"; + case DecorationLinkageAttributes: return "LinkageAttributes"; + case DecorationNoContraction: return "NoContraction"; + case DecorationInputAttachmentIndex: return "InputAttachmentIndex"; + case DecorationAlignment: return "Alignment"; + case DecorationMaxByteOffset: return "MaxByteOffset"; + case DecorationAlignmentId: return "AlignmentId"; + case DecorationMaxByteOffsetId: return "MaxByteOffsetId"; + case DecorationSaturatedToLargestFloat8NormalConversionEXT: return "SaturatedToLargestFloat8NormalConversionEXT"; + case DecorationNoSignedWrap: return "NoSignedWrap"; + case DecorationNoUnsignedWrap: return "NoUnsignedWrap"; + case DecorationWeightTextureQCOM: return "WeightTextureQCOM"; + case DecorationBlockMatchTextureQCOM: return "BlockMatchTextureQCOM"; + case DecorationBlockMatchSamplerQCOM: return "BlockMatchSamplerQCOM"; + case DecorationExplicitInterpAMD: return "ExplicitInterpAMD"; + case DecorationNodeSharesPayloadLimitsWithAMDX: return "NodeSharesPayloadLimitsWithAMDX"; + case DecorationNodeMaxPayloadsAMDX: return "NodeMaxPayloadsAMDX"; + case DecorationTrackFinishWritingAMDX: return "TrackFinishWritingAMDX"; + case DecorationPayloadNodeNameAMDX: return "PayloadNodeNameAMDX"; + case DecorationPayloadNodeBaseIndexAMDX: return "PayloadNodeBaseIndexAMDX"; + case DecorationPayloadNodeSparseArrayAMDX: return "PayloadNodeSparseArrayAMDX"; + case DecorationPayloadNodeArraySizeAMDX: return "PayloadNodeArraySizeAMDX"; + case DecorationPayloadDispatchIndirectAMDX: return "PayloadDispatchIndirectAMDX"; + case DecorationOverrideCoverageNV: return "OverrideCoverageNV"; + case DecorationPassthroughNV: return "PassthroughNV"; + case DecorationViewportRelativeNV: return "ViewportRelativeNV"; + case DecorationSecondaryViewportRelativeNV: return "SecondaryViewportRelativeNV"; + case DecorationPerPrimitiveEXT: return "PerPrimitiveEXT"; + case DecorationPerViewNV: return "PerViewNV"; + case DecorationPerTaskNV: return "PerTaskNV"; + case DecorationPerVertexKHR: return "PerVertexKHR"; + case DecorationNonUniform: return "NonUniform"; + case DecorationRestrictPointer: return "RestrictPointer"; + case DecorationAliasedPointer: return "AliasedPointer"; + case DecorationHitObjectShaderRecordBufferNV: return "HitObjectShaderRecordBufferNV"; + case DecorationBindlessSamplerNV: return "BindlessSamplerNV"; + case DecorationBindlessImageNV: return "BindlessImageNV"; + case DecorationBoundSamplerNV: return "BoundSamplerNV"; + case DecorationBoundImageNV: return "BoundImageNV"; + case DecorationSIMTCallINTEL: return "SIMTCallINTEL"; + case DecorationReferencedIndirectlyINTEL: return "ReferencedIndirectlyINTEL"; + case DecorationClobberINTEL: return "ClobberINTEL"; + case DecorationSideEffectsINTEL: return "SideEffectsINTEL"; + case DecorationVectorComputeVariableINTEL: return "VectorComputeVariableINTEL"; + case DecorationFuncParamIOKindINTEL: return "FuncParamIOKindINTEL"; + case DecorationVectorComputeFunctionINTEL: return "VectorComputeFunctionINTEL"; + case DecorationStackCallINTEL: return "StackCallINTEL"; + case DecorationGlobalVariableOffsetINTEL: return "GlobalVariableOffsetINTEL"; + case DecorationCounterBuffer: return "CounterBuffer"; + case DecorationHlslSemanticGOOGLE: return "HlslSemanticGOOGLE"; + case DecorationUserTypeGOOGLE: return "UserTypeGOOGLE"; + case DecorationFunctionRoundingModeINTEL: return "FunctionRoundingModeINTEL"; + case DecorationFunctionDenormModeINTEL: return "FunctionDenormModeINTEL"; + case DecorationRegisterINTEL: return "RegisterINTEL"; + case DecorationMemoryINTEL: return "MemoryINTEL"; + case DecorationNumbanksINTEL: return "NumbanksINTEL"; + case DecorationBankwidthINTEL: return "BankwidthINTEL"; + case DecorationMaxPrivateCopiesINTEL: return "MaxPrivateCopiesINTEL"; + case DecorationSinglepumpINTEL: return "SinglepumpINTEL"; + case DecorationDoublepumpINTEL: return "DoublepumpINTEL"; + case DecorationMaxReplicatesINTEL: return "MaxReplicatesINTEL"; + case DecorationSimpleDualPortINTEL: return "SimpleDualPortINTEL"; + case DecorationMergeINTEL: return "MergeINTEL"; + case DecorationBankBitsINTEL: return "BankBitsINTEL"; + case DecorationForcePow2DepthINTEL: return "ForcePow2DepthINTEL"; + case DecorationStridesizeINTEL: return "StridesizeINTEL"; + case DecorationWordsizeINTEL: return "WordsizeINTEL"; + case DecorationTrueDualPortINTEL: return "TrueDualPortINTEL"; + case DecorationBurstCoalesceINTEL: return "BurstCoalesceINTEL"; + case DecorationCacheSizeINTEL: return "CacheSizeINTEL"; + case DecorationDontStaticallyCoalesceINTEL: return "DontStaticallyCoalesceINTEL"; + case DecorationPrefetchINTEL: return "PrefetchINTEL"; + case DecorationStallEnableINTEL: return "StallEnableINTEL"; + case DecorationFuseLoopsInFunctionINTEL: return "FuseLoopsInFunctionINTEL"; + case DecorationMathOpDSPModeINTEL: return "MathOpDSPModeINTEL"; + case DecorationAliasScopeINTEL: return "AliasScopeINTEL"; + case DecorationNoAliasINTEL: return "NoAliasINTEL"; + case DecorationInitiationIntervalINTEL: return "InitiationIntervalINTEL"; + case DecorationMaxConcurrencyINTEL: return "MaxConcurrencyINTEL"; + case DecorationPipelineEnableINTEL: return "PipelineEnableINTEL"; + case DecorationBufferLocationINTEL: return "BufferLocationINTEL"; + case DecorationIOPipeStorageINTEL: return "IOPipeStorageINTEL"; + case DecorationFunctionFloatingPointModeINTEL: return "FunctionFloatingPointModeINTEL"; + case DecorationSingleElementVectorINTEL: return "SingleElementVectorINTEL"; + case DecorationVectorComputeCallableFunctionINTEL: return "VectorComputeCallableFunctionINTEL"; + case DecorationMediaBlockIOINTEL: return "MediaBlockIOINTEL"; + case DecorationStallFreeINTEL: return "StallFreeINTEL"; + case DecorationFPMaxErrorDecorationINTEL: return "FPMaxErrorDecorationINTEL"; + case DecorationLatencyControlLabelINTEL: return "LatencyControlLabelINTEL"; + case DecorationLatencyControlConstraintINTEL: return "LatencyControlConstraintINTEL"; + case DecorationConduitKernelArgumentINTEL: return "ConduitKernelArgumentINTEL"; + case DecorationRegisterMapKernelArgumentINTEL: return "RegisterMapKernelArgumentINTEL"; + case DecorationMMHostInterfaceAddressWidthINTEL: return "MMHostInterfaceAddressWidthINTEL"; + case DecorationMMHostInterfaceDataWidthINTEL: return "MMHostInterfaceDataWidthINTEL"; + case DecorationMMHostInterfaceLatencyINTEL: return "MMHostInterfaceLatencyINTEL"; + case DecorationMMHostInterfaceReadWriteModeINTEL: return "MMHostInterfaceReadWriteModeINTEL"; + case DecorationMMHostInterfaceMaxBurstINTEL: return "MMHostInterfaceMaxBurstINTEL"; + case DecorationMMHostInterfaceWaitRequestINTEL: return "MMHostInterfaceWaitRequestINTEL"; + case DecorationStableKernelArgumentINTEL: return "StableKernelArgumentINTEL"; + case DecorationHostAccessINTEL: return "HostAccessINTEL"; + case DecorationInitModeINTEL: return "InitModeINTEL"; + case DecorationImplementInRegisterMapINTEL: return "ImplementInRegisterMapINTEL"; + case DecorationCacheControlLoadINTEL: return "CacheControlLoadINTEL"; + case DecorationCacheControlStoreINTEL: return "CacheControlStoreINTEL"; + default: return "Unknown"; + } +} + +inline const char* BuiltInToString(BuiltIn value) { + switch (value) { + case BuiltInPosition: return "Position"; + case BuiltInPointSize: return "PointSize"; + case BuiltInClipDistance: return "ClipDistance"; + case BuiltInCullDistance: return "CullDistance"; + case BuiltInVertexId: return "VertexId"; + case BuiltInInstanceId: return "InstanceId"; + case BuiltInPrimitiveId: return "PrimitiveId"; + case BuiltInInvocationId: return "InvocationId"; + case BuiltInLayer: return "Layer"; + case BuiltInViewportIndex: return "ViewportIndex"; + case BuiltInTessLevelOuter: return "TessLevelOuter"; + case BuiltInTessLevelInner: return "TessLevelInner"; + case BuiltInTessCoord: return "TessCoord"; + case BuiltInPatchVertices: return "PatchVertices"; + case BuiltInFragCoord: return "FragCoord"; + case BuiltInPointCoord: return "PointCoord"; + case BuiltInFrontFacing: return "FrontFacing"; + case BuiltInSampleId: return "SampleId"; + case BuiltInSamplePosition: return "SamplePosition"; + case BuiltInSampleMask: return "SampleMask"; + case BuiltInFragDepth: return "FragDepth"; + case BuiltInHelperInvocation: return "HelperInvocation"; + case BuiltInNumWorkgroups: return "NumWorkgroups"; + case BuiltInWorkgroupSize: return "WorkgroupSize"; + case BuiltInWorkgroupId: return "WorkgroupId"; + case BuiltInLocalInvocationId: return "LocalInvocationId"; + case BuiltInGlobalInvocationId: return "GlobalInvocationId"; + case BuiltInLocalInvocationIndex: return "LocalInvocationIndex"; + case BuiltInWorkDim: return "WorkDim"; + case BuiltInGlobalSize: return "GlobalSize"; + case BuiltInEnqueuedWorkgroupSize: return "EnqueuedWorkgroupSize"; + case BuiltInGlobalOffset: return "GlobalOffset"; + case BuiltInGlobalLinearId: return "GlobalLinearId"; + case BuiltInSubgroupSize: return "SubgroupSize"; + case BuiltInSubgroupMaxSize: return "SubgroupMaxSize"; + case BuiltInNumSubgroups: return "NumSubgroups"; + case BuiltInNumEnqueuedSubgroups: return "NumEnqueuedSubgroups"; + case BuiltInSubgroupId: return "SubgroupId"; + case BuiltInSubgroupLocalInvocationId: return "SubgroupLocalInvocationId"; + case BuiltInVertexIndex: return "VertexIndex"; + case BuiltInInstanceIndex: return "InstanceIndex"; + case BuiltInCoreIDARM: return "CoreIDARM"; + case BuiltInCoreCountARM: return "CoreCountARM"; + case BuiltInCoreMaxIDARM: return "CoreMaxIDARM"; + case BuiltInWarpIDARM: return "WarpIDARM"; + case BuiltInWarpMaxIDARM: return "WarpMaxIDARM"; + case BuiltInSubgroupEqMask: return "SubgroupEqMask"; + case BuiltInSubgroupGeMask: return "SubgroupGeMask"; + case BuiltInSubgroupGtMask: return "SubgroupGtMask"; + case BuiltInSubgroupLeMask: return "SubgroupLeMask"; + case BuiltInSubgroupLtMask: return "SubgroupLtMask"; + case BuiltInBaseVertex: return "BaseVertex"; + case BuiltInBaseInstance: return "BaseInstance"; + case BuiltInDrawIndex: return "DrawIndex"; + case BuiltInPrimitiveShadingRateKHR: return "PrimitiveShadingRateKHR"; + case BuiltInDeviceIndex: return "DeviceIndex"; + case BuiltInViewIndex: return "ViewIndex"; + case BuiltInShadingRateKHR: return "ShadingRateKHR"; + case BuiltInTileOffsetQCOM: return "TileOffsetQCOM"; + case BuiltInTileDimensionQCOM: return "TileDimensionQCOM"; + case BuiltInTileApronSizeQCOM: return "TileApronSizeQCOM"; + case BuiltInBaryCoordNoPerspAMD: return "BaryCoordNoPerspAMD"; + case BuiltInBaryCoordNoPerspCentroidAMD: return "BaryCoordNoPerspCentroidAMD"; + case BuiltInBaryCoordNoPerspSampleAMD: return "BaryCoordNoPerspSampleAMD"; + case BuiltInBaryCoordSmoothAMD: return "BaryCoordSmoothAMD"; + case BuiltInBaryCoordSmoothCentroidAMD: return "BaryCoordSmoothCentroidAMD"; + case BuiltInBaryCoordSmoothSampleAMD: return "BaryCoordSmoothSampleAMD"; + case BuiltInBaryCoordPullModelAMD: return "BaryCoordPullModelAMD"; + case BuiltInFragStencilRefEXT: return "FragStencilRefEXT"; + case BuiltInRemainingRecursionLevelsAMDX: return "RemainingRecursionLevelsAMDX"; + case BuiltInShaderIndexAMDX: return "ShaderIndexAMDX"; + case BuiltInViewportMaskNV: return "ViewportMaskNV"; + case BuiltInSecondaryPositionNV: return "SecondaryPositionNV"; + case BuiltInSecondaryViewportMaskNV: return "SecondaryViewportMaskNV"; + case BuiltInPositionPerViewNV: return "PositionPerViewNV"; + case BuiltInViewportMaskPerViewNV: return "ViewportMaskPerViewNV"; + case BuiltInFullyCoveredEXT: return "FullyCoveredEXT"; + case BuiltInTaskCountNV: return "TaskCountNV"; + case BuiltInPrimitiveCountNV: return "PrimitiveCountNV"; + case BuiltInPrimitiveIndicesNV: return "PrimitiveIndicesNV"; + case BuiltInClipDistancePerViewNV: return "ClipDistancePerViewNV"; + case BuiltInCullDistancePerViewNV: return "CullDistancePerViewNV"; + case BuiltInLayerPerViewNV: return "LayerPerViewNV"; + case BuiltInMeshViewCountNV: return "MeshViewCountNV"; + case BuiltInMeshViewIndicesNV: return "MeshViewIndicesNV"; + case BuiltInBaryCoordKHR: return "BaryCoordKHR"; + case BuiltInBaryCoordNoPerspKHR: return "BaryCoordNoPerspKHR"; + case BuiltInFragSizeEXT: return "FragSizeEXT"; + case BuiltInFragInvocationCountEXT: return "FragInvocationCountEXT"; + case BuiltInPrimitivePointIndicesEXT: return "PrimitivePointIndicesEXT"; + case BuiltInPrimitiveLineIndicesEXT: return "PrimitiveLineIndicesEXT"; + case BuiltInPrimitiveTriangleIndicesEXT: return "PrimitiveTriangleIndicesEXT"; + case BuiltInCullPrimitiveEXT: return "CullPrimitiveEXT"; + case BuiltInLaunchIdKHR: return "LaunchIdKHR"; + case BuiltInLaunchSizeKHR: return "LaunchSizeKHR"; + case BuiltInWorldRayOriginKHR: return "WorldRayOriginKHR"; + case BuiltInWorldRayDirectionKHR: return "WorldRayDirectionKHR"; + case BuiltInObjectRayOriginKHR: return "ObjectRayOriginKHR"; + case BuiltInObjectRayDirectionKHR: return "ObjectRayDirectionKHR"; + case BuiltInRayTminKHR: return "RayTminKHR"; + case BuiltInRayTmaxKHR: return "RayTmaxKHR"; + case BuiltInInstanceCustomIndexKHR: return "InstanceCustomIndexKHR"; + case BuiltInObjectToWorldKHR: return "ObjectToWorldKHR"; + case BuiltInWorldToObjectKHR: return "WorldToObjectKHR"; + case BuiltInHitTNV: return "HitTNV"; + case BuiltInHitKindKHR: return "HitKindKHR"; + case BuiltInCurrentRayTimeNV: return "CurrentRayTimeNV"; + case BuiltInHitTriangleVertexPositionsKHR: return "HitTriangleVertexPositionsKHR"; + case BuiltInHitMicroTriangleVertexPositionsNV: return "HitMicroTriangleVertexPositionsNV"; + case BuiltInHitMicroTriangleVertexBarycentricsNV: return "HitMicroTriangleVertexBarycentricsNV"; + case BuiltInIncomingRayFlagsKHR: return "IncomingRayFlagsKHR"; + case BuiltInRayGeometryIndexKHR: return "RayGeometryIndexKHR"; + case BuiltInHitIsSphereNV: return "HitIsSphereNV"; + case BuiltInHitIsLSSNV: return "HitIsLSSNV"; + case BuiltInHitSpherePositionNV: return "HitSpherePositionNV"; + case BuiltInWarpsPerSMNV: return "WarpsPerSMNV"; + case BuiltInSMCountNV: return "SMCountNV"; + case BuiltInWarpIDNV: return "WarpIDNV"; + case BuiltInSMIDNV: return "SMIDNV"; + case BuiltInHitLSSPositionsNV: return "HitLSSPositionsNV"; + case BuiltInHitKindFrontFacingMicroTriangleNV: return "HitKindFrontFacingMicroTriangleNV"; + case BuiltInHitKindBackFacingMicroTriangleNV: return "HitKindBackFacingMicroTriangleNV"; + case BuiltInHitSphereRadiusNV: return "HitSphereRadiusNV"; + case BuiltInHitLSSRadiiNV: return "HitLSSRadiiNV"; + case BuiltInClusterIDNV: return "ClusterIDNV"; + case BuiltInCullMaskKHR: return "CullMaskKHR"; + default: return "Unknown"; + } +} + +inline const char* ScopeToString(Scope value) { + switch (value) { + case ScopeCrossDevice: return "CrossDevice"; + case ScopeDevice: return "Device"; + case ScopeWorkgroup: return "Workgroup"; + case ScopeSubgroup: return "Subgroup"; + case ScopeInvocation: return "Invocation"; + case ScopeQueueFamily: return "QueueFamily"; + case ScopeShaderCallKHR: return "ShaderCallKHR"; + default: return "Unknown"; + } +} + +inline const char* GroupOperationToString(GroupOperation value) { + switch (value) { + case GroupOperationReduce: return "Reduce"; + case GroupOperationInclusiveScan: return "InclusiveScan"; + case GroupOperationExclusiveScan: return "ExclusiveScan"; + case GroupOperationClusteredReduce: return "ClusteredReduce"; + case GroupOperationPartitionedReduceNV: return "PartitionedReduceNV"; + case GroupOperationPartitionedInclusiveScanNV: return "PartitionedInclusiveScanNV"; + case GroupOperationPartitionedExclusiveScanNV: return "PartitionedExclusiveScanNV"; + default: return "Unknown"; + } +} + +inline const char* KernelEnqueueFlagsToString(KernelEnqueueFlags value) { + switch (value) { + case KernelEnqueueFlagsNoWait: return "NoWait"; + case KernelEnqueueFlagsWaitKernel: return "WaitKernel"; + case KernelEnqueueFlagsWaitWorkGroup: return "WaitWorkGroup"; + default: return "Unknown"; + } +} + +inline const char* CapabilityToString(Capability value) { + switch (value) { + case CapabilityMatrix: return "Matrix"; + case CapabilityShader: return "Shader"; + case CapabilityGeometry: return "Geometry"; + case CapabilityTessellation: return "Tessellation"; + case CapabilityAddresses: return "Addresses"; + case CapabilityLinkage: return "Linkage"; + case CapabilityKernel: return "Kernel"; + case CapabilityVector16: return "Vector16"; + case CapabilityFloat16Buffer: return "Float16Buffer"; + case CapabilityFloat16: return "Float16"; + case CapabilityFloat64: return "Float64"; + case CapabilityInt64: return "Int64"; + case CapabilityInt64Atomics: return "Int64Atomics"; + case CapabilityImageBasic: return "ImageBasic"; + case CapabilityImageReadWrite: return "ImageReadWrite"; + case CapabilityImageMipmap: return "ImageMipmap"; + case CapabilityPipes: return "Pipes"; + case CapabilityGroups: return "Groups"; + case CapabilityDeviceEnqueue: return "DeviceEnqueue"; + case CapabilityLiteralSampler: return "LiteralSampler"; + case CapabilityAtomicStorage: return "AtomicStorage"; + case CapabilityInt16: return "Int16"; + case CapabilityTessellationPointSize: return "TessellationPointSize"; + case CapabilityGeometryPointSize: return "GeometryPointSize"; + case CapabilityImageGatherExtended: return "ImageGatherExtended"; + case CapabilityStorageImageMultisample: return "StorageImageMultisample"; + case CapabilityUniformBufferArrayDynamicIndexing: return "UniformBufferArrayDynamicIndexing"; + case CapabilitySampledImageArrayDynamicIndexing: return "SampledImageArrayDynamicIndexing"; + case CapabilityStorageBufferArrayDynamicIndexing: return "StorageBufferArrayDynamicIndexing"; + case CapabilityStorageImageArrayDynamicIndexing: return "StorageImageArrayDynamicIndexing"; + case CapabilityClipDistance: return "ClipDistance"; + case CapabilityCullDistance: return "CullDistance"; + case CapabilityImageCubeArray: return "ImageCubeArray"; + case CapabilitySampleRateShading: return "SampleRateShading"; + case CapabilityImageRect: return "ImageRect"; + case CapabilitySampledRect: return "SampledRect"; + case CapabilityGenericPointer: return "GenericPointer"; + case CapabilityInt8: return "Int8"; + case CapabilityInputAttachment: return "InputAttachment"; + case CapabilitySparseResidency: return "SparseResidency"; + case CapabilityMinLod: return "MinLod"; + case CapabilitySampled1D: return "Sampled1D"; + case CapabilityImage1D: return "Image1D"; + case CapabilitySampledCubeArray: return "SampledCubeArray"; + case CapabilitySampledBuffer: return "SampledBuffer"; + case CapabilityImageBuffer: return "ImageBuffer"; + case CapabilityImageMSArray: return "ImageMSArray"; + case CapabilityStorageImageExtendedFormats: return "StorageImageExtendedFormats"; + case CapabilityImageQuery: return "ImageQuery"; + case CapabilityDerivativeControl: return "DerivativeControl"; + case CapabilityInterpolationFunction: return "InterpolationFunction"; + case CapabilityTransformFeedback: return "TransformFeedback"; + case CapabilityGeometryStreams: return "GeometryStreams"; + case CapabilityStorageImageReadWithoutFormat: return "StorageImageReadWithoutFormat"; + case CapabilityStorageImageWriteWithoutFormat: return "StorageImageWriteWithoutFormat"; + case CapabilityMultiViewport: return "MultiViewport"; + case CapabilitySubgroupDispatch: return "SubgroupDispatch"; + case CapabilityNamedBarrier: return "NamedBarrier"; + case CapabilityPipeStorage: return "PipeStorage"; + case CapabilityGroupNonUniform: return "GroupNonUniform"; + case CapabilityGroupNonUniformVote: return "GroupNonUniformVote"; + case CapabilityGroupNonUniformArithmetic: return "GroupNonUniformArithmetic"; + case CapabilityGroupNonUniformBallot: return "GroupNonUniformBallot"; + case CapabilityGroupNonUniformShuffle: return "GroupNonUniformShuffle"; + case CapabilityGroupNonUniformShuffleRelative: return "GroupNonUniformShuffleRelative"; + case CapabilityGroupNonUniformClustered: return "GroupNonUniformClustered"; + case CapabilityGroupNonUniformQuad: return "GroupNonUniformQuad"; + case CapabilityShaderLayer: return "ShaderLayer"; + case CapabilityShaderViewportIndex: return "ShaderViewportIndex"; + case CapabilityUniformDecoration: return "UniformDecoration"; + case CapabilityCoreBuiltinsARM: return "CoreBuiltinsARM"; + case CapabilityTileImageColorReadAccessEXT: return "TileImageColorReadAccessEXT"; + case CapabilityTileImageDepthReadAccessEXT: return "TileImageDepthReadAccessEXT"; + case CapabilityTileImageStencilReadAccessEXT: return "TileImageStencilReadAccessEXT"; + case CapabilityTensorsARM: return "TensorsARM"; + case CapabilityStorageTensorArrayDynamicIndexingARM: return "StorageTensorArrayDynamicIndexingARM"; + case CapabilityStorageTensorArrayNonUniformIndexingARM: return "StorageTensorArrayNonUniformIndexingARM"; + case CapabilityGraphARM: return "GraphARM"; + case CapabilityCooperativeMatrixLayoutsARM: return "CooperativeMatrixLayoutsARM"; + case CapabilityFloat8EXT: return "Float8EXT"; + case CapabilityFloat8CooperativeMatrixEXT: return "Float8CooperativeMatrixEXT"; + case CapabilityFragmentShadingRateKHR: return "FragmentShadingRateKHR"; + case CapabilitySubgroupBallotKHR: return "SubgroupBallotKHR"; + case CapabilityDrawParameters: return "DrawParameters"; + case CapabilityWorkgroupMemoryExplicitLayoutKHR: return "WorkgroupMemoryExplicitLayoutKHR"; + case CapabilityWorkgroupMemoryExplicitLayout8BitAccessKHR: return "WorkgroupMemoryExplicitLayout8BitAccessKHR"; + case CapabilityWorkgroupMemoryExplicitLayout16BitAccessKHR: return "WorkgroupMemoryExplicitLayout16BitAccessKHR"; + case CapabilitySubgroupVoteKHR: return "SubgroupVoteKHR"; + case CapabilityStorageBuffer16BitAccess: return "StorageBuffer16BitAccess"; + case CapabilityStorageUniform16: return "StorageUniform16"; + case CapabilityStoragePushConstant16: return "StoragePushConstant16"; + case CapabilityStorageInputOutput16: return "StorageInputOutput16"; + case CapabilityDeviceGroup: return "DeviceGroup"; + case CapabilityMultiView: return "MultiView"; + case CapabilityVariablePointersStorageBuffer: return "VariablePointersStorageBuffer"; + case CapabilityVariablePointers: return "VariablePointers"; + case CapabilityAtomicStorageOps: return "AtomicStorageOps"; + case CapabilitySampleMaskPostDepthCoverage: return "SampleMaskPostDepthCoverage"; + case CapabilityStorageBuffer8BitAccess: return "StorageBuffer8BitAccess"; + case CapabilityUniformAndStorageBuffer8BitAccess: return "UniformAndStorageBuffer8BitAccess"; + case CapabilityStoragePushConstant8: return "StoragePushConstant8"; + case CapabilityDenormPreserve: return "DenormPreserve"; + case CapabilityDenormFlushToZero: return "DenormFlushToZero"; + case CapabilitySignedZeroInfNanPreserve: return "SignedZeroInfNanPreserve"; + case CapabilityRoundingModeRTE: return "RoundingModeRTE"; + case CapabilityRoundingModeRTZ: return "RoundingModeRTZ"; + case CapabilityRayQueryProvisionalKHR: return "RayQueryProvisionalKHR"; + case CapabilityRayQueryKHR: return "RayQueryKHR"; + case CapabilityUntypedPointersKHR: return "UntypedPointersKHR"; + case CapabilityRayTraversalPrimitiveCullingKHR: return "RayTraversalPrimitiveCullingKHR"; + case CapabilityRayTracingKHR: return "RayTracingKHR"; + case CapabilityTextureSampleWeightedQCOM: return "TextureSampleWeightedQCOM"; + case CapabilityTextureBoxFilterQCOM: return "TextureBoxFilterQCOM"; + case CapabilityTextureBlockMatchQCOM: return "TextureBlockMatchQCOM"; + case CapabilityTileShadingQCOM: return "TileShadingQCOM"; + case CapabilityTextureBlockMatch2QCOM: return "TextureBlockMatch2QCOM"; + case CapabilityFloat16ImageAMD: return "Float16ImageAMD"; + case CapabilityImageGatherBiasLodAMD: return "ImageGatherBiasLodAMD"; + case CapabilityFragmentMaskAMD: return "FragmentMaskAMD"; + case CapabilityStencilExportEXT: return "StencilExportEXT"; + case CapabilityImageReadWriteLodAMD: return "ImageReadWriteLodAMD"; + case CapabilityInt64ImageEXT: return "Int64ImageEXT"; + case CapabilityShaderClockKHR: return "ShaderClockKHR"; + case CapabilityShaderEnqueueAMDX: return "ShaderEnqueueAMDX"; + case CapabilityQuadControlKHR: return "QuadControlKHR"; + case CapabilityInt4TypeINTEL: return "Int4TypeINTEL"; + case CapabilityInt4CooperativeMatrixINTEL: return "Int4CooperativeMatrixINTEL"; + case CapabilityBFloat16TypeKHR: return "BFloat16TypeKHR"; + case CapabilityBFloat16DotProductKHR: return "BFloat16DotProductKHR"; + case CapabilityBFloat16CooperativeMatrixKHR: return "BFloat16CooperativeMatrixKHR"; + case CapabilitySampleMaskOverrideCoverageNV: return "SampleMaskOverrideCoverageNV"; + case CapabilityGeometryShaderPassthroughNV: return "GeometryShaderPassthroughNV"; + case CapabilityShaderViewportIndexLayerEXT: return "ShaderViewportIndexLayerEXT"; + case CapabilityShaderViewportMaskNV: return "ShaderViewportMaskNV"; + case CapabilityShaderStereoViewNV: return "ShaderStereoViewNV"; + case CapabilityPerViewAttributesNV: return "PerViewAttributesNV"; + case CapabilityFragmentFullyCoveredEXT: return "FragmentFullyCoveredEXT"; + case CapabilityMeshShadingNV: return "MeshShadingNV"; + case CapabilityImageFootprintNV: return "ImageFootprintNV"; + case CapabilityMeshShadingEXT: return "MeshShadingEXT"; + case CapabilityFragmentBarycentricKHR: return "FragmentBarycentricKHR"; + case CapabilityComputeDerivativeGroupQuadsKHR: return "ComputeDerivativeGroupQuadsKHR"; + case CapabilityFragmentDensityEXT: return "FragmentDensityEXT"; + case CapabilityGroupNonUniformPartitionedNV: return "GroupNonUniformPartitionedNV"; + case CapabilityShaderNonUniform: return "ShaderNonUniform"; + case CapabilityRuntimeDescriptorArray: return "RuntimeDescriptorArray"; + case CapabilityInputAttachmentArrayDynamicIndexing: return "InputAttachmentArrayDynamicIndexing"; + case CapabilityUniformTexelBufferArrayDynamicIndexing: return "UniformTexelBufferArrayDynamicIndexing"; + case CapabilityStorageTexelBufferArrayDynamicIndexing: return "StorageTexelBufferArrayDynamicIndexing"; + case CapabilityUniformBufferArrayNonUniformIndexing: return "UniformBufferArrayNonUniformIndexing"; + case CapabilitySampledImageArrayNonUniformIndexing: return "SampledImageArrayNonUniformIndexing"; + case CapabilityStorageBufferArrayNonUniformIndexing: return "StorageBufferArrayNonUniformIndexing"; + case CapabilityStorageImageArrayNonUniformIndexing: return "StorageImageArrayNonUniformIndexing"; + case CapabilityInputAttachmentArrayNonUniformIndexing: return "InputAttachmentArrayNonUniformIndexing"; + case CapabilityUniformTexelBufferArrayNonUniformIndexing: return "UniformTexelBufferArrayNonUniformIndexing"; + case CapabilityStorageTexelBufferArrayNonUniformIndexing: return "StorageTexelBufferArrayNonUniformIndexing"; + case CapabilityRayTracingPositionFetchKHR: return "RayTracingPositionFetchKHR"; + case CapabilityRayTracingNV: return "RayTracingNV"; + case CapabilityRayTracingMotionBlurNV: return "RayTracingMotionBlurNV"; + case CapabilityVulkanMemoryModel: return "VulkanMemoryModel"; + case CapabilityVulkanMemoryModelDeviceScope: return "VulkanMemoryModelDeviceScope"; + case CapabilityPhysicalStorageBufferAddresses: return "PhysicalStorageBufferAddresses"; + case CapabilityComputeDerivativeGroupLinearKHR: return "ComputeDerivativeGroupLinearKHR"; + case CapabilityRayTracingProvisionalKHR: return "RayTracingProvisionalKHR"; + case CapabilityCooperativeMatrixNV: return "CooperativeMatrixNV"; + case CapabilityFragmentShaderSampleInterlockEXT: return "FragmentShaderSampleInterlockEXT"; + case CapabilityFragmentShaderShadingRateInterlockEXT: return "FragmentShaderShadingRateInterlockEXT"; + case CapabilityShaderSMBuiltinsNV: return "ShaderSMBuiltinsNV"; + case CapabilityFragmentShaderPixelInterlockEXT: return "FragmentShaderPixelInterlockEXT"; + case CapabilityDemoteToHelperInvocation: return "DemoteToHelperInvocation"; + case CapabilityDisplacementMicromapNV: return "DisplacementMicromapNV"; + case CapabilityRayTracingOpacityMicromapEXT: return "RayTracingOpacityMicromapEXT"; + case CapabilityShaderInvocationReorderNV: return "ShaderInvocationReorderNV"; + case CapabilityBindlessTextureNV: return "BindlessTextureNV"; + case CapabilityRayQueryPositionFetchKHR: return "RayQueryPositionFetchKHR"; + case CapabilityCooperativeVectorNV: return "CooperativeVectorNV"; + case CapabilityAtomicFloat16VectorNV: return "AtomicFloat16VectorNV"; + case CapabilityRayTracingDisplacementMicromapNV: return "RayTracingDisplacementMicromapNV"; + case CapabilityRawAccessChainsNV: return "RawAccessChainsNV"; + case CapabilityRayTracingSpheresGeometryNV: return "RayTracingSpheresGeometryNV"; + case CapabilityRayTracingLinearSweptSpheresGeometryNV: return "RayTracingLinearSweptSpheresGeometryNV"; + case CapabilityCooperativeMatrixReductionsNV: return "CooperativeMatrixReductionsNV"; + case CapabilityCooperativeMatrixConversionsNV: return "CooperativeMatrixConversionsNV"; + case CapabilityCooperativeMatrixPerElementOperationsNV: return "CooperativeMatrixPerElementOperationsNV"; + case CapabilityCooperativeMatrixTensorAddressingNV: return "CooperativeMatrixTensorAddressingNV"; + case CapabilityCooperativeMatrixBlockLoadsNV: return "CooperativeMatrixBlockLoadsNV"; + case CapabilityCooperativeVectorTrainingNV: return "CooperativeVectorTrainingNV"; + case CapabilityRayTracingClusterAccelerationStructureNV: return "RayTracingClusterAccelerationStructureNV"; + case CapabilityTensorAddressingNV: return "TensorAddressingNV"; + case CapabilitySubgroupShuffleINTEL: return "SubgroupShuffleINTEL"; + case CapabilitySubgroupBufferBlockIOINTEL: return "SubgroupBufferBlockIOINTEL"; + case CapabilitySubgroupImageBlockIOINTEL: return "SubgroupImageBlockIOINTEL"; + case CapabilitySubgroupImageMediaBlockIOINTEL: return "SubgroupImageMediaBlockIOINTEL"; + case CapabilityRoundToInfinityINTEL: return "RoundToInfinityINTEL"; + case CapabilityFloatingPointModeINTEL: return "FloatingPointModeINTEL"; + case CapabilityIntegerFunctions2INTEL: return "IntegerFunctions2INTEL"; + case CapabilityFunctionPointersINTEL: return "FunctionPointersINTEL"; + case CapabilityIndirectReferencesINTEL: return "IndirectReferencesINTEL"; + case CapabilityAsmINTEL: return "AsmINTEL"; + case CapabilityAtomicFloat32MinMaxEXT: return "AtomicFloat32MinMaxEXT"; + case CapabilityAtomicFloat64MinMaxEXT: return "AtomicFloat64MinMaxEXT"; + case CapabilityAtomicFloat16MinMaxEXT: return "AtomicFloat16MinMaxEXT"; + case CapabilityVectorComputeINTEL: return "VectorComputeINTEL"; + case CapabilityVectorAnyINTEL: return "VectorAnyINTEL"; + case CapabilityExpectAssumeKHR: return "ExpectAssumeKHR"; + case CapabilitySubgroupAvcMotionEstimationINTEL: return "SubgroupAvcMotionEstimationINTEL"; + case CapabilitySubgroupAvcMotionEstimationIntraINTEL: return "SubgroupAvcMotionEstimationIntraINTEL"; + case CapabilitySubgroupAvcMotionEstimationChromaINTEL: return "SubgroupAvcMotionEstimationChromaINTEL"; + case CapabilityVariableLengthArrayINTEL: return "VariableLengthArrayINTEL"; + case CapabilityFunctionFloatControlINTEL: return "FunctionFloatControlINTEL"; + case CapabilityFPGAMemoryAttributesINTEL: return "FPGAMemoryAttributesINTEL"; + case CapabilityFPFastMathModeINTEL: return "FPFastMathModeINTEL"; + case CapabilityArbitraryPrecisionIntegersINTEL: return "ArbitraryPrecisionIntegersINTEL"; + case CapabilityArbitraryPrecisionFloatingPointINTEL: return "ArbitraryPrecisionFloatingPointINTEL"; + case CapabilityUnstructuredLoopControlsINTEL: return "UnstructuredLoopControlsINTEL"; + case CapabilityFPGALoopControlsINTEL: return "FPGALoopControlsINTEL"; + case CapabilityKernelAttributesINTEL: return "KernelAttributesINTEL"; + case CapabilityFPGAKernelAttributesINTEL: return "FPGAKernelAttributesINTEL"; + case CapabilityFPGAMemoryAccessesINTEL: return "FPGAMemoryAccessesINTEL"; + case CapabilityFPGAClusterAttributesINTEL: return "FPGAClusterAttributesINTEL"; + case CapabilityLoopFuseINTEL: return "LoopFuseINTEL"; + case CapabilityFPGADSPControlINTEL: return "FPGADSPControlINTEL"; + case CapabilityMemoryAccessAliasingINTEL: return "MemoryAccessAliasingINTEL"; + case CapabilityFPGAInvocationPipeliningAttributesINTEL: return "FPGAInvocationPipeliningAttributesINTEL"; + case CapabilityFPGABufferLocationINTEL: return "FPGABufferLocationINTEL"; + case CapabilityArbitraryPrecisionFixedPointINTEL: return "ArbitraryPrecisionFixedPointINTEL"; + case CapabilityUSMStorageClassesINTEL: return "USMStorageClassesINTEL"; + case CapabilityRuntimeAlignedAttributeINTEL: return "RuntimeAlignedAttributeINTEL"; + case CapabilityIOPipesINTEL: return "IOPipesINTEL"; + case CapabilityBlockingPipesINTEL: return "BlockingPipesINTEL"; + case CapabilityFPGARegINTEL: return "FPGARegINTEL"; + case CapabilityDotProductInputAll: return "DotProductInputAll"; + case CapabilityDotProductInput4x8Bit: return "DotProductInput4x8Bit"; + case CapabilityDotProductInput4x8BitPacked: return "DotProductInput4x8BitPacked"; + case CapabilityDotProduct: return "DotProduct"; + case CapabilityRayCullMaskKHR: return "RayCullMaskKHR"; + case CapabilityCooperativeMatrixKHR: return "CooperativeMatrixKHR"; + case CapabilityReplicatedCompositesEXT: return "ReplicatedCompositesEXT"; + case CapabilityBitInstructions: return "BitInstructions"; + case CapabilityGroupNonUniformRotateKHR: return "GroupNonUniformRotateKHR"; + case CapabilityFloatControls2: return "FloatControls2"; + case CapabilityAtomicFloat32AddEXT: return "AtomicFloat32AddEXT"; + case CapabilityAtomicFloat64AddEXT: return "AtomicFloat64AddEXT"; + case CapabilityLongCompositesINTEL: return "LongCompositesINTEL"; + case CapabilityOptNoneEXT: return "OptNoneEXT"; + case CapabilityAtomicFloat16AddEXT: return "AtomicFloat16AddEXT"; + case CapabilityDebugInfoModuleINTEL: return "DebugInfoModuleINTEL"; + case CapabilityBFloat16ConversionINTEL: return "BFloat16ConversionINTEL"; + case CapabilitySplitBarrierINTEL: return "SplitBarrierINTEL"; + case CapabilityArithmeticFenceEXT: return "ArithmeticFenceEXT"; + case CapabilityFPGAClusterAttributesV2INTEL: return "FPGAClusterAttributesV2INTEL"; + case CapabilityFPGAKernelAttributesv2INTEL: return "FPGAKernelAttributesv2INTEL"; + case CapabilityTaskSequenceINTEL: return "TaskSequenceINTEL"; + case CapabilityFPMaxErrorINTEL: return "FPMaxErrorINTEL"; + case CapabilityFPGALatencyControlINTEL: return "FPGALatencyControlINTEL"; + case CapabilityFPGAArgumentInterfacesINTEL: return "FPGAArgumentInterfacesINTEL"; + case CapabilityGlobalVariableHostAccessINTEL: return "GlobalVariableHostAccessINTEL"; + case CapabilityGlobalVariableFPGADecorationsINTEL: return "GlobalVariableFPGADecorationsINTEL"; + case CapabilitySubgroupBufferPrefetchINTEL: return "SubgroupBufferPrefetchINTEL"; + case CapabilitySubgroup2DBlockIOINTEL: return "Subgroup2DBlockIOINTEL"; + case CapabilitySubgroup2DBlockTransformINTEL: return "Subgroup2DBlockTransformINTEL"; + case CapabilitySubgroup2DBlockTransposeINTEL: return "Subgroup2DBlockTransposeINTEL"; + case CapabilitySubgroupMatrixMultiplyAccumulateINTEL: return "SubgroupMatrixMultiplyAccumulateINTEL"; + case CapabilityTernaryBitwiseFunctionINTEL: return "TernaryBitwiseFunctionINTEL"; + case CapabilityGroupUniformArithmeticKHR: return "GroupUniformArithmeticKHR"; + case CapabilityTensorFloat32RoundingINTEL: return "TensorFloat32RoundingINTEL"; + case CapabilityMaskedGatherScatterINTEL: return "MaskedGatherScatterINTEL"; + case CapabilityCacheControlsINTEL: return "CacheControlsINTEL"; + case CapabilityRegisterLimitsINTEL: return "RegisterLimitsINTEL"; + case CapabilityBindlessImagesINTEL: return "BindlessImagesINTEL"; + default: return "Unknown"; + } +} + +inline const char* RayQueryIntersectionToString(RayQueryIntersection value) { + switch (value) { + case RayQueryIntersectionRayQueryCandidateIntersectionKHR: return "RayQueryCandidateIntersectionKHR"; + case RayQueryIntersectionRayQueryCommittedIntersectionKHR: return "RayQueryCommittedIntersectionKHR"; + default: return "Unknown"; + } +} + +inline const char* RayQueryCommittedIntersectionTypeToString(RayQueryCommittedIntersectionType value) { + switch (value) { + case RayQueryCommittedIntersectionTypeRayQueryCommittedIntersectionNoneKHR: return "RayQueryCommittedIntersectionNoneKHR"; + case RayQueryCommittedIntersectionTypeRayQueryCommittedIntersectionTriangleKHR: return "RayQueryCommittedIntersectionTriangleKHR"; + case RayQueryCommittedIntersectionTypeRayQueryCommittedIntersectionGeneratedKHR: return "RayQueryCommittedIntersectionGeneratedKHR"; + default: return "Unknown"; + } +} + +inline const char* RayQueryCandidateIntersectionTypeToString(RayQueryCandidateIntersectionType value) { + switch (value) { + case RayQueryCandidateIntersectionTypeRayQueryCandidateIntersectionTriangleKHR: return "RayQueryCandidateIntersectionTriangleKHR"; + case RayQueryCandidateIntersectionTypeRayQueryCandidateIntersectionAABBKHR: return "RayQueryCandidateIntersectionAABBKHR"; + default: return "Unknown"; + } +} + +inline const char* FPDenormModeToString(FPDenormMode value) { + switch (value) { + case FPDenormModePreserve: return "Preserve"; + case FPDenormModeFlushToZero: return "FlushToZero"; + default: return "Unknown"; + } +} + +inline const char* FPOperationModeToString(FPOperationMode value) { + switch (value) { + case FPOperationModeIEEE: return "IEEE"; + case FPOperationModeALT: return "ALT"; + default: return "Unknown"; + } +} + +inline const char* QuantizationModesToString(QuantizationModes value) { + switch (value) { + case QuantizationModesTRN: return "TRN"; + case QuantizationModesTRN_ZERO: return "TRN_ZERO"; + case QuantizationModesRND: return "RND"; + case QuantizationModesRND_ZERO: return "RND_ZERO"; + case QuantizationModesRND_INF: return "RND_INF"; + case QuantizationModesRND_MIN_INF: return "RND_MIN_INF"; + case QuantizationModesRND_CONV: return "RND_CONV"; + case QuantizationModesRND_CONV_ODD: return "RND_CONV_ODD"; + default: return "Unknown"; + } +} + +inline const char* OverflowModesToString(OverflowModes value) { + switch (value) { + case OverflowModesWRAP: return "WRAP"; + case OverflowModesSAT: return "SAT"; + case OverflowModesSAT_ZERO: return "SAT_ZERO"; + case OverflowModesSAT_SYM: return "SAT_SYM"; + default: return "Unknown"; + } +} + +inline const char* PackedVectorFormatToString(PackedVectorFormat value) { + switch (value) { + case PackedVectorFormatPackedVectorFormat4x8Bit: return "PackedVectorFormat4x8Bit"; + default: return "Unknown"; + } +} + +inline const char* CooperativeMatrixLayoutToString(CooperativeMatrixLayout value) { + switch (value) { + case CooperativeMatrixLayoutRowMajorKHR: return "RowMajorKHR"; + case CooperativeMatrixLayoutColumnMajorKHR: return "ColumnMajorKHR"; + case CooperativeMatrixLayoutRowBlockedInterleavedARM: return "RowBlockedInterleavedARM"; + case CooperativeMatrixLayoutColumnBlockedInterleavedARM: return "ColumnBlockedInterleavedARM"; + default: return "Unknown"; + } +} + +inline const char* CooperativeMatrixUseToString(CooperativeMatrixUse value) { + switch (value) { + case CooperativeMatrixUseMatrixAKHR: return "MatrixAKHR"; + case CooperativeMatrixUseMatrixBKHR: return "MatrixBKHR"; + case CooperativeMatrixUseMatrixAccumulatorKHR: return "MatrixAccumulatorKHR"; + default: return "Unknown"; + } +} + +inline const char* TensorClampModeToString(TensorClampMode value) { + switch (value) { + case TensorClampModeUndefined: return "Undefined"; + case TensorClampModeConstant: return "Constant"; + case TensorClampModeClampToEdge: return "ClampToEdge"; + case TensorClampModeRepeat: return "Repeat"; + case TensorClampModeRepeatMirrored: return "RepeatMirrored"; + default: return "Unknown"; + } +} + +inline const char* InitializationModeQualifierToString(InitializationModeQualifier value) { + switch (value) { + case InitializationModeQualifierInitOnDeviceReprogramINTEL: return "InitOnDeviceReprogramINTEL"; + case InitializationModeQualifierInitOnDeviceResetINTEL: return "InitOnDeviceResetINTEL"; + default: return "Unknown"; + } +} + +inline const char* HostAccessQualifierToString(HostAccessQualifier value) { + switch (value) { + case HostAccessQualifierNoneINTEL: return "NoneINTEL"; + case HostAccessQualifierReadINTEL: return "ReadINTEL"; + case HostAccessQualifierWriteINTEL: return "WriteINTEL"; + case HostAccessQualifierReadWriteINTEL: return "ReadWriteINTEL"; + default: return "Unknown"; + } +} + +inline const char* LoadCacheControlToString(LoadCacheControl value) { + switch (value) { + case LoadCacheControlUncachedINTEL: return "UncachedINTEL"; + case LoadCacheControlCachedINTEL: return "CachedINTEL"; + case LoadCacheControlStreamingINTEL: return "StreamingINTEL"; + case LoadCacheControlInvalidateAfterReadINTEL: return "InvalidateAfterReadINTEL"; + case LoadCacheControlConstCachedINTEL: return "ConstCachedINTEL"; + default: return "Unknown"; + } +} + +inline const char* StoreCacheControlToString(StoreCacheControl value) { + switch (value) { + case StoreCacheControlUncachedINTEL: return "UncachedINTEL"; + case StoreCacheControlWriteThroughINTEL: return "WriteThroughINTEL"; + case StoreCacheControlWriteBackINTEL: return "WriteBackINTEL"; + case StoreCacheControlStreamingINTEL: return "StreamingINTEL"; + default: return "Unknown"; + } +} + +inline const char* NamedMaximumNumberOfRegistersToString(NamedMaximumNumberOfRegisters value) { + switch (value) { + case NamedMaximumNumberOfRegistersAutoINTEL: return "AutoINTEL"; + default: return "Unknown"; + } +} + +inline const char* FPEncodingToString(FPEncoding value) { + switch (value) { + case FPEncodingBFloat16KHR: return "BFloat16KHR"; + case FPEncodingFloat8E4M3EXT: return "Float8E4M3EXT"; + case FPEncodingFloat8E5M2EXT: return "Float8E5M2EXT"; + default: return "Unknown"; + } +} + +inline const char* CooperativeVectorMatrixLayoutToString(CooperativeVectorMatrixLayout value) { + switch (value) { + case CooperativeVectorMatrixLayoutRowMajorNV: return "RowMajorNV"; + case CooperativeVectorMatrixLayoutColumnMajorNV: return "ColumnMajorNV"; + case CooperativeVectorMatrixLayoutInferencingOptimalNV: return "InferencingOptimalNV"; + case CooperativeVectorMatrixLayoutTrainingOptimalNV: return "TrainingOptimalNV"; + default: return "Unknown"; + } +} + +inline const char* ComponentTypeToString(ComponentType value) { + switch (value) { + case ComponentTypeFloat16NV: return "Float16NV"; + case ComponentTypeFloat32NV: return "Float32NV"; + case ComponentTypeFloat64NV: return "Float64NV"; + case ComponentTypeSignedInt8NV: return "SignedInt8NV"; + case ComponentTypeSignedInt16NV: return "SignedInt16NV"; + case ComponentTypeSignedInt32NV: return "SignedInt32NV"; + case ComponentTypeSignedInt64NV: return "SignedInt64NV"; + case ComponentTypeUnsignedInt8NV: return "UnsignedInt8NV"; + case ComponentTypeUnsignedInt16NV: return "UnsignedInt16NV"; + case ComponentTypeUnsignedInt32NV: return "UnsignedInt32NV"; + case ComponentTypeUnsignedInt64NV: return "UnsignedInt64NV"; + case ComponentTypeSignedInt8PackedNV: return "SignedInt8PackedNV"; + case ComponentTypeUnsignedInt8PackedNV: return "UnsignedInt8PackedNV"; + case ComponentTypeFloatE4M3NV: return "FloatE4M3NV"; + case ComponentTypeFloatE5M2NV: return "FloatE5M2NV"; + default: return "Unknown"; + } +} + +inline const char* OpToString(Op value) { + switch (value) { + case OpNop: return "OpNop"; + case OpUndef: return "OpUndef"; + case OpSourceContinued: return "OpSourceContinued"; + case OpSource: return "OpSource"; + case OpSourceExtension: return "OpSourceExtension"; + case OpName: return "OpName"; + case OpMemberName: return "OpMemberName"; + case OpString: return "OpString"; + case OpLine: return "OpLine"; + case OpExtension: return "OpExtension"; + case OpExtInstImport: return "OpExtInstImport"; + case OpExtInst: return "OpExtInst"; + case OpMemoryModel: return "OpMemoryModel"; + case OpEntryPoint: return "OpEntryPoint"; + case OpExecutionMode: return "OpExecutionMode"; + case OpCapability: return "OpCapability"; + case OpTypeVoid: return "OpTypeVoid"; + case OpTypeBool: return "OpTypeBool"; + case OpTypeInt: return "OpTypeInt"; + case OpTypeFloat: return "OpTypeFloat"; + case OpTypeVector: return "OpTypeVector"; + case OpTypeMatrix: return "OpTypeMatrix"; + case OpTypeImage: return "OpTypeImage"; + case OpTypeSampler: return "OpTypeSampler"; + case OpTypeSampledImage: return "OpTypeSampledImage"; + case OpTypeArray: return "OpTypeArray"; + case OpTypeRuntimeArray: return "OpTypeRuntimeArray"; + case OpTypeStruct: return "OpTypeStruct"; + case OpTypeOpaque: return "OpTypeOpaque"; + case OpTypePointer: return "OpTypePointer"; + case OpTypeFunction: return "OpTypeFunction"; + case OpTypeEvent: return "OpTypeEvent"; + case OpTypeDeviceEvent: return "OpTypeDeviceEvent"; + case OpTypeReserveId: return "OpTypeReserveId"; + case OpTypeQueue: return "OpTypeQueue"; + case OpTypePipe: return "OpTypePipe"; + case OpTypeForwardPointer: return "OpTypeForwardPointer"; + case OpConstantTrue: return "OpConstantTrue"; + case OpConstantFalse: return "OpConstantFalse"; + case OpConstant: return "OpConstant"; + case OpConstantComposite: return "OpConstantComposite"; + case OpConstantSampler: return "OpConstantSampler"; + case OpConstantNull: return "OpConstantNull"; + case OpSpecConstantTrue: return "OpSpecConstantTrue"; + case OpSpecConstantFalse: return "OpSpecConstantFalse"; + case OpSpecConstant: return "OpSpecConstant"; + case OpSpecConstantComposite: return "OpSpecConstantComposite"; + case OpSpecConstantOp: return "OpSpecConstantOp"; + case OpFunction: return "OpFunction"; + case OpFunctionParameter: return "OpFunctionParameter"; + case OpFunctionEnd: return "OpFunctionEnd"; + case OpFunctionCall: return "OpFunctionCall"; + case OpVariable: return "OpVariable"; + case OpImageTexelPointer: return "OpImageTexelPointer"; + case OpLoad: return "OpLoad"; + case OpStore: return "OpStore"; + case OpCopyMemory: return "OpCopyMemory"; + case OpCopyMemorySized: return "OpCopyMemorySized"; + case OpAccessChain: return "OpAccessChain"; + case OpInBoundsAccessChain: return "OpInBoundsAccessChain"; + case OpPtrAccessChain: return "OpPtrAccessChain"; + case OpArrayLength: return "OpArrayLength"; + case OpGenericPtrMemSemantics: return "OpGenericPtrMemSemantics"; + case OpInBoundsPtrAccessChain: return "OpInBoundsPtrAccessChain"; + case OpDecorate: return "OpDecorate"; + case OpMemberDecorate: return "OpMemberDecorate"; + case OpDecorationGroup: return "OpDecorationGroup"; + case OpGroupDecorate: return "OpGroupDecorate"; + case OpGroupMemberDecorate: return "OpGroupMemberDecorate"; + case OpVectorExtractDynamic: return "OpVectorExtractDynamic"; + case OpVectorInsertDynamic: return "OpVectorInsertDynamic"; + case OpVectorShuffle: return "OpVectorShuffle"; + case OpCompositeConstruct: return "OpCompositeConstruct"; + case OpCompositeExtract: return "OpCompositeExtract"; + case OpCompositeInsert: return "OpCompositeInsert"; + case OpCopyObject: return "OpCopyObject"; + case OpTranspose: return "OpTranspose"; + case OpSampledImage: return "OpSampledImage"; + case OpImageSampleImplicitLod: return "OpImageSampleImplicitLod"; + case OpImageSampleExplicitLod: return "OpImageSampleExplicitLod"; + case OpImageSampleDrefImplicitLod: return "OpImageSampleDrefImplicitLod"; + case OpImageSampleDrefExplicitLod: return "OpImageSampleDrefExplicitLod"; + case OpImageSampleProjImplicitLod: return "OpImageSampleProjImplicitLod"; + case OpImageSampleProjExplicitLod: return "OpImageSampleProjExplicitLod"; + case OpImageSampleProjDrefImplicitLod: return "OpImageSampleProjDrefImplicitLod"; + case OpImageSampleProjDrefExplicitLod: return "OpImageSampleProjDrefExplicitLod"; + case OpImageFetch: return "OpImageFetch"; + case OpImageGather: return "OpImageGather"; + case OpImageDrefGather: return "OpImageDrefGather"; + case OpImageRead: return "OpImageRead"; + case OpImageWrite: return "OpImageWrite"; + case OpImage: return "OpImage"; + case OpImageQueryFormat: return "OpImageQueryFormat"; + case OpImageQueryOrder: return "OpImageQueryOrder"; + case OpImageQuerySizeLod: return "OpImageQuerySizeLod"; + case OpImageQuerySize: return "OpImageQuerySize"; + case OpImageQueryLod: return "OpImageQueryLod"; + case OpImageQueryLevels: return "OpImageQueryLevels"; + case OpImageQuerySamples: return "OpImageQuerySamples"; + case OpConvertFToU: return "OpConvertFToU"; + case OpConvertFToS: return "OpConvertFToS"; + case OpConvertSToF: return "OpConvertSToF"; + case OpConvertUToF: return "OpConvertUToF"; + case OpUConvert: return "OpUConvert"; + case OpSConvert: return "OpSConvert"; + case OpFConvert: return "OpFConvert"; + case OpQuantizeToF16: return "OpQuantizeToF16"; + case OpConvertPtrToU: return "OpConvertPtrToU"; + case OpSatConvertSToU: return "OpSatConvertSToU"; + case OpSatConvertUToS: return "OpSatConvertUToS"; + case OpConvertUToPtr: return "OpConvertUToPtr"; + case OpPtrCastToGeneric: return "OpPtrCastToGeneric"; + case OpGenericCastToPtr: return "OpGenericCastToPtr"; + case OpGenericCastToPtrExplicit: return "OpGenericCastToPtrExplicit"; + case OpBitcast: return "OpBitcast"; + case OpSNegate: return "OpSNegate"; + case OpFNegate: return "OpFNegate"; + case OpIAdd: return "OpIAdd"; + case OpFAdd: return "OpFAdd"; + case OpISub: return "OpISub"; + case OpFSub: return "OpFSub"; + case OpIMul: return "OpIMul"; + case OpFMul: return "OpFMul"; + case OpUDiv: return "OpUDiv"; + case OpSDiv: return "OpSDiv"; + case OpFDiv: return "OpFDiv"; + case OpUMod: return "OpUMod"; + case OpSRem: return "OpSRem"; + case OpSMod: return "OpSMod"; + case OpFRem: return "OpFRem"; + case OpFMod: return "OpFMod"; + case OpVectorTimesScalar: return "OpVectorTimesScalar"; + case OpMatrixTimesScalar: return "OpMatrixTimesScalar"; + case OpVectorTimesMatrix: return "OpVectorTimesMatrix"; + case OpMatrixTimesVector: return "OpMatrixTimesVector"; + case OpMatrixTimesMatrix: return "OpMatrixTimesMatrix"; + case OpOuterProduct: return "OpOuterProduct"; + case OpDot: return "OpDot"; + case OpIAddCarry: return "OpIAddCarry"; + case OpISubBorrow: return "OpISubBorrow"; + case OpUMulExtended: return "OpUMulExtended"; + case OpSMulExtended: return "OpSMulExtended"; + case OpAny: return "OpAny"; + case OpAll: return "OpAll"; + case OpIsNan: return "OpIsNan"; + case OpIsInf: return "OpIsInf"; + case OpIsFinite: return "OpIsFinite"; + case OpIsNormal: return "OpIsNormal"; + case OpSignBitSet: return "OpSignBitSet"; + case OpLessOrGreater: return "OpLessOrGreater"; + case OpOrdered: return "OpOrdered"; + case OpUnordered: return "OpUnordered"; + case OpLogicalEqual: return "OpLogicalEqual"; + case OpLogicalNotEqual: return "OpLogicalNotEqual"; + case OpLogicalOr: return "OpLogicalOr"; + case OpLogicalAnd: return "OpLogicalAnd"; + case OpLogicalNot: return "OpLogicalNot"; + case OpSelect: return "OpSelect"; + case OpIEqual: return "OpIEqual"; + case OpINotEqual: return "OpINotEqual"; + case OpUGreaterThan: return "OpUGreaterThan"; + case OpSGreaterThan: return "OpSGreaterThan"; + case OpUGreaterThanEqual: return "OpUGreaterThanEqual"; + case OpSGreaterThanEqual: return "OpSGreaterThanEqual"; + case OpULessThan: return "OpULessThan"; + case OpSLessThan: return "OpSLessThan"; + case OpULessThanEqual: return "OpULessThanEqual"; + case OpSLessThanEqual: return "OpSLessThanEqual"; + case OpFOrdEqual: return "OpFOrdEqual"; + case OpFUnordEqual: return "OpFUnordEqual"; + case OpFOrdNotEqual: return "OpFOrdNotEqual"; + case OpFUnordNotEqual: return "OpFUnordNotEqual"; + case OpFOrdLessThan: return "OpFOrdLessThan"; + case OpFUnordLessThan: return "OpFUnordLessThan"; + case OpFOrdGreaterThan: return "OpFOrdGreaterThan"; + case OpFUnordGreaterThan: return "OpFUnordGreaterThan"; + case OpFOrdLessThanEqual: return "OpFOrdLessThanEqual"; + case OpFUnordLessThanEqual: return "OpFUnordLessThanEqual"; + case OpFOrdGreaterThanEqual: return "OpFOrdGreaterThanEqual"; + case OpFUnordGreaterThanEqual: return "OpFUnordGreaterThanEqual"; + case OpShiftRightLogical: return "OpShiftRightLogical"; + case OpShiftRightArithmetic: return "OpShiftRightArithmetic"; + case OpShiftLeftLogical: return "OpShiftLeftLogical"; + case OpBitwiseOr: return "OpBitwiseOr"; + case OpBitwiseXor: return "OpBitwiseXor"; + case OpBitwiseAnd: return "OpBitwiseAnd"; + case OpNot: return "OpNot"; + case OpBitFieldInsert: return "OpBitFieldInsert"; + case OpBitFieldSExtract: return "OpBitFieldSExtract"; + case OpBitFieldUExtract: return "OpBitFieldUExtract"; + case OpBitReverse: return "OpBitReverse"; + case OpBitCount: return "OpBitCount"; + case OpDPdx: return "OpDPdx"; + case OpDPdy: return "OpDPdy"; + case OpFwidth: return "OpFwidth"; + case OpDPdxFine: return "OpDPdxFine"; + case OpDPdyFine: return "OpDPdyFine"; + case OpFwidthFine: return "OpFwidthFine"; + case OpDPdxCoarse: return "OpDPdxCoarse"; + case OpDPdyCoarse: return "OpDPdyCoarse"; + case OpFwidthCoarse: return "OpFwidthCoarse"; + case OpEmitVertex: return "OpEmitVertex"; + case OpEndPrimitive: return "OpEndPrimitive"; + case OpEmitStreamVertex: return "OpEmitStreamVertex"; + case OpEndStreamPrimitive: return "OpEndStreamPrimitive"; + case OpControlBarrier: return "OpControlBarrier"; + case OpMemoryBarrier: return "OpMemoryBarrier"; + case OpAtomicLoad: return "OpAtomicLoad"; + case OpAtomicStore: return "OpAtomicStore"; + case OpAtomicExchange: return "OpAtomicExchange"; + case OpAtomicCompareExchange: return "OpAtomicCompareExchange"; + case OpAtomicCompareExchangeWeak: return "OpAtomicCompareExchangeWeak"; + case OpAtomicIIncrement: return "OpAtomicIIncrement"; + case OpAtomicIDecrement: return "OpAtomicIDecrement"; + case OpAtomicIAdd: return "OpAtomicIAdd"; + case OpAtomicISub: return "OpAtomicISub"; + case OpAtomicSMin: return "OpAtomicSMin"; + case OpAtomicUMin: return "OpAtomicUMin"; + case OpAtomicSMax: return "OpAtomicSMax"; + case OpAtomicUMax: return "OpAtomicUMax"; + case OpAtomicAnd: return "OpAtomicAnd"; + case OpAtomicOr: return "OpAtomicOr"; + case OpAtomicXor: return "OpAtomicXor"; + case OpPhi: return "OpPhi"; + case OpLoopMerge: return "OpLoopMerge"; + case OpSelectionMerge: return "OpSelectionMerge"; + case OpLabel: return "OpLabel"; + case OpBranch: return "OpBranch"; + case OpBranchConditional: return "OpBranchConditional"; + case OpSwitch: return "OpSwitch"; + case OpKill: return "OpKill"; + case OpReturn: return "OpReturn"; + case OpReturnValue: return "OpReturnValue"; + case OpUnreachable: return "OpUnreachable"; + case OpLifetimeStart: return "OpLifetimeStart"; + case OpLifetimeStop: return "OpLifetimeStop"; + case OpGroupAsyncCopy: return "OpGroupAsyncCopy"; + case OpGroupWaitEvents: return "OpGroupWaitEvents"; + case OpGroupAll: return "OpGroupAll"; + case OpGroupAny: return "OpGroupAny"; + case OpGroupBroadcast: return "OpGroupBroadcast"; + case OpGroupIAdd: return "OpGroupIAdd"; + case OpGroupFAdd: return "OpGroupFAdd"; + case OpGroupFMin: return "OpGroupFMin"; + case OpGroupUMin: return "OpGroupUMin"; + case OpGroupSMin: return "OpGroupSMin"; + case OpGroupFMax: return "OpGroupFMax"; + case OpGroupUMax: return "OpGroupUMax"; + case OpGroupSMax: return "OpGroupSMax"; + case OpReadPipe: return "OpReadPipe"; + case OpWritePipe: return "OpWritePipe"; + case OpReservedReadPipe: return "OpReservedReadPipe"; + case OpReservedWritePipe: return "OpReservedWritePipe"; + case OpReserveReadPipePackets: return "OpReserveReadPipePackets"; + case OpReserveWritePipePackets: return "OpReserveWritePipePackets"; + case OpCommitReadPipe: return "OpCommitReadPipe"; + case OpCommitWritePipe: return "OpCommitWritePipe"; + case OpIsValidReserveId: return "OpIsValidReserveId"; + case OpGetNumPipePackets: return "OpGetNumPipePackets"; + case OpGetMaxPipePackets: return "OpGetMaxPipePackets"; + case OpGroupReserveReadPipePackets: return "OpGroupReserveReadPipePackets"; + case OpGroupReserveWritePipePackets: return "OpGroupReserveWritePipePackets"; + case OpGroupCommitReadPipe: return "OpGroupCommitReadPipe"; + case OpGroupCommitWritePipe: return "OpGroupCommitWritePipe"; + case OpEnqueueMarker: return "OpEnqueueMarker"; + case OpEnqueueKernel: return "OpEnqueueKernel"; + case OpGetKernelNDrangeSubGroupCount: return "OpGetKernelNDrangeSubGroupCount"; + case OpGetKernelNDrangeMaxSubGroupSize: return "OpGetKernelNDrangeMaxSubGroupSize"; + case OpGetKernelWorkGroupSize: return "OpGetKernelWorkGroupSize"; + case OpGetKernelPreferredWorkGroupSizeMultiple: return "OpGetKernelPreferredWorkGroupSizeMultiple"; + case OpRetainEvent: return "OpRetainEvent"; + case OpReleaseEvent: return "OpReleaseEvent"; + case OpCreateUserEvent: return "OpCreateUserEvent"; + case OpIsValidEvent: return "OpIsValidEvent"; + case OpSetUserEventStatus: return "OpSetUserEventStatus"; + case OpCaptureEventProfilingInfo: return "OpCaptureEventProfilingInfo"; + case OpGetDefaultQueue: return "OpGetDefaultQueue"; + case OpBuildNDRange: return "OpBuildNDRange"; + case OpImageSparseSampleImplicitLod: return "OpImageSparseSampleImplicitLod"; + case OpImageSparseSampleExplicitLod: return "OpImageSparseSampleExplicitLod"; + case OpImageSparseSampleDrefImplicitLod: return "OpImageSparseSampleDrefImplicitLod"; + case OpImageSparseSampleDrefExplicitLod: return "OpImageSparseSampleDrefExplicitLod"; + case OpImageSparseSampleProjImplicitLod: return "OpImageSparseSampleProjImplicitLod"; + case OpImageSparseSampleProjExplicitLod: return "OpImageSparseSampleProjExplicitLod"; + case OpImageSparseSampleProjDrefImplicitLod: return "OpImageSparseSampleProjDrefImplicitLod"; + case OpImageSparseSampleProjDrefExplicitLod: return "OpImageSparseSampleProjDrefExplicitLod"; + case OpImageSparseFetch: return "OpImageSparseFetch"; + case OpImageSparseGather: return "OpImageSparseGather"; + case OpImageSparseDrefGather: return "OpImageSparseDrefGather"; + case OpImageSparseTexelsResident: return "OpImageSparseTexelsResident"; + case OpNoLine: return "OpNoLine"; + case OpAtomicFlagTestAndSet: return "OpAtomicFlagTestAndSet"; + case OpAtomicFlagClear: return "OpAtomicFlagClear"; + case OpImageSparseRead: return "OpImageSparseRead"; + case OpSizeOf: return "OpSizeOf"; + case OpTypePipeStorage: return "OpTypePipeStorage"; + case OpConstantPipeStorage: return "OpConstantPipeStorage"; + case OpCreatePipeFromPipeStorage: return "OpCreatePipeFromPipeStorage"; + case OpGetKernelLocalSizeForSubgroupCount: return "OpGetKernelLocalSizeForSubgroupCount"; + case OpGetKernelMaxNumSubgroups: return "OpGetKernelMaxNumSubgroups"; + case OpTypeNamedBarrier: return "OpTypeNamedBarrier"; + case OpNamedBarrierInitialize: return "OpNamedBarrierInitialize"; + case OpMemoryNamedBarrier: return "OpMemoryNamedBarrier"; + case OpModuleProcessed: return "OpModuleProcessed"; + case OpExecutionModeId: return "OpExecutionModeId"; + case OpDecorateId: return "OpDecorateId"; + case OpGroupNonUniformElect: return "OpGroupNonUniformElect"; + case OpGroupNonUniformAll: return "OpGroupNonUniformAll"; + case OpGroupNonUniformAny: return "OpGroupNonUniformAny"; + case OpGroupNonUniformAllEqual: return "OpGroupNonUniformAllEqual"; + case OpGroupNonUniformBroadcast: return "OpGroupNonUniformBroadcast"; + case OpGroupNonUniformBroadcastFirst: return "OpGroupNonUniformBroadcastFirst"; + case OpGroupNonUniformBallot: return "OpGroupNonUniformBallot"; + case OpGroupNonUniformInverseBallot: return "OpGroupNonUniformInverseBallot"; + case OpGroupNonUniformBallotBitExtract: return "OpGroupNonUniformBallotBitExtract"; + case OpGroupNonUniformBallotBitCount: return "OpGroupNonUniformBallotBitCount"; + case OpGroupNonUniformBallotFindLSB: return "OpGroupNonUniformBallotFindLSB"; + case OpGroupNonUniformBallotFindMSB: return "OpGroupNonUniformBallotFindMSB"; + case OpGroupNonUniformShuffle: return "OpGroupNonUniformShuffle"; + case OpGroupNonUniformShuffleXor: return "OpGroupNonUniformShuffleXor"; + case OpGroupNonUniformShuffleUp: return "OpGroupNonUniformShuffleUp"; + case OpGroupNonUniformShuffleDown: return "OpGroupNonUniformShuffleDown"; + case OpGroupNonUniformIAdd: return "OpGroupNonUniformIAdd"; + case OpGroupNonUniformFAdd: return "OpGroupNonUniformFAdd"; + case OpGroupNonUniformIMul: return "OpGroupNonUniformIMul"; + case OpGroupNonUniformFMul: return "OpGroupNonUniformFMul"; + case OpGroupNonUniformSMin: return "OpGroupNonUniformSMin"; + case OpGroupNonUniformUMin: return "OpGroupNonUniformUMin"; + case OpGroupNonUniformFMin: return "OpGroupNonUniformFMin"; + case OpGroupNonUniformSMax: return "OpGroupNonUniformSMax"; + case OpGroupNonUniformUMax: return "OpGroupNonUniformUMax"; + case OpGroupNonUniformFMax: return "OpGroupNonUniformFMax"; + case OpGroupNonUniformBitwiseAnd: return "OpGroupNonUniformBitwiseAnd"; + case OpGroupNonUniformBitwiseOr: return "OpGroupNonUniformBitwiseOr"; + case OpGroupNonUniformBitwiseXor: return "OpGroupNonUniformBitwiseXor"; + case OpGroupNonUniformLogicalAnd: return "OpGroupNonUniformLogicalAnd"; + case OpGroupNonUniformLogicalOr: return "OpGroupNonUniformLogicalOr"; + case OpGroupNonUniformLogicalXor: return "OpGroupNonUniformLogicalXor"; + case OpGroupNonUniformQuadBroadcast: return "OpGroupNonUniformQuadBroadcast"; + case OpGroupNonUniformQuadSwap: return "OpGroupNonUniformQuadSwap"; + case OpCopyLogical: return "OpCopyLogical"; + case OpPtrEqual: return "OpPtrEqual"; + case OpPtrNotEqual: return "OpPtrNotEqual"; + case OpPtrDiff: return "OpPtrDiff"; + case OpColorAttachmentReadEXT: return "OpColorAttachmentReadEXT"; + case OpDepthAttachmentReadEXT: return "OpDepthAttachmentReadEXT"; + case OpStencilAttachmentReadEXT: return "OpStencilAttachmentReadEXT"; + case OpTypeTensorARM: return "OpTypeTensorARM"; + case OpTensorReadARM: return "OpTensorReadARM"; + case OpTensorWriteARM: return "OpTensorWriteARM"; + case OpTensorQuerySizeARM: return "OpTensorQuerySizeARM"; + case OpGraphConstantARM: return "OpGraphConstantARM"; + case OpGraphEntryPointARM: return "OpGraphEntryPointARM"; + case OpGraphARM: return "OpGraphARM"; + case OpGraphInputARM: return "OpGraphInputARM"; + case OpGraphSetOutputARM: return "OpGraphSetOutputARM"; + case OpGraphEndARM: return "OpGraphEndARM"; + case OpTypeGraphARM: return "OpTypeGraphARM"; + case OpTerminateInvocation: return "OpTerminateInvocation"; + case OpTypeUntypedPointerKHR: return "OpTypeUntypedPointerKHR"; + case OpUntypedVariableKHR: return "OpUntypedVariableKHR"; + case OpUntypedAccessChainKHR: return "OpUntypedAccessChainKHR"; + case OpUntypedInBoundsAccessChainKHR: return "OpUntypedInBoundsAccessChainKHR"; + case OpSubgroupBallotKHR: return "OpSubgroupBallotKHR"; + case OpSubgroupFirstInvocationKHR: return "OpSubgroupFirstInvocationKHR"; + case OpUntypedPtrAccessChainKHR: return "OpUntypedPtrAccessChainKHR"; + case OpUntypedInBoundsPtrAccessChainKHR: return "OpUntypedInBoundsPtrAccessChainKHR"; + case OpUntypedArrayLengthKHR: return "OpUntypedArrayLengthKHR"; + case OpUntypedPrefetchKHR: return "OpUntypedPrefetchKHR"; + case OpSubgroupAllKHR: return "OpSubgroupAllKHR"; + case OpSubgroupAnyKHR: return "OpSubgroupAnyKHR"; + case OpSubgroupAllEqualKHR: return "OpSubgroupAllEqualKHR"; + case OpGroupNonUniformRotateKHR: return "OpGroupNonUniformRotateKHR"; + case OpSubgroupReadInvocationKHR: return "OpSubgroupReadInvocationKHR"; + case OpExtInstWithForwardRefsKHR: return "OpExtInstWithForwardRefsKHR"; + case OpTraceRayKHR: return "OpTraceRayKHR"; + case OpExecuteCallableKHR: return "OpExecuteCallableKHR"; + case OpConvertUToAccelerationStructureKHR: return "OpConvertUToAccelerationStructureKHR"; + case OpIgnoreIntersectionKHR: return "OpIgnoreIntersectionKHR"; + case OpTerminateRayKHR: return "OpTerminateRayKHR"; + case OpSDot: return "OpSDot"; + case OpUDot: return "OpUDot"; + case OpSUDot: return "OpSUDot"; + case OpSDotAccSat: return "OpSDotAccSat"; + case OpUDotAccSat: return "OpUDotAccSat"; + case OpSUDotAccSat: return "OpSUDotAccSat"; + case OpTypeCooperativeMatrixKHR: return "OpTypeCooperativeMatrixKHR"; + case OpCooperativeMatrixLoadKHR: return "OpCooperativeMatrixLoadKHR"; + case OpCooperativeMatrixStoreKHR: return "OpCooperativeMatrixStoreKHR"; + case OpCooperativeMatrixMulAddKHR: return "OpCooperativeMatrixMulAddKHR"; + case OpCooperativeMatrixLengthKHR: return "OpCooperativeMatrixLengthKHR"; + case OpConstantCompositeReplicateEXT: return "OpConstantCompositeReplicateEXT"; + case OpSpecConstantCompositeReplicateEXT: return "OpSpecConstantCompositeReplicateEXT"; + case OpCompositeConstructReplicateEXT: return "OpCompositeConstructReplicateEXT"; + case OpTypeRayQueryKHR: return "OpTypeRayQueryKHR"; + case OpRayQueryInitializeKHR: return "OpRayQueryInitializeKHR"; + case OpRayQueryTerminateKHR: return "OpRayQueryTerminateKHR"; + case OpRayQueryGenerateIntersectionKHR: return "OpRayQueryGenerateIntersectionKHR"; + case OpRayQueryConfirmIntersectionKHR: return "OpRayQueryConfirmIntersectionKHR"; + case OpRayQueryProceedKHR: return "OpRayQueryProceedKHR"; + case OpRayQueryGetIntersectionTypeKHR: return "OpRayQueryGetIntersectionTypeKHR"; + case OpImageSampleWeightedQCOM: return "OpImageSampleWeightedQCOM"; + case OpImageBoxFilterQCOM: return "OpImageBoxFilterQCOM"; + case OpImageBlockMatchSSDQCOM: return "OpImageBlockMatchSSDQCOM"; + case OpImageBlockMatchSADQCOM: return "OpImageBlockMatchSADQCOM"; + case OpImageBlockMatchWindowSSDQCOM: return "OpImageBlockMatchWindowSSDQCOM"; + case OpImageBlockMatchWindowSADQCOM: return "OpImageBlockMatchWindowSADQCOM"; + case OpImageBlockMatchGatherSSDQCOM: return "OpImageBlockMatchGatherSSDQCOM"; + case OpImageBlockMatchGatherSADQCOM: return "OpImageBlockMatchGatherSADQCOM"; + case OpGroupIAddNonUniformAMD: return "OpGroupIAddNonUniformAMD"; + case OpGroupFAddNonUniformAMD: return "OpGroupFAddNonUniformAMD"; + case OpGroupFMinNonUniformAMD: return "OpGroupFMinNonUniformAMD"; + case OpGroupUMinNonUniformAMD: return "OpGroupUMinNonUniformAMD"; + case OpGroupSMinNonUniformAMD: return "OpGroupSMinNonUniformAMD"; + case OpGroupFMaxNonUniformAMD: return "OpGroupFMaxNonUniformAMD"; + case OpGroupUMaxNonUniformAMD: return "OpGroupUMaxNonUniformAMD"; + case OpGroupSMaxNonUniformAMD: return "OpGroupSMaxNonUniformAMD"; + case OpFragmentMaskFetchAMD: return "OpFragmentMaskFetchAMD"; + case OpFragmentFetchAMD: return "OpFragmentFetchAMD"; + case OpReadClockKHR: return "OpReadClockKHR"; + case OpAllocateNodePayloadsAMDX: return "OpAllocateNodePayloadsAMDX"; + case OpEnqueueNodePayloadsAMDX: return "OpEnqueueNodePayloadsAMDX"; + case OpTypeNodePayloadArrayAMDX: return "OpTypeNodePayloadArrayAMDX"; + case OpFinishWritingNodePayloadAMDX: return "OpFinishWritingNodePayloadAMDX"; + case OpNodePayloadArrayLengthAMDX: return "OpNodePayloadArrayLengthAMDX"; + case OpIsNodePayloadValidAMDX: return "OpIsNodePayloadValidAMDX"; + case OpConstantStringAMDX: return "OpConstantStringAMDX"; + case OpSpecConstantStringAMDX: return "OpSpecConstantStringAMDX"; + case OpGroupNonUniformQuadAllKHR: return "OpGroupNonUniformQuadAllKHR"; + case OpGroupNonUniformQuadAnyKHR: return "OpGroupNonUniformQuadAnyKHR"; + case OpHitObjectRecordHitMotionNV: return "OpHitObjectRecordHitMotionNV"; + case OpHitObjectRecordHitWithIndexMotionNV: return "OpHitObjectRecordHitWithIndexMotionNV"; + case OpHitObjectRecordMissMotionNV: return "OpHitObjectRecordMissMotionNV"; + case OpHitObjectGetWorldToObjectNV: return "OpHitObjectGetWorldToObjectNV"; + case OpHitObjectGetObjectToWorldNV: return "OpHitObjectGetObjectToWorldNV"; + case OpHitObjectGetObjectRayDirectionNV: return "OpHitObjectGetObjectRayDirectionNV"; + case OpHitObjectGetObjectRayOriginNV: return "OpHitObjectGetObjectRayOriginNV"; + case OpHitObjectTraceRayMotionNV: return "OpHitObjectTraceRayMotionNV"; + case OpHitObjectGetShaderRecordBufferHandleNV: return "OpHitObjectGetShaderRecordBufferHandleNV"; + case OpHitObjectGetShaderBindingTableRecordIndexNV: return "OpHitObjectGetShaderBindingTableRecordIndexNV"; + case OpHitObjectRecordEmptyNV: return "OpHitObjectRecordEmptyNV"; + case OpHitObjectTraceRayNV: return "OpHitObjectTraceRayNV"; + case OpHitObjectRecordHitNV: return "OpHitObjectRecordHitNV"; + case OpHitObjectRecordHitWithIndexNV: return "OpHitObjectRecordHitWithIndexNV"; + case OpHitObjectRecordMissNV: return "OpHitObjectRecordMissNV"; + case OpHitObjectExecuteShaderNV: return "OpHitObjectExecuteShaderNV"; + case OpHitObjectGetCurrentTimeNV: return "OpHitObjectGetCurrentTimeNV"; + case OpHitObjectGetAttributesNV: return "OpHitObjectGetAttributesNV"; + case OpHitObjectGetHitKindNV: return "OpHitObjectGetHitKindNV"; + case OpHitObjectGetPrimitiveIndexNV: return "OpHitObjectGetPrimitiveIndexNV"; + case OpHitObjectGetGeometryIndexNV: return "OpHitObjectGetGeometryIndexNV"; + case OpHitObjectGetInstanceIdNV: return "OpHitObjectGetInstanceIdNV"; + case OpHitObjectGetInstanceCustomIndexNV: return "OpHitObjectGetInstanceCustomIndexNV"; + case OpHitObjectGetWorldRayDirectionNV: return "OpHitObjectGetWorldRayDirectionNV"; + case OpHitObjectGetWorldRayOriginNV: return "OpHitObjectGetWorldRayOriginNV"; + case OpHitObjectGetRayTMaxNV: return "OpHitObjectGetRayTMaxNV"; + case OpHitObjectGetRayTMinNV: return "OpHitObjectGetRayTMinNV"; + case OpHitObjectIsEmptyNV: return "OpHitObjectIsEmptyNV"; + case OpHitObjectIsHitNV: return "OpHitObjectIsHitNV"; + case OpHitObjectIsMissNV: return "OpHitObjectIsMissNV"; + case OpReorderThreadWithHitObjectNV: return "OpReorderThreadWithHitObjectNV"; + case OpReorderThreadWithHintNV: return "OpReorderThreadWithHintNV"; + case OpTypeHitObjectNV: return "OpTypeHitObjectNV"; + case OpImageSampleFootprintNV: return "OpImageSampleFootprintNV"; + case OpTypeCooperativeVectorNV: return "OpTypeCooperativeVectorNV"; + case OpCooperativeVectorMatrixMulNV: return "OpCooperativeVectorMatrixMulNV"; + case OpCooperativeVectorOuterProductAccumulateNV: return "OpCooperativeVectorOuterProductAccumulateNV"; + case OpCooperativeVectorReduceSumAccumulateNV: return "OpCooperativeVectorReduceSumAccumulateNV"; + case OpCooperativeVectorMatrixMulAddNV: return "OpCooperativeVectorMatrixMulAddNV"; + case OpCooperativeMatrixConvertNV: return "OpCooperativeMatrixConvertNV"; + case OpEmitMeshTasksEXT: return "OpEmitMeshTasksEXT"; + case OpSetMeshOutputsEXT: return "OpSetMeshOutputsEXT"; + case OpGroupNonUniformPartitionNV: return "OpGroupNonUniformPartitionNV"; + case OpWritePackedPrimitiveIndices4x8NV: return "OpWritePackedPrimitiveIndices4x8NV"; + case OpFetchMicroTriangleVertexPositionNV: return "OpFetchMicroTriangleVertexPositionNV"; + case OpFetchMicroTriangleVertexBarycentricNV: return "OpFetchMicroTriangleVertexBarycentricNV"; + case OpCooperativeVectorLoadNV: return "OpCooperativeVectorLoadNV"; + case OpCooperativeVectorStoreNV: return "OpCooperativeVectorStoreNV"; + case OpReportIntersectionKHR: return "OpReportIntersectionKHR"; + case OpIgnoreIntersectionNV: return "OpIgnoreIntersectionNV"; + case OpTerminateRayNV: return "OpTerminateRayNV"; + case OpTraceNV: return "OpTraceNV"; + case OpTraceMotionNV: return "OpTraceMotionNV"; + case OpTraceRayMotionNV: return "OpTraceRayMotionNV"; + case OpRayQueryGetIntersectionTriangleVertexPositionsKHR: return "OpRayQueryGetIntersectionTriangleVertexPositionsKHR"; + case OpTypeAccelerationStructureKHR: return "OpTypeAccelerationStructureKHR"; + case OpExecuteCallableNV: return "OpExecuteCallableNV"; + case OpRayQueryGetClusterIdNV: return "OpRayQueryGetClusterIdNV"; + case OpHitObjectGetClusterIdNV: return "OpHitObjectGetClusterIdNV"; + case OpTypeCooperativeMatrixNV: return "OpTypeCooperativeMatrixNV"; + case OpCooperativeMatrixLoadNV: return "OpCooperativeMatrixLoadNV"; + case OpCooperativeMatrixStoreNV: return "OpCooperativeMatrixStoreNV"; + case OpCooperativeMatrixMulAddNV: return "OpCooperativeMatrixMulAddNV"; + case OpCooperativeMatrixLengthNV: return "OpCooperativeMatrixLengthNV"; + case OpBeginInvocationInterlockEXT: return "OpBeginInvocationInterlockEXT"; + case OpEndInvocationInterlockEXT: return "OpEndInvocationInterlockEXT"; + case OpCooperativeMatrixReduceNV: return "OpCooperativeMatrixReduceNV"; + case OpCooperativeMatrixLoadTensorNV: return "OpCooperativeMatrixLoadTensorNV"; + case OpCooperativeMatrixStoreTensorNV: return "OpCooperativeMatrixStoreTensorNV"; + case OpCooperativeMatrixPerElementOpNV: return "OpCooperativeMatrixPerElementOpNV"; + case OpTypeTensorLayoutNV: return "OpTypeTensorLayoutNV"; + case OpTypeTensorViewNV: return "OpTypeTensorViewNV"; + case OpCreateTensorLayoutNV: return "OpCreateTensorLayoutNV"; + case OpTensorLayoutSetDimensionNV: return "OpTensorLayoutSetDimensionNV"; + case OpTensorLayoutSetStrideNV: return "OpTensorLayoutSetStrideNV"; + case OpTensorLayoutSliceNV: return "OpTensorLayoutSliceNV"; + case OpTensorLayoutSetClampValueNV: return "OpTensorLayoutSetClampValueNV"; + case OpCreateTensorViewNV: return "OpCreateTensorViewNV"; + case OpTensorViewSetDimensionNV: return "OpTensorViewSetDimensionNV"; + case OpTensorViewSetStrideNV: return "OpTensorViewSetStrideNV"; + case OpDemoteToHelperInvocation: return "OpDemoteToHelperInvocation"; + case OpIsHelperInvocationEXT: return "OpIsHelperInvocationEXT"; + case OpTensorViewSetClipNV: return "OpTensorViewSetClipNV"; + case OpTensorLayoutSetBlockSizeNV: return "OpTensorLayoutSetBlockSizeNV"; + case OpCooperativeMatrixTransposeNV: return "OpCooperativeMatrixTransposeNV"; + case OpConvertUToImageNV: return "OpConvertUToImageNV"; + case OpConvertUToSamplerNV: return "OpConvertUToSamplerNV"; + case OpConvertImageToUNV: return "OpConvertImageToUNV"; + case OpConvertSamplerToUNV: return "OpConvertSamplerToUNV"; + case OpConvertUToSampledImageNV: return "OpConvertUToSampledImageNV"; + case OpConvertSampledImageToUNV: return "OpConvertSampledImageToUNV"; + case OpSamplerImageAddressingModeNV: return "OpSamplerImageAddressingModeNV"; + case OpRawAccessChainNV: return "OpRawAccessChainNV"; + case OpRayQueryGetIntersectionSpherePositionNV: return "OpRayQueryGetIntersectionSpherePositionNV"; + case OpRayQueryGetIntersectionSphereRadiusNV: return "OpRayQueryGetIntersectionSphereRadiusNV"; + case OpRayQueryGetIntersectionLSSPositionsNV: return "OpRayQueryGetIntersectionLSSPositionsNV"; + case OpRayQueryGetIntersectionLSSRadiiNV: return "OpRayQueryGetIntersectionLSSRadiiNV"; + case OpRayQueryGetIntersectionLSSHitValueNV: return "OpRayQueryGetIntersectionLSSHitValueNV"; + case OpHitObjectGetSpherePositionNV: return "OpHitObjectGetSpherePositionNV"; + case OpHitObjectGetSphereRadiusNV: return "OpHitObjectGetSphereRadiusNV"; + case OpHitObjectGetLSSPositionsNV: return "OpHitObjectGetLSSPositionsNV"; + case OpHitObjectGetLSSRadiiNV: return "OpHitObjectGetLSSRadiiNV"; + case OpHitObjectIsSphereHitNV: return "OpHitObjectIsSphereHitNV"; + case OpHitObjectIsLSSHitNV: return "OpHitObjectIsLSSHitNV"; + case OpRayQueryIsSphereHitNV: return "OpRayQueryIsSphereHitNV"; + case OpRayQueryIsLSSHitNV: return "OpRayQueryIsLSSHitNV"; + case OpSubgroupShuffleINTEL: return "OpSubgroupShuffleINTEL"; + case OpSubgroupShuffleDownINTEL: return "OpSubgroupShuffleDownINTEL"; + case OpSubgroupShuffleUpINTEL: return "OpSubgroupShuffleUpINTEL"; + case OpSubgroupShuffleXorINTEL: return "OpSubgroupShuffleXorINTEL"; + case OpSubgroupBlockReadINTEL: return "OpSubgroupBlockReadINTEL"; + case OpSubgroupBlockWriteINTEL: return "OpSubgroupBlockWriteINTEL"; + case OpSubgroupImageBlockReadINTEL: return "OpSubgroupImageBlockReadINTEL"; + case OpSubgroupImageBlockWriteINTEL: return "OpSubgroupImageBlockWriteINTEL"; + case OpSubgroupImageMediaBlockReadINTEL: return "OpSubgroupImageMediaBlockReadINTEL"; + case OpSubgroupImageMediaBlockWriteINTEL: return "OpSubgroupImageMediaBlockWriteINTEL"; + case OpUCountLeadingZerosINTEL: return "OpUCountLeadingZerosINTEL"; + case OpUCountTrailingZerosINTEL: return "OpUCountTrailingZerosINTEL"; + case OpAbsISubINTEL: return "OpAbsISubINTEL"; + case OpAbsUSubINTEL: return "OpAbsUSubINTEL"; + case OpIAddSatINTEL: return "OpIAddSatINTEL"; + case OpUAddSatINTEL: return "OpUAddSatINTEL"; + case OpIAverageINTEL: return "OpIAverageINTEL"; + case OpUAverageINTEL: return "OpUAverageINTEL"; + case OpIAverageRoundedINTEL: return "OpIAverageRoundedINTEL"; + case OpUAverageRoundedINTEL: return "OpUAverageRoundedINTEL"; + case OpISubSatINTEL: return "OpISubSatINTEL"; + case OpUSubSatINTEL: return "OpUSubSatINTEL"; + case OpIMul32x16INTEL: return "OpIMul32x16INTEL"; + case OpUMul32x16INTEL: return "OpUMul32x16INTEL"; + case OpConstantFunctionPointerINTEL: return "OpConstantFunctionPointerINTEL"; + case OpFunctionPointerCallINTEL: return "OpFunctionPointerCallINTEL"; + case OpAsmTargetINTEL: return "OpAsmTargetINTEL"; + case OpAsmINTEL: return "OpAsmINTEL"; + case OpAsmCallINTEL: return "OpAsmCallINTEL"; + case OpAtomicFMinEXT: return "OpAtomicFMinEXT"; + case OpAtomicFMaxEXT: return "OpAtomicFMaxEXT"; + case OpAssumeTrueKHR: return "OpAssumeTrueKHR"; + case OpExpectKHR: return "OpExpectKHR"; + case OpDecorateString: return "OpDecorateString"; + case OpMemberDecorateString: return "OpMemberDecorateString"; + case OpVmeImageINTEL: return "OpVmeImageINTEL"; + case OpTypeVmeImageINTEL: return "OpTypeVmeImageINTEL"; + case OpTypeAvcImePayloadINTEL: return "OpTypeAvcImePayloadINTEL"; + case OpTypeAvcRefPayloadINTEL: return "OpTypeAvcRefPayloadINTEL"; + case OpTypeAvcSicPayloadINTEL: return "OpTypeAvcSicPayloadINTEL"; + case OpTypeAvcMcePayloadINTEL: return "OpTypeAvcMcePayloadINTEL"; + case OpTypeAvcMceResultINTEL: return "OpTypeAvcMceResultINTEL"; + case OpTypeAvcImeResultINTEL: return "OpTypeAvcImeResultINTEL"; + case OpTypeAvcImeResultSingleReferenceStreamoutINTEL: return "OpTypeAvcImeResultSingleReferenceStreamoutINTEL"; + case OpTypeAvcImeResultDualReferenceStreamoutINTEL: return "OpTypeAvcImeResultDualReferenceStreamoutINTEL"; + case OpTypeAvcImeSingleReferenceStreaminINTEL: return "OpTypeAvcImeSingleReferenceStreaminINTEL"; + case OpTypeAvcImeDualReferenceStreaminINTEL: return "OpTypeAvcImeDualReferenceStreaminINTEL"; + case OpTypeAvcRefResultINTEL: return "OpTypeAvcRefResultINTEL"; + case OpTypeAvcSicResultINTEL: return "OpTypeAvcSicResultINTEL"; + case OpSubgroupAvcMceGetDefaultInterBaseMultiReferencePenaltyINTEL: return "OpSubgroupAvcMceGetDefaultInterBaseMultiReferencePenaltyINTEL"; + case OpSubgroupAvcMceSetInterBaseMultiReferencePenaltyINTEL: return "OpSubgroupAvcMceSetInterBaseMultiReferencePenaltyINTEL"; + case OpSubgroupAvcMceGetDefaultInterShapePenaltyINTEL: return "OpSubgroupAvcMceGetDefaultInterShapePenaltyINTEL"; + case OpSubgroupAvcMceSetInterShapePenaltyINTEL: return "OpSubgroupAvcMceSetInterShapePenaltyINTEL"; + case OpSubgroupAvcMceGetDefaultInterDirectionPenaltyINTEL: return "OpSubgroupAvcMceGetDefaultInterDirectionPenaltyINTEL"; + case OpSubgroupAvcMceSetInterDirectionPenaltyINTEL: return "OpSubgroupAvcMceSetInterDirectionPenaltyINTEL"; + case OpSubgroupAvcMceGetDefaultIntraLumaShapePenaltyINTEL: return "OpSubgroupAvcMceGetDefaultIntraLumaShapePenaltyINTEL"; + case OpSubgroupAvcMceGetDefaultInterMotionVectorCostTableINTEL: return "OpSubgroupAvcMceGetDefaultInterMotionVectorCostTableINTEL"; + case OpSubgroupAvcMceGetDefaultHighPenaltyCostTableINTEL: return "OpSubgroupAvcMceGetDefaultHighPenaltyCostTableINTEL"; + case OpSubgroupAvcMceGetDefaultMediumPenaltyCostTableINTEL: return "OpSubgroupAvcMceGetDefaultMediumPenaltyCostTableINTEL"; + case OpSubgroupAvcMceGetDefaultLowPenaltyCostTableINTEL: return "OpSubgroupAvcMceGetDefaultLowPenaltyCostTableINTEL"; + case OpSubgroupAvcMceSetMotionVectorCostFunctionINTEL: return "OpSubgroupAvcMceSetMotionVectorCostFunctionINTEL"; + case OpSubgroupAvcMceGetDefaultIntraLumaModePenaltyINTEL: return "OpSubgroupAvcMceGetDefaultIntraLumaModePenaltyINTEL"; + case OpSubgroupAvcMceGetDefaultNonDcLumaIntraPenaltyINTEL: return "OpSubgroupAvcMceGetDefaultNonDcLumaIntraPenaltyINTEL"; + case OpSubgroupAvcMceGetDefaultIntraChromaModeBasePenaltyINTEL: return "OpSubgroupAvcMceGetDefaultIntraChromaModeBasePenaltyINTEL"; + case OpSubgroupAvcMceSetAcOnlyHaarINTEL: return "OpSubgroupAvcMceSetAcOnlyHaarINTEL"; + case OpSubgroupAvcMceSetSourceInterlacedFieldPolarityINTEL: return "OpSubgroupAvcMceSetSourceInterlacedFieldPolarityINTEL"; + case OpSubgroupAvcMceSetSingleReferenceInterlacedFieldPolarityINTEL: return "OpSubgroupAvcMceSetSingleReferenceInterlacedFieldPolarityINTEL"; + case OpSubgroupAvcMceSetDualReferenceInterlacedFieldPolaritiesINTEL: return "OpSubgroupAvcMceSetDualReferenceInterlacedFieldPolaritiesINTEL"; + case OpSubgroupAvcMceConvertToImePayloadINTEL: return "OpSubgroupAvcMceConvertToImePayloadINTEL"; + case OpSubgroupAvcMceConvertToImeResultINTEL: return "OpSubgroupAvcMceConvertToImeResultINTEL"; + case OpSubgroupAvcMceConvertToRefPayloadINTEL: return "OpSubgroupAvcMceConvertToRefPayloadINTEL"; + case OpSubgroupAvcMceConvertToRefResultINTEL: return "OpSubgroupAvcMceConvertToRefResultINTEL"; + case OpSubgroupAvcMceConvertToSicPayloadINTEL: return "OpSubgroupAvcMceConvertToSicPayloadINTEL"; + case OpSubgroupAvcMceConvertToSicResultINTEL: return "OpSubgroupAvcMceConvertToSicResultINTEL"; + case OpSubgroupAvcMceGetMotionVectorsINTEL: return "OpSubgroupAvcMceGetMotionVectorsINTEL"; + case OpSubgroupAvcMceGetInterDistortionsINTEL: return "OpSubgroupAvcMceGetInterDistortionsINTEL"; + case OpSubgroupAvcMceGetBestInterDistortionsINTEL: return "OpSubgroupAvcMceGetBestInterDistortionsINTEL"; + case OpSubgroupAvcMceGetInterMajorShapeINTEL: return "OpSubgroupAvcMceGetInterMajorShapeINTEL"; + case OpSubgroupAvcMceGetInterMinorShapeINTEL: return "OpSubgroupAvcMceGetInterMinorShapeINTEL"; + case OpSubgroupAvcMceGetInterDirectionsINTEL: return "OpSubgroupAvcMceGetInterDirectionsINTEL"; + case OpSubgroupAvcMceGetInterMotionVectorCountINTEL: return "OpSubgroupAvcMceGetInterMotionVectorCountINTEL"; + case OpSubgroupAvcMceGetInterReferenceIdsINTEL: return "OpSubgroupAvcMceGetInterReferenceIdsINTEL"; + case OpSubgroupAvcMceGetInterReferenceInterlacedFieldPolaritiesINTEL: return "OpSubgroupAvcMceGetInterReferenceInterlacedFieldPolaritiesINTEL"; + case OpSubgroupAvcImeInitializeINTEL: return "OpSubgroupAvcImeInitializeINTEL"; + case OpSubgroupAvcImeSetSingleReferenceINTEL: return "OpSubgroupAvcImeSetSingleReferenceINTEL"; + case OpSubgroupAvcImeSetDualReferenceINTEL: return "OpSubgroupAvcImeSetDualReferenceINTEL"; + case OpSubgroupAvcImeRefWindowSizeINTEL: return "OpSubgroupAvcImeRefWindowSizeINTEL"; + case OpSubgroupAvcImeAdjustRefOffsetINTEL: return "OpSubgroupAvcImeAdjustRefOffsetINTEL"; + case OpSubgroupAvcImeConvertToMcePayloadINTEL: return "OpSubgroupAvcImeConvertToMcePayloadINTEL"; + case OpSubgroupAvcImeSetMaxMotionVectorCountINTEL: return "OpSubgroupAvcImeSetMaxMotionVectorCountINTEL"; + case OpSubgroupAvcImeSetUnidirectionalMixDisableINTEL: return "OpSubgroupAvcImeSetUnidirectionalMixDisableINTEL"; + case OpSubgroupAvcImeSetEarlySearchTerminationThresholdINTEL: return "OpSubgroupAvcImeSetEarlySearchTerminationThresholdINTEL"; + case OpSubgroupAvcImeSetWeightedSadINTEL: return "OpSubgroupAvcImeSetWeightedSadINTEL"; + case OpSubgroupAvcImeEvaluateWithSingleReferenceINTEL: return "OpSubgroupAvcImeEvaluateWithSingleReferenceINTEL"; + case OpSubgroupAvcImeEvaluateWithDualReferenceINTEL: return "OpSubgroupAvcImeEvaluateWithDualReferenceINTEL"; + case OpSubgroupAvcImeEvaluateWithSingleReferenceStreaminINTEL: return "OpSubgroupAvcImeEvaluateWithSingleReferenceStreaminINTEL"; + case OpSubgroupAvcImeEvaluateWithDualReferenceStreaminINTEL: return "OpSubgroupAvcImeEvaluateWithDualReferenceStreaminINTEL"; + case OpSubgroupAvcImeEvaluateWithSingleReferenceStreamoutINTEL: return "OpSubgroupAvcImeEvaluateWithSingleReferenceStreamoutINTEL"; + case OpSubgroupAvcImeEvaluateWithDualReferenceStreamoutINTEL: return "OpSubgroupAvcImeEvaluateWithDualReferenceStreamoutINTEL"; + case OpSubgroupAvcImeEvaluateWithSingleReferenceStreaminoutINTEL: return "OpSubgroupAvcImeEvaluateWithSingleReferenceStreaminoutINTEL"; + case OpSubgroupAvcImeEvaluateWithDualReferenceStreaminoutINTEL: return "OpSubgroupAvcImeEvaluateWithDualReferenceStreaminoutINTEL"; + case OpSubgroupAvcImeConvertToMceResultINTEL: return "OpSubgroupAvcImeConvertToMceResultINTEL"; + case OpSubgroupAvcImeGetSingleReferenceStreaminINTEL: return "OpSubgroupAvcImeGetSingleReferenceStreaminINTEL"; + case OpSubgroupAvcImeGetDualReferenceStreaminINTEL: return "OpSubgroupAvcImeGetDualReferenceStreaminINTEL"; + case OpSubgroupAvcImeStripSingleReferenceStreamoutINTEL: return "OpSubgroupAvcImeStripSingleReferenceStreamoutINTEL"; + case OpSubgroupAvcImeStripDualReferenceStreamoutINTEL: return "OpSubgroupAvcImeStripDualReferenceStreamoutINTEL"; + case OpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeMotionVectorsINTEL: return "OpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeMotionVectorsINTEL"; + case OpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeDistortionsINTEL: return "OpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeDistortionsINTEL"; + case OpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeReferenceIdsINTEL: return "OpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeReferenceIdsINTEL"; + case OpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeMotionVectorsINTEL: return "OpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeMotionVectorsINTEL"; + case OpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeDistortionsINTEL: return "OpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeDistortionsINTEL"; + case OpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeReferenceIdsINTEL: return "OpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeReferenceIdsINTEL"; + case OpSubgroupAvcImeGetBorderReachedINTEL: return "OpSubgroupAvcImeGetBorderReachedINTEL"; + case OpSubgroupAvcImeGetTruncatedSearchIndicationINTEL: return "OpSubgroupAvcImeGetTruncatedSearchIndicationINTEL"; + case OpSubgroupAvcImeGetUnidirectionalEarlySearchTerminationINTEL: return "OpSubgroupAvcImeGetUnidirectionalEarlySearchTerminationINTEL"; + case OpSubgroupAvcImeGetWeightingPatternMinimumMotionVectorINTEL: return "OpSubgroupAvcImeGetWeightingPatternMinimumMotionVectorINTEL"; + case OpSubgroupAvcImeGetWeightingPatternMinimumDistortionINTEL: return "OpSubgroupAvcImeGetWeightingPatternMinimumDistortionINTEL"; + case OpSubgroupAvcFmeInitializeINTEL: return "OpSubgroupAvcFmeInitializeINTEL"; + case OpSubgroupAvcBmeInitializeINTEL: return "OpSubgroupAvcBmeInitializeINTEL"; + case OpSubgroupAvcRefConvertToMcePayloadINTEL: return "OpSubgroupAvcRefConvertToMcePayloadINTEL"; + case OpSubgroupAvcRefSetBidirectionalMixDisableINTEL: return "OpSubgroupAvcRefSetBidirectionalMixDisableINTEL"; + case OpSubgroupAvcRefSetBilinearFilterEnableINTEL: return "OpSubgroupAvcRefSetBilinearFilterEnableINTEL"; + case OpSubgroupAvcRefEvaluateWithSingleReferenceINTEL: return "OpSubgroupAvcRefEvaluateWithSingleReferenceINTEL"; + case OpSubgroupAvcRefEvaluateWithDualReferenceINTEL: return "OpSubgroupAvcRefEvaluateWithDualReferenceINTEL"; + case OpSubgroupAvcRefEvaluateWithMultiReferenceINTEL: return "OpSubgroupAvcRefEvaluateWithMultiReferenceINTEL"; + case OpSubgroupAvcRefEvaluateWithMultiReferenceInterlacedINTEL: return "OpSubgroupAvcRefEvaluateWithMultiReferenceInterlacedINTEL"; + case OpSubgroupAvcRefConvertToMceResultINTEL: return "OpSubgroupAvcRefConvertToMceResultINTEL"; + case OpSubgroupAvcSicInitializeINTEL: return "OpSubgroupAvcSicInitializeINTEL"; + case OpSubgroupAvcSicConfigureSkcINTEL: return "OpSubgroupAvcSicConfigureSkcINTEL"; + case OpSubgroupAvcSicConfigureIpeLumaINTEL: return "OpSubgroupAvcSicConfigureIpeLumaINTEL"; + case OpSubgroupAvcSicConfigureIpeLumaChromaINTEL: return "OpSubgroupAvcSicConfigureIpeLumaChromaINTEL"; + case OpSubgroupAvcSicGetMotionVectorMaskINTEL: return "OpSubgroupAvcSicGetMotionVectorMaskINTEL"; + case OpSubgroupAvcSicConvertToMcePayloadINTEL: return "OpSubgroupAvcSicConvertToMcePayloadINTEL"; + case OpSubgroupAvcSicSetIntraLumaShapePenaltyINTEL: return "OpSubgroupAvcSicSetIntraLumaShapePenaltyINTEL"; + case OpSubgroupAvcSicSetIntraLumaModeCostFunctionINTEL: return "OpSubgroupAvcSicSetIntraLumaModeCostFunctionINTEL"; + case OpSubgroupAvcSicSetIntraChromaModeCostFunctionINTEL: return "OpSubgroupAvcSicSetIntraChromaModeCostFunctionINTEL"; + case OpSubgroupAvcSicSetBilinearFilterEnableINTEL: return "OpSubgroupAvcSicSetBilinearFilterEnableINTEL"; + case OpSubgroupAvcSicSetSkcForwardTransformEnableINTEL: return "OpSubgroupAvcSicSetSkcForwardTransformEnableINTEL"; + case OpSubgroupAvcSicSetBlockBasedRawSkipSadINTEL: return "OpSubgroupAvcSicSetBlockBasedRawSkipSadINTEL"; + case OpSubgroupAvcSicEvaluateIpeINTEL: return "OpSubgroupAvcSicEvaluateIpeINTEL"; + case OpSubgroupAvcSicEvaluateWithSingleReferenceINTEL: return "OpSubgroupAvcSicEvaluateWithSingleReferenceINTEL"; + case OpSubgroupAvcSicEvaluateWithDualReferenceINTEL: return "OpSubgroupAvcSicEvaluateWithDualReferenceINTEL"; + case OpSubgroupAvcSicEvaluateWithMultiReferenceINTEL: return "OpSubgroupAvcSicEvaluateWithMultiReferenceINTEL"; + case OpSubgroupAvcSicEvaluateWithMultiReferenceInterlacedINTEL: return "OpSubgroupAvcSicEvaluateWithMultiReferenceInterlacedINTEL"; + case OpSubgroupAvcSicConvertToMceResultINTEL: return "OpSubgroupAvcSicConvertToMceResultINTEL"; + case OpSubgroupAvcSicGetIpeLumaShapeINTEL: return "OpSubgroupAvcSicGetIpeLumaShapeINTEL"; + case OpSubgroupAvcSicGetBestIpeLumaDistortionINTEL: return "OpSubgroupAvcSicGetBestIpeLumaDistortionINTEL"; + case OpSubgroupAvcSicGetBestIpeChromaDistortionINTEL: return "OpSubgroupAvcSicGetBestIpeChromaDistortionINTEL"; + case OpSubgroupAvcSicGetPackedIpeLumaModesINTEL: return "OpSubgroupAvcSicGetPackedIpeLumaModesINTEL"; + case OpSubgroupAvcSicGetIpeChromaModeINTEL: return "OpSubgroupAvcSicGetIpeChromaModeINTEL"; + case OpSubgroupAvcSicGetPackedSkcLumaCountThresholdINTEL: return "OpSubgroupAvcSicGetPackedSkcLumaCountThresholdINTEL"; + case OpSubgroupAvcSicGetPackedSkcLumaSumThresholdINTEL: return "OpSubgroupAvcSicGetPackedSkcLumaSumThresholdINTEL"; + case OpSubgroupAvcSicGetInterRawSadsINTEL: return "OpSubgroupAvcSicGetInterRawSadsINTEL"; + case OpVariableLengthArrayINTEL: return "OpVariableLengthArrayINTEL"; + case OpSaveMemoryINTEL: return "OpSaveMemoryINTEL"; + case OpRestoreMemoryINTEL: return "OpRestoreMemoryINTEL"; + case OpArbitraryFloatSinCosPiINTEL: return "OpArbitraryFloatSinCosPiINTEL"; + case OpArbitraryFloatCastINTEL: return "OpArbitraryFloatCastINTEL"; + case OpArbitraryFloatCastFromIntINTEL: return "OpArbitraryFloatCastFromIntINTEL"; + case OpArbitraryFloatCastToIntINTEL: return "OpArbitraryFloatCastToIntINTEL"; + case OpArbitraryFloatAddINTEL: return "OpArbitraryFloatAddINTEL"; + case OpArbitraryFloatSubINTEL: return "OpArbitraryFloatSubINTEL"; + case OpArbitraryFloatMulINTEL: return "OpArbitraryFloatMulINTEL"; + case OpArbitraryFloatDivINTEL: return "OpArbitraryFloatDivINTEL"; + case OpArbitraryFloatGTINTEL: return "OpArbitraryFloatGTINTEL"; + case OpArbitraryFloatGEINTEL: return "OpArbitraryFloatGEINTEL"; + case OpArbitraryFloatLTINTEL: return "OpArbitraryFloatLTINTEL"; + case OpArbitraryFloatLEINTEL: return "OpArbitraryFloatLEINTEL"; + case OpArbitraryFloatEQINTEL: return "OpArbitraryFloatEQINTEL"; + case OpArbitraryFloatRecipINTEL: return "OpArbitraryFloatRecipINTEL"; + case OpArbitraryFloatRSqrtINTEL: return "OpArbitraryFloatRSqrtINTEL"; + case OpArbitraryFloatCbrtINTEL: return "OpArbitraryFloatCbrtINTEL"; + case OpArbitraryFloatHypotINTEL: return "OpArbitraryFloatHypotINTEL"; + case OpArbitraryFloatSqrtINTEL: return "OpArbitraryFloatSqrtINTEL"; + case OpArbitraryFloatLogINTEL: return "OpArbitraryFloatLogINTEL"; + case OpArbitraryFloatLog2INTEL: return "OpArbitraryFloatLog2INTEL"; + case OpArbitraryFloatLog10INTEL: return "OpArbitraryFloatLog10INTEL"; + case OpArbitraryFloatLog1pINTEL: return "OpArbitraryFloatLog1pINTEL"; + case OpArbitraryFloatExpINTEL: return "OpArbitraryFloatExpINTEL"; + case OpArbitraryFloatExp2INTEL: return "OpArbitraryFloatExp2INTEL"; + case OpArbitraryFloatExp10INTEL: return "OpArbitraryFloatExp10INTEL"; + case OpArbitraryFloatExpm1INTEL: return "OpArbitraryFloatExpm1INTEL"; + case OpArbitraryFloatSinINTEL: return "OpArbitraryFloatSinINTEL"; + case OpArbitraryFloatCosINTEL: return "OpArbitraryFloatCosINTEL"; + case OpArbitraryFloatSinCosINTEL: return "OpArbitraryFloatSinCosINTEL"; + case OpArbitraryFloatSinPiINTEL: return "OpArbitraryFloatSinPiINTEL"; + case OpArbitraryFloatCosPiINTEL: return "OpArbitraryFloatCosPiINTEL"; + case OpArbitraryFloatASinINTEL: return "OpArbitraryFloatASinINTEL"; + case OpArbitraryFloatASinPiINTEL: return "OpArbitraryFloatASinPiINTEL"; + case OpArbitraryFloatACosINTEL: return "OpArbitraryFloatACosINTEL"; + case OpArbitraryFloatACosPiINTEL: return "OpArbitraryFloatACosPiINTEL"; + case OpArbitraryFloatATanINTEL: return "OpArbitraryFloatATanINTEL"; + case OpArbitraryFloatATanPiINTEL: return "OpArbitraryFloatATanPiINTEL"; + case OpArbitraryFloatATan2INTEL: return "OpArbitraryFloatATan2INTEL"; + case OpArbitraryFloatPowINTEL: return "OpArbitraryFloatPowINTEL"; + case OpArbitraryFloatPowRINTEL: return "OpArbitraryFloatPowRINTEL"; + case OpArbitraryFloatPowNINTEL: return "OpArbitraryFloatPowNINTEL"; + case OpLoopControlINTEL: return "OpLoopControlINTEL"; + case OpAliasDomainDeclINTEL: return "OpAliasDomainDeclINTEL"; + case OpAliasScopeDeclINTEL: return "OpAliasScopeDeclINTEL"; + case OpAliasScopeListDeclINTEL: return "OpAliasScopeListDeclINTEL"; + case OpFixedSqrtINTEL: return "OpFixedSqrtINTEL"; + case OpFixedRecipINTEL: return "OpFixedRecipINTEL"; + case OpFixedRsqrtINTEL: return "OpFixedRsqrtINTEL"; + case OpFixedSinINTEL: return "OpFixedSinINTEL"; + case OpFixedCosINTEL: return "OpFixedCosINTEL"; + case OpFixedSinCosINTEL: return "OpFixedSinCosINTEL"; + case OpFixedSinPiINTEL: return "OpFixedSinPiINTEL"; + case OpFixedCosPiINTEL: return "OpFixedCosPiINTEL"; + case OpFixedSinCosPiINTEL: return "OpFixedSinCosPiINTEL"; + case OpFixedLogINTEL: return "OpFixedLogINTEL"; + case OpFixedExpINTEL: return "OpFixedExpINTEL"; + case OpPtrCastToCrossWorkgroupINTEL: return "OpPtrCastToCrossWorkgroupINTEL"; + case OpCrossWorkgroupCastToPtrINTEL: return "OpCrossWorkgroupCastToPtrINTEL"; + case OpReadPipeBlockingINTEL: return "OpReadPipeBlockingINTEL"; + case OpWritePipeBlockingINTEL: return "OpWritePipeBlockingINTEL"; + case OpFPGARegINTEL: return "OpFPGARegINTEL"; + case OpRayQueryGetRayTMinKHR: return "OpRayQueryGetRayTMinKHR"; + case OpRayQueryGetRayFlagsKHR: return "OpRayQueryGetRayFlagsKHR"; + case OpRayQueryGetIntersectionTKHR: return "OpRayQueryGetIntersectionTKHR"; + case OpRayQueryGetIntersectionInstanceCustomIndexKHR: return "OpRayQueryGetIntersectionInstanceCustomIndexKHR"; + case OpRayQueryGetIntersectionInstanceIdKHR: return "OpRayQueryGetIntersectionInstanceIdKHR"; + case OpRayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetKHR: return "OpRayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetKHR"; + case OpRayQueryGetIntersectionGeometryIndexKHR: return "OpRayQueryGetIntersectionGeometryIndexKHR"; + case OpRayQueryGetIntersectionPrimitiveIndexKHR: return "OpRayQueryGetIntersectionPrimitiveIndexKHR"; + case OpRayQueryGetIntersectionBarycentricsKHR: return "OpRayQueryGetIntersectionBarycentricsKHR"; + case OpRayQueryGetIntersectionFrontFaceKHR: return "OpRayQueryGetIntersectionFrontFaceKHR"; + case OpRayQueryGetIntersectionCandidateAABBOpaqueKHR: return "OpRayQueryGetIntersectionCandidateAABBOpaqueKHR"; + case OpRayQueryGetIntersectionObjectRayDirectionKHR: return "OpRayQueryGetIntersectionObjectRayDirectionKHR"; + case OpRayQueryGetIntersectionObjectRayOriginKHR: return "OpRayQueryGetIntersectionObjectRayOriginKHR"; + case OpRayQueryGetWorldRayDirectionKHR: return "OpRayQueryGetWorldRayDirectionKHR"; + case OpRayQueryGetWorldRayOriginKHR: return "OpRayQueryGetWorldRayOriginKHR"; + case OpRayQueryGetIntersectionObjectToWorldKHR: return "OpRayQueryGetIntersectionObjectToWorldKHR"; + case OpRayQueryGetIntersectionWorldToObjectKHR: return "OpRayQueryGetIntersectionWorldToObjectKHR"; + case OpAtomicFAddEXT: return "OpAtomicFAddEXT"; + case OpTypeBufferSurfaceINTEL: return "OpTypeBufferSurfaceINTEL"; + case OpTypeStructContinuedINTEL: return "OpTypeStructContinuedINTEL"; + case OpConstantCompositeContinuedINTEL: return "OpConstantCompositeContinuedINTEL"; + case OpSpecConstantCompositeContinuedINTEL: return "OpSpecConstantCompositeContinuedINTEL"; + case OpCompositeConstructContinuedINTEL: return "OpCompositeConstructContinuedINTEL"; + case OpConvertFToBF16INTEL: return "OpConvertFToBF16INTEL"; + case OpConvertBF16ToFINTEL: return "OpConvertBF16ToFINTEL"; + case OpControlBarrierArriveINTEL: return "OpControlBarrierArriveINTEL"; + case OpControlBarrierWaitINTEL: return "OpControlBarrierWaitINTEL"; + case OpArithmeticFenceEXT: return "OpArithmeticFenceEXT"; + case OpTaskSequenceCreateINTEL: return "OpTaskSequenceCreateINTEL"; + case OpTaskSequenceAsyncINTEL: return "OpTaskSequenceAsyncINTEL"; + case OpTaskSequenceGetINTEL: return "OpTaskSequenceGetINTEL"; + case OpTaskSequenceReleaseINTEL: return "OpTaskSequenceReleaseINTEL"; + case OpTypeTaskSequenceINTEL: return "OpTypeTaskSequenceINTEL"; + case OpSubgroupBlockPrefetchINTEL: return "OpSubgroupBlockPrefetchINTEL"; + case OpSubgroup2DBlockLoadINTEL: return "OpSubgroup2DBlockLoadINTEL"; + case OpSubgroup2DBlockLoadTransformINTEL: return "OpSubgroup2DBlockLoadTransformINTEL"; + case OpSubgroup2DBlockLoadTransposeINTEL: return "OpSubgroup2DBlockLoadTransposeINTEL"; + case OpSubgroup2DBlockPrefetchINTEL: return "OpSubgroup2DBlockPrefetchINTEL"; + case OpSubgroup2DBlockStoreINTEL: return "OpSubgroup2DBlockStoreINTEL"; + case OpSubgroupMatrixMultiplyAccumulateINTEL: return "OpSubgroupMatrixMultiplyAccumulateINTEL"; + case OpBitwiseFunctionINTEL: return "OpBitwiseFunctionINTEL"; + case OpGroupIMulKHR: return "OpGroupIMulKHR"; + case OpGroupFMulKHR: return "OpGroupFMulKHR"; + case OpGroupBitwiseAndKHR: return "OpGroupBitwiseAndKHR"; + case OpGroupBitwiseOrKHR: return "OpGroupBitwiseOrKHR"; + case OpGroupBitwiseXorKHR: return "OpGroupBitwiseXorKHR"; + case OpGroupLogicalAndKHR: return "OpGroupLogicalAndKHR"; + case OpGroupLogicalOrKHR: return "OpGroupLogicalOrKHR"; + case OpGroupLogicalXorKHR: return "OpGroupLogicalXorKHR"; + case OpRoundFToTF32INTEL: return "OpRoundFToTF32INTEL"; + case OpMaskedGatherINTEL: return "OpMaskedGatherINTEL"; + case OpMaskedScatterINTEL: return "OpMaskedScatterINTEL"; + case OpConvertHandleToImageINTEL: return "OpConvertHandleToImageINTEL"; + case OpConvertHandleToSamplerINTEL: return "OpConvertHandleToSamplerINTEL"; + case OpConvertHandleToSampledImageINTEL: return "OpConvertHandleToSampledImageINTEL"; + default: return "Unknown"; + } +} + #endif /* SPV_ENABLE_UTILITY_CODE */ -// Overload operator| for mask bit combining +// Overload bitwise operators for mask bit combining inline ImageOperandsMask operator|(ImageOperandsMask a, ImageOperandsMask b) { return ImageOperandsMask(unsigned(a) | unsigned(b)); } +inline ImageOperandsMask operator&(ImageOperandsMask a, ImageOperandsMask b) { return ImageOperandsMask(unsigned(a) & unsigned(b)); } +inline ImageOperandsMask operator^(ImageOperandsMask a, ImageOperandsMask b) { return ImageOperandsMask(unsigned(a) ^ unsigned(b)); } +inline ImageOperandsMask operator~(ImageOperandsMask a) { return ImageOperandsMask(~unsigned(a)); } inline FPFastMathModeMask operator|(FPFastMathModeMask a, FPFastMathModeMask b) { return FPFastMathModeMask(unsigned(a) | unsigned(b)); } +inline FPFastMathModeMask operator&(FPFastMathModeMask a, FPFastMathModeMask b) { return FPFastMathModeMask(unsigned(a) & unsigned(b)); } +inline FPFastMathModeMask operator^(FPFastMathModeMask a, FPFastMathModeMask b) { return FPFastMathModeMask(unsigned(a) ^ unsigned(b)); } +inline FPFastMathModeMask operator~(FPFastMathModeMask a) { return FPFastMathModeMask(~unsigned(a)); } inline SelectionControlMask operator|(SelectionControlMask a, SelectionControlMask b) { return SelectionControlMask(unsigned(a) | unsigned(b)); } +inline SelectionControlMask operator&(SelectionControlMask a, SelectionControlMask b) { return SelectionControlMask(unsigned(a) & unsigned(b)); } +inline SelectionControlMask operator^(SelectionControlMask a, SelectionControlMask b) { return SelectionControlMask(unsigned(a) ^ unsigned(b)); } +inline SelectionControlMask operator~(SelectionControlMask a) { return SelectionControlMask(~unsigned(a)); } inline LoopControlMask operator|(LoopControlMask a, LoopControlMask b) { return LoopControlMask(unsigned(a) | unsigned(b)); } +inline LoopControlMask operator&(LoopControlMask a, LoopControlMask b) { return LoopControlMask(unsigned(a) & unsigned(b)); } +inline LoopControlMask operator^(LoopControlMask a, LoopControlMask b) { return LoopControlMask(unsigned(a) ^ unsigned(b)); } +inline LoopControlMask operator~(LoopControlMask a) { return LoopControlMask(~unsigned(a)); } inline FunctionControlMask operator|(FunctionControlMask a, FunctionControlMask b) { return FunctionControlMask(unsigned(a) | unsigned(b)); } +inline FunctionControlMask operator&(FunctionControlMask a, FunctionControlMask b) { return FunctionControlMask(unsigned(a) & unsigned(b)); } +inline FunctionControlMask operator^(FunctionControlMask a, FunctionControlMask b) { return FunctionControlMask(unsigned(a) ^ unsigned(b)); } +inline FunctionControlMask operator~(FunctionControlMask a) { return FunctionControlMask(~unsigned(a)); } inline MemorySemanticsMask operator|(MemorySemanticsMask a, MemorySemanticsMask b) { return MemorySemanticsMask(unsigned(a) | unsigned(b)); } +inline MemorySemanticsMask operator&(MemorySemanticsMask a, MemorySemanticsMask b) { return MemorySemanticsMask(unsigned(a) & unsigned(b)); } +inline MemorySemanticsMask operator^(MemorySemanticsMask a, MemorySemanticsMask b) { return MemorySemanticsMask(unsigned(a) ^ unsigned(b)); } +inline MemorySemanticsMask operator~(MemorySemanticsMask a) { return MemorySemanticsMask(~unsigned(a)); } inline MemoryAccessMask operator|(MemoryAccessMask a, MemoryAccessMask b) { return MemoryAccessMask(unsigned(a) | unsigned(b)); } +inline MemoryAccessMask operator&(MemoryAccessMask a, MemoryAccessMask b) { return MemoryAccessMask(unsigned(a) & unsigned(b)); } +inline MemoryAccessMask operator^(MemoryAccessMask a, MemoryAccessMask b) { return MemoryAccessMask(unsigned(a) ^ unsigned(b)); } +inline MemoryAccessMask operator~(MemoryAccessMask a) { return MemoryAccessMask(~unsigned(a)); } inline KernelProfilingInfoMask operator|(KernelProfilingInfoMask a, KernelProfilingInfoMask b) { return KernelProfilingInfoMask(unsigned(a) | unsigned(b)); } +inline KernelProfilingInfoMask operator&(KernelProfilingInfoMask a, KernelProfilingInfoMask b) { return KernelProfilingInfoMask(unsigned(a) & unsigned(b)); } +inline KernelProfilingInfoMask operator^(KernelProfilingInfoMask a, KernelProfilingInfoMask b) { return KernelProfilingInfoMask(unsigned(a) ^ unsigned(b)); } +inline KernelProfilingInfoMask operator~(KernelProfilingInfoMask a) { return KernelProfilingInfoMask(~unsigned(a)); } inline RayFlagsMask operator|(RayFlagsMask a, RayFlagsMask b) { return RayFlagsMask(unsigned(a) | unsigned(b)); } +inline RayFlagsMask operator&(RayFlagsMask a, RayFlagsMask b) { return RayFlagsMask(unsigned(a) & unsigned(b)); } +inline RayFlagsMask operator^(RayFlagsMask a, RayFlagsMask b) { return RayFlagsMask(unsigned(a) ^ unsigned(b)); } +inline RayFlagsMask operator~(RayFlagsMask a) { return RayFlagsMask(~unsigned(a)); } inline FragmentShadingRateMask operator|(FragmentShadingRateMask a, FragmentShadingRateMask b) { return FragmentShadingRateMask(unsigned(a) | unsigned(b)); } +inline FragmentShadingRateMask operator&(FragmentShadingRateMask a, FragmentShadingRateMask b) { return FragmentShadingRateMask(unsigned(a) & unsigned(b)); } +inline FragmentShadingRateMask operator^(FragmentShadingRateMask a, FragmentShadingRateMask b) { return FragmentShadingRateMask(unsigned(a) ^ unsigned(b)); } +inline FragmentShadingRateMask operator~(FragmentShadingRateMask a) { return FragmentShadingRateMask(~unsigned(a)); } +inline CooperativeMatrixOperandsMask operator|(CooperativeMatrixOperandsMask a, CooperativeMatrixOperandsMask b) { return CooperativeMatrixOperandsMask(unsigned(a) | unsigned(b)); } +inline CooperativeMatrixOperandsMask operator&(CooperativeMatrixOperandsMask a, CooperativeMatrixOperandsMask b) { return CooperativeMatrixOperandsMask(unsigned(a) & unsigned(b)); } +inline CooperativeMatrixOperandsMask operator^(CooperativeMatrixOperandsMask a, CooperativeMatrixOperandsMask b) { return CooperativeMatrixOperandsMask(unsigned(a) ^ unsigned(b)); } +inline CooperativeMatrixOperandsMask operator~(CooperativeMatrixOperandsMask a) { return CooperativeMatrixOperandsMask(~unsigned(a)); } +inline CooperativeMatrixReduceMask operator|(CooperativeMatrixReduceMask a, CooperativeMatrixReduceMask b) { return CooperativeMatrixReduceMask(unsigned(a) | unsigned(b)); } +inline CooperativeMatrixReduceMask operator&(CooperativeMatrixReduceMask a, CooperativeMatrixReduceMask b) { return CooperativeMatrixReduceMask(unsigned(a) & unsigned(b)); } +inline CooperativeMatrixReduceMask operator^(CooperativeMatrixReduceMask a, CooperativeMatrixReduceMask b) { return CooperativeMatrixReduceMask(unsigned(a) ^ unsigned(b)); } +inline CooperativeMatrixReduceMask operator~(CooperativeMatrixReduceMask a) { return CooperativeMatrixReduceMask(~unsigned(a)); } +inline TensorAddressingOperandsMask operator|(TensorAddressingOperandsMask a, TensorAddressingOperandsMask b) { return TensorAddressingOperandsMask(unsigned(a) | unsigned(b)); } +inline TensorAddressingOperandsMask operator&(TensorAddressingOperandsMask a, TensorAddressingOperandsMask b) { return TensorAddressingOperandsMask(unsigned(a) & unsigned(b)); } +inline TensorAddressingOperandsMask operator^(TensorAddressingOperandsMask a, TensorAddressingOperandsMask b) { return TensorAddressingOperandsMask(unsigned(a) ^ unsigned(b)); } +inline TensorAddressingOperandsMask operator~(TensorAddressingOperandsMask a) { return TensorAddressingOperandsMask(~unsigned(a)); } +inline TensorOperandsMask operator|(TensorOperandsMask a, TensorOperandsMask b) { return TensorOperandsMask(unsigned(a) | unsigned(b)); } +inline TensorOperandsMask operator&(TensorOperandsMask a, TensorOperandsMask b) { return TensorOperandsMask(unsigned(a) & unsigned(b)); } +inline TensorOperandsMask operator^(TensorOperandsMask a, TensorOperandsMask b) { return TensorOperandsMask(unsigned(a) ^ unsigned(b)); } +inline TensorOperandsMask operator~(TensorOperandsMask a) { return TensorOperandsMask(~unsigned(a)); } +inline MatrixMultiplyAccumulateOperandsMask operator|(MatrixMultiplyAccumulateOperandsMask a, MatrixMultiplyAccumulateOperandsMask b) { return MatrixMultiplyAccumulateOperandsMask(unsigned(a) | unsigned(b)); } +inline MatrixMultiplyAccumulateOperandsMask operator&(MatrixMultiplyAccumulateOperandsMask a, MatrixMultiplyAccumulateOperandsMask b) { return MatrixMultiplyAccumulateOperandsMask(unsigned(a) & unsigned(b)); } +inline MatrixMultiplyAccumulateOperandsMask operator^(MatrixMultiplyAccumulateOperandsMask a, MatrixMultiplyAccumulateOperandsMask b) { return MatrixMultiplyAccumulateOperandsMask(unsigned(a) ^ unsigned(b)); } +inline MatrixMultiplyAccumulateOperandsMask operator~(MatrixMultiplyAccumulateOperandsMask a) { return MatrixMultiplyAccumulateOperandsMask(~unsigned(a)); } +inline RawAccessChainOperandsMask operator|(RawAccessChainOperandsMask a, RawAccessChainOperandsMask b) { return RawAccessChainOperandsMask(unsigned(a) | unsigned(b)); } +inline RawAccessChainOperandsMask operator&(RawAccessChainOperandsMask a, RawAccessChainOperandsMask b) { return RawAccessChainOperandsMask(unsigned(a) & unsigned(b)); } +inline RawAccessChainOperandsMask operator^(RawAccessChainOperandsMask a, RawAccessChainOperandsMask b) { return RawAccessChainOperandsMask(unsigned(a) ^ unsigned(b)); } +inline RawAccessChainOperandsMask operator~(RawAccessChainOperandsMask a) { return RawAccessChainOperandsMask(~unsigned(a)); } } // end namespace spv diff --git a/thirdparty/spirv-cross/spirv_common.hpp b/thirdparty/spirv-cross/spirv_common.hpp index b70536d9ec..4780d2750f 100644 --- a/thirdparty/spirv-cross/spirv_common.hpp +++ b/thirdparty/spirv-cross/spirv_common.hpp @@ -580,7 +580,10 @@ struct SPIRType : IVariant Interpolant, Char, // MSL specific type, that is used by 'object'(analog of 'task' from glsl) shader. - MeshGridProperties + MeshGridProperties, + BFloat16, + FloatE4M3, + FloatE5M2 }; // Scalar/vector/matrix support. @@ -605,6 +608,14 @@ struct SPIRType : IVariant bool pointer = false; bool forward_pointer = false; + struct + { + uint32_t use_id = 0; + uint32_t rows_id = 0; + uint32_t columns_id = 0; + uint32_t scope_id = 0; + } cooperative; + spv::StorageClass storage = spv::StorageClassGeneric; SmallVector member_types; @@ -686,6 +697,7 @@ struct SPIREntryPoint FunctionID self = 0; std::string name; std::string orig_name; + std::unordered_map fp_fast_math_defaults; SmallVector interface_variables; Bitset flags; @@ -1026,6 +1038,9 @@ struct SPIRFunction : IVariant // consider arrays value types. SmallVector constant_arrays_needed_on_stack; + // Does this function (or any function called by it), emit geometry? + bool emits_geometry = false; + bool active = false; bool flush_undeclared = true; bool do_combined_parameters = true; @@ -1226,6 +1241,26 @@ struct SPIRConstant : IVariant return u.f32; } + static inline float fe4m3_to_f32(uint8_t v) + { + if ((v & 0x7f) == 0x7f) + { + union + { + float f32; + uint32_t u32; + } u; + + u.u32 = (v & 0x80) ? 0xffffffffu : 0x7fffffffu; + return u.f32; + } + else + { + // Reuse the FP16 to FP32 code. Cute bit-hackery. + return f16_to_f32((int16_t(int8_t(v)) << 7) & (0xffff ^ 0x4000)) * 256.0f; + } + } + inline uint32_t specialization_constant_id(uint32_t col, uint32_t row) const { return m.c[col].id[row]; @@ -1266,6 +1301,24 @@ struct SPIRConstant : IVariant return f16_to_f32(scalar_u16(col, row)); } + inline float scalar_bf16(uint32_t col = 0, uint32_t row = 0) const + { + uint32_t v = scalar_u16(col, row) << 16; + float fp32; + memcpy(&fp32, &v, sizeof(float)); + return fp32; + } + + inline float scalar_floate4m3(uint32_t col = 0, uint32_t row = 0) const + { + return fe4m3_to_f32(scalar_u8(col, row)); + } + + inline float scalar_bf8(uint32_t col = 0, uint32_t row = 0) const + { + return f16_to_f32(scalar_u8(col, row) << 8); + } + inline float scalar_f32(uint32_t col = 0, uint32_t row = 0) const { return m.c[col].r[row].f32; @@ -1336,9 +1389,10 @@ struct SPIRConstant : IVariant SPIRConstant() = default; - SPIRConstant(TypeID constant_type_, const uint32_t *elements, uint32_t num_elements, bool specialized) + SPIRConstant(TypeID constant_type_, const uint32_t *elements, uint32_t num_elements, bool specialized, bool replicated_ = false) : constant_type(constant_type_) , specialization(specialized) + , replicated(replicated_) { subconstants.reserve(num_elements); for (uint32_t i = 0; i < num_elements; i++) @@ -1410,9 +1464,16 @@ struct SPIRConstant : IVariant // If true, this is a LUT, and should always be declared in the outer scope. bool is_used_as_lut = false; + // If this is a null constant of array type with specialized length. + // May require special handling in initializer + bool is_null_array_specialized_length = false; + // For composites which are constant arrays, etc. SmallVector subconstants; + // Whether the subconstants are intended to be replicated (e.g. OpConstantCompositeReplicateEXT) + bool replicated = false; + // Non-Vulkan GLSL, HLSL and sometimes MSL emits defines for each specialization constant, // and uses them to initialize the constant. This allows the user // to still be able to specialize the value by supplying corresponding @@ -1708,6 +1769,7 @@ struct Meta uint32_t spec_id = 0; uint32_t index = 0; spv::FPRoundingMode fp_rounding_mode = spv::FPRoundingModeMax; + spv::FPFastMathModeMask fp_fast_math_mode = spv::FPFastMathModeMaskNone; bool builtin = false; bool qualified_alias_explicit_override = false; diff --git a/thirdparty/spirv-cross/spirv_cross.cpp b/thirdparty/spirv-cross/spirv_cross.cpp index 3492f0b3ed..350eff3429 100644 --- a/thirdparty/spirv-cross/spirv_cross.cpp +++ b/thirdparty/spirv-cross/spirv_cross.cpp @@ -82,7 +82,7 @@ bool Compiler::variable_storage_is_aliased(const SPIRVariable &v) ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock); bool image = type.basetype == SPIRType::Image; bool counter = type.basetype == SPIRType::AtomicCounter; - bool buffer_reference = type.storage == StorageClassPhysicalStorageBufferEXT; + bool buffer_reference = type.storage == StorageClassPhysicalStorageBuffer; bool is_restrict; if (ssbo) @@ -171,6 +171,7 @@ bool Compiler::block_is_control_dependent(const SPIRBlock &block) case OpGroupNonUniformLogicalXor: case OpGroupNonUniformQuadBroadcast: case OpGroupNonUniformQuadSwap: + case OpGroupNonUniformRotateKHR: // Control barriers case OpControlBarrier: @@ -210,6 +211,7 @@ bool Compiler::block_is_pure(const SPIRBlock &block) case OpCopyMemory: case OpStore: + case OpCooperativeMatrixStoreKHR: { auto &type = expression_type(ops[0]); if (type.storage != StorageClassFunction) @@ -370,6 +372,7 @@ void Compiler::register_global_read_dependencies(const SPIRBlock &block, uint32_ } case OpLoad: + case OpCooperativeMatrixLoadKHR: case OpImageRead: { // If we're in a storage class which does not get invalidated, adding dependencies here is no big deal. @@ -481,7 +484,7 @@ void Compiler::register_write(uint32_t chain) } } - if (type.storage == StorageClassPhysicalStorageBufferEXT || variable_storage_is_aliased(*var)) + if (type.storage == StorageClassPhysicalStorageBuffer || variable_storage_is_aliased(*var)) flush_all_aliased_variables(); else if (var) flush_dependees(*var); @@ -587,6 +590,7 @@ const SPIRType &Compiler::expression_type(uint32_t id) const bool Compiler::expression_is_lvalue(uint32_t id) const { auto &type = expression_type(id); + switch (type.basetype) { case SPIRType::SampledImage: @@ -818,6 +822,7 @@ bool Compiler::InterfaceVariableAccessHandler::handle(Op opcode, const uint32_t case OpAtomicStore: case OpStore: + case OpCooperativeMatrixStoreKHR: // Invalid SPIR-V. if (length < 1) return false; @@ -910,6 +915,7 @@ bool Compiler::InterfaceVariableAccessHandler::handle(Op opcode, const uint32_t case OpInBoundsAccessChain: case OpPtrAccessChain: case OpLoad: + case OpCooperativeMatrixLoadKHR: case OpCopyObject: case OpImageTexelPointer: case OpAtomicLoad: @@ -2364,6 +2370,10 @@ void Compiler::set_execution_mode(ExecutionMode mode, uint32_t arg0, uint32_t ar execution.output_primitives = arg0; break; + case ExecutionModeFPFastMathDefault: + execution.fp_fast_math_defaults[arg0] = arg1; + break; + default: break; } @@ -3461,6 +3471,7 @@ bool Compiler::AnalyzeVariableScopeAccessHandler::handle(spv::Op op, const uint3 switch (op) { case OpStore: + case OpCooperativeMatrixStoreKHR: { if (length < 2) return false; @@ -3581,6 +3592,7 @@ bool Compiler::AnalyzeVariableScopeAccessHandler::handle(spv::Op op, const uint3 } case OpLoad: + case OpCooperativeMatrixLoadKHR: { if (length < 3) return false; @@ -3800,6 +3812,7 @@ bool Compiler::StaticExpressionAccessHandler::handle(spv::Op op, const uint32_t switch (op) { case OpStore: + case OpCooperativeMatrixStoreKHR: if (length < 2) return false; if (args[0] == variable_id) @@ -3810,6 +3823,7 @@ bool Compiler::StaticExpressionAccessHandler::handle(spv::Op op, const uint32_t break; case OpLoad: + case OpCooperativeMatrixLoadKHR: if (length < 3) return false; if (args[2] == variable_id && static_expression == 0) // Tried to read from variable before it was initialized. @@ -4285,6 +4299,7 @@ bool Compiler::may_read_undefined_variable_in_block(const SPIRBlock &block, uint switch (op.op) { case OpStore: + case OpCooperativeMatrixStoreKHR: case OpCopyMemory: if (ops[0] == var) return false; @@ -4323,6 +4338,7 @@ bool Compiler::may_read_undefined_variable_in_block(const SPIRBlock &block, uint case OpCopyObject: case OpLoad: + case OpCooperativeMatrixLoadKHR: if (ops[2] == var) return true; break; @@ -4350,6 +4366,39 @@ bool Compiler::may_read_undefined_variable_in_block(const SPIRBlock &block, uint return true; } +bool Compiler::GeometryEmitDisocveryHandler::handle(spv::Op opcode, const uint32_t *, uint32_t) +{ + if (opcode == OpEmitVertex || opcode == OpEndPrimitive) + { + for (auto *func : function_stack) + func->emits_geometry = true; + } + + return true; +} + +bool Compiler::GeometryEmitDisocveryHandler::begin_function_scope(const uint32_t *stream, uint32_t) +{ + auto &callee = compiler.get(stream[2]); + function_stack.push_back(&callee); + return true; +} + +bool Compiler::GeometryEmitDisocveryHandler::end_function_scope([[maybe_unused]] const uint32_t *stream, uint32_t) +{ + assert(function_stack.back() == &compiler.get(stream[2])); + function_stack.pop_back(); + + return true; +} + +void Compiler::discover_geometry_emitters() +{ + GeometryEmitDisocveryHandler handler(*this); + + traverse_all_reachable_opcodes(get(ir.default_entry_point), handler); +} + Bitset Compiler::get_buffer_block_flags(VariableID id) const { return ir.get_buffer_block_flags(get(id)); @@ -4462,6 +4511,7 @@ bool Compiler::ActiveBuiltinHandler::handle(spv::Op opcode, const uint32_t *args switch (opcode) { case OpStore: + case OpCooperativeMatrixStoreKHR: if (length < 1) return false; @@ -4478,6 +4528,7 @@ bool Compiler::ActiveBuiltinHandler::handle(spv::Op opcode, const uint32_t *args case OpCopyObject: case OpLoad: + case OpCooperativeMatrixLoadKHR: if (length < 3) return false; @@ -4910,13 +4961,16 @@ void Compiler::make_constant_null(uint32_t id, uint32_t type) uint32_t parent_id = ir.increase_bound_by(1); make_constant_null(parent_id, constant_type.parent_type); - if (!constant_type.array_size_literal.back()) - SPIRV_CROSS_THROW("Array size of OpConstantNull must be a literal."); - - SmallVector elements(constant_type.array.back()); - for (uint32_t i = 0; i < constant_type.array.back(); i++) + // The array size of OpConstantNull can be either literal or specialization constant. + // In the latter case, we cannot take the value as-is, as it can be changed to anything. + // Rather, we assume it to be *one* for the sake of initializer. + bool is_literal_array_size = constant_type.array_size_literal.back(); + uint32_t count = is_literal_array_size ? constant_type.array.back() : 1; + SmallVector elements(count); + for (uint32_t i = 0; i < count; i++) elements[i] = parent_id; - set(id, type, elements.data(), uint32_t(elements.size()), false); + auto &constant = set(id, type, elements.data(), uint32_t(elements.size()), false); + constant.is_null_array_specialized_length = !is_literal_array_size; } else if (!constant_type.member_types.empty()) { @@ -5177,7 +5231,7 @@ bool Compiler::PhysicalStorageBufferPointerHandler::type_is_bda_block_entry(uint uint32_t Compiler::PhysicalStorageBufferPointerHandler::get_minimum_scalar_alignment(const SPIRType &type) const { - if (type.storage == spv::StorageClassPhysicalStorageBufferEXT) + if (type.storage == spv::StorageClassPhysicalStorageBuffer) return 8; else if (type.basetype == SPIRType::Struct) { @@ -5252,6 +5306,13 @@ bool Compiler::PhysicalStorageBufferPointerHandler::handle(Op op, const uint32_t break; } + case OpCooperativeMatrixLoadKHR: + case OpCooperativeMatrixStoreKHR: + { + // TODO: Can we meaningfully deal with this? + break; + } + default: break; } @@ -5274,6 +5335,10 @@ uint32_t Compiler::PhysicalStorageBufferPointerHandler::get_base_non_block_type_ void Compiler::PhysicalStorageBufferPointerHandler::analyze_non_block_types_from_block(const SPIRType &type) { + if (analyzed_type_ids.count(type.self)) + return; + analyzed_type_ids.insert(type.self); + for (auto &member : type.member_types) { auto &subtype = compiler.get(member); @@ -5407,6 +5472,7 @@ bool Compiler::InterlockedResourceAccessHandler::handle(Op opcode, const uint32_ switch (opcode) { case OpLoad: + case OpCooperativeMatrixLoadKHR: { if (length < 3) return false; @@ -5484,6 +5550,7 @@ bool Compiler::InterlockedResourceAccessHandler::handle(Op opcode, const uint32_ case OpStore: case OpImageWrite: case OpAtomicStore: + case OpCooperativeMatrixStoreKHR: { if (length < 1) return false; diff --git a/thirdparty/spirv-cross/spirv_cross.hpp b/thirdparty/spirv-cross/spirv_cross.hpp index e9062b485c..b65b5ac77a 100644 --- a/thirdparty/spirv-cross/spirv_cross.hpp +++ b/thirdparty/spirv-cross/spirv_cross.hpp @@ -1054,6 +1054,7 @@ protected: std::unordered_set non_block_types; std::unordered_map physical_block_type_meta; std::unordered_map access_chain_to_physical_block; + std::unordered_set analyzed_type_ids; void mark_aligned_access(uint32_t id, const uint32_t *args, uint32_t length); PhysicalBlockMeta *find_block_meta(uint32_t id) const; @@ -1072,6 +1073,22 @@ protected: bool single_function); bool may_read_undefined_variable_in_block(const SPIRBlock &block, uint32_t var); + struct GeometryEmitDisocveryHandler : OpcodeHandler + { + explicit GeometryEmitDisocveryHandler(Compiler &compiler_) + : compiler(compiler_) + { + } + Compiler &compiler; + + bool handle(spv::Op opcode, const uint32_t *args, uint32_t length) override; + bool begin_function_scope(const uint32_t *, uint32_t) override; + bool end_function_scope(const uint32_t *, uint32_t) override; + SmallVector function_stack; + }; + + void discover_geometry_emitters(); + // Finds all resources that are written to from inside the critical section, if present. // The critical section is delimited by OpBeginInvocationInterlockEXT and // OpEndInvocationInterlockEXT instructions. In MSL and HLSL, any resources written diff --git a/thirdparty/spirv-cross/spirv_cross_parsed_ir.cpp b/thirdparty/spirv-cross/spirv_cross_parsed_ir.cpp index b05afeb3f5..760b8037d4 100644 --- a/thirdparty/spirv-cross/spirv_cross_parsed_ir.cpp +++ b/thirdparty/spirv-cross/spirv_cross_parsed_ir.cpp @@ -452,6 +452,10 @@ void ParsedIR::set_decoration(ID id, Decoration decoration, uint32_t argument) dec.fp_rounding_mode = static_cast(argument); break; + case DecorationFPFastMathMode: + dec.fp_fast_math_mode = static_cast(argument); + break; + default: break; } @@ -643,6 +647,8 @@ uint32_t ParsedIR::get_decoration(ID id, Decoration decoration) const return dec.index; case DecorationFPRoundingMode: return dec.fp_rounding_mode; + case DecorationFPFastMathMode: + return dec.fp_fast_math_mode; default: return 1; } @@ -730,6 +736,10 @@ void ParsedIR::unset_decoration(ID id, Decoration decoration) dec.fp_rounding_mode = FPRoundingModeMax; break; + case DecorationFPFastMathMode: + dec.fp_fast_math_mode = FPFastMathModeMaskNone; + break; + case DecorationHlslCounterBufferGOOGLE: { auto &counter = meta[id].hlsl_magic_counter_buffer; @@ -1050,16 +1060,21 @@ void ParsedIR::make_constant_null(uint32_t id, uint32_t type, bool add_to_typed_ uint32_t parent_id = increase_bound_by(1); make_constant_null(parent_id, constant_type.parent_type, add_to_typed_id_set); - if (!constant_type.array_size_literal.back()) - SPIRV_CROSS_THROW("Array size of OpConstantNull must be a literal."); + // The array size of OpConstantNull can be either literal or specialization constant. + // In the latter case, we cannot take the value as-is, as it can be changed to anything. + // Rather, we assume it to be *one* for the sake of initializer. + bool is_literal_array_size = constant_type.array_size_literal.back(); + uint32_t count = is_literal_array_size ? constant_type.array.back() : 1; - SmallVector elements(constant_type.array.back()); - for (uint32_t i = 0; i < constant_type.array.back(); i++) + SmallVector elements(count); + for (uint32_t i = 0; i < count; i++) elements[i] = parent_id; if (add_to_typed_id_set) add_typed_id(TypeConstant, id); - variant_set(ids[id], type, elements.data(), uint32_t(elements.size()), false).self = id; + auto& constant = variant_set(ids[id], type, elements.data(), uint32_t(elements.size()), false); + constant.self = id; + constant.is_null_array_specialized_length = !is_literal_array_size; } else if (!constant_type.member_types.empty()) { diff --git a/thirdparty/spirv-cross/spirv_cross_util.cpp b/thirdparty/spirv-cross/spirv_cross_util.cpp deleted file mode 100644 index 7cff010d1c..0000000000 --- a/thirdparty/spirv-cross/spirv_cross_util.cpp +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright 2015-2021 Arm Limited - * SPDX-License-Identifier: Apache-2.0 OR MIT - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * At your option, you may choose to accept this material under either: - * 1. The Apache License, Version 2.0, found at , or - * 2. The MIT License, found at . - */ - -#include "spirv_cross_util.hpp" -#include "spirv_common.hpp" - -using namespace spv; -using namespace SPIRV_CROSS_NAMESPACE; - -namespace spirv_cross_util -{ -void rename_interface_variable(Compiler &compiler, const SmallVector &resources, uint32_t location, - const std::string &name) -{ - for (auto &v : resources) - { - if (!compiler.has_decoration(v.id, spv::DecorationLocation)) - continue; - - auto loc = compiler.get_decoration(v.id, spv::DecorationLocation); - if (loc != location) - continue; - - auto &type = compiler.get_type(v.base_type_id); - - // This is more of a friendly variant. If we need to rename interface variables, we might have to rename - // structs as well and make sure all the names match up. - if (type.basetype == SPIRType::Struct) - { - compiler.set_name(v.base_type_id, join("SPIRV_Cross_Interface_Location", location)); - for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++) - compiler.set_member_name(v.base_type_id, i, join("InterfaceMember", i)); - } - - compiler.set_name(v.id, name); - } -} - -void inherit_combined_sampler_bindings(Compiler &compiler) -{ - auto &samplers = compiler.get_combined_image_samplers(); - for (auto &s : samplers) - { - if (compiler.has_decoration(s.image_id, spv::DecorationDescriptorSet)) - { - uint32_t set = compiler.get_decoration(s.image_id, spv::DecorationDescriptorSet); - compiler.set_decoration(s.combined_id, spv::DecorationDescriptorSet, set); - } - - if (compiler.has_decoration(s.image_id, spv::DecorationBinding)) - { - uint32_t binding = compiler.get_decoration(s.image_id, spv::DecorationBinding); - compiler.set_decoration(s.combined_id, spv::DecorationBinding, binding); - } - } -} -} // namespace spirv_cross_util diff --git a/thirdparty/spirv-cross/spirv_cross_util.hpp b/thirdparty/spirv-cross/spirv_cross_util.hpp deleted file mode 100644 index e6e3fcdb63..0000000000 --- a/thirdparty/spirv-cross/spirv_cross_util.hpp +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Copyright 2015-2021 Arm Limited - * SPDX-License-Identifier: Apache-2.0 OR MIT - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* - * At your option, you may choose to accept this material under either: - * 1. The Apache License, Version 2.0, found at , or - * 2. The MIT License, found at . - */ - -#ifndef SPIRV_CROSS_UTIL_HPP -#define SPIRV_CROSS_UTIL_HPP - -#include "spirv_cross.hpp" - -namespace spirv_cross_util -{ -void rename_interface_variable(SPIRV_CROSS_NAMESPACE::Compiler &compiler, - const SPIRV_CROSS_NAMESPACE::SmallVector &resources, - uint32_t location, const std::string &name); -void inherit_combined_sampler_bindings(SPIRV_CROSS_NAMESPACE::Compiler &compiler); -} // namespace spirv_cross_util - -#endif diff --git a/thirdparty/spirv-cross/spirv_glsl.cpp b/thirdparty/spirv-cross/spirv_glsl.cpp index 6c1d5208b9..a01cef4449 100644 --- a/thirdparty/spirv-cross/spirv_glsl.cpp +++ b/thirdparty/spirv-cross/spirv_glsl.cpp @@ -545,7 +545,7 @@ void CompilerGLSL::find_static_extensions() if (options.separate_shader_objects && !options.es && options.version < 410) require_extension_internal("GL_ARB_separate_shader_objects"); - if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT) + if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64) { if (!options.vulkan_semantics) SPIRV_CROSS_THROW("GL_EXT_buffer_reference is only supported in Vulkan GLSL."); @@ -557,7 +557,7 @@ void CompilerGLSL::find_static_extensions() } else if (ir.addressing_model != AddressingModelLogical) { - SPIRV_CROSS_THROW("Only Logical and PhysicalStorageBuffer64EXT addressing models are supported."); + SPIRV_CROSS_THROW("Only Logical and PhysicalStorageBuffer64 addressing models are supported."); } // Check for nonuniform qualifier and passthrough. @@ -631,6 +631,12 @@ void CompilerGLSL::find_static_extensions() require_extension_internal("GL_OVR_multiview2"); } + if (execution.flags.get(ExecutionModeQuadDerivativesKHR) || + (execution.flags.get(ExecutionModeRequireFullQuadsKHR) && get_execution_model() == ExecutionModelFragment)) + { + require_extension_internal("GL_EXT_shader_quad_control"); + } + // KHR one is likely to get promoted at some point, so if we don't see an explicit SPIR-V extension, assume KHR. for (auto &ext : ir.declared_extensions) if (ext == "SPV_NV_fragment_shader_barycentric") @@ -681,6 +687,8 @@ string CompilerGLSL::compile() backend.requires_relaxed_precision_analysis = options.es || options.vulkan_semantics; backend.support_precise_qualifier = (!options.es && options.version >= 400) || (options.es && options.version >= 320); + backend.constant_null_initializer = "{ }"; + backend.requires_matching_array_initializer = true; if (is_legacy_es()) backend.support_case_fallthrough = false; @@ -700,7 +708,7 @@ string CompilerGLSL::compile() // Shaders might cast unrelated data to pointers of non-block types. // Find all such instances and make sure we can cast the pointers to a synthesized block type. - if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT) + if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64) analyze_non_block_pointer_types(); uint32_t pass_count = 0; @@ -1191,6 +1199,9 @@ void CompilerGLSL::emit_header() else if (!options.es && execution.flags.get(ExecutionModeDepthLess)) statement("layout(depth_less) out float gl_FragDepth;"); + if (execution.flags.get(ExecutionModeRequireFullQuadsKHR)) + statement("layout(full_quads) in;"); + break; default: @@ -1201,6 +1212,9 @@ void CompilerGLSL::emit_header() if (cap == CapabilityRayTraversalPrimitiveCullingKHR) statement("layout(primitive_culling);"); + if (execution.flags.get(ExecutionModeQuadDerivativesKHR)) + statement("layout(quad_derivatives) in;"); + if (!inputs.empty()) statement("layout(", merge(inputs), ") in;"); if (!outputs.empty()) @@ -1515,9 +1529,12 @@ uint32_t CompilerGLSL::type_to_packed_base_size(const SPIRType &type, BufferPack case SPIRType::Half: case SPIRType::Short: case SPIRType::UShort: + case SPIRType::BFloat16: return 2; case SPIRType::SByte: case SPIRType::UByte: + case SPIRType::FloatE4M3: + case SPIRType::FloatE5M2: return 1; default: @@ -1528,14 +1545,14 @@ uint32_t CompilerGLSL::type_to_packed_base_size(const SPIRType &type, BufferPack uint32_t CompilerGLSL::type_to_packed_alignment(const SPIRType &type, const Bitset &flags, BufferPackingStandard packing) { - // If using PhysicalStorageBufferEXT storage class, this is a pointer, + // If using PhysicalStorageBuffer storage class, this is a pointer, // and is 64-bit. if (is_physical_pointer(type)) { if (!type.pointer) - SPIRV_CROSS_THROW("Types in PhysicalStorageBufferEXT must be pointers."); + SPIRV_CROSS_THROW("Types in PhysicalStorageBuffer must be pointers."); - if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT) + if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64) { if (packing_is_vec4_padded(packing) && type_is_array_of_pointers(type)) return 16; @@ -1543,7 +1560,7 @@ uint32_t CompilerGLSL::type_to_packed_alignment(const SPIRType &type, const Bits return 8; } else - SPIRV_CROSS_THROW("AddressingModelPhysicalStorageBuffer64EXT must be used for PhysicalStorageBufferEXT."); + SPIRV_CROSS_THROW("AddressingModelPhysicalStorageBuffer64 must be used for PhysicalStorageBuffer."); } else if (is_array(type)) { @@ -1651,17 +1668,17 @@ uint32_t CompilerGLSL::type_to_packed_array_stride(const SPIRType &type, const B uint32_t CompilerGLSL::type_to_packed_size(const SPIRType &type, const Bitset &flags, BufferPackingStandard packing) { - // If using PhysicalStorageBufferEXT storage class, this is a pointer, + // If using PhysicalStorageBuffer storage class, this is a pointer, // and is 64-bit. if (is_physical_pointer(type)) { if (!type.pointer) - SPIRV_CROSS_THROW("Types in PhysicalStorageBufferEXT must be pointers."); + SPIRV_CROSS_THROW("Types in PhysicalStorageBuffer must be pointers."); - if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT) + if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64) return 8; else - SPIRV_CROSS_THROW("AddressingModelPhysicalStorageBuffer64EXT must be used for PhysicalStorageBufferEXT."); + SPIRV_CROSS_THROW("AddressingModelPhysicalStorageBuffer64 must be used for PhysicalStorageBuffer."); } else if (is_array(type)) { @@ -2841,7 +2858,7 @@ void CompilerGLSL::emit_uniform(const SPIRVariable &var) statement(layout_for_variable(var), variable_decl(var), ";"); } -string CompilerGLSL::constant_value_macro_name(uint32_t id) +string CompilerGLSL::constant_value_macro_name(uint32_t id) const { return join("SPIRV_CROSS_CONSTANT_ID_", id); } @@ -3624,6 +3641,36 @@ void CompilerGLSL::emit_resources() bool emitted = false; + if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64) + { + // Output buffer reference block forward declarations. + ir.for_each_typed_id([&](uint32_t id, SPIRType &type) + { + if (is_physical_pointer(type)) + { + bool emit_type = true; + if (!is_physical_pointer_to_buffer_block(type)) + { + // Only forward-declare if we intend to emit it in the non_block_pointer types. + // Otherwise, these are just "benign" pointer types that exist as a result of access chains. + emit_type = std::find(physical_storage_non_block_pointer_types.begin(), + physical_storage_non_block_pointer_types.end(), + id) != physical_storage_non_block_pointer_types.end(); + } + + if (emit_type) + { + emit_buffer_reference_block(id, true); + emitted = true; + } + } + }); + } + + if (emitted) + statement(""); + emitted = false; + // If emitted Vulkan GLSL, // emit specialization constants as actual floats, // spec op expressions will redirect to the constant name. @@ -3733,30 +3780,10 @@ void CompilerGLSL::emit_resources() emitted = false; - if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT) + if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64) { // Output buffer reference blocks. - // Do this in two stages, one with forward declaration, - // and one without. Buffer reference blocks can reference themselves - // to support things like linked lists. - ir.for_each_typed_id([&](uint32_t id, SPIRType &type) { - if (is_physical_pointer(type)) - { - bool emit_type = true; - if (!is_physical_pointer_to_buffer_block(type)) - { - // Only forward-declare if we intend to emit it in the non_block_pointer types. - // Otherwise, these are just "benign" pointer types that exist as a result of access chains. - emit_type = std::find(physical_storage_non_block_pointer_types.begin(), - physical_storage_non_block_pointer_types.end(), - id) != physical_storage_non_block_pointer_types.end(); - } - - if (emit_type) - emit_buffer_reference_block(id, true); - } - }); - + // Buffer reference blocks can reference themselves to support things like linked lists. for (auto type : physical_storage_non_block_pointer_types) emit_buffer_reference_block(type, false); @@ -4955,12 +4982,16 @@ void CompilerGLSL::emit_polyfills(uint32_t polyfills, bool relaxed) // Subclasses may override to modify the return value. string CompilerGLSL::to_func_call_arg(const SPIRFunction::Parameter &, uint32_t id) { + // BDA expects pointers through function interface. + if (is_physical_pointer(expression_type(id))) + return to_pointer_expression(id); + // Make sure that we use the name of the original variable, and not the parameter alias. uint32_t name_id = id; auto *var = maybe_get(id); if (var && var->basevariable) name_id = var->basevariable; - return to_expression(name_id); + return to_unpacked_expression(name_id); } void CompilerGLSL::force_temporary_and_recompile(uint32_t id) @@ -5391,6 +5422,15 @@ string CompilerGLSL::to_non_uniform_aware_expression(uint32_t id) return expr; } +string CompilerGLSL::to_atomic_ptr_expression(uint32_t id) +{ + string expr = to_non_uniform_aware_expression(id); + // If we have naked pointer to POD, we need to dereference to get the proper ".value" resolve. + if (should_dereference(id)) + expr = dereference_expression(expression_type(id), expr); + return expr; +} + string CompilerGLSL::to_expression(uint32_t id, bool register_expression_read) { auto itr = invalid_expressions.find(id); @@ -5898,6 +5938,35 @@ string CompilerGLSL::constant_expression(const SPIRConstant &c, { return backend.null_pointer_literal; } + else if (c.is_null_array_specialized_length && backend.requires_matching_array_initializer) + { + require_extension_internal("GL_EXT_null_initializer"); + return backend.constant_null_initializer; + } + else if (c.replicated && type.op != spv::OpTypeArray) + { + if (type.op == spv::OpTypeMatrix) + { + uint32_t num_elements = type.columns; + // GLSL does not allow the replication constructor for matrices + // mat4(vec4(0.0)) needs to be manually expanded to mat4(vec4(0.0), vec4(0.0), vec4(0.0), vec4(0.0)); + std::string res; + res += type_to_glsl(type); + res += "("; + for (uint32_t i = 0; i < num_elements; i++) + { + res += to_expression(c.subconstants[0]); + if (i < num_elements - 1) + res += ", "; + } + res += ")"; + return res; + } + else + { + return join(type_to_glsl(type), "(", to_expression(c.subconstants[0]), ")"); + } + } else if (!c.subconstants.empty()) { // Handles Arrays and structures. @@ -5947,8 +6016,16 @@ string CompilerGLSL::constant_expression(const SPIRConstant &c, } uint32_t subconstant_index = 0; - for (auto &elem : c.subconstants) + size_t num_elements = c.subconstants.size(); + if (c.replicated) { + if (type.array.size() != 1) + SPIRV_CROSS_THROW("Multidimensional arrays not yet supported as replicated constans"); + num_elements = type.array[0]; + } + for (size_t i = 0; i < num_elements; i++) + { + auto &elem = c.subconstants[c.replicated ? 0 : i]; if (auto *op = maybe_get(elem)) { res += constant_op_expression(*op); @@ -5979,7 +6056,7 @@ string CompilerGLSL::constant_expression(const SPIRConstant &c, } } - if (&elem != &c.subconstants.back()) + if (i != num_elements - 1) res += ", "; subconstant_index++; @@ -6003,7 +6080,7 @@ string CompilerGLSL::constant_expression(const SPIRConstant &c, else return join(type_to_glsl(type), "(0)"); } - else if (c.columns() == 1) + else if (c.columns() == 1 && type.op != spv::OpTypeCooperativeMatrixKHR) { auto res = constant_expression_vector(c, 0); @@ -6053,17 +6130,44 @@ string CompilerGLSL::constant_expression(const SPIRConstant &c, #pragma warning(disable : 4996) #endif +string CompilerGLSL::convert_floate4m3_to_string(const SPIRConstant &c, uint32_t col, uint32_t row) +{ + string res; + float float_value = c.scalar_floate4m3(col, row); + + // There is no infinity in e4m3. + if (std::isnan(float_value)) + { + SPIRType type { OpTypeFloat }; + type.basetype = SPIRType::Half; + type.vecsize = 1; + type.columns = 1; + res = join(type_to_glsl(type), "(0.0 / 0.0)"); + } + else + { + SPIRType type { OpTypeFloat }; + type.basetype = SPIRType::FloatE4M3; + type.vecsize = 1; + type.columns = 1; + res = join(type_to_glsl(type), "(", format_float(float_value), ")"); + } + + return res; +} + string CompilerGLSL::convert_half_to_string(const SPIRConstant &c, uint32_t col, uint32_t row) { string res; - float float_value = c.scalar_f16(col, row); + bool is_bfloat8 = get(c.constant_type).basetype == SPIRType::FloatE5M2; + float float_value = is_bfloat8 ? c.scalar_bf8(col, row) : c.scalar_f16(col, row); // There is no literal "hf" in GL_NV_gpu_shader5, so to avoid lots // of complicated workarounds, just value-cast to the half type always. if (std::isnan(float_value) || std::isinf(float_value)) { SPIRType type { OpTypeFloat }; - type.basetype = SPIRType::Half; + type.basetype = is_bfloat8 ? SPIRType::FloatE5M2 : SPIRType::Half; type.vecsize = 1; type.columns = 1; @@ -6079,7 +6183,7 @@ string CompilerGLSL::convert_half_to_string(const SPIRConstant &c, uint32_t col, else { SPIRType type { OpTypeFloat }; - type.basetype = SPIRType::Half; + type.basetype = is_bfloat8 ? SPIRType::FloatE5M2 : SPIRType::Half; type.vecsize = 1; type.columns = 1; res = join(type_to_glsl(type), "(", format_float(float_value), ")"); @@ -6091,7 +6195,9 @@ string CompilerGLSL::convert_half_to_string(const SPIRConstant &c, uint32_t col, string CompilerGLSL::convert_float_to_string(const SPIRConstant &c, uint32_t col, uint32_t row) { string res; - float float_value = c.scalar_f32(col, row); + + bool is_bfloat16 = get(c.constant_type).basetype == SPIRType::BFloat16; + float float_value = is_bfloat16 ? c.scalar_bf16(col, row) : c.scalar_f32(col, row); if (std::isnan(float_value) || std::isinf(float_value)) { @@ -6155,6 +6261,9 @@ string CompilerGLSL::convert_float_to_string(const SPIRConstant &c, uint32_t col res += "f"; } + if (is_bfloat16) + res = join("bfloat16_t(", res, ")"); + return res; } @@ -6311,6 +6420,29 @@ string CompilerGLSL::constant_expression_vector(const SPIRConstant &c, uint32_t switch (type.basetype) { + case SPIRType::FloatE4M3: + if (splat || swizzle_splat) + { + res += convert_floate4m3_to_string(c, vector, 0); + if (swizzle_splat) + res = remap_swizzle(get(c.constant_type), 1, res); + } + else + { + for (uint32_t i = 0; i < c.vector_size(); i++) + { + if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) + res += to_expression(c.specialization_constant_id(vector, i)); + else + res += convert_floate4m3_to_string(c, vector, i); + + if (i + 1 < c.vector_size()) + res += ", "; + } + } + break; + + case SPIRType::FloatE5M2: case SPIRType::Half: if (splat || swizzle_splat) { @@ -6333,6 +6465,7 @@ string CompilerGLSL::constant_expression_vector(const SPIRConstant &c, uint32_t } break; + case SPIRType::BFloat16: case SPIRType::Float: if (splat || swizzle_splat) { @@ -6988,9 +7121,12 @@ void CompilerGLSL::emit_atomic_func_op(uint32_t result_type, uint32_t result_id, require_extension_internal("GL_EXT_shader_atomic_float"); } + if (type.basetype == SPIRType::UInt64 || type.basetype == SPIRType::Int64) + require_extension_internal("GL_EXT_shader_atomic_int64"); + forced_temporaries.insert(result_id); emit_op(result_type, result_id, - join(op, "(", to_non_uniform_aware_expression(op0), ", ", + join(op, "(", to_atomic_ptr_expression(op0), ", ", to_unpacked_expression(op1), ")"), false); flush_all_atomic_capable_variables(); } @@ -9358,6 +9494,10 @@ void CompilerGLSL::emit_subgroup_op(const Instruction &i) require_extension_internal("GL_KHR_shader_subgroup_shuffle_relative"); break; + case OpGroupNonUniformRotateKHR: + require_extension_internal("GL_KHR_shader_subgroup_rotate"); + break; + case OpGroupNonUniformAll: case OpGroupNonUniformAny: case OpGroupNonUniformAllEqual: @@ -9429,6 +9569,13 @@ void CompilerGLSL::emit_subgroup_op(const Instruction &i) require_extension_internal("GL_KHR_shader_subgroup_quad"); break; + case OpGroupNonUniformQuadAllKHR: + case OpGroupNonUniformQuadAnyKHR: + // Require both extensions to be enabled. + require_extension_internal("GL_KHR_shader_subgroup_vote"); + require_extension_internal("GL_EXT_shader_quad_control"); + break; + default: SPIRV_CROSS_THROW("Invalid opcode for subgroup."); } @@ -9436,9 +9583,13 @@ void CompilerGLSL::emit_subgroup_op(const Instruction &i) uint32_t result_type = ops[0]; uint32_t id = ops[1]; - auto scope = static_cast(evaluate_constant_u32(ops[2])); - if (scope != ScopeSubgroup) - SPIRV_CROSS_THROW("Only subgroup scope is supported."); + // These quad ops do not have a scope parameter. + if (op != OpGroupNonUniformQuadAllKHR && op != OpGroupNonUniformQuadAnyKHR) + { + auto scope = static_cast(evaluate_constant_u32(ops[2])); + if (scope != ScopeSubgroup) + SPIRV_CROSS_THROW("Only subgroup scope is supported."); + } switch (op) { @@ -9504,6 +9655,13 @@ void CompilerGLSL::emit_subgroup_op(const Instruction &i) emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffleDown"); break; + case OpGroupNonUniformRotateKHR: + if (i.length > 5) + emit_trinary_func_op(result_type, id, ops[3], ops[4], ops[5], "subgroupClusteredRotate"); + else + emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupRotate"); + break; + case OpGroupNonUniformAll: emit_unary_func_op(result_type, id, ops[3], "subgroupAll"); break; @@ -9591,6 +9749,14 @@ case OpGroupNonUniform##op: \ break; } + case OpGroupNonUniformQuadAllKHR: + emit_unary_func_op(result_type, id, ops[2], "subgroupQuadAll"); + break; + + case OpGroupNonUniformQuadAnyKHR: + emit_unary_func_op(result_type, id, ops[2], "subgroupQuadAny"); + break; + default: SPIRV_CROSS_THROW("Invalid opcode for subgroup."); } @@ -9706,6 +9872,30 @@ string CompilerGLSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &i return "packUint4x16"; else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::UInt64 && in_type.vecsize == 1) return "unpackUint4x16"; + else if (out_type.basetype == SPIRType::BFloat16 && in_type.basetype == SPIRType::UShort) + return "uintBitsToBFloat16EXT"; + else if (out_type.basetype == SPIRType::BFloat16 && in_type.basetype == SPIRType::Short) + return "intBitsToBFloat16EXT"; + else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::BFloat16) + return "bfloat16BitsToUintEXT"; + else if (out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::BFloat16) + return "bfloat16BitsToIntEXT"; + else if (out_type.basetype == SPIRType::FloatE4M3 && in_type.basetype == SPIRType::UByte) + return "uintBitsToFloate4m3EXT"; + else if (out_type.basetype == SPIRType::FloatE4M3 && in_type.basetype == SPIRType::SByte) + return "intBitsToFloate4m3EXT"; + else if (out_type.basetype == SPIRType::UByte && in_type.basetype == SPIRType::FloatE4M3) + return "floate4m3BitsToUintEXT"; + else if (out_type.basetype == SPIRType::SByte && in_type.basetype == SPIRType::FloatE4M3) + return "floate4m3BitsToIntEXT"; + else if (out_type.basetype == SPIRType::FloatE5M2 && in_type.basetype == SPIRType::UByte) + return "uintBitsToFloate5m2EXT"; + else if (out_type.basetype == SPIRType::FloatE5M2 && in_type.basetype == SPIRType::SByte) + return "intBitsToFloate5m2EXT"; + else if (out_type.basetype == SPIRType::UByte && in_type.basetype == SPIRType::FloatE5M2) + return "floate5m2BitsToUintEXT"; + else if (out_type.basetype == SPIRType::SByte && in_type.basetype == SPIRType::FloatE5M2) + return "floate5m2BitsToIntEXT"; return ""; } @@ -9824,7 +10014,17 @@ string CompilerGLSL::builtin_to_glsl(BuiltIn builtin, StorageClass storage) case BuiltInInvocationId: return "gl_InvocationID"; case BuiltInLayer: + { + auto model = get_execution_model(); + if (model == ExecutionModelVertex || model == ExecutionModelTessellationEvaluation) + { + if (options.es) + require_extension_internal("GL_NV_viewport_array2"); + else + require_extension_internal("GL_ARB_shader_viewport_layer_array"); + } return "gl_Layer"; + } case BuiltInViewportIndex: return "gl_ViewportIndex"; case BuiltInTessLevelOuter: @@ -10228,7 +10428,8 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice if (!is_ptr_chain) mod_flags &= ~ACCESS_CHAIN_PTR_CHAIN_BIT; access_chain_internal_append_index(expr, base, type, mod_flags, access_chain_is_arrayed, index); - check_physical_type_cast(expr, type, physical_type); + if (check_physical_type_cast(expr, type, physical_type)) + physical_type = 0; }; for (uint32_t i = 0; i < count; i++) @@ -10572,7 +10773,7 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice type = &get(type_id); } // Vector -> Scalar - else if (type->vecsize > 1) + else if (type->op == OpTypeCooperativeMatrixKHR || type->vecsize > 1) { string deferred_index; if (row_major_matrix_needs_conversion) @@ -10634,9 +10835,9 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice if (is_literal) { - bool out_of_bounds = (index >= type->vecsize); + bool out_of_bounds = index >= type->vecsize && type->op != OpTypeCooperativeMatrixKHR; - if (!is_packed && !row_major_matrix_needs_conversion) + if (!is_packed && !row_major_matrix_needs_conversion && type->op != OpTypeCooperativeMatrixKHR) { expr += "."; expr += index_to_swizzle(out_of_bounds ? 0 : index); @@ -10736,8 +10937,9 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice return expr; } -void CompilerGLSL::check_physical_type_cast(std::string &, const SPIRType *, uint32_t) +bool CompilerGLSL::check_physical_type_cast(std::string &, const SPIRType *, uint32_t) { + return false; } bool CompilerGLSL::prepare_access_chain_for_scalar_access(std::string &, const SPIRType &, spv::StorageClass, bool &) @@ -11239,7 +11441,7 @@ bool CompilerGLSL::should_dereference(uint32_t id) { const auto &type = expression_type(id); // Non-pointer expressions don't need to be dereferenced. - if (!type.pointer) + if (!is_pointer(type)) return false; // Handles shouldn't be dereferenced either. @@ -11247,8 +11449,9 @@ bool CompilerGLSL::should_dereference(uint32_t id) return false; // If id is a variable but not a phi variable, we should not dereference it. + // BDA passed around as parameters are always pointers. if (auto *var = maybe_get(id)) - return var->phi_variable; + return (var->parameter && is_physical_pointer(type)) || var->phi_variable; if (auto *expr = maybe_get(id)) { @@ -11281,6 +11484,16 @@ bool CompilerGLSL::should_dereference(uint32_t id) return true; } +bool CompilerGLSL::should_dereference_caller_param(uint32_t id) +{ + const auto &type = expression_type(id); + // BDA is always passed around as pointers. + if (is_physical_pointer(type)) + return false; + + return should_dereference(id); +} + bool CompilerGLSL::should_forward(uint32_t id) const { // If id is a variable we will try to forward it regardless of force_temporary check below @@ -11575,7 +11788,8 @@ string CompilerGLSL::build_composite_combiner(uint32_t return_type, const uint32 // Can only merge swizzles for vectors. auto &type = get(return_type); - bool can_apply_swizzle_opt = type.basetype != SPIRType::Struct && type.array.empty() && type.columns == 1; + bool can_apply_swizzle_opt = type.basetype != SPIRType::Struct && type.array.empty() && type.columns == 1 && + type.op != spv::OpTypeCooperativeMatrixKHR; bool swizzle_optimization = false; for (uint32_t i = 0; i < length; i++) @@ -12132,6 +12346,33 @@ CompilerGLSL::TemporaryCopy CompilerGLSL::handle_instruction_precision(const Ins return {}; } +static pair split_coopmat_pointer(const string &expr) +{ + auto ptr_expr = expr; + string index_expr; + + if (ptr_expr.back() != ']') + SPIRV_CROSS_THROW("Access chain for coopmat must be indexed into an array."); + + // Strip the access chain. + ptr_expr.pop_back(); + uint32_t counter = 1; + while (counter && !ptr_expr.empty()) + { + if (ptr_expr.back() == ']') + counter++; + else if (ptr_expr.back() == '[') + counter--; + ptr_expr.pop_back(); + } + + if (ptr_expr.empty()) + SPIRV_CROSS_THROW("Invalid pointer expression for coopmat."); + + index_expr = expr.substr(ptr_expr.size() + 1, expr.size() - (ptr_expr.size() + 1) - 1); + return { std::move(ptr_expr), std::move(index_expr) }; +} + void CompilerGLSL::emit_instruction(const Instruction &instruction) { auto ops = stream(instruction); @@ -12675,6 +12916,9 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) if (composite_type_is_complex) allow_base_expression = false; + if (composite_type.op == spv::OpTypeCooperativeMatrixKHR) + allow_base_expression = false; + // Packed expressions or physical ID mapped expressions cannot be split up. if (has_extended_decoration(ops[2], SPIRVCrossDecorationPhysicalTypePacked) || has_extended_decoration(ops[2], SPIRVCrossDecorationPhysicalTypeID)) @@ -13618,13 +13862,42 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) break; } + case OpCooperativeMatrixConvertNV: + if (!options.vulkan_semantics) + SPIRV_CROSS_THROW("CooperativeMatrixConvertNV requires vulkan semantics."); + require_extension_internal("GL_NV_cooperative_matrix2"); + // fallthrough case OpFConvert: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; - auto func = type_to_glsl_constructor(get(result_type)); - emit_unary_func_op(result_type, id, ops[2], func.c_str()); + auto &type = get(result_type); + + if (type.op == OpTypeCooperativeMatrixKHR && opcode == OpFConvert) + { + auto &expr_type = expression_type(ops[2]); + if (get(type.cooperative.use_id).scalar() != + get(expr_type.cooperative.use_id).scalar()) + { + // Somewhat questionable with spec constant uses. + if (!options.vulkan_semantics) + SPIRV_CROSS_THROW("NV_cooperative_matrix2 requires vulkan semantics."); + require_extension_internal("GL_NV_cooperative_matrix2"); + } + } + + if ((type.basetype == SPIRType::FloatE4M3 || type.basetype == SPIRType::FloatE5M2) && + has_decoration(id, spv::DecorationSaturatedToLargestFloat8NormalConversionEXT)) + { + emit_uninitialized_temporary_expression(result_type, id); + statement("saturatedConvertEXT(", to_expression(id), ", ", to_unpacked_expression(ops[2]), ");"); + } + else + { + auto func = type_to_glsl_constructor(type); + emit_unary_func_op(result_type, id, ops[2], func.c_str()); + } break; } @@ -13843,8 +14116,11 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) const char *increment = unsigned_type ? "0u" : "0"; emit_op(ops[0], ops[1], join(op, "(", - to_non_uniform_aware_expression(ops[2]), ", ", increment, ")"), false); + to_atomic_ptr_expression(ops[2]), ", ", increment, ")"), false); flush_all_atomic_capable_variables(); + + if (type.basetype == SPIRType::UInt64 || type.basetype == SPIRType::Int64) + require_extension_internal("GL_EXT_shader_atomic_int64"); break; } @@ -13856,8 +14132,12 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) // Ignore semantics for now, probably only relevant to CL. uint32_t val = ops[3]; const char *op = check_atomic_image(ptr) ? "imageAtomicExchange" : "atomicExchange"; - statement(op, "(", to_non_uniform_aware_expression(ptr), ", ", to_expression(val), ");"); + statement(op, "(", to_atomic_ptr_expression(ptr), ", ", to_expression(val), ");"); flush_all_atomic_capable_variables(); + + auto &type = expression_type(ptr); + if (type.basetype == SPIRType::UInt64 || type.basetype == SPIRType::Int64) + require_extension_internal("GL_EXT_shader_atomic_int64"); break; } @@ -13892,7 +14172,10 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) increment = "-1"; emit_op(ops[0], ops[1], - join(op, "(", to_non_uniform_aware_expression(ops[2]), ", ", increment, ")"), false); + join(op, "(", to_atomic_ptr_expression(ops[2]), ", ", increment, ")"), false); + + if (type.basetype == SPIRType::UInt64 || type.basetype == SPIRType::Int64) + require_extension_internal("GL_EXT_shader_atomic_int64"); } flush_all_atomic_capable_variables(); @@ -13911,9 +14194,13 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) { const char *op = check_atomic_image(ops[2]) ? "imageAtomicAdd" : "atomicAdd"; forced_temporaries.insert(ops[1]); - auto expr = join(op, "(", to_non_uniform_aware_expression(ops[2]), ", -", to_enclosed_expression(ops[5]), ")"); + auto expr = join(op, "(", to_atomic_ptr_expression(ops[2]), ", -", to_enclosed_expression(ops[5]), ")"); emit_op(ops[0], ops[1], expr, should_forward(ops[2]) && should_forward(ops[5])); flush_all_atomic_capable_variables(); + + auto &type = get(ops[0]); + if (type.basetype == SPIRType::UInt64 || type.basetype == SPIRType::Int64) + require_extension_internal("GL_EXT_shader_atomic_int64"); break; } @@ -14717,6 +15004,20 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) break; } + case OpExtInstWithForwardRefsKHR: + { + uint32_t extension_set = ops[2]; + auto ext = get(extension_set).ext; + if (ext != SPIRExtension::SPV_debug_info && + ext != SPIRExtension::NonSemanticShaderDebugInfo && + ext != SPIRExtension::NonSemanticGeneric) + { + SPIRV_CROSS_THROW("Unexpected use of ExtInstWithForwardRefsKHR."); + } + + break; + } + case OpExtInst: { uint32_t extension_set = ops[2]; @@ -14757,7 +15058,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) SPIRV_CROSS_THROW("Debug printf is only supported in Vulkan GLSL.\n"); require_extension_internal("GL_EXT_debug_printf"); auto &format_string = get(ops[4]).str; - string expr = join("debugPrintfEXT(\"", format_string, "\""); + string expr = join(backend.printf_function, "(\"", format_string, "\""); for (uint32_t i = 5; i < length; i++) { expr += ", "; @@ -14956,6 +15257,9 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) case OpGroupNonUniformLogicalXor: case OpGroupNonUniformQuadSwap: case OpGroupNonUniformQuadBroadcast: + case OpGroupNonUniformQuadAllKHR: + case OpGroupNonUniformQuadAnyKHR: + case OpGroupNonUniformRotateKHR: emit_subgroup_op(instruction); break; @@ -15175,8 +15479,8 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) case OpConvertUToPtr: { auto &type = get(ops[0]); - if (type.storage != StorageClassPhysicalStorageBufferEXT) - SPIRV_CROSS_THROW("Only StorageClassPhysicalStorageBufferEXT is supported by OpConvertUToPtr."); + if (type.storage != StorageClassPhysicalStorageBuffer) + SPIRV_CROSS_THROW("Only StorageClassPhysicalStorageBuffer is supported by OpConvertUToPtr."); auto &in_type = expression_type(ops[2]); if (in_type.vecsize == 2) @@ -15191,8 +15495,8 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) { auto &type = get(ops[0]); auto &ptr_type = expression_type(ops[2]); - if (ptr_type.storage != StorageClassPhysicalStorageBufferEXT) - SPIRV_CROSS_THROW("Only StorageClassPhysicalStorageBufferEXT is supported by OpConvertPtrToU."); + if (ptr_type.storage != StorageClassPhysicalStorageBuffer) + SPIRV_CROSS_THROW("Only StorageClassPhysicalStorageBuffer is supported by OpConvertPtrToU."); if (type.vecsize == 2) require_extension_internal("GL_EXT_buffer_reference_uvec2"); @@ -15291,6 +15595,169 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) break; } + case OpCooperativeMatrixLengthKHR: + { + // Need to synthesize a dummy temporary, since the SPIR-V opcode is based on the type. + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + set( + id, join(type_to_glsl(get(result_type)), + "(", type_to_glsl(get(ops[2])), "(0).length())"), + result_type, true); + break; + } + + case OpCooperativeMatrixLoadKHR: + { + // Spec contradicts itself if stride is optional or not. + if (length < 5) + SPIRV_CROSS_THROW("Stride is not provided."); + + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + emit_uninitialized_temporary_expression(result_type, id); + + auto expr = to_expression(ops[2]); + pair split_expr; + if (!is_forcing_recompilation()) + split_expr = split_coopmat_pointer(expr); + + string layout_expr; + if (const auto *layout = maybe_get(ops[3])) + { + if (!layout->specialization) + { + if (layout->scalar() == spv::CooperativeMatrixLayoutColumnMajorKHR) + layout_expr = "gl_CooperativeMatrixLayoutColumnMajor"; + else + layout_expr = "gl_CooperativeMatrixLayoutRowMajor"; + } + } + + if (layout_expr.empty()) + layout_expr = join("int(", to_expression(ops[3]), ")"); + + statement("coopMatLoad(", + to_expression(id), ", ", + split_expr.first, ", ", + split_expr.second, ", ", + to_expression(ops[4]), ", ", + layout_expr, ");"); + + register_read(id, ops[2], false); + break; + } + + case OpCooperativeMatrixStoreKHR: + { + // Spec contradicts itself if stride is optional or not. + if (length < 4) + SPIRV_CROSS_THROW("Stride is not provided."); + + // SPIR-V and GLSL don't agree how to pass the expression. + // In SPIR-V it's a pointer, but in GLSL it's reference to array + index. + + auto expr = to_expression(ops[0]); + pair split_expr; + if (!is_forcing_recompilation()) + split_expr = split_coopmat_pointer(expr); + + string layout_expr; + if (const auto *layout = maybe_get(ops[2])) + { + if (!layout->specialization) + { + if (layout->scalar() == spv::CooperativeMatrixLayoutColumnMajorKHR) + layout_expr = "gl_CooperativeMatrixLayoutColumnMajor"; + else + layout_expr = "gl_CooperativeMatrixLayoutRowMajor"; + } + } + + if (layout_expr.empty()) + layout_expr = join("int(", to_expression(ops[2]), ")"); + + statement("coopMatStore(", + to_expression(ops[1]), ", ", + split_expr.first, ", ", + split_expr.second, ", ", + to_expression(ops[3]), ", ", + layout_expr, ");"); + + // TODO: Do we care about memory operands? + + register_write(ops[0]); + break; + } + + case OpCooperativeMatrixMulAddKHR: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t A = ops[2]; + uint32_t B = ops[3]; + uint32_t C = ops[4]; + bool forward = should_forward(A) && should_forward(B) && should_forward(C); + emit_op(result_type, id, + join("coopMatMulAdd(", + to_unpacked_expression(A), ", ", + to_unpacked_expression(B), ", ", + to_unpacked_expression(C), ", ", + (length >= 6 ? ops[5] : 0), + ")"), + forward); + + inherit_expression_dependencies(id, A); + inherit_expression_dependencies(id, B); + inherit_expression_dependencies(id, C); + break; + } + + case OpCompositeConstructReplicateEXT: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + + auto &type = get(result_type); + auto value_to_replicate = to_expression(ops[2]); + std::string rhs; + // Matrices don't have a replicating constructor for vectors. Need to manually replicate + if (type.op == spv::OpTypeMatrix || type.op == spv::OpTypeArray) + { + if (type.op == spv::OpTypeArray && type.array.size() != 1) + { + SPIRV_CROSS_THROW( + "Multi-dimensional arrays currently not supported for OpCompositeConstructReplicateEXT"); + } + uint32_t num_elements = type.op == spv::OpTypeMatrix ? type.columns : type.array[0]; + if (backend.use_initializer_list && type.op == spv::OpTypeArray) + { + rhs += "{"; + } + else + { + rhs += type_to_glsl_constructor(type); + rhs += "("; + } + for (uint32_t i = 0; i < num_elements; i++) + { + rhs += value_to_replicate; + if (i < num_elements - 1) + rhs += ", "; + } + if (backend.use_initializer_list && type.op == spv::OpTypeArray) + rhs += "}"; + else + rhs += ")"; + } + else + { + rhs = join(type_to_glsl(type), "(", to_expression(ops[2]), ")"); + } + emit_op(result_type, id, rhs, true); + break; + } + default: statement("// unimplemented op ", instruction.op); break; @@ -15689,7 +16156,10 @@ string CompilerGLSL::argument_decl(const SPIRFunction::Parameter &arg) auto &type = expression_type(arg.id); const char *direction = ""; - if (type.pointer) + if (is_pointer(type) && + (type.storage == StorageClassFunction || + type.storage == StorageClassPrivate || + type.storage == StorageClassOutput)) { // If we're passing around block types to function, we really mean reference in a pointer sense, // but DXC does not like inout for mesh blocks, so workaround that. out is technically not correct, @@ -15763,13 +16233,24 @@ string CompilerGLSL::variable_decl(const SPIRVariable &variable) else if (options.force_zero_initialized_variables && type_can_zero_initialize(type)) res += join(" = ", to_zero_initialized_expression(get_variable_data_type_id(variable))); } - else if (variable.initializer && !variable_decl_is_remapped_storage(variable, StorageClassWorkgroup)) + else if (variable.initializer) { - uint32_t expr = variable.initializer; - if (ir.ids[expr].get_type() != TypeUndef) - res += join(" = ", to_initializer_expression(variable)); - else if (options.force_zero_initialized_variables && type_can_zero_initialize(type)) - res += join(" = ", to_zero_initialized_expression(get_variable_data_type_id(variable))); + if (!variable_decl_is_remapped_storage(variable, StorageClassWorkgroup)) + { + uint32_t expr = variable.initializer; + if (ir.ids[expr].get_type() != TypeUndef) + res += join(" = ", to_initializer_expression(variable)); + else if (options.force_zero_initialized_variables && type_can_zero_initialize(type)) + res += join(" = ", to_zero_initialized_expression(get_variable_data_type_id(variable))); + } + else + { + // Workgroup memory requires special handling. First, it can only be Null-Initialized. + // GLSL will handle this with null initializer, while others require more work after the decl + require_extension_internal("GL_EXT_null_initializer"); + if (!backend.constant_null_initializer.empty()) + res += join(" = ", backend.constant_null_initializer); + } } return res; @@ -15849,7 +16330,7 @@ string CompilerGLSL::to_array_size(const SPIRType &type, uint32_t index) string CompilerGLSL::type_to_array_glsl(const SPIRType &type, uint32_t) { - if (type.pointer && type.storage == StorageClassPhysicalStorageBufferEXT && type.basetype != SPIRType::Struct) + if (type.pointer && type.storage == StorageClassPhysicalStorageBuffer && type.basetype != SPIRType::Struct) { // We are using a wrapped pointer type, and we should not emit any array declarations here. return ""; @@ -16124,6 +16605,61 @@ string CompilerGLSL::type_to_glsl(const SPIRType &type, uint32_t id) require_extension_internal("GL_ARB_shader_atomic_counters"); } + const SPIRType *coop_type = &type; + while (is_pointer(*coop_type) || is_array(*coop_type)) + coop_type = &get(coop_type->parent_type); + + if (coop_type->op == spv::OpTypeCooperativeMatrixKHR) + { + require_extension_internal("GL_KHR_cooperative_matrix"); + if (!options.vulkan_semantics) + SPIRV_CROSS_THROW("Cooperative matrix only available in Vulkan."); + // GLSL doesn't support this as spec constant, which makes sense ... + uint32_t use_type = get(coop_type->cooperative.use_id).scalar(); + + const char *use = nullptr; + switch (use_type) + { + case CooperativeMatrixUseMatrixAKHR: + use = "gl_MatrixUseA"; + break; + + case CooperativeMatrixUseMatrixBKHR: + use = "gl_MatrixUseB"; + break; + + case CooperativeMatrixUseMatrixAccumulatorKHR: + use = "gl_MatrixUseAccumulator"; + break; + + default: + SPIRV_CROSS_THROW("Invalid matrix use."); + } + + string scope_expr; + if (const auto *scope = maybe_get(coop_type->cooperative.scope_id)) + { + if (!scope->specialization) + { + require_extension_internal("GL_KHR_memory_scope_semantics"); + if (scope->scalar() == spv::ScopeSubgroup) + scope_expr = "gl_ScopeSubgroup"; + else if (scope->scalar() == spv::ScopeWorkgroup) + scope_expr = "gl_ScopeWorkgroup"; + else + SPIRV_CROSS_THROW("Invalid scope for cooperative matrix."); + } + } + + if (scope_expr.empty()) + scope_expr = to_expression(coop_type->cooperative.scope_id); + + return join("coopmat<", type_to_glsl(get(coop_type->parent_type)), ", ", + scope_expr, ", ", + to_expression(coop_type->cooperative.rows_id), ", ", + to_expression(coop_type->cooperative.columns_id), ", ", use, ">"); + } + if (type.vecsize == 1 && type.columns == 1) // Scalar builtin { switch (type.basetype) @@ -16146,6 +16682,21 @@ string CompilerGLSL::type_to_glsl(const SPIRType &type, uint32_t id) return "atomic_uint"; case SPIRType::Half: return "float16_t"; + case SPIRType::BFloat16: + if (!options.vulkan_semantics) + SPIRV_CROSS_THROW("bfloat16 requires Vulkan semantics."); + require_extension_internal("GL_EXT_bfloat16"); + return "bfloat16_t"; + case SPIRType::FloatE4M3: + if (!options.vulkan_semantics) + SPIRV_CROSS_THROW("floate4m3_t requires Vulkan semantics."); + require_extension_internal("GL_EXT_float_e4m3"); + return "floate4m3_t"; + case SPIRType::FloatE5M2: + if (!options.vulkan_semantics) + SPIRV_CROSS_THROW("floate5m2_t requires Vulkan semantics."); + require_extension_internal("GL_EXT_float_e5m2"); + return "floate5m2_t"; case SPIRType::Float: return "float"; case SPIRType::Double: @@ -16178,6 +16729,21 @@ string CompilerGLSL::type_to_glsl(const SPIRType &type, uint32_t id) return join("uvec", type.vecsize); case SPIRType::Half: return join("f16vec", type.vecsize); + case SPIRType::BFloat16: + if (!options.vulkan_semantics) + SPIRV_CROSS_THROW("bfloat16 requires Vulkan semantics."); + require_extension_internal("GL_EXT_bfloat16"); + return join("bf16vec", type.vecsize); + case SPIRType::FloatE4M3: + if (!options.vulkan_semantics) + SPIRV_CROSS_THROW("floate4m3_t requires Vulkan semantics."); + require_extension_internal("GL_EXT_float_e4m3"); + return join("fe4m3vec", type.vecsize); + case SPIRType::FloatE5M2: + if (!options.vulkan_semantics) + SPIRV_CROSS_THROW("floate5m2_t requires Vulkan semantics."); + require_extension_internal("GL_EXT_float_e5m2"); + return join("fe5m2vec", type.vecsize); case SPIRType::Float: return join("vec", type.vecsize); case SPIRType::Double: @@ -16353,6 +16919,11 @@ void CompilerGLSL::add_function_overload(const SPIRFunction &func) // but that will not change the signature in GLSL/HLSL, // so strip the pointer type before hashing. uint32_t type_id = get_pointee_type_id(arg.type); + + // Workaround glslang bug. It seems to only consider the base type when resolving overloads. + if (get(type_id).op == spv::OpTypeCooperativeMatrixKHR) + type_id = get(type_id).parent_type; + auto &type = get(type_id); if (!combined_image_samplers.empty()) @@ -16492,6 +17063,7 @@ void CompilerGLSL::emit_function(SPIRFunction &func, const Bitset &return_flags) { // Recursively emit functions which are called. uint32_t id = ops[2]; + emit_function(get(id), ir.meta[ops[1]].decoration.decoration_flags); } } @@ -16530,6 +17102,12 @@ void CompilerGLSL::emit_function(SPIRFunction &func, const Bitset &return_flags) // Comes from MSL which can push global variables as local variables in main function. add_local_variable_name(var.self); statement(variable_decl(var), ";"); + + // "Real" workgroup variables in compute shaders needs extra caretaking. + // They need to be initialized with an extra routine as they come in arbitrary form. + if (var.storage == StorageClassWorkgroup && var.initializer) + emit_workgroup_initialization(var); + var.deferred_declaration = false; } else if (var.storage == StorageClassPrivate) @@ -16636,6 +17214,10 @@ void CompilerGLSL::emit_fixup() } } +void CompilerGLSL::emit_workgroup_initialization(const SPIRVariable &) +{ +} + void CompilerGLSL::flush_phi(BlockID from, BlockID to) { auto &child = get(to); @@ -17869,6 +18451,14 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block) case SPIRBlock::Unreachable: { + // If the entry point ends with unreachable and has a return value, insert a return + // statement to avoid potential compiler errors from non-void functions without a return value. + if (block.return_value) + { + statement("return ", to_unpacked_expression(block.return_value), ";"); + break; + } + // Avoid emitting false fallthrough, which can happen for // if (cond) break; else discard; inside a case label. // Discard is not always implementable as a terminator. diff --git a/thirdparty/spirv-cross/spirv_glsl.hpp b/thirdparty/spirv-cross/spirv_glsl.hpp index 8a00263234..03ff330ccf 100644 --- a/thirdparty/spirv-cross/spirv_glsl.hpp +++ b/thirdparty/spirv-cross/spirv_glsl.hpp @@ -297,6 +297,9 @@ public: float_formatter = formatter; } + // Returns the macro name corresponding to constant id + std::string constant_value_macro_name(uint32_t id) const; + protected: struct ShaderSubgroupSupportHelper { @@ -450,6 +453,7 @@ protected: virtual std::string variable_decl(const SPIRType &type, const std::string &name, uint32_t id = 0); virtual bool variable_decl_is_remapped_storage(const SPIRVariable &var, spv::StorageClass storage) const; virtual std::string to_func_call_arg(const SPIRFunction::Parameter &arg, uint32_t id); + virtual void emit_workgroup_initialization(const SPIRVariable &var); struct TextureFunctionBaseArguments { @@ -622,6 +626,8 @@ protected: const char *uint16_t_literal_suffix = "us"; const char *nonuniform_qualifier = "nonuniformEXT"; const char *boolean_mix_function = "mix"; + const char *printf_function = "debugPrintfEXT"; + std::string constant_null_initializer = ""; SPIRType::BaseType boolean_in_struct_remapped_type = SPIRType::Boolean; bool swizzle_is_function = false; bool shared_is_implied = false; @@ -629,6 +635,7 @@ protected: bool explicit_struct_type = false; bool use_initializer_list = false; bool use_typed_initializer_list = false; + bool requires_matching_array_initializer = false; bool can_declare_struct_inline = true; bool can_declare_arrays_inline = true; bool native_row_major_matrix = true; @@ -679,7 +686,6 @@ protected: const SmallVector &indices); void emit_block_chain(SPIRBlock &block); void emit_hoisted_temporaries(SmallVector> &temporaries); - std::string constant_value_macro_name(uint32_t id); int get_constant_mapping_to_workgroup_component(const SPIRConstant &constant) const; void emit_constant(const SPIRConstant &constant); void emit_specialization_constant_op(const SPIRConstantOp &constant); @@ -695,6 +701,7 @@ protected: void emit_variable_temporary_copies(const SPIRVariable &var); bool should_dereference(uint32_t id); + bool should_dereference_caller_param(uint32_t id); bool should_forward(uint32_t id) const; bool should_suppress_usage_tracking(uint32_t id) const; void emit_mix_op(uint32_t result_type, uint32_t id, uint32_t left, uint32_t right, uint32_t lerp); @@ -762,7 +769,7 @@ protected: spv::StorageClass get_expression_effective_storage_class(uint32_t ptr); virtual bool access_chain_needs_stage_io_builtin_translation(uint32_t base); - virtual void check_physical_type_cast(std::string &expr, const SPIRType *type, uint32_t physical_type); + virtual bool check_physical_type_cast(std::string &expr, const SPIRType *type, uint32_t physical_type); virtual bool prepare_access_chain_for_scalar_access(std::string &expr, const SPIRType &type, spv::StorageClass storage, bool &is_packed); @@ -792,8 +799,9 @@ protected: std::string declare_temporary(uint32_t type, uint32_t id); void emit_uninitialized_temporary(uint32_t type, uint32_t id); SPIRExpression &emit_uninitialized_temporary_expression(uint32_t type, uint32_t id); - void append_global_func_args(const SPIRFunction &func, uint32_t index, SmallVector &arglist); + virtual void append_global_func_args(const SPIRFunction &func, uint32_t index, SmallVector &arglist); std::string to_non_uniform_aware_expression(uint32_t id); + std::string to_atomic_ptr_expression(uint32_t id); std::string to_expression(uint32_t id, bool register_expression_read = true); std::string to_composite_constructor_expression(const SPIRType &parent_type, uint32_t id, bool block_like_type); std::string to_rerolled_array_expression(const SPIRType &parent_type, const std::string &expr, const SPIRType &type); @@ -1009,6 +1017,8 @@ protected: const Instruction *get_next_instruction_in_block(const Instruction &instr); static uint32_t mask_relevant_memory_semantics(uint32_t semantics); + std::string convert_floate4m3_to_string(const SPIRConstant &value, uint32_t col, uint32_t row); + std::string convert_floate5m2_to_string(const SPIRConstant &value, uint32_t col, uint32_t row); std::string convert_half_to_string(const SPIRConstant &value, uint32_t col, uint32_t row); std::string convert_float_to_string(const SPIRConstant &value, uint32_t col, uint32_t row); std::string convert_double_to_string(const SPIRConstant &value, uint32_t col, uint32_t row); diff --git a/thirdparty/spirv-cross/spirv_msl.cpp b/thirdparty/spirv-cross/spirv_msl.cpp index 642fcfa59a..1c4f1ed13a 100644 --- a/thirdparty/spirv-cross/spirv_msl.cpp +++ b/thirdparty/spirv-cross/spirv_msl.cpp @@ -272,16 +272,22 @@ void CompilerMSL::build_implicit_builtins() (active_input_builtins.get(BuiltInVertexId) || active_input_builtins.get(BuiltInVertexIndex) || active_input_builtins.get(BuiltInBaseVertex) || active_input_builtins.get(BuiltInInstanceId) || active_input_builtins.get(BuiltInInstanceIndex) || active_input_builtins.get(BuiltInBaseInstance)); - bool need_local_invocation_index = (msl_options.emulate_subgroups && active_input_builtins.get(BuiltInSubgroupId)) || is_mesh_shader(); + bool need_local_invocation_index = + (msl_options.emulate_subgroups && active_input_builtins.get(BuiltInSubgroupId)) || is_mesh_shader() || + needs_workgroup_zero_init || needs_local_invocation_index; bool need_workgroup_size = msl_options.emulate_subgroups && active_input_builtins.get(BuiltInNumSubgroups); bool force_frag_depth_passthrough = get_execution_model() == ExecutionModelFragment && !uses_explicit_early_fragment_test() && need_subpass_input && msl_options.enable_frag_depth_builtin && msl_options.input_attachment_is_ds_attachment; + bool need_point_size = + msl_options.enable_point_size_builtin && msl_options.enable_point_size_default && + get_execution_model() == ExecutionModelVertex; if (need_subpass_input || need_sample_pos || need_subgroup_mask || need_vertex_params || need_tesc_params || need_tese_params || need_multiview || need_dispatch_base || need_vertex_base_params || need_grid_params || needs_sample_id || needs_subgroup_invocation_id || needs_subgroup_size || needs_helper_invocation || - has_additional_fixed_sample_mask() || need_local_invocation_index || need_workgroup_size || force_frag_depth_passthrough || is_mesh_shader()) + has_additional_fixed_sample_mask() || need_local_invocation_index || need_workgroup_size || + force_frag_depth_passthrough || need_point_size || is_mesh_shader()) { bool has_frag_coord = false; bool has_sample_id = false; @@ -299,6 +305,7 @@ void CompilerMSL::build_implicit_builtins() bool has_local_invocation_index = false; bool has_workgroup_size = false; bool has_frag_depth = false; + bool has_point_size = false; uint32_t workgroup_id_type = 0; ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { @@ -306,6 +313,22 @@ void CompilerMSL::build_implicit_builtins() return; if (!interface_variable_exists_in_entry_point(var.self)) return; + + auto &type = this->get(var.basetype); + if (need_point_size && has_decoration(type.self, DecorationBlock)) + { + const auto member_count = static_cast(type.member_types.size()); + for (uint32_t i = 0; i < member_count; i++) + { + if (get_member_decoration(type.self, i, DecorationBuiltIn) == BuiltInPointSize) + { + has_point_size = true; + active_output_builtins.set(BuiltInPointSize); + break; + } + } + } + if (!has_decoration(var.self, DecorationBuiltIn)) return; @@ -328,6 +351,12 @@ void CompilerMSL::build_implicit_builtins() } } + if (builtin == BuiltInPointSize) + { + has_point_size = true; + active_output_builtins.set(BuiltInPointSize); + } + if (builtin == BuiltInPrimitivePointIndicesEXT || builtin == BuiltInPrimitiveLineIndicesEXT || builtin == BuiltInPrimitiveTriangleIndicesEXT) @@ -481,7 +510,7 @@ void CompilerMSL::build_implicit_builtins() has_local_invocation_index = true; } - if (need_workgroup_size && builtin == BuiltInLocalInvocationId) + if (need_workgroup_size && builtin == BuiltInWorkgroupSize) { builtin_workgroup_size_id = var.self; mark_implicit_builtin(StorageClassInput, BuiltInWorkgroupSize, var.self); @@ -903,25 +932,55 @@ void CompilerMSL::build_implicit_builtins() if (need_workgroup_size && !has_workgroup_size) { - uint32_t offset = ir.increase_bound_by(2); - uint32_t type_ptr_id = offset; - uint32_t var_id = offset + 1; + auto &execution = get_entry_point(); + // First, check if the workgroup size _constant_ were defined. + // If it were, we don't need to do--in fact, shouldn't do--anything. + builtin_workgroup_size_id = execution.workgroup_size.constant; + if (builtin_workgroup_size_id == 0) + { + uint32_t var_id = ir.increase_bound_by(1); - // Create gl_WorkgroupSize. - uint32_t type_id = build_extended_vector_type(get_uint_type_id(), 3); - SPIRType uint_type_ptr = get(type_id); - uint_type_ptr.op = OpTypePointer; - uint_type_ptr.pointer = true; - uint_type_ptr.pointer_depth++; - uint_type_ptr.parent_type = type_id; - uint_type_ptr.storage = StorageClassInput; + // Create gl_WorkgroupSize. + uint32_t type_id = build_extended_vector_type(get_uint_type_id(), 3); + // If we have LocalSize or LocalSizeId, use those to define the workgroup size. + if (execution.flags.get(ExecutionModeLocalSizeId)) + { + const SPIRConstant *init[] = { &get(execution.workgroup_size.id_x), + &get(execution.workgroup_size.id_y), + &get(execution.workgroup_size.id_z) }; + bool specialized = init[0]->specialization || init[1]->specialization || init[2]->specialization; + set(var_id, type_id, init, 3, specialized); + execution.workgroup_size.constant = var_id; + } + else if (execution.flags.get(ExecutionModeLocalSize)) + { + uint32_t offset = ir.increase_bound_by(3); + const SPIRConstant *init[] = { + &set(offset, get_uint_type_id(), execution.workgroup_size.x, false), + &set(offset + 1, get_uint_type_id(), execution.workgroup_size.y, false), + &set(offset + 2, get_uint_type_id(), execution.workgroup_size.z, false) + }; + set(var_id, type_id, init, 3, false); + execution.workgroup_size.constant = var_id; + } + else + { + uint32_t type_ptr_id = ir.increase_bound_by(1); + SPIRType uint_type_ptr = get(type_id); + uint_type_ptr.op = OpTypePointer; + uint_type_ptr.pointer = true; + uint_type_ptr.pointer_depth++; + uint_type_ptr.parent_type = type_id; + uint_type_ptr.storage = StorageClassInput; - auto &ptr_type = set(type_ptr_id, uint_type_ptr); - ptr_type.self = type_id; - set(var_id, type_ptr_id, StorageClassInput); - set_decoration(var_id, DecorationBuiltIn, BuiltInWorkgroupSize); - builtin_workgroup_size_id = var_id; - mark_implicit_builtin(StorageClassInput, BuiltInWorkgroupSize, var_id); + auto &ptr_type = set(type_ptr_id, uint_type_ptr); + ptr_type.self = type_id; + set(var_id, type_ptr_id, StorageClassInput); + mark_implicit_builtin(StorageClassInput, BuiltInWorkgroupSize, var_id); + } + set_decoration(var_id, DecorationBuiltIn, BuiltInWorkgroupSize); + builtin_workgroup_size_id = var_id; + } } if (!has_frag_depth && force_frag_depth_passthrough) @@ -953,6 +1012,34 @@ void CompilerMSL::build_implicit_builtins() mark_implicit_builtin(StorageClassOutput, BuiltInFragDepth, var_id); active_output_builtins.set(BuiltInFragDepth); } + + if (!has_point_size && need_point_size) + { + uint32_t offset = ir.increase_bound_by(3); + uint32_t type_id = offset; + uint32_t type_ptr_id = offset + 1; + uint32_t var_id = offset + 2; + + // Create gl_PointSize + SPIRType float_type { OpTypeFloat }; + float_type.basetype = SPIRType::Float; + float_type.width = 32; + float_type.vecsize = 1; + set(type_id, float_type); + + SPIRType float_type_ptr_in = float_type; + float_type_ptr_in.op = spv::OpTypePointer; + float_type_ptr_in.pointer = true; + float_type_ptr_in.pointer_depth++; + float_type_ptr_in.parent_type = type_id; + float_type_ptr_in.storage = StorageClassOutput; + + auto &ptr_in_type = set(type_ptr_id, float_type_ptr_in); + ptr_in_type.self = type_id; + set(var_id, type_ptr_id, StorageClassOutput); + set_decoration(var_id, DecorationBuiltIn, BuiltInPointSize); + mark_implicit_builtin(StorageClassOutput, BuiltInPointSize, var_id); + } } if (needs_swizzle_buffer_def) @@ -1003,6 +1090,7 @@ void CompilerMSL::build_implicit_builtins() // If we're returning a struct from a vertex-like entry point, we must return a position attribute. bool need_position = (get_execution_model() == ExecutionModelVertex || is_tese_shader()) && !capture_output_to_buffer && !get_is_rasterization_disabled() && + !msl_options.auto_disable_rasterization && !active_output_builtins.get(BuiltInPosition); if (need_position) @@ -1039,6 +1127,10 @@ void CompilerMSL::build_implicit_builtins() }); need_position = has_output && !active_output_builtins.get(BuiltInPosition); } + else if (!active_output_builtins.get(BuiltInPosition) && msl_options.auto_disable_rasterization) + { + is_rasterization_disabled = true; + } if (need_position) { @@ -1593,6 +1685,7 @@ string CompilerMSL::compile() backend.basic_int16_type = "short"; backend.basic_uint16_type = "ushort"; backend.boolean_mix_function = "select"; + backend.printf_function = "os_log_default.log"; backend.swizzle_is_function = false; backend.shared_is_implied = false; backend.use_initializer_list = true; @@ -1606,7 +1699,7 @@ string CompilerMSL::compile() backend.nonuniform_qualifier = ""; backend.support_small_type_sampling_result = true; backend.force_merged_mesh_block = false; - backend.force_gl_in_out_block = get_execution_model() == ExecutionModelMeshEXT; + backend.force_gl_in_out_block = false; backend.supports_empty_struct = true; backend.support_64bit_switch = true; backend.boolean_in_struct_remapped_type = SPIRType::Short; @@ -1644,6 +1737,7 @@ string CompilerMSL::compile() analyze_image_and_sampler_usage(); analyze_sampled_image_usage(); analyze_interlocked_resource_usage(); + analyze_workgroup_variables(); preprocess_op_codes(); build_implicit_builtins(); @@ -1777,7 +1871,7 @@ void CompilerMSL::preprocess_op_codes() if (preproc.uses_atomics) { add_header_line("#include "); - add_pragma_line("#pragma clang diagnostic ignored \"-Wunused-variable\""); + add_pragma_line("#pragma clang diagnostic ignored \"-Wunused-variable\"", false); } // Before MSL 2.1 (2.2 for textures), Metal vertex functions that write to @@ -1794,6 +1888,8 @@ void CompilerMSL::preprocess_op_codes() capture_output_to_buffer = true; } + if (preproc.needs_local_invocation_index) + needs_local_invocation_index = true; if (preproc.needs_subgroup_invocation_id) needs_subgroup_invocation_id = true; if (preproc.needs_subgroup_size) @@ -2147,6 +2243,36 @@ void CompilerMSL::extract_global_variables_from_function(uint32_t func_id, std:: break; } + case OpGroupNonUniformRotateKHR: + { + // Add the correct invocation ID for calculating clustered rotate case. + if (i.length > 5) + added_arg_ids.insert(static_cast(evaluate_constant_u32(ops[2])) == ScopeSubgroup + ? builtin_subgroup_invocation_id_id : builtin_local_invocation_index_id); + break; + } + + case OpGroupNonUniformFAdd: + case OpGroupNonUniformFMul: + case OpGroupNonUniformFMin: + case OpGroupNonUniformFMax: + case OpGroupNonUniformIAdd: + case OpGroupNonUniformIMul: + case OpGroupNonUniformSMin: + case OpGroupNonUniformSMax: + case OpGroupNonUniformUMin: + case OpGroupNonUniformUMax: + case OpGroupNonUniformBitwiseAnd: + case OpGroupNonUniformBitwiseOr: + case OpGroupNonUniformBitwiseXor: + case OpGroupNonUniformLogicalAnd: + case OpGroupNonUniformLogicalOr: + case OpGroupNonUniformLogicalXor: + if ((get_execution_model() != ExecutionModelFragment || msl_options.supports_msl_version(2, 2)) && + ops[3] == GroupOperationClusteredReduce) + added_arg_ids.insert(builtin_subgroup_invocation_id_id); + break; + case OpDemoteToHelperInvocation: if (needs_manual_helper_invocation_updates() && needs_helper_invocation) added_arg_ids.insert(builtin_helper_invocation_id); @@ -2317,7 +2443,14 @@ void CompilerMSL::extract_global_variables_from_function(uint32_t func_id, std:: v.storage = StorageClassWorkgroup; // Ensure the existing variable has a valid name and the new variable has all the same meta info - set_name(arg_id, ensure_valid_name(to_name(arg_id), "v")); + if (ir.meta[arg_id].decoration.builtin) + { + set_name(arg_id, builtin_to_glsl(bi_type, var.storage)); + } + else + { + set_name(arg_id, ensure_valid_name(to_name(arg_id), "v")); + } ir.meta[next_id] = ir.meta[arg_id]; } else if (is_builtin && has_decoration(p_type->self, DecorationBlock)) @@ -3182,41 +3315,62 @@ void CompilerMSL::add_composite_member_variable_to_interface_block(StorageClass string mbr_name = ensure_valid_name(append_member_name(mbr_name_qual, var_type, mbr_idx) + (mbr_is_indexable ? join("_", i) : ""), "m"); set_member_name(ib_type.self, ib_mbr_idx, mbr_name); + // The SPIRV location of interface variable, used to obtain the initial + // MSL location (the location variable) and interface matching + uint32_t ir_location = UINT32_MAX; + bool has_member_loc_decor = has_member_decoration(var_type.self, mbr_idx, DecorationLocation); + bool has_var_loc_decor = has_decoration(var.self, DecorationLocation); + uint32_t orig_vecsize = UINT32_MAX; + + // If we haven't established a location base yet, do so here. + if (location == UINT32_MAX) + { + if (has_member_loc_decor) + ir_location = get_member_decoration(var_type.self, mbr_idx, DecorationLocation); + else if (has_var_loc_decor) + ir_location = get_accumulated_member_location(var, mbr_idx, meta.strip_array); + else if (is_builtin) + { + if (is_tessellation_shader() && storage == StorageClassInput && inputs_by_builtin.count(builtin)) + ir_location = inputs_by_builtin[builtin].location; + else if (capture_output_to_buffer && storage == StorageClassOutput && outputs_by_builtin.count(builtin)) + ir_location = outputs_by_builtin[builtin].location; + } + } + // Once we determine the location of the first member within nested structures, // from a var of the topmost structure, the remaining flattened members of // the nested structures will have consecutive location values. At this point, // we've recursively tunnelled into structs, arrays, and matrices, and are // down to a single location for each member now. - if (!is_builtin && location != UINT32_MAX) + if (location == UINT32_MAX && ir_location != UINT32_MAX) + location = ir_location + i; + + if (storage == StorageClassInput && (has_member_loc_decor || has_var_loc_decor)) { - set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, location); - mark_location_as_used_by_shader(location, *usable_type, storage); - location++; + uint32_t component = 0; + uint32_t orig_mbr_type_id = usable_type->self; + + if (has_member_loc_decor) + component = get_member_decoration(var_type.self, mbr_idx, DecorationComponent); + + var.basetype = ensure_correct_input_type(var.basetype, location, component, 0, meta.strip_array); + mbr_type_id = ensure_correct_input_type(usable_type->self, location, component, 0, meta.strip_array); + + // For members of the composite interface block, we only change the interface block type + // when interface matching happens. In the meantime, we store the original vector size + // and insert a swizzle when loading from metal interface block (see fixup below) + if (mbr_type_id != orig_mbr_type_id) + orig_vecsize = get(orig_mbr_type_id).vecsize; + + if (storage == StorageClassInput && pull_model_inputs.count(var.self)) + ib_type.member_types[ib_mbr_idx] = build_msl_interpolant_type(mbr_type_id, is_noperspective); + else + ib_type.member_types[ib_mbr_idx] = mbr_type_id; } - else if (has_member_decoration(var_type.self, mbr_idx, DecorationLocation)) + + if ((!is_builtin && location != UINT32_MAX) || (is_builtin && ir_location != UINT32_MAX)) { - location = get_member_decoration(var_type.self, mbr_idx, DecorationLocation) + i; - set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, location); - mark_location_as_used_by_shader(location, *usable_type, storage); - location++; - } - else if (has_decoration(var.self, DecorationLocation)) - { - location = get_accumulated_member_location(var, mbr_idx, meta.strip_array) + i; - set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, location); - mark_location_as_used_by_shader(location, *usable_type, storage); - location++; - } - else if (is_builtin && is_tessellation_shader() && storage == StorageClassInput && inputs_by_builtin.count(builtin)) - { - location = inputs_by_builtin[builtin].location + i; - set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, location); - mark_location_as_used_by_shader(location, *usable_type, storage); - location++; - } - else if (is_builtin && capture_output_to_buffer && storage == StorageClassOutput && outputs_by_builtin.count(builtin)) - { - location = outputs_by_builtin[builtin].location + i; set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, location); mark_location_as_used_by_shader(location, *usable_type, storage); location++; @@ -3256,6 +3410,7 @@ void CompilerMSL::add_composite_member_variable_to_interface_block(StorageClass case StorageClassInput: entry_func.fixup_hooks_in.push_back([=, &var]() { string lerp_call; + string swizzle; if (pull_model_inputs.count(var.self)) { if (is_centroid) @@ -3265,7 +3420,9 @@ void CompilerMSL::add_composite_member_variable_to_interface_block(StorageClass else lerp_call = ".interpolate_at_center()"; } - statement(var_chain, " = ", ib_var_ref, ".", mbr_name, lerp_call, ";"); + if (orig_vecsize != UINT32_MAX) + swizzle = vector_swizzle(orig_vecsize, 0); + statement(var_chain, " = ", ib_var_ref, ".", mbr_name, lerp_call, swizzle, ";"); }); break; @@ -3333,6 +3490,55 @@ void CompilerMSL::add_plain_member_variable_to_interface_block(StorageClass stor qual_var_name += ".interpolate_at_center()"; } + // The SPIRV location of interface variable, used to obtain the initial + // MSL location (the location variable) and interface matching + uint32_t ir_location = UINT32_MAX; + bool has_member_loc_decor = has_member_decoration(var_type.self, mbr_idx, DecorationLocation); + bool has_var_loc_decor = has_decoration(var.self, DecorationLocation); + uint32_t orig_vecsize = UINT32_MAX; + + if (has_member_loc_decor) + ir_location = get_member_decoration(var_type.self, mbr_idx, DecorationLocation); + else if (has_var_loc_decor) + ir_location = get_accumulated_member_location(var, mbr_idx, meta.strip_array); + else if (is_builtin) + { + if (is_tessellation_shader() && storage == StorageClassInput && inputs_by_builtin.count(builtin)) + ir_location = inputs_by_builtin[builtin].location; + else if (capture_output_to_buffer && storage == StorageClassOutput && outputs_by_builtin.count(builtin)) + ir_location = outputs_by_builtin[builtin].location; + } + + // Once we determine the location of the first member within nested structures, + // from a var of the topmost structure, the remaining flattened members of + // the nested structures will have consecutive location values. At this point, + // we've recursively tunnelled into structs, arrays, and matrices, and are + // down to a single location for each member now. + if (location == UINT32_MAX && ir_location != UINT32_MAX) + location = ir_location; + + if (storage == StorageClassInput && (has_member_loc_decor || has_var_loc_decor)) + { + uint32_t component = 0; + uint32_t orig_mbr_type_id = mbr_type_id; + + if (has_member_loc_decor) + component = get_member_decoration(var_type.self, mbr_idx, DecorationComponent); + + mbr_type_id = ensure_correct_input_type(mbr_type_id, location, component, 0, meta.strip_array); + + // For members of the composite interface block, we only change the interface block type + // when interface matching happens. In the meantime, we store the original vector size + // and insert a swizzle when loading from metal interface block (see fixup below) + if (mbr_type_id != orig_mbr_type_id) + orig_vecsize = get(orig_mbr_type_id).vecsize; + + if (storage == StorageClassInput && pull_model_inputs.count(var.self)) + ib_type.member_types[ib_mbr_idx] = build_msl_interpolant_type(mbr_type_id, is_noperspective); + else + ib_type.member_types[ib_mbr_idx] = mbr_type_id; + } + bool flatten_stage_out = false; string var_chain = var_chain_qual + "." + to_member_name(var_type, mbr_idx); if (is_builtin && !meta.strip_array) @@ -3348,7 +3554,11 @@ void CompilerMSL::add_plain_member_variable_to_interface_block(StorageClass stor { case StorageClassInput: entry_func.fixup_hooks_in.push_back([=]() { - statement(var_chain, " = ", qual_var_name, ";"); + string swizzle; + // Insert swizzle for widened interface block vector from interface matching + if (orig_vecsize != UINT32_MAX) + swizzle = vector_swizzle(orig_vecsize, 0); + statement(var_chain, " = ", qual_var_name, swizzle, ";"); }); break; @@ -3364,64 +3574,12 @@ void CompilerMSL::add_plain_member_variable_to_interface_block(StorageClass stor } } - // Once we determine the location of the first member within nested structures, - // from a var of the topmost structure, the remaining flattened members of - // the nested structures will have consecutive location values. At this point, - // we've recursively tunnelled into structs, arrays, and matrices, and are - // down to a single location for each member now. - if (!is_builtin && location != UINT32_MAX) + if ((!is_builtin && location != UINT32_MAX) || (is_builtin && ir_location != UINT32_MAX)) { set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, location); mark_location_as_used_by_shader(location, get(mbr_type_id), storage); location += type_to_location_count(get(mbr_type_id)); } - else if (has_member_decoration(var_type.self, mbr_idx, DecorationLocation)) - { - location = get_member_decoration(var_type.self, mbr_idx, DecorationLocation); - uint32_t comp = get_member_decoration(var_type.self, mbr_idx, DecorationComponent); - if (storage == StorageClassInput) - { - mbr_type_id = ensure_correct_input_type(mbr_type_id, location, comp, 0, meta.strip_array); - var_type.member_types[mbr_idx] = mbr_type_id; - if (storage == StorageClassInput && pull_model_inputs.count(var.self)) - ib_type.member_types[ib_mbr_idx] = build_msl_interpolant_type(mbr_type_id, is_noperspective); - else - ib_type.member_types[ib_mbr_idx] = mbr_type_id; - } - set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, location); - mark_location_as_used_by_shader(location, get(mbr_type_id), storage); - location += type_to_location_count(get(mbr_type_id)); - } - else if (has_decoration(var.self, DecorationLocation)) - { - location = get_accumulated_member_location(var, mbr_idx, meta.strip_array); - if (storage == StorageClassInput) - { - mbr_type_id = ensure_correct_input_type(mbr_type_id, location, 0, 0, meta.strip_array); - var_type.member_types[mbr_idx] = mbr_type_id; - if (storage == StorageClassInput && pull_model_inputs.count(var.self)) - ib_type.member_types[ib_mbr_idx] = build_msl_interpolant_type(mbr_type_id, is_noperspective); - else - ib_type.member_types[ib_mbr_idx] = mbr_type_id; - } - set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, location); - mark_location_as_used_by_shader(location, get(mbr_type_id), storage); - location += type_to_location_count(get(mbr_type_id)); - } - else if (is_builtin && is_tessellation_shader() && storage == StorageClassInput && inputs_by_builtin.count(builtin)) - { - location = inputs_by_builtin[builtin].location; - set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, location); - mark_location_as_used_by_shader(location, get(mbr_type_id), storage); - location += type_to_location_count(get(mbr_type_id)); - } - else if (is_builtin && capture_output_to_buffer && storage == StorageClassOutput && outputs_by_builtin.count(builtin)) - { - location = outputs_by_builtin[builtin].location; - set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, location); - mark_location_as_used_by_shader(location, get(mbr_type_id), storage); - location += type_to_location_count(get(mbr_type_id)); - } // Copy the component location, if present. if (has_member_decoration(var_type.self, mbr_idx, DecorationComponent)) @@ -3720,6 +3878,20 @@ void CompilerMSL::add_variable_to_interface_block(StorageClass storage, const st return; } + // Tesselation stages pass I/O via buffer content which may contain nested structs. + // Ensure the vector sizes of any nested struct members within these input variables match + // the vector sizes of the corresponding output variables from the previous pipeline stage. + // This adjustment is handled here instead of ensure_correct_input_type() in order to + // perform the necessary recursive processing. + if (storage == StorageClassInput && var_type.basetype == SPIRType::Struct && + ((is_tesc_shader() && msl_options.multi_patch_workgroup) || + (is_tese_shader() && msl_options.raw_buffer_tese_input)) && + has_decoration(var.self, DecorationLocation)) + { + uint32_t locn = get_decoration(var.self, DecorationLocation); + ensure_struct_members_valid_vecsizes(get_variable_data_type(var), locn); + } + if (storage == StorageClassInput && has_decoration(var.self, DecorationPerVertexKHR)) SPIRV_CROSS_THROW("PerVertexKHR decoration is not supported in MSL."); @@ -3915,6 +4087,43 @@ void CompilerMSL::add_variable_to_interface_block(StorageClass storage, const st } } +// Recursively iterate into the input struct type, and adjust the vecsize +// of any nested members, based on location info provided through the API. +// The location parameter is modified recursively. +void CompilerMSL::ensure_struct_members_valid_vecsizes(SPIRType &struct_type, uint32_t &location) +{ + assert(struct_type.basetype == SPIRType::Struct); + + auto mbr_cnt = struct_type.member_types.size(); + for (size_t mbr_idx = 0; mbr_idx < mbr_cnt; mbr_idx++) + { + auto mbr_type_id = struct_type.member_types[mbr_idx]; + auto &mbr_type = get(mbr_type_id); + + if (mbr_type.basetype == SPIRType::Struct) + ensure_struct_members_valid_vecsizes(mbr_type, location); + else + { + auto p_va = inputs_by_location.find({location, 0}); + if (p_va != end(inputs_by_location) && p_va->second.vecsize > mbr_type.vecsize) + { + // Set a new member type into the struct type, and all its parent types. + auto new_mbr_type_id = build_extended_vector_type(mbr_type_id, p_va->second.vecsize); + for (auto *p_type = &struct_type; p_type; p_type = maybe_get(p_type->parent_type)) + p_type->member_types[mbr_idx] = new_mbr_type_id; + } + + // Calc location of next member + uint32_t loc_cnt = mbr_type.columns; + auto dim_cnt = mbr_type.array.size(); + for (uint32_t i = 0; i < dim_cnt; i++) + loc_cnt *= to_array_size_literal(mbr_type, i); + + location += loc_cnt; + } + } +} + // Fix up the mapping of variables to interface member indices, which is used to compile access chains // for per-vertex variables in a tessellation control shader. void CompilerMSL::fix_up_interface_member_indices(StorageClass storage, uint32_t ib_type_id) @@ -4219,8 +4428,9 @@ uint32_t CompilerMSL::add_interface_block(StorageClass storage, bool patch) // If the entry point should return the output struct, set the entry function // to return the output interface struct, otherwise to return nothing. // Watch out for the rare case where the terminator of the last entry point block is a - // Kill, instead of a Return. Based on SPIR-V's block-domination rules, we assume that - // any block that has a Kill will also have a terminating Return, except the last block. + // Kill or Unreachable, instead of a Return. Based on SPIR-V's block-domination rules, + // we assume that any block that has a Kill will also have a terminating Return, except + // the last block. // Indicate the output var requires early initialization. bool ep_should_return_output = !get_is_rasterization_disabled(); uint32_t rtn_id = ep_should_return_output ? ib_var_id : 0; @@ -4230,7 +4440,8 @@ uint32_t CompilerMSL::add_interface_block(StorageClass storage, bool patch) for (auto &blk_id : entry_func.blocks) { auto &blk = get(blk_id); - if (blk.terminator == SPIRBlock::Return || (blk.terminator == SPIRBlock::Kill && blk_id == entry_func.blocks.back())) + auto last_blk_return = blk.terminator == SPIRBlock::Kill || blk.terminator == SPIRBlock::Unreachable; + if (blk.terminator == SPIRBlock::Return || (last_blk_return && blk_id == entry_func.blocks.back())) blk.return_value = rtn_id; } vars_needing_early_declaration.push_back(ib_var_id); @@ -5539,18 +5750,44 @@ void CompilerMSL::emit_header() { // This particular line can be overridden during compilation, so make it a flag and not a pragma line. if (suppress_missing_prototypes) - statement("#pragma clang diagnostic ignored \"-Wmissing-prototypes\""); + add_pragma_line("#pragma clang diagnostic ignored \"-Wmissing-prototypes\"", false); if (suppress_incompatible_pointer_types_discard_qualifiers) - statement("#pragma clang diagnostic ignored \"-Wincompatible-pointer-types-discards-qualifiers\""); + add_pragma_line("#pragma clang diagnostic ignored \"-Wincompatible-pointer-types-discards-qualifiers\"", false); + + // Disable warning about "sometimes unitialized" when zero-initializing simple threadgroup variables + if (suppress_sometimes_unitialized) + add_pragma_line("#pragma clang diagnostic ignored \"-Wsometimes-uninitialized\"", false); // Disable warning about missing braces for array template to make arrays a value type if (spv_function_implementations.count(SPVFuncImplUnsafeArray) != 0) - statement("#pragma clang diagnostic ignored \"-Wmissing-braces\""); + add_pragma_line("#pragma clang diagnostic ignored \"-Wmissing-braces\"", false); + + // Floating point fast math compile declarations + if (msl_options.use_fast_math_pragmas && msl_options.supports_msl_version(3, 2)) + { + uint32_t contract_mask = FPFastMathModeAllowContractMask; + uint32_t relax_mask = (FPFastMathModeNSZMask | FPFastMathModeAllowRecipMask | FPFastMathModeAllowReassocMask); + uint32_t fast_mask = (relax_mask | FPFastMathModeNotNaNMask | FPFastMathModeNotInfMask); + + // FP math mode + uint32_t fp_flags = get_fp_fast_math_flags(true); + const char *math_mode = "safe"; + if ((fp_flags & fast_mask) == fast_mask) // Must have all flags + math_mode = "fast"; + else if ((fp_flags & relax_mask) == relax_mask) // Must have all flags + math_mode = "relaxed"; + + add_pragma_line(join("#pragma metal fp math_mode(", math_mode, ")"), false); + + // FP contraction + const char *contract_mode = ((fp_flags & contract_mask) == contract_mask) ? "fast" : "off"; + add_pragma_line(join("#pragma metal fp contract(", contract_mode, ")"), false); + } for (auto &pragma : pragma_lines) statement(pragma); - if (!pragma_lines.empty() || suppress_missing_prototypes) + if (!pragma_lines.empty()) statement(""); statement("#include "); @@ -5570,18 +5807,23 @@ void CompilerMSL::emit_header() statement(""); } -void CompilerMSL::add_pragma_line(const string &line) +void CompilerMSL::add_pragma_line(const string &line, bool recompile_on_unique) { - auto rslt = pragma_lines.insert(line); - if (rslt.second) - force_recompile(); + if (std::find(pragma_lines.begin(), pragma_lines.end(), line) == pragma_lines.end()) + { + pragma_lines.push_back(line); + if (recompile_on_unique) + force_recompile(); + } } void CompilerMSL::add_typedef_line(const string &line) { - auto rslt = typedef_lines.insert(line); - if (rslt.second) + if (std::find(typedef_lines.begin(), typedef_lines.end(), line) == typedef_lines.end()) + { + typedef_lines.push_back(line); force_recompile(); + } } // Template struct like spvUnsafeArray<> need to be declared *before* any resources are declared @@ -5789,7 +6031,6 @@ void CompilerMSL::emit_custom_functions() if (!msl_options.supports_msl_version(2)) SPIRV_CROSS_THROW( "spvDynamicImageSampler requires default-constructible texture objects, which require MSL 2.0."); - spv_function_implementations.insert(SPVFuncImplForwardArgs); spv_function_implementations.insert(SPVFuncImplTextureSwizzle); if (msl_options.swizzle_texture_samples) spv_function_implementations.insert(SPVFuncImplGatherSwizzle); @@ -5803,16 +6044,22 @@ void CompilerMSL::emit_custom_functions() spv_function_implementations.insert(SPVFuncImplConvertYCbCrBT2020); } - for (uint32_t i = SPVFuncImplChromaReconstructNearest2Plane; - i <= SPVFuncImplChromaReconstructLinear420XMidpointYMidpoint3Plane; i++) - if (spv_function_implementations.count(static_cast(i))) - spv_function_implementations.insert(SPVFuncImplForwardArgs); + if (spv_function_implementations.count(SPVFuncImplGatherSwizzle) || + spv_function_implementations.count(SPVFuncImplGatherConstOffsets)) + { + spv_function_implementations.insert(SPVFuncImplGatherReturn); + } + + if (spv_function_implementations.count(SPVFuncImplGatherCompareSwizzle) || + spv_function_implementations.count(SPVFuncImplGatherCompareConstOffsets)) + { + spv_function_implementations.insert(SPVFuncImplGatherCompareReturn); + } if (spv_function_implementations.count(SPVFuncImplTextureSwizzle) || spv_function_implementations.count(SPVFuncImplGatherSwizzle) || spv_function_implementations.count(SPVFuncImplGatherCompareSwizzle)) { - spv_function_implementations.insert(SPVFuncImplForwardArgs); spv_function_implementations.insert(SPVFuncImplGetSwizzle); } @@ -5820,6 +6067,17 @@ void CompilerMSL::emit_custom_functions() { switch (spv_func) { + case SPVFuncImplSMod: + statement("// Implementation of signed integer mod accurate to SPIR-V specification"); + statement("template"); + statement("inline Tx spvSMod(Tx x, Ty y)"); + begin_scope(); + statement("Tx remainder = x - y * (x / y);"); + statement("return select(Tx(remainder + y), remainder, remainder == 0 || (x >= 0) == (y >= 0));"); + end_scope(); + statement(""); + break; + case SPVFuncImplMod: statement("// Implementation of the GLSL mod() function, which is slightly different than Metal fmod()"); statement("template"); @@ -6282,23 +6540,6 @@ void CompilerMSL::emit_custom_functions() statement(""); break; - case SPVFuncImplForwardArgs: - statement("template struct spvRemoveReference { typedef T type; };"); - statement("template struct spvRemoveReference { typedef T type; };"); - statement("template struct spvRemoveReference { typedef T type; };"); - statement("template inline constexpr thread T&& spvForward(thread typename " - "spvRemoveReference::type& x)"); - begin_scope(); - statement("return static_cast(x);"); - end_scope(); - statement("template inline constexpr thread T&& spvForward(thread typename " - "spvRemoveReference::type&& x)"); - begin_scope(); - statement("return static_cast(x);"); - end_scope(); - statement(""); - break; - case SPVFuncImplGetSwizzle: statement("enum class spvSwizzle : uint"); begin_scope(); @@ -6356,11 +6597,22 @@ void CompilerMSL::emit_custom_functions() statement(""); break; + case SPVFuncImplGatherReturn: + statement("template"); + statement("using spvGatherReturn = decltype(declval().gather(declval(), declval()...));"); + statement(""); + break; + + case SPVFuncImplGatherCompareReturn: + statement("template"); + statement("using spvGatherCompareReturn = decltype(declval().gather_compare(declval(), declval()...));"); + statement(""); + break; + case SPVFuncImplGatherSwizzle: statement("// Wrapper function that swizzles texture gathers."); - statement("template class Tex, " - "typename... Ts>"); - statement("inline vec spvGatherSwizzle(const thread Tex& t, sampler s, " + statement("template"); + statement("inline spvGatherReturn spvGatherSwizzle(const thread Tex& t, sampler s, " "uint sw, component c, Ts... params) METAL_CONST_ARG(c)"); begin_scope(); statement("if (sw)"); @@ -6370,17 +6622,17 @@ void CompilerMSL::emit_custom_functions() statement("case spvSwizzle::none:"); statement(" break;"); statement("case spvSwizzle::zero:"); - statement(" return vec(0, 0, 0, 0);"); + statement(" return spvGatherReturn(0, 0, 0, 0);"); statement("case spvSwizzle::one:"); - statement(" return vec(1, 1, 1, 1);"); + statement(" return spvGatherReturn(1, 1, 1, 1);"); statement("case spvSwizzle::red:"); - statement(" return t.gather(s, spvForward(params)..., component::x);"); + statement(" return t.gather(s, params..., component::x);"); statement("case spvSwizzle::green:"); - statement(" return t.gather(s, spvForward(params)..., component::y);"); + statement(" return t.gather(s, params..., component::y);"); statement("case spvSwizzle::blue:"); - statement(" return t.gather(s, spvForward(params)..., component::z);"); + statement(" return t.gather(s, params..., component::z);"); statement("case spvSwizzle::alpha:"); - statement(" return t.gather(s, spvForward(params)..., component::w);"); + statement(" return t.gather(s, params..., component::w);"); end_scope(); end_scope(); // texture::gather insists on its component parameter being a constant @@ -6388,13 +6640,13 @@ void CompilerMSL::emit_custom_functions() statement("switch (c)"); begin_scope(); statement("case component::x:"); - statement(" return t.gather(s, spvForward(params)..., component::x);"); + statement(" return t.gather(s, params..., component::x);"); statement("case component::y:"); - statement(" return t.gather(s, spvForward(params)..., component::y);"); + statement(" return t.gather(s, params..., component::y);"); statement("case component::z:"); - statement(" return t.gather(s, spvForward(params)..., component::z);"); + statement(" return t.gather(s, params..., component::z);"); statement("case component::w:"); - statement(" return t.gather(s, spvForward(params)..., component::w);"); + statement(" return t.gather(s, params..., component::w);"); end_scope(); end_scope(); statement(""); @@ -6402,10 +6654,8 @@ void CompilerMSL::emit_custom_functions() case SPVFuncImplGatherCompareSwizzle: statement("// Wrapper function that swizzles depth texture gathers."); - statement("template class Tex, " - "typename... Ts>"); - statement("inline vec spvGatherCompareSwizzle(const thread Tex& t, sampler " - "s, uint sw, Ts... params) "); + statement("template"); + statement("inline spvGatherCompareReturn spvGatherCompareSwizzle(const thread Tex& t, sampler s, uint sw, Ts... params)"); begin_scope(); statement("if (sw)"); begin_scope(); @@ -6418,12 +6668,12 @@ void CompilerMSL::emit_custom_functions() statement("case spvSwizzle::green:"); statement("case spvSwizzle::blue:"); statement("case spvSwizzle::alpha:"); - statement(" return vec(0, 0, 0, 0);"); + statement(" return spvGatherCompareReturn(0, 0, 0, 0);"); statement("case spvSwizzle::one:"); - statement(" return vec(1, 1, 1, 1);"); + statement(" return spvGatherCompareReturn(1, 1, 1, 1);"); end_scope(); end_scope(); - statement("return t.gather_compare(s, spvForward(params)...);"); + statement("return t.gather_compare(s, params...);"); end_scope(); statement(""); break; @@ -6433,33 +6683,32 @@ void CompilerMSL::emit_custom_functions() for (uint32_t i = 0; i < texture_addr_space_count; i++) { statement("// Wrapper function that processes a ", texture_addr_spaces[i], " texture gather with a constant offset array."); - statement("template class Tex, " - "typename Toff, typename... Tp>"); - statement("inline vec spvGatherConstOffsets(const ", texture_addr_spaces[i], " Tex& t, sampler s, " + statement("template"); + statement("inline spvGatherReturn spvGatherConstOffsets(const ", texture_addr_spaces[i], " Tex& t, sampler s, " "Toff coffsets, component c, Tp... params) METAL_CONST_ARG(c)"); begin_scope(); - statement("vec rslts[4];"); + statement("spvGatherReturn rslts[4];"); statement("for (uint i = 0; i < 4; i++)"); begin_scope(); statement("switch (c)"); begin_scope(); // Work around texture::gather() requiring its component parameter to be a constant expression statement("case component::x:"); - statement(" rslts[i] = t.gather(s, spvForward(params)..., coffsets[i], component::x);"); + statement(" rslts[i] = t.gather(s, params..., coffsets[i], component::x);"); statement(" break;"); statement("case component::y:"); - statement(" rslts[i] = t.gather(s, spvForward(params)..., coffsets[i], component::y);"); + statement(" rslts[i] = t.gather(s, params..., coffsets[i], component::y);"); statement(" break;"); statement("case component::z:"); - statement(" rslts[i] = t.gather(s, spvForward(params)..., coffsets[i], component::z);"); + statement(" rslts[i] = t.gather(s, params..., coffsets[i], component::z);"); statement(" break;"); statement("case component::w:"); - statement(" rslts[i] = t.gather(s, spvForward(params)..., coffsets[i], component::w);"); + statement(" rslts[i] = t.gather(s, params..., coffsets[i], component::w);"); statement(" break;"); end_scope(); end_scope(); // Pull all values from the i0j0 component of each gather footprint - statement("return vec(rslts[0].w, rslts[1].w, rslts[2].w, rslts[3].w);"); + statement("return spvGatherReturn(rslts[0].w, rslts[1].w, rslts[2].w, rslts[3].w);"); end_scope(); statement(""); } @@ -6470,18 +6719,17 @@ void CompilerMSL::emit_custom_functions() for (uint32_t i = 0; i < texture_addr_space_count; i++) { statement("// Wrapper function that processes a ", texture_addr_spaces[i], " texture gather with a constant offset array."); - statement("template class Tex, " - "typename Toff, typename... Tp>"); - statement("inline vec spvGatherCompareConstOffsets(const ", texture_addr_spaces[i], " Tex& t, sampler s, " + statement("template"); + statement("inline spvGatherCompareReturn spvGatherCompareConstOffsets(const ", texture_addr_spaces[i], " Tex& t, sampler s, " "Toff coffsets, Tp... params)"); begin_scope(); - statement("vec rslts[4];"); + statement("spvGatherCompareReturn rslts[4];"); statement("for (uint i = 0; i < 4; i++)"); begin_scope(); - statement(" rslts[i] = t.gather_compare(s, spvForward(params)..., coffsets[i]);"); + statement(" rslts[i] = t.gather_compare(s, params..., coffsets[i]);"); end_scope(); // Pull all values from the i0j0 component of each gather footprint - statement("return vec(rslts[0].w, rslts[1].w, rslts[2].w, rslts[3].w);"); + statement("return spvGatherCompareReturn(rslts[0].w, rslts[1].w, rslts[2].w, rslts[3].w);"); end_scope(); statement(""); } @@ -6826,6 +7074,135 @@ void CompilerMSL::emit_custom_functions() statement(""); break; + case SPVFuncImplSubgroupRotate: + statement("template"); + statement("inline T spvSubgroupRotate(T value, ushort delta)"); + begin_scope(); + if (msl_options.use_quadgroup_operation()) + statement("return quad_shuffle_rotate_down(value, delta);"); + else + statement("return simd_shuffle_rotate_down(value, delta);"); + end_scope(); + statement(""); + statement("template<>"); + statement("inline bool spvSubgroupRotate(bool value, ushort delta)"); + begin_scope(); + if (msl_options.use_quadgroup_operation()) + statement("return !!quad_shuffle_rotate_down((ushort)value, delta);"); + else + statement("return !!simd_shuffle_rotate_down((ushort)value, delta);"); + end_scope(); + statement(""); + statement("template"); + statement("inline vec spvSubgroupRotate(vec value, ushort delta)"); + begin_scope(); + if (msl_options.use_quadgroup_operation()) + statement("return (vec)quad_shuffle_rotate_down((vec)value, delta);"); + else + statement("return (vec)simd_shuffle_rotate_down((vec)value, delta);"); + end_scope(); + statement(""); + break; + + // C++ disallows partial specializations of function templates, + // hence the use of a struct. + // clang-format off +#define FUNC_SUBGROUP_CLUSTERED(spv, msl, combine, op, ident) \ + case SPVFuncImplSubgroupClustered##spv: \ + statement("template"); \ + statement("struct spvClustered" #spv "Detail;"); \ + statement(""); \ + statement("// Base cases"); \ + statement("template<>"); \ + statement("struct spvClustered" #spv "Detail<1, 0>"); \ + begin_scope(); \ + statement("template"); \ + statement("static T op(T value, uint)"); \ + begin_scope(); \ + statement("return value;"); \ + end_scope(); \ + end_scope_decl(); \ + statement(""); \ + statement("template"); \ + statement("struct spvClustered" #spv "Detail<1, offset>"); \ + begin_scope(); \ + statement("template"); \ + statement("static T op(T value, uint lid)"); \ + begin_scope(); \ + statement("// If the target lane is inactive, then return identity."); \ + if (msl_options.use_quadgroup_operation()) \ + statement("if (!extract_bits((quad_vote::vote_t)quad_active_threads_mask(), (lid ^ offset), 1))"); \ + else \ + statement("if (!extract_bits(as_type((simd_vote::vote_t)simd_active_threads_mask())[(lid ^ offset) / 32], (lid ^ offset) % 32, 1))"); \ + statement(" return " #ident ";"); \ + if (msl_options.use_quadgroup_operation()) \ + statement("return quad_shuffle_xor(value, offset);"); \ + else \ + statement("return simd_shuffle_xor(value, offset);"); \ + end_scope(); \ + end_scope_decl(); \ + statement(""); \ + statement("template<>"); \ + statement("struct spvClustered" #spv "Detail<4, 0>"); \ + begin_scope(); \ + statement("template"); \ + statement("static T op(T value, uint)"); \ + begin_scope(); \ + statement("return quad_" #msl "(value);"); \ + end_scope(); \ + end_scope_decl(); \ + statement(""); \ + statement("template"); \ + statement("struct spvClustered" #spv "Detail<4, offset>"); \ + begin_scope(); \ + statement("template"); \ + statement("static T op(T value, uint lid)"); \ + begin_scope(); \ + statement("// Here, we care if any of the lanes in the quad are active."); \ + statement("uint quad_mask = extract_bits(as_type((simd_vote::vote_t)simd_active_threads_mask())[(lid ^ offset) / 32], ((lid ^ offset) % 32) & ~3, 4);"); \ + statement("if (!quad_mask)"); \ + statement(" return " #ident ";"); \ + statement("// But we need to make sure we shuffle from an active lane."); \ + if (msl_options.use_quadgroup_operation()) \ + SPIRV_CROSS_THROW("Subgroup size with quadgroup operation cannot exceed 4."); \ + else \ + statement("return simd_shuffle(quad_" #msl "(value), ((lid ^ offset) & ~3) | ctz(quad_mask));"); \ + end_scope(); \ + end_scope_decl(); \ + statement(""); \ + statement("// General case"); \ + statement("template"); \ + statement("struct spvClustered" #spv "Detail"); \ + begin_scope(); \ + statement("template"); \ + statement("static T op(T value, uint lid)"); \ + begin_scope(); \ + statement("return " combine(msl, op, "spvClustered" #spv "Detail::op(value, lid)", "spvClustered" #spv "Detail::op(value, lid)") ";"); \ + end_scope(); \ + end_scope_decl(); \ + statement(""); \ + statement("template"); \ + statement("T spvClustered_" #msl "(T value, uint lid)"); \ + begin_scope(); \ + statement("return spvClustered" #spv "Detail::op(value, lid);"); \ + end_scope(); \ + statement(""); \ + break +#define BINOP(msl, op, l, r) l " " #op " " r +#define BINFUNC(msl, op, l, r) #msl "(" l ", " r ")" + + FUNC_SUBGROUP_CLUSTERED(Add, sum, BINOP, +, 0); + FUNC_SUBGROUP_CLUSTERED(Mul, product, BINOP, *, 1); + FUNC_SUBGROUP_CLUSTERED(Min, min, BINFUNC, , numeric_limits::max()); + FUNC_SUBGROUP_CLUSTERED(Max, max, BINFUNC, , numeric_limits::min()); + FUNC_SUBGROUP_CLUSTERED(And, and, BINOP, &, ~T(0)); + FUNC_SUBGROUP_CLUSTERED(Or, or, BINOP, |, 0); + FUNC_SUBGROUP_CLUSTERED(Xor, xor, BINOP, ^, 0); + // clang-format on +#undef FUNC_SUBGROUP_CLUSTERED +#undef BINOP +#undef BINFUNC + case SPVFuncImplQuadBroadcast: statement("template"); statement("inline T spvQuadBroadcast(T value, uint lane)"); @@ -6925,8 +7302,8 @@ void CompilerMSL::emit_custom_functions() "samp, float2 coord, LodOptions... options)"); begin_scope(); statement("vec ycbcr = vec(0, 0, 0, 1);"); - statement("ycbcr.g = plane0.sample(samp, coord, spvForward(options)...).r;"); - statement("ycbcr.br = plane1.sample(samp, coord, spvForward(options)...).rg;"); + statement("ycbcr.g = plane0.sample(samp, coord, options...).r;"); + statement("ycbcr.br = plane1.sample(samp, coord, options...).rg;"); statement("return ycbcr;"); end_scope(); statement(""); @@ -6938,9 +7315,9 @@ void CompilerMSL::emit_custom_functions() "texture2d plane2, sampler samp, float2 coord, LodOptions... options)"); begin_scope(); statement("vec ycbcr = vec(0, 0, 0, 1);"); - statement("ycbcr.g = plane0.sample(samp, coord, spvForward(options)...).r;"); - statement("ycbcr.b = plane1.sample(samp, coord, spvForward(options)...).r;"); - statement("ycbcr.r = plane2.sample(samp, coord, spvForward(options)...).r;"); + statement("ycbcr.g = plane0.sample(samp, coord, options...).r;"); + statement("ycbcr.b = plane1.sample(samp, coord, options...).r;"); + statement("ycbcr.r = plane2.sample(samp, coord, options...).r;"); statement("return ycbcr;"); end_scope(); statement(""); @@ -6952,15 +7329,15 @@ void CompilerMSL::emit_custom_functions() "plane1, sampler samp, float2 coord, LodOptions... options)"); begin_scope(); statement("vec ycbcr = vec(0, 0, 0, 1);"); - statement("ycbcr.g = plane0.sample(samp, coord, spvForward(options)...).r;"); + statement("ycbcr.g = plane0.sample(samp, coord, options...).r;"); statement("if (fract(coord.x * plane1.get_width()) != 0.0)"); begin_scope(); - statement("ycbcr.br = vec(mix(plane1.sample(samp, coord, spvForward(options)...), " - "plane1.sample(samp, coord, spvForward(options)..., int2(1, 0)), 0.5).rg);"); + statement("ycbcr.br = vec(mix(plane1.sample(samp, coord, options...), " + "plane1.sample(samp, coord, options..., int2(1, 0)), 0.5).rg);"); end_scope(); statement("else"); begin_scope(); - statement("ycbcr.br = plane1.sample(samp, coord, spvForward(options)...).rg;"); + statement("ycbcr.br = plane1.sample(samp, coord, options...).rg;"); end_scope(); statement("return ycbcr;"); end_scope(); @@ -6973,18 +7350,18 @@ void CompilerMSL::emit_custom_functions() "plane1, texture2d plane2, sampler samp, float2 coord, LodOptions... options)"); begin_scope(); statement("vec ycbcr = vec(0, 0, 0, 1);"); - statement("ycbcr.g = plane0.sample(samp, coord, spvForward(options)...).r;"); + statement("ycbcr.g = plane0.sample(samp, coord, options...).r;"); statement("if (fract(coord.x * plane1.get_width()) != 0.0)"); begin_scope(); - statement("ycbcr.b = T(mix(plane1.sample(samp, coord, spvForward(options)...), " - "plane1.sample(samp, coord, spvForward(options)..., int2(1, 0)), 0.5).r);"); - statement("ycbcr.r = T(mix(plane2.sample(samp, coord, spvForward(options)...), " - "plane2.sample(samp, coord, spvForward(options)..., int2(1, 0)), 0.5).r);"); + statement("ycbcr.b = T(mix(plane1.sample(samp, coord, options...), " + "plane1.sample(samp, coord, options..., int2(1, 0)), 0.5).r);"); + statement("ycbcr.r = T(mix(plane2.sample(samp, coord, options...), " + "plane2.sample(samp, coord, options..., int2(1, 0)), 0.5).r);"); end_scope(); statement("else"); begin_scope(); - statement("ycbcr.b = plane1.sample(samp, coord, spvForward(options)...).r;"); - statement("ycbcr.r = plane2.sample(samp, coord, spvForward(options)...).r;"); + statement("ycbcr.b = plane1.sample(samp, coord, options...).r;"); + statement("ycbcr.r = plane2.sample(samp, coord, options...).r;"); end_scope(); statement("return ycbcr;"); end_scope(); @@ -6997,10 +7374,10 @@ void CompilerMSL::emit_custom_functions() "plane1, sampler samp, float2 coord, LodOptions... options)"); begin_scope(); statement("vec ycbcr = vec(0, 0, 0, 1);"); - statement("ycbcr.g = plane0.sample(samp, coord, spvForward(options)...).r;"); + statement("ycbcr.g = plane0.sample(samp, coord, options...).r;"); statement("int2 offs = int2(fract(coord.x * plane1.get_width()) != 0.0 ? 1 : -1, 0);"); - statement("ycbcr.br = vec(mix(plane1.sample(samp, coord, spvForward(options)...), " - "plane1.sample(samp, coord, spvForward(options)..., offs), 0.25).rg);"); + statement("ycbcr.br = vec(mix(plane1.sample(samp, coord, options...), " + "plane1.sample(samp, coord, options..., offs), 0.25).rg);"); statement("return ycbcr;"); end_scope(); statement(""); @@ -7012,12 +7389,12 @@ void CompilerMSL::emit_custom_functions() "plane1, texture2d plane2, sampler samp, float2 coord, LodOptions... options)"); begin_scope(); statement("vec ycbcr = vec(0, 0, 0, 1);"); - statement("ycbcr.g = plane0.sample(samp, coord, spvForward(options)...).r;"); + statement("ycbcr.g = plane0.sample(samp, coord, options...).r;"); statement("int2 offs = int2(fract(coord.x * plane1.get_width()) != 0.0 ? 1 : -1, 0);"); - statement("ycbcr.b = T(mix(plane1.sample(samp, coord, spvForward(options)...), " - "plane1.sample(samp, coord, spvForward(options)..., offs), 0.25).r);"); - statement("ycbcr.r = T(mix(plane2.sample(samp, coord, spvForward(options)...), " - "plane2.sample(samp, coord, spvForward(options)..., offs), 0.25).r);"); + statement("ycbcr.b = T(mix(plane1.sample(samp, coord, options...), " + "plane1.sample(samp, coord, options..., offs), 0.25).r);"); + statement("ycbcr.r = T(mix(plane2.sample(samp, coord, options...), " + "plane2.sample(samp, coord, options..., offs), 0.25).r);"); statement("return ycbcr;"); end_scope(); statement(""); @@ -7029,12 +7406,12 @@ void CompilerMSL::emit_custom_functions() "texture2d plane1, sampler samp, float2 coord, LodOptions... options)"); begin_scope(); statement("vec ycbcr = vec(0, 0, 0, 1);"); - statement("ycbcr.g = plane0.sample(samp, coord, spvForward(options)...).r;"); + statement("ycbcr.g = plane0.sample(samp, coord, options...).r;"); statement("float2 ab = fract(round(coord * float2(plane0.get_width(), plane0.get_height())) * 0.5);"); - statement("ycbcr.br = vec(mix(mix(plane1.sample(samp, coord, spvForward(options)...), " - "plane1.sample(samp, coord, spvForward(options)..., int2(1, 0)), ab.x), " - "mix(plane1.sample(samp, coord, spvForward(options)..., int2(0, 1)), " - "plane1.sample(samp, coord, spvForward(options)..., int2(1, 1)), ab.x), ab.y).rg);"); + statement("ycbcr.br = vec(mix(mix(plane1.sample(samp, coord, options...), " + "plane1.sample(samp, coord, options..., int2(1, 0)), ab.x), " + "mix(plane1.sample(samp, coord, options..., int2(0, 1)), " + "plane1.sample(samp, coord, options..., int2(1, 1)), ab.x), ab.y).rg);"); statement("return ycbcr;"); end_scope(); statement(""); @@ -7046,16 +7423,16 @@ void CompilerMSL::emit_custom_functions() "texture2d plane1, texture2d plane2, sampler samp, float2 coord, LodOptions... options)"); begin_scope(); statement("vec ycbcr = vec(0, 0, 0, 1);"); - statement("ycbcr.g = plane0.sample(samp, coord, spvForward(options)...).r;"); + statement("ycbcr.g = plane0.sample(samp, coord, options...).r;"); statement("float2 ab = fract(round(coord * float2(plane0.get_width(), plane0.get_height())) * 0.5);"); - statement("ycbcr.b = T(mix(mix(plane1.sample(samp, coord, spvForward(options)...), " - "plane1.sample(samp, coord, spvForward(options)..., int2(1, 0)), ab.x), " - "mix(plane1.sample(samp, coord, spvForward(options)..., int2(0, 1)), " - "plane1.sample(samp, coord, spvForward(options)..., int2(1, 1)), ab.x), ab.y).r);"); - statement("ycbcr.r = T(mix(mix(plane2.sample(samp, coord, spvForward(options)...), " - "plane2.sample(samp, coord, spvForward(options)..., int2(1, 0)), ab.x), " - "mix(plane2.sample(samp, coord, spvForward(options)..., int2(0, 1)), " - "plane2.sample(samp, coord, spvForward(options)..., int2(1, 1)), ab.x), ab.y).r);"); + statement("ycbcr.b = T(mix(mix(plane1.sample(samp, coord, options...), " + "plane1.sample(samp, coord, options..., int2(1, 0)), ab.x), " + "mix(plane1.sample(samp, coord, options..., int2(0, 1)), " + "plane1.sample(samp, coord, options..., int2(1, 1)), ab.x), ab.y).r);"); + statement("ycbcr.r = T(mix(mix(plane2.sample(samp, coord, options...), " + "plane2.sample(samp, coord, options..., int2(1, 0)), ab.x), " + "mix(plane2.sample(samp, coord, options..., int2(0, 1)), " + "plane2.sample(samp, coord, options..., int2(1, 1)), ab.x), ab.y).r);"); statement("return ycbcr;"); end_scope(); statement(""); @@ -7067,13 +7444,13 @@ void CompilerMSL::emit_custom_functions() "texture2d plane1, sampler samp, float2 coord, LodOptions... options)"); begin_scope(); statement("vec ycbcr = vec(0, 0, 0, 1);"); - statement("ycbcr.g = plane0.sample(samp, coord, spvForward(options)...).r;"); + statement("ycbcr.g = plane0.sample(samp, coord, options...).r;"); statement("float2 ab = fract((round(coord * float2(plane0.get_width(), plane0.get_height())) - float2(0.5, " "0)) * 0.5);"); - statement("ycbcr.br = vec(mix(mix(plane1.sample(samp, coord, spvForward(options)...), " - "plane1.sample(samp, coord, spvForward(options)..., int2(1, 0)), ab.x), " - "mix(plane1.sample(samp, coord, spvForward(options)..., int2(0, 1)), " - "plane1.sample(samp, coord, spvForward(options)..., int2(1, 1)), ab.x), ab.y).rg);"); + statement("ycbcr.br = vec(mix(mix(plane1.sample(samp, coord, options...), " + "plane1.sample(samp, coord, options..., int2(1, 0)), ab.x), " + "mix(plane1.sample(samp, coord, options..., int2(0, 1)), " + "plane1.sample(samp, coord, options..., int2(1, 1)), ab.x), ab.y).rg);"); statement("return ycbcr;"); end_scope(); statement(""); @@ -7085,17 +7462,17 @@ void CompilerMSL::emit_custom_functions() "texture2d plane1, texture2d plane2, sampler samp, float2 coord, LodOptions... options)"); begin_scope(); statement("vec ycbcr = vec(0, 0, 0, 1);"); - statement("ycbcr.g = plane0.sample(samp, coord, spvForward(options)...).r;"); + statement("ycbcr.g = plane0.sample(samp, coord, options...).r;"); statement("float2 ab = fract((round(coord * float2(plane0.get_width(), plane0.get_height())) - float2(0.5, " "0)) * 0.5);"); - statement("ycbcr.b = T(mix(mix(plane1.sample(samp, coord, spvForward(options)...), " - "plane1.sample(samp, coord, spvForward(options)..., int2(1, 0)), ab.x), " - "mix(plane1.sample(samp, coord, spvForward(options)..., int2(0, 1)), " - "plane1.sample(samp, coord, spvForward(options)..., int2(1, 1)), ab.x), ab.y).r);"); - statement("ycbcr.r = T(mix(mix(plane2.sample(samp, coord, spvForward(options)...), " - "plane2.sample(samp, coord, spvForward(options)..., int2(1, 0)), ab.x), " - "mix(plane2.sample(samp, coord, spvForward(options)..., int2(0, 1)), " - "plane2.sample(samp, coord, spvForward(options)..., int2(1, 1)), ab.x), ab.y).r);"); + statement("ycbcr.b = T(mix(mix(plane1.sample(samp, coord, options...), " + "plane1.sample(samp, coord, options..., int2(1, 0)), ab.x), " + "mix(plane1.sample(samp, coord, options..., int2(0, 1)), " + "plane1.sample(samp, coord, options..., int2(1, 1)), ab.x), ab.y).r);"); + statement("ycbcr.r = T(mix(mix(plane2.sample(samp, coord, options...), " + "plane2.sample(samp, coord, options..., int2(1, 0)), ab.x), " + "mix(plane2.sample(samp, coord, options..., int2(0, 1)), " + "plane2.sample(samp, coord, options..., int2(1, 1)), ab.x), ab.y).r);"); statement("return ycbcr;"); end_scope(); statement(""); @@ -7107,13 +7484,13 @@ void CompilerMSL::emit_custom_functions() "texture2d plane1, sampler samp, float2 coord, LodOptions... options)"); begin_scope(); statement("vec ycbcr = vec(0, 0, 0, 1);"); - statement("ycbcr.g = plane0.sample(samp, coord, spvForward(options)...).r;"); + statement("ycbcr.g = plane0.sample(samp, coord, options...).r;"); statement("float2 ab = fract((round(coord * float2(plane0.get_width(), plane0.get_height())) - float2(0, " "0.5)) * 0.5);"); - statement("ycbcr.br = vec(mix(mix(plane1.sample(samp, coord, spvForward(options)...), " - "plane1.sample(samp, coord, spvForward(options)..., int2(1, 0)), ab.x), " - "mix(plane1.sample(samp, coord, spvForward(options)..., int2(0, 1)), " - "plane1.sample(samp, coord, spvForward(options)..., int2(1, 1)), ab.x), ab.y).rg);"); + statement("ycbcr.br = vec(mix(mix(plane1.sample(samp, coord, options...), " + "plane1.sample(samp, coord, options..., int2(1, 0)), ab.x), " + "mix(plane1.sample(samp, coord, options..., int2(0, 1)), " + "plane1.sample(samp, coord, options..., int2(1, 1)), ab.x), ab.y).rg);"); statement("return ycbcr;"); end_scope(); statement(""); @@ -7125,17 +7502,17 @@ void CompilerMSL::emit_custom_functions() "texture2d plane1, texture2d plane2, sampler samp, float2 coord, LodOptions... options)"); begin_scope(); statement("vec ycbcr = vec(0, 0, 0, 1);"); - statement("ycbcr.g = plane0.sample(samp, coord, spvForward(options)...).r;"); + statement("ycbcr.g = plane0.sample(samp, coord, options...).r;"); statement("float2 ab = fract((round(coord * float2(plane0.get_width(), plane0.get_height())) - float2(0, " "0.5)) * 0.5);"); - statement("ycbcr.b = T(mix(mix(plane1.sample(samp, coord, spvForward(options)...), " - "plane1.sample(samp, coord, spvForward(options)..., int2(1, 0)), ab.x), " - "mix(plane1.sample(samp, coord, spvForward(options)..., int2(0, 1)), " - "plane1.sample(samp, coord, spvForward(options)..., int2(1, 1)), ab.x), ab.y).r);"); - statement("ycbcr.r = T(mix(mix(plane2.sample(samp, coord, spvForward(options)...), " - "plane2.sample(samp, coord, spvForward(options)..., int2(1, 0)), ab.x), " - "mix(plane2.sample(samp, coord, spvForward(options)..., int2(0, 1)), " - "plane2.sample(samp, coord, spvForward(options)..., int2(1, 1)), ab.x), ab.y).r);"); + statement("ycbcr.b = T(mix(mix(plane1.sample(samp, coord, options...), " + "plane1.sample(samp, coord, options..., int2(1, 0)), ab.x), " + "mix(plane1.sample(samp, coord, options..., int2(0, 1)), " + "plane1.sample(samp, coord, options..., int2(1, 1)), ab.x), ab.y).r);"); + statement("ycbcr.r = T(mix(mix(plane2.sample(samp, coord, options...), " + "plane2.sample(samp, coord, options..., int2(1, 0)), ab.x), " + "mix(plane2.sample(samp, coord, options..., int2(0, 1)), " + "plane2.sample(samp, coord, options..., int2(1, 1)), ab.x), ab.y).r);"); statement("return ycbcr;"); end_scope(); statement(""); @@ -7147,13 +7524,13 @@ void CompilerMSL::emit_custom_functions() "texture2d plane1, sampler samp, float2 coord, LodOptions... options)"); begin_scope(); statement("vec ycbcr = vec(0, 0, 0, 1);"); - statement("ycbcr.g = plane0.sample(samp, coord, spvForward(options)...).r;"); + statement("ycbcr.g = plane0.sample(samp, coord, options...).r;"); statement("float2 ab = fract((round(coord * float2(plane0.get_width(), plane0.get_height())) - float2(0.5, " "0.5)) * 0.5);"); - statement("ycbcr.br = vec(mix(mix(plane1.sample(samp, coord, spvForward(options)...), " - "plane1.sample(samp, coord, spvForward(options)..., int2(1, 0)), ab.x), " - "mix(plane1.sample(samp, coord, spvForward(options)..., int2(0, 1)), " - "plane1.sample(samp, coord, spvForward(options)..., int2(1, 1)), ab.x), ab.y).rg);"); + statement("ycbcr.br = vec(mix(mix(plane1.sample(samp, coord, options...), " + "plane1.sample(samp, coord, options..., int2(1, 0)), ab.x), " + "mix(plane1.sample(samp, coord, options..., int2(0, 1)), " + "plane1.sample(samp, coord, options..., int2(1, 1)), ab.x), ab.y).rg);"); statement("return ycbcr;"); end_scope(); statement(""); @@ -7165,17 +7542,17 @@ void CompilerMSL::emit_custom_functions() "texture2d plane1, texture2d plane2, sampler samp, float2 coord, LodOptions... options)"); begin_scope(); statement("vec ycbcr = vec(0, 0, 0, 1);"); - statement("ycbcr.g = plane0.sample(samp, coord, spvForward(options)...).r;"); + statement("ycbcr.g = plane0.sample(samp, coord, options...).r;"); statement("float2 ab = fract((round(coord * float2(plane0.get_width(), plane0.get_height())) - float2(0.5, " "0.5)) * 0.5);"); - statement("ycbcr.b = T(mix(mix(plane1.sample(samp, coord, spvForward(options)...), " - "plane1.sample(samp, coord, spvForward(options)..., int2(1, 0)), ab.x), " - "mix(plane1.sample(samp, coord, spvForward(options)..., int2(0, 1)), " - "plane1.sample(samp, coord, spvForward(options)..., int2(1, 1)), ab.x), ab.y).r);"); - statement("ycbcr.r = T(mix(mix(plane2.sample(samp, coord, spvForward(options)...), " - "plane2.sample(samp, coord, spvForward(options)..., int2(1, 0)), ab.x), " - "mix(plane2.sample(samp, coord, spvForward(options)..., int2(0, 1)), " - "plane2.sample(samp, coord, spvForward(options)..., int2(1, 1)), ab.x), ab.y).r);"); + statement("ycbcr.b = T(mix(mix(plane1.sample(samp, coord, options...), " + "plane1.sample(samp, coord, options..., int2(1, 0)), ab.x), " + "mix(plane1.sample(samp, coord, options..., int2(0, 1)), " + "plane1.sample(samp, coord, options..., int2(1, 1)), ab.x), ab.y).r);"); + statement("ycbcr.r = T(mix(mix(plane2.sample(samp, coord, options...), " + "plane2.sample(samp, coord, options..., int2(1, 0)), ab.x), " + "mix(plane2.sample(samp, coord, options..., int2(0, 1)), " + "plane2.sample(samp, coord, options..., int2(1, 1)), ab.x), ab.y).r);"); statement("return ycbcr;"); end_scope(); statement(""); @@ -7457,10 +7834,8 @@ void CompilerMSL::emit_custom_functions() statement(" ycbcr_samp.get_chroma_filter() == spvChromaFilter::nearest)"); begin_scope(); statement("if (!is_null_texture(plane2))"); - statement(" return spvChromaReconstructNearest(plane0, plane1, plane2, samp, coord,"); - statement(" spvForward(options)...);"); - statement( - "return spvChromaReconstructNearest(plane0, plane1, samp, coord, spvForward(options)...);"); + statement(" return spvChromaReconstructNearest(plane0, plane1, plane2, samp, coord, options...);"); + statement("return spvChromaReconstructNearest(plane0, plane1, samp, coord, options...);"); end_scope(); // if (resolution == 422 || chroma_filter == nearest) statement("switch (ycbcr_samp.get_resolution())"); begin_scope(); @@ -7473,18 +7848,18 @@ void CompilerMSL::emit_custom_functions() statement(" if (!is_null_texture(plane2))"); statement(" return spvChromaReconstructLinear422CositedEven("); statement(" plane0, plane1, plane2, samp,"); - statement(" coord, spvForward(options)...);"); + statement(" coord, options...);"); statement(" return spvChromaReconstructLinear422CositedEven("); statement(" plane0, plane1, samp, coord,"); - statement(" spvForward(options)...);"); + statement(" options...);"); statement("case spvXChromaLocation::midpoint:"); statement(" if (!is_null_texture(plane2))"); statement(" return spvChromaReconstructLinear422Midpoint("); statement(" plane0, plane1, plane2, samp,"); - statement(" coord, spvForward(options)...);"); + statement(" coord, options...);"); statement(" return spvChromaReconstructLinear422Midpoint("); statement(" plane0, plane1, samp, coord,"); - statement(" spvForward(options)...);"); + statement(" options...);"); end_scope(); // switch (x_chroma_offset) end_scope(); // case 422: statement("case spvFormatResolution::_420:"); @@ -7499,18 +7874,18 @@ void CompilerMSL::emit_custom_functions() statement(" if (!is_null_texture(plane2))"); statement(" return spvChromaReconstructLinear420XCositedEvenYCositedEven("); statement(" plane0, plane1, plane2, samp,"); - statement(" coord, spvForward(options)...);"); + statement(" coord, options...);"); statement(" return spvChromaReconstructLinear420XCositedEvenYCositedEven("); statement(" plane0, plane1, samp, coord,"); - statement(" spvForward(options)...);"); + statement(" options...);"); statement("case spvYChromaLocation::midpoint:"); statement(" if (!is_null_texture(plane2))"); statement(" return spvChromaReconstructLinear420XCositedEvenYMidpoint("); statement(" plane0, plane1, plane2, samp,"); - statement(" coord, spvForward(options)...);"); + statement(" coord, options...);"); statement(" return spvChromaReconstructLinear420XCositedEvenYMidpoint("); statement(" plane0, plane1, samp, coord,"); - statement(" spvForward(options)...);"); + statement(" options...);"); end_scope(); // switch (y_chroma_offset) end_scope(); // case x::cosited_even: statement("case spvXChromaLocation::midpoint:"); @@ -7521,31 +7896,30 @@ void CompilerMSL::emit_custom_functions() statement(" if (!is_null_texture(plane2))"); statement(" return spvChromaReconstructLinear420XMidpointYCositedEven("); statement(" plane0, plane1, plane2, samp,"); - statement(" coord, spvForward(options)...);"); + statement(" coord, options...);"); statement(" return spvChromaReconstructLinear420XMidpointYCositedEven("); statement(" plane0, plane1, samp, coord,"); - statement(" spvForward(options)...);"); + statement(" options...);"); statement("case spvYChromaLocation::midpoint:"); statement(" if (!is_null_texture(plane2))"); statement(" return spvChromaReconstructLinear420XMidpointYMidpoint("); statement(" plane0, plane1, plane2, samp,"); - statement(" coord, spvForward(options)...);"); + statement(" coord, options...);"); statement(" return spvChromaReconstructLinear420XMidpointYMidpoint("); statement(" plane0, plane1, samp, coord,"); - statement(" spvForward(options)...);"); + statement(" options...);"); end_scope(); // switch (y_chroma_offset) end_scope(); // case x::midpoint end_scope(); // switch (x_chroma_offset) end_scope(); // case 420: end_scope(); // switch (resolution) end_scope(); // if (multiplanar) - statement("return plane0.sample(samp, coord, spvForward(options)...);"); + statement("return plane0.sample(samp, coord, options...);"); end_scope(); // do_sample() statement("template "); statement("vec sample(float2 coord, LodOptions... options) const thread"); begin_scope(); - statement( - "vec s = spvTextureSwizzle(do_sample(coord, spvForward(options)...), swizzle);"); + statement("vec s = spvTextureSwizzle(do_sample(coord, options...), swizzle);"); statement("if (ycbcr_samp.get_ycbcr_model() == spvYCbCrModelConversion::rgb_identity)"); statement(" return s;"); statement(""); @@ -7784,6 +8158,26 @@ void CompilerMSL::emit_custom_functions() statement(""); break; + case SPVFuncImplAssume: + statement_no_indent("#if defined(__has_builtin)"); + statement_no_indent("#if !defined(SPV_ASSUME) && __has_builtin(__builtin_assume)"); + statement_no_indent("#define SPV_ASSUME(x) __builtin_assume(x);"); + statement_no_indent("#endif"); + statement_no_indent("#if !defined(SPV_EXPECT) && __has_builtin(__builtin_expect)"); + statement_no_indent("#define SPV_EXPECT(x, y) __builtin_expect(x, y);"); + statement_no_indent("#endif"); + statement_no_indent("#endif"); + + statement_no_indent("#ifndef SPV_ASSUME"); + statement_no_indent("#define SPV_ASSUME(x)"); + statement_no_indent("#endif"); + + statement_no_indent("#ifndef SPV_EXPECT"); + statement_no_indent("#define SPV_EXPECT(x, y) x"); + statement_no_indent("#endif"); + + break; + default: break; } @@ -7909,8 +8303,16 @@ void CompilerMSL::emit_specialization_constants_and_structs() { SpecializationConstant wg_x, wg_y, wg_z; ID workgroup_size_id = get_work_group_size_specialization_constants(wg_x, wg_y, wg_z); - bool emitted = false; + if (workgroup_size_id == 0 && is_mesh_shader()) + { + auto &execution = get_entry_point(); + statement("constant uint3 ", builtin_to_glsl(BuiltInWorkgroupSize, StorageClassWorkgroup), + " [[maybe_unused]] = ", "uint3(", execution.workgroup_size.x, ", ", execution.workgroup_size.y, ", ", + execution.workgroup_size.z, ");"); + statement(""); + } + bool emitted = false; unordered_set declared_structs; unordered_set aligned_structs; @@ -8015,14 +8417,18 @@ void CompilerMSL::emit_specialization_constants_and_structs() else if (has_decoration(c.self, DecorationSpecId)) { // Fallback to macro overrides. + uint32_t constant_id = get_decoration(c.self, DecorationSpecId); c.specialization_constant_macro_name = - constant_value_macro_name(get_decoration(c.self, DecorationSpecId)); + constant_value_macro_name(constant_id); statement("#ifndef ", c.specialization_constant_macro_name); statement("#define ", c.specialization_constant_macro_name, " ", constant_expression(c)); statement("#endif"); statement("constant ", sc_type_name, " ", sc_name, " = ", c.specialization_constant_macro_name, ";"); + + // Record the usage of macro + constant_macro_ids.insert(constant_id); } else { @@ -8897,7 +9303,7 @@ void CompilerMSL::fix_up_interpolant_access_chain(const uint32_t *ops, uint32_t // If the physical type of a physical buffer pointer has been changed // to a ulong or ulongn vector, add a cast back to the pointer type. -void CompilerMSL::check_physical_type_cast(std::string &expr, const SPIRType *type, uint32_t physical_type) +bool CompilerMSL::check_physical_type_cast(std::string &expr, const SPIRType *type, uint32_t physical_type) { auto *p_physical_type = maybe_get(physical_type); if (p_physical_type && @@ -8908,7 +9314,10 @@ void CompilerMSL::check_physical_type_cast(std::string &expr, const SPIRType *ty expr += ".x"; expr = join("((", type_to_glsl(*type), ")", expr, ")"); + return true; } + + return false; } // Override for MSL-specific syntax instructions @@ -9124,6 +9533,10 @@ void CompilerMSL::emit_instruction(const Instruction &instruction) break; } + case OpSMod: + MSL_BFOP(spvSMod); + break; + case OpFRem: MSL_BFOP(fmod); break; @@ -9594,6 +10007,9 @@ void CompilerMSL::emit_instruction(const Instruction &instruction) if (needs_frag_discard_checks() && (type.storage == StorageClassStorageBuffer || type.storage == StorageClassUniform)) end_scope(); + if (has_decoration(ops[0], DecorationBuiltIn) && get_decoration(ops[0], DecorationBuiltIn) == BuiltInPointSize) + writes_to_point_size = true; + break; } @@ -9604,9 +10020,9 @@ void CompilerMSL::emit_instruction(const Instruction &instruction) case OpControlBarrier: // In GLSL a memory barrier is often followed by a control barrier. - // But in MSL, memory barriers are also control barriers, so don't + // But in MSL, memory barriers are also control barriers (before MSL 3.2), so don't // emit a simple control barrier if a memory barrier has just been emitted. - if (previous_instruction_opcode != OpMemoryBarrier) + if (previous_instruction_opcode != OpMemoryBarrier || msl_options.supports_msl_version(3, 2)) emit_barrier(ops[0], ops[1], ops[2]); break; @@ -9971,11 +10387,11 @@ void CompilerMSL::emit_instruction(const Instruction &instruction) auto &type = get(ops[0]); auto &input_type = expression_type(ops[2]); - if (opcode != OpBitcast || type.pointer || input_type.pointer) + if (opcode != OpBitcast || is_pointer(type) || is_pointer(input_type)) { string op; - if (type.vecsize == 1 && input_type.vecsize == 1) + if ((type.vecsize == 1 || is_pointer(type)) && (input_type.vecsize == 1 || is_pointer(input_type))) op = join("reinterpret_cast<", type_to_glsl(type), ">(", to_unpacked_expression(ops[2]), ")"); else if (input_type.vecsize == 2) op = join("reinterpret_cast<", type_to_glsl(type), ">(as_type(", to_unpacked_expression(ops[2]), "))"); @@ -10125,6 +10541,27 @@ void CompilerMSL::emit_instruction(const Instruction &instruction) break; } + case OpAssumeTrueKHR: + { + auto condition = ops[0]; + statement(join("SPV_ASSUME(", to_unpacked_expression(condition), ")")); + break; + } + + case OpExpectKHR: + { + auto result_type = ops[0]; + auto ret = ops[1]; + auto value = ops[2]; + auto exp_value = ops[3]; + + auto exp = join("SPV_EXPECT(", to_unpacked_expression(value), ", ", to_unpacked_expression(exp_value), ")"); + emit_op(result_type, ret, exp, should_forward(value), should_forward(exp_value)); + inherit_expression_dependencies(ret, value); + inherit_expression_dependencies(ret, exp_value); + break; + } + default: CompilerGLSL::emit_instruction(instruction); break; @@ -10184,10 +10621,20 @@ void CompilerMSL::emit_barrier(uint32_t id_exe_scope, uint32_t id_mem_scope, uin return; string bar_stmt; - if ((msl_options.is_ios() && msl_options.supports_msl_version(1, 2)) || msl_options.supports_msl_version(2)) - bar_stmt = exe_scope < ScopeSubgroup ? "threadgroup_barrier" : "simdgroup_barrier"; + + if (!id_exe_scope && msl_options.supports_msl_version(3, 2)) + { + // Just took 10 years to get a proper barrier, but hey! + bar_stmt = "atomic_thread_fence"; + } else - bar_stmt = "threadgroup_barrier"; + { + if ((msl_options.is_ios() && msl_options.supports_msl_version(1, 2)) || msl_options.supports_msl_version(2)) + bar_stmt = exe_scope < ScopeSubgroup ? "threadgroup_barrier" : "simdgroup_barrier"; + else + bar_stmt = "threadgroup_barrier"; + } + bar_stmt += "("; uint32_t mem_sem = id_mem_sem ? evaluate_constant_u32(id_mem_sem) : uint32_t(MemorySemanticsMaskNone); @@ -10195,7 +10642,8 @@ void CompilerMSL::emit_barrier(uint32_t id_exe_scope, uint32_t id_mem_scope, uin // Use the | operator to combine flags if we can. if (msl_options.supports_msl_version(1, 2)) { - string mem_flags = ""; + string mem_flags; + // For tesc shaders, this also affects objects in the Output storage class. // Since in Metal, these are placed in a device buffer, we have to sync device memory here. if (is_tesc_shader() || @@ -10236,6 +10684,55 @@ void CompilerMSL::emit_barrier(uint32_t id_exe_scope, uint32_t id_mem_scope, uin bar_stmt += "mem_flags::mem_none"; } + if (!id_exe_scope && msl_options.supports_msl_version(3, 2)) + { + // If there's no device-related memory in the barrier, demote to workgroup scope. + // glslang seems to emit device scope even for memoryBarrierShared(). + if (mem_scope == ScopeDevice && + (mem_sem & (MemorySemanticsUniformMemoryMask | + MemorySemanticsImageMemoryMask | + MemorySemanticsCrossWorkgroupMemoryMask)) == 0) + { + mem_scope = ScopeWorkgroup; + } + + // MSL 3.2 only supports seq_cst or relaxed. + if (mem_sem & (MemorySemanticsAcquireReleaseMask | + MemorySemanticsAcquireMask | + MemorySemanticsReleaseMask | + MemorySemanticsSequentiallyConsistentMask)) + { + bar_stmt += ", memory_order_seq_cst"; + } + else + { + bar_stmt += ", memory_order_relaxed"; + } + + switch (mem_scope) + { + case ScopeDevice: + bar_stmt += ", thread_scope_device"; + break; + + case ScopeWorkgroup: + bar_stmt += ", thread_scope_threadgroup"; + break; + + case ScopeSubgroup: + bar_stmt += ", thread_scope_subgroup"; + break; + + case ScopeInvocation: + bar_stmt += ", thread_scope_thread"; + break; + + default: + // The default argument is device, which is conservative. + break; + } + } + bar_stmt += ");"; statement(bar_stmt); @@ -11337,7 +11834,6 @@ string CompilerMSL::to_function_name(const TextureFunctionNameArguments &args) { bool is_compare = comparison_ids.count(img); add_spv_func_and_recompile(is_compare ? SPVFuncImplGatherCompareConstOffsets : SPVFuncImplGatherConstOffsets); - add_spv_func_and_recompile(SPVFuncImplForwardArgs); return is_compare ? "spvGatherCompareConstOffsets" : "spvGatherConstOffsets"; } @@ -12310,7 +12806,7 @@ string CompilerMSL::to_func_call_arg(const SPIRFunction::Parameter &arg, uint32_ } // Dereference pointer variables where needed. // FIXME: This dereference is actually backwards. We should really just support passing pointer variables between functions. - else if (should_dereference(id)) + else if (should_dereference_caller_param(id)) arg_str += dereference_expression(type, CompilerGLSL::to_func_call_arg(arg, id)); else arg_str += CompilerGLSL::to_func_call_arg(arg, id); @@ -12564,6 +13060,9 @@ void CompilerMSL::emit_fixup() { if (is_vertex_like_shader() && stage_out_var_id && !qual_pos_var_name.empty() && !capture_output_to_buffer) { + if (msl_options.enable_point_size_default && !writes_to_point_size) + statement(builtin_to_glsl(BuiltInPointSize, StorageClassOutput), " = ", format_float(msl_options.default_point_size), ";"); + if (options.vertex.fixup_clipspace) statement(qual_pos_var_name, ".z = (", qual_pos_var_name, ".z + ", qual_pos_var_name, ".w) * 0.5; // Adjust clip-space for Metal"); @@ -13398,12 +13897,23 @@ bool CompilerMSL::uses_explicit_early_fragment_test() string CompilerMSL::get_argument_address_space(const SPIRVariable &argument) { const auto &type = get(argument.basetype); + // BDA is always passed around by value. There is no storage class for the argument itself. + if (is_physical_pointer(type)) + return ""; return get_type_address_space(type, argument.self, true); } -bool CompilerMSL::decoration_flags_signal_volatile(const Bitset &flags) +bool CompilerMSL::decoration_flags_signal_volatile(const Bitset &flags) const { - return flags.get(DecorationVolatile) || flags.get(DecorationCoherent); + // Using volatile for coherent pre-3.2 is definitely not correct, but it's something. + // MSL 3.2 adds actual coherent qualifiers. + return flags.get(DecorationVolatile) || + (flags.get(DecorationCoherent) && !msl_options.supports_msl_version(3, 2)); +} + +bool CompilerMSL::decoration_flags_signal_coherent(const Bitset &flags) const +{ + return flags.get(DecorationCoherent) && msl_options.supports_msl_version(3, 2); } string CompilerMSL::get_type_address_space(const SPIRType &type, uint32_t id, bool argument) @@ -13415,8 +13925,17 @@ string CompilerMSL::get_type_address_space(const SPIRType &type, uint32_t id, bo (has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock))) flags = get_buffer_block_flags(id); else + { flags = get_decoration_bitset(id); + if (type.basetype == SPIRType::Struct && + (has_decoration(type.self, DecorationBlock) || + has_decoration(type.self, DecorationBufferBlock))) + { + flags.merge_or(ir.get_buffer_block_type_flags(type)); + } + } + const char *addr_space = nullptr; switch (type.storage) { @@ -13425,7 +13944,6 @@ string CompilerMSL::get_type_address_space(const SPIRType &type, uint32_t id, bo break; case StorageClassStorageBuffer: - case StorageClassPhysicalStorageBuffer: { // For arguments from variable pointers, we use the write count deduction, so // we should not assume any constness here. Only for global SSBOs. @@ -13433,10 +13951,19 @@ string CompilerMSL::get_type_address_space(const SPIRType &type, uint32_t id, bo if (!var || has_decoration(type.self, DecorationBlock)) readonly = flags.get(DecorationNonWritable); + if (decoration_flags_signal_coherent(flags)) + readonly = false; + addr_space = readonly ? "const device" : "device"; break; } + case StorageClassPhysicalStorageBuffer: + // We cannot fully trust NonWritable coming from glslang due to a bug in buffer_reference handling. + // There isn't much gain in emitting const in C++ languages anyway. + addr_space = "device"; + break; + case StorageClassUniform: case StorageClassUniformConstant: case StorageClassPushConstant: @@ -13525,7 +14052,9 @@ string CompilerMSL::get_type_address_space(const SPIRType &type, uint32_t id, bo addr_space = type.pointer || (argument && type.basetype == SPIRType::ControlPointArray) ? "thread" : ""; } - if (decoration_flags_signal_volatile(flags) && 0 != strcmp(addr_space, "thread")) + if (decoration_flags_signal_coherent(flags) && strcmp(addr_space, "device") == 0) + return join("coherent device"); + else if (decoration_flags_signal_volatile(flags) && strcmp(addr_space, "thread") != 0) return join("volatile ", addr_space); else return addr_space; @@ -15133,19 +15662,24 @@ const char *CompilerMSL::descriptor_address_space(uint32_t id, StorageClass stor string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg) { auto &var = get(arg.id); - auto &type = get_variable_data_type(var); auto &var_type = get(arg.type); StorageClass type_storage = var_type.storage; + // Physical pointer types are passed by pointer, not reference. + auto &data_type = get_variable_data_type(var); + bool passed_by_value = is_physical_pointer(var_type); + auto &type = passed_by_value ? var_type : data_type; + // If we need to modify the name of the variable, make sure we use the original variable. // Our alias is just a shadow variable. uint32_t name_id = var.self; if (arg.alias_global_variable && var.basevariable) name_id = var.basevariable; - bool constref = !arg.alias_global_variable && is_pointer(var_type) && arg.write_count == 0; + bool constref = !arg.alias_global_variable && !passed_by_value && is_pointer(var_type) && arg.write_count == 0; // Framebuffer fetch is plain value, const looks out of place, but it is not wrong. - if (type_is_msl_framebuffer_fetch(type)) + // readonly coming from glslang is not reliable in all cases. + if (type_is_msl_framebuffer_fetch(type) || type_storage == StorageClassPhysicalStorageBuffer) constref = false; else if (type_storage == StorageClassUniformConstant) constref = true; @@ -15174,8 +15708,7 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg) if (var.basevariable && (var.basevariable == stage_in_ptr_var_id || var.basevariable == stage_out_ptr_var_id)) decl = join(cv_qualifier, type_to_glsl(type, arg.id)); - else if (builtin && builtin_type != spv::BuiltInPrimitiveTriangleIndicesEXT && - builtin_type != spv::BuiltInPrimitiveLineIndicesEXT && builtin_type != spv::BuiltInPrimitivePointIndicesEXT) + else if (builtin && !is_mesh_shader()) { // Only use templated array for Clip/Cull distance when feasible. // In other scenarios, we need need to override array length for tess levels (if used as outputs), @@ -15227,7 +15760,7 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg) else { // The type is a pointer type we need to emit cv_qualifier late. - if (is_pointer(type)) + if (is_pointer(data_type)) { decl = type_to_glsl(type, arg.id); if (*cv_qualifier != '\0') @@ -15239,8 +15772,8 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg) } } - if (!builtin && !is_pointer(var_type) && - (type_storage == StorageClassFunction || type_storage == StorageClassGeneric)) + if (passed_by_value || (!builtin && !is_pointer(var_type) && + (type_storage == StorageClassFunction || type_storage == StorageClassGeneric))) { // If the argument is a pure value and not an opaque type, we will pass by value. if (msl_options.force_native_arrays && is_array(type)) @@ -15341,7 +15874,7 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg) // for the reference has to go before the '&', but after the '*'. if (!address_space.empty()) { - if (is_pointer(type)) + if (is_pointer(data_type)) { if (*cv_qualifier == '\0') decl += ' '; @@ -15350,6 +15883,7 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg) else decl = join(address_space, " ", decl); } + decl += "&"; decl += " "; decl += to_restrict(name_id, true); @@ -15584,6 +16118,9 @@ const std::unordered_set &CompilerMSL::get_illegal_func_names() { static const unordered_set illegal_func_names = { "main", + "fragment", + "vertex", + "kernel", "saturate", "assert", "fmin3", @@ -16091,6 +16628,9 @@ string CompilerMSL::constant_op_expression(const SPIRConstantOp &cop) { switch (cop.opcode) { + case OpSMod: + add_spv_func_and_recompile(SPVFuncImplSMod); + return join("spvSMod(", to_expression(cop.arguments[0]), ", ", to_expression(cop.arguments[1]), ")"); case OpQuantizeToF16: add_spv_func_and_recompile(SPVFuncImplQuantizeToF16); return join("spvQuantizeToF16(", to_expression(cop.arguments[0]), ")"); @@ -16367,6 +16907,12 @@ string CompilerMSL::image_type_glsl(const SPIRType &type, uint32_t id, bool memb // Otherwise it may be set based on whether the image is read from or written to within the shader. if (type.basetype == SPIRType::Image && type.image.sampled == 2 && type.image.dim != DimSubpassData) { + auto *p_var = maybe_get_backing_variable(id); + if (p_var && p_var->basevariable) + p_var = maybe_get(p_var->basevariable); + + bool has_access_qualifier = true; + switch (img_type.access) { case AccessQualifierReadOnly: @@ -16383,9 +16929,6 @@ string CompilerMSL::image_type_glsl(const SPIRType &type, uint32_t id, bool memb default: { - auto *p_var = maybe_get_backing_variable(id); - if (p_var && p_var->basevariable) - p_var = maybe_get(p_var->basevariable); if (p_var && !has_decoration(p_var->self, DecorationNonWritable)) { img_type_name += ", access::"; @@ -16395,9 +16938,21 @@ string CompilerMSL::image_type_glsl(const SPIRType &type, uint32_t id, bool memb img_type_name += "write"; } + else + { + has_access_qualifier = false; + } break; } } + + if (p_var && has_decoration(p_var->self, DecorationCoherent) && msl_options.supports_msl_version(3, 2)) + { + // Cannot declare memory_coherence_device without access qualifier. + if (!has_access_qualifier) + img_type_name += ", access::read"; + img_type_name += ", memory_coherence_device"; + } } img_type_name += ">"; @@ -16465,6 +17020,10 @@ void CompilerMSL::emit_subgroup_op(const Instruction &i) if (!msl_options.supports_msl_version(2, 2)) SPIRV_CROSS_THROW("Ballot ops on iOS requires Metal 2.2 and up."); break; + case OpGroupNonUniformRotateKHR: + if (!msl_options.supports_msl_version(2, 2)) + SPIRV_CROSS_THROW("Rotate on iOS requires Metal 2.2 and up."); + break; case OpGroupNonUniformBroadcast: case OpGroupNonUniformShuffle: case OpGroupNonUniformShuffleXor: @@ -16500,13 +17059,16 @@ void CompilerMSL::emit_subgroup_op(const Instruction &i) Scope scope; switch (op) { + // These earlier instructions don't have the scope operand. case OpSubgroupBallotKHR: case OpSubgroupFirstInvocationKHR: case OpSubgroupReadInvocationKHR: case OpSubgroupAllKHR: case OpSubgroupAnyKHR: case OpSubgroupAllEqualKHR: - // These earlier instructions don't have the scope operand. + // These instructions are always quad-scoped and thus do not have a scope operand. + case OpGroupNonUniformQuadAllKHR: + case OpGroupNonUniformQuadAnyKHR: scope = ScopeSubgroup; break; default: @@ -16594,6 +17156,23 @@ void CompilerMSL::emit_subgroup_op(const Instruction &i) emit_binary_func_op(result_type, id, ops[op_idx], ops[op_idx + 1], "spvSubgroupShuffleDown"); break; + case OpGroupNonUniformRotateKHR: + { + if (i.length > 5) + { + // MSL does not have a cluster size parameter, so calculate the invocation ID manually and using a shuffle. + auto delta_expr = enclose_expression(to_unpacked_expression(ops[op_idx + 1])); + auto cluster_size_minus_one = evaluate_constant_u32(ops[op_idx + 2]) - 1; + auto local_id_expr = to_unpacked_expression(scope == ScopeSubgroup + ? builtin_subgroup_invocation_id_id : builtin_local_invocation_index_id); + auto shuffle_idx = join("((", local_id_expr, " + ", delta_expr, ")", " & ", std::to_string(cluster_size_minus_one), + ") + (", local_id_expr, " & ", std::to_string(~cluster_size_minus_one), ")"); + emit_op(result_type, id, join("spvSubgroupShuffle(", to_unpacked_expression(ops[op_idx]), ", ", shuffle_idx, ")"), false); + } else + emit_binary_func_op(result_type, id, ops[op_idx], ops[op_idx + 1], "spvSubgroupRotate"); + break; + } + case OpGroupNonUniformAll: case OpSubgroupAllKHR: if (msl_options.use_quadgroup_operation()) @@ -16628,11 +17207,10 @@ case OpGroupNonUniform##op: \ emit_unary_func_op(result_type, id, ops[op_idx], "simd_prefix_exclusive_" #msl_op); \ else if (operation == GroupOperationClusteredReduce) \ { \ - /* Only cluster sizes of 4 are supported. */ \ uint32_t cluster_size = evaluate_constant_u32(ops[op_idx + 1]); \ - if (cluster_size != 4) \ - SPIRV_CROSS_THROW("Metal only supports quad ClusteredReduce."); \ - emit_unary_func_op(result_type, id, ops[op_idx], "quad_" #msl_op); \ + if (get_execution_model() != ExecutionModelFragment || msl_options.supports_msl_version(2, 2)) \ + add_spv_func_and_recompile(SPVFuncImplSubgroupClustered##op); \ + emit_subgroup_cluster_op(result_type, id, cluster_size, ops[op_idx], #msl_op); \ } \ else \ SPIRV_CROSS_THROW("Invalid group operation."); \ @@ -16657,11 +17235,10 @@ case OpGroupNonUniform##op: \ SPIRV_CROSS_THROW("Metal doesn't support ExclusiveScan for OpGroupNonUniform" #op "."); \ else if (operation == GroupOperationClusteredReduce) \ { \ - /* Only cluster sizes of 4 are supported. */ \ uint32_t cluster_size = evaluate_constant_u32(ops[op_idx + 1]); \ - if (cluster_size != 4) \ - SPIRV_CROSS_THROW("Metal only supports quad ClusteredReduce."); \ - emit_unary_func_op(result_type, id, ops[op_idx], "quad_" #msl_op); \ + if (get_execution_model() != ExecutionModelFragment || msl_options.supports_msl_version(2, 2)) \ + add_spv_func_and_recompile(SPVFuncImplSubgroupClustered##op); \ + emit_subgroup_cluster_op(result_type, id, cluster_size, ops[op_idx], #msl_op); \ } \ else \ SPIRV_CROSS_THROW("Invalid group operation."); \ @@ -16680,11 +17257,10 @@ case OpGroupNonUniform##op: \ SPIRV_CROSS_THROW("Metal doesn't support ExclusiveScan for OpGroupNonUniform" #op "."); \ else if (operation == GroupOperationClusteredReduce) \ { \ - /* Only cluster sizes of 4 are supported. */ \ uint32_t cluster_size = evaluate_constant_u32(ops[op_idx + 1]); \ - if (cluster_size != 4) \ - SPIRV_CROSS_THROW("Metal only supports quad ClusteredReduce."); \ - emit_unary_func_op_cast(result_type, id, ops[op_idx], "quad_" #msl_op, type, type); \ + if (get_execution_model() != ExecutionModelFragment || msl_options.supports_msl_version(2, 2)) \ + add_spv_func_and_recompile(SPVFuncImplSubgroupClustered##op); \ + emit_subgroup_cluster_op_cast(result_type, id, cluster_size, ops[op_idx], #msl_op, type, type); \ } \ else \ SPIRV_CROSS_THROW("Invalid group operation."); \ @@ -16700,9 +17276,11 @@ case OpGroupNonUniform##op: \ MSL_GROUP_OP(BitwiseAnd, and) MSL_GROUP_OP(BitwiseOr, or) MSL_GROUP_OP(BitwiseXor, xor) - MSL_GROUP_OP(LogicalAnd, and) - MSL_GROUP_OP(LogicalOr, or) - MSL_GROUP_OP(LogicalXor, xor) + // Metal doesn't support boolean types in SIMD-group operations, so we + // have to emit some casts. + MSL_GROUP_OP_CAST(LogicalAnd, and, SPIRType::UShort) + MSL_GROUP_OP_CAST(LogicalOr, or, SPIRType::UShort) + MSL_GROUP_OP_CAST(LogicalXor, xor, SPIRType::UShort) // clang-format on #undef MSL_GROUP_OP #undef MSL_GROUP_OP_CAST @@ -16715,6 +17293,14 @@ case OpGroupNonUniform##op: \ emit_binary_func_op(result_type, id, ops[op_idx], ops[op_idx + 1], "spvQuadBroadcast"); break; + case OpGroupNonUniformQuadAllKHR: + emit_unary_func_op(result_type, id, ops[op_idx], "quad_all"); + break; + + case OpGroupNonUniformQuadAnyKHR: + emit_unary_func_op(result_type, id, ops[op_idx], "quad_any"); + break; + default: SPIRV_CROSS_THROW("Invalid opcode for subgroup."); } @@ -16722,6 +17308,83 @@ case OpGroupNonUniform##op: \ register_control_dependent_expression(id); } +void CompilerMSL::emit_subgroup_cluster_op(uint32_t result_type, uint32_t result_id, uint32_t cluster_size, + uint32_t op0, const char *op) +{ + if (get_execution_model() == ExecutionModelFragment && !msl_options.supports_msl_version(2, 2)) + { + if (cluster_size == 4) + { + emit_unary_func_op(result_type, result_id, op0, join("quad_", op).c_str()); + return; + } + SPIRV_CROSS_THROW("Cluster sizes other than 4 in fragment shaders require MSL 2.2."); + } + bool forward = should_forward(op0); + emit_op(result_type, result_id, + join("spvClustered_", op, "<", cluster_size, ">(", to_unpacked_expression(op0), ", ", + to_expression(builtin_subgroup_invocation_id_id), ")"), + forward); + inherit_expression_dependencies(result_id, op0); +} + +void CompilerMSL::emit_subgroup_cluster_op_cast(uint32_t result_type, uint32_t result_id, uint32_t cluster_size, + uint32_t op0, const char *op, SPIRType::BaseType input_type, + SPIRType::BaseType expected_result_type) +{ + if (get_execution_model() == ExecutionModelFragment && !msl_options.supports_msl_version(2, 2)) + { + if (cluster_size == 4) + { + emit_unary_func_op_cast(result_type, result_id, op0, join("quad_", op).c_str(), input_type, + expected_result_type); + return; + } + SPIRV_CROSS_THROW("Cluster sizes other than 4 in fragment shaders require MSL 2.2."); + } + + auto &out_type = get(result_type); + auto &expr_type = expression_type(op0); + auto expected_type = out_type; + + // Bit-widths might be different in unary cases because we use it for SConvert/UConvert and friends. + expected_type.basetype = input_type; + expected_type.width = expr_type.width; + + string cast_op; + if (expr_type.basetype != input_type) + { + if (expr_type.basetype == SPIRType::Boolean) + cast_op = join(type_to_glsl(expected_type), "(", to_unpacked_expression(op0), ")"); + else + cast_op = bitcast_glsl(expected_type, op0); + } + else + cast_op = to_unpacked_expression(op0); + + string sg_op = join("spvClustered_", op, "<", cluster_size, ">"); + string expr; + if (out_type.basetype != expected_result_type) + { + expected_type.basetype = expected_result_type; + expected_type.width = out_type.width; + if (out_type.basetype == SPIRType::Boolean) + expr = type_to_glsl(out_type); + else + expr = bitcast_glsl_op(out_type, expected_type); + expr += '('; + expr += join(sg_op, "(", cast_op, ", ", to_expression(builtin_subgroup_invocation_id_id), ")"); + expr += ')'; + } + else + { + expr += join(sg_op, "(", cast_op, ", ", to_expression(builtin_subgroup_invocation_id_id), ")"); + } + + emit_op(result_type, result_id, expr, should_forward(op0)); + inherit_expression_dependencies(result_id, op0); +} + string CompilerMSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &in_type) { if (out_type.basetype == in_type.basetype) @@ -17084,6 +17747,9 @@ string CompilerMSL::builtin_qualifier(BuiltIn builtin) case BuiltInGlobalInvocationId: return "thread_position_in_grid"; + case BuiltInWorkgroupSize: + return "threads_per_threadgroup"; + case BuiltInWorkgroupId: return "threadgroup_position_in_grid"; @@ -17269,6 +17935,7 @@ string CompilerMSL::builtin_type_decl(BuiltIn builtin, uint32_t id) case BuiltInLocalInvocationId: case BuiltInNumWorkgroups: case BuiltInWorkgroupId: + case BuiltInWorkgroupSize: return "uint3"; case BuiltInLocalInvocationIndex: case BuiltInNumSubgroups: @@ -17598,6 +18265,23 @@ void CompilerMSL::analyze_sampled_image_usage() } } +void CompilerMSL::analyze_workgroup_variables() +{ + ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { + // If workgroup variables have initializer, it can only be ConstantNull (zero init) + if (var.storage == StorageClassWorkgroup && var.initializer) + { + needs_workgroup_zero_init = true; + + // MSL compiler does not like the routine to initialize simple threadgroup variables, + // falsely claiming it is "sometimes uninitialized". Suppress it. + auto &type = get_variable_data_type(var); + if (type.array.empty() && type.member_types.empty()) + suppress_sometimes_unitialized = true; + } + }); +} + bool CompilerMSL::SampledImageScanner::handle(spv::Op opcode, const uint32_t *args, uint32_t length) { switch (opcode) @@ -17658,7 +18342,7 @@ bool CompilerMSL::OpCodePreprocessor::handle(Op opcode, const uint32_t *args, ui // suppress_missing_prototypes to suppress compiler warnings of missing function prototypes. // Mark if the input requires the implementation of an SPIR-V function that does not exist in Metal. - SPVFuncImpl spv_func = get_spv_func_impl(opcode, args); + SPVFuncImpl spv_func = get_spv_func_impl(opcode, args, length); if (spv_func != SPVFuncImplNone) { compiler.spv_function_implementations.insert(spv_func); @@ -17765,6 +18449,39 @@ bool CompilerMSL::OpCodePreprocessor::handle(Op opcode, const uint32_t *args, ui needs_subgroup_invocation_id = true; break; + case OpGroupNonUniformRotateKHR: + // Add the correct invocation ID for calculating clustered rotate case. + if (length > 5) + { + if (static_cast(compiler.evaluate_constant_u32(args[2])) == ScopeSubgroup) + needs_subgroup_invocation_id = true; + else + needs_local_invocation_index = true; + } + break; + + case OpGroupNonUniformFAdd: + case OpGroupNonUniformFMul: + case OpGroupNonUniformFMin: + case OpGroupNonUniformFMax: + case OpGroupNonUniformIAdd: + case OpGroupNonUniformIMul: + case OpGroupNonUniformSMin: + case OpGroupNonUniformSMax: + case OpGroupNonUniformUMin: + case OpGroupNonUniformUMax: + case OpGroupNonUniformBitwiseAnd: + case OpGroupNonUniformBitwiseOr: + case OpGroupNonUniformBitwiseXor: + case OpGroupNonUniformLogicalAnd: + case OpGroupNonUniformLogicalOr: + case OpGroupNonUniformLogicalXor: + if ((compiler.get_execution_model() != ExecutionModelFragment || + compiler.msl_options.supports_msl_version(2, 2)) && + args[3] == GroupOperationClusteredReduce) + needs_subgroup_invocation_id = true; + break; + case OpArrayLength: { auto *var = compiler.maybe_get_backing_variable(args[2]); @@ -17794,7 +18511,8 @@ bool CompilerMSL::OpCodePreprocessor::handle(Op opcode, const uint32_t *args, ui case OpExtInst: { uint32_t extension_set = args[2]; - if (compiler.get(extension_set).ext == SPIRExtension::GLSL) + SPIRExtension::Extension ext = compiler.get(extension_set).ext; + if (ext == SPIRExtension::GLSL) { auto op_450 = static_cast(args[3]); switch (op_450) @@ -17837,6 +18555,12 @@ bool CompilerMSL::OpCodePreprocessor::handle(Op opcode, const uint32_t *args, ui break; } } + else if (ext == SPIRExtension::NonSemanticDebugPrintf) + { + // Operation 1 is printf. + if (args[3] == 1 && !compiler.msl_options.supports_msl_version(3, 2)) + SPIRV_CROSS_THROW("Debug printf requires MSL 3.2."); + } break; } @@ -17867,10 +18591,13 @@ void CompilerMSL::OpCodePreprocessor::check_resource_write(uint32_t var_id) } // Returns an enumeration of a SPIR-V function that needs to be output for certain Op codes. -CompilerMSL::SPVFuncImpl CompilerMSL::OpCodePreprocessor::get_spv_func_impl(Op opcode, const uint32_t *args) +CompilerMSL::SPVFuncImpl CompilerMSL::OpCodePreprocessor::get_spv_func_impl(Op opcode, const uint32_t *args, uint32_t length) { switch (opcode) { + case OpSMod: + return SPVFuncImplSMod; + case OpFMod: return SPVFuncImplMod; @@ -18049,6 +18776,12 @@ CompilerMSL::SPVFuncImpl CompilerMSL::OpCodePreprocessor::get_spv_func_impl(Op o case OpGroupNonUniformShuffleDown: return SPVFuncImplSubgroupShuffleDown; + case OpGroupNonUniformRotateKHR: + // Clustered rotate is performed using shuffle. + if (length > 5) + return SPVFuncImplSubgroupShuffle; + return SPVFuncImplSubgroupRotate; + case OpGroupNonUniformQuadBroadcast: return SPVFuncImplQuadBroadcast; @@ -18067,6 +18800,10 @@ CompilerMSL::SPVFuncImpl CompilerMSL::OpCodePreprocessor::get_spv_func_impl(Op o case OpUMulExtended: return SPVFuncImplMulExtended; + case OpAssumeTrueKHR: + case OpExpectKHR: + return SPVFuncImplAssume; + default: break; } @@ -18907,7 +19644,7 @@ void CompilerMSL::analyze_argument_buffers() SetBindingPair pair = { desc_set, binding }; if (resource.basetype == SPIRType::Image || resource.basetype == SPIRType::Sampler || - resource.basetype == SPIRType::SampledImage) + resource.basetype == SPIRType::SampledImage || resource.basetype == SPIRType::AccelerationStructure) { // Drop pointer information when we emit the resources into a struct. buffer_type.member_types.push_back(get_variable_data_type_id(var)); @@ -18997,7 +19734,7 @@ void CompilerMSL::analyze_argument_buffers() set_extended_member_decoration(buffer_type.self, member_index, SPIRVCrossDecorationOverlappingBinding); member_index++; } - + if (msl_options.replace_recursive_inputs && type_contains_recursion(buffer_type)) { recursive_inputs.insert(type_id); @@ -19157,6 +19894,35 @@ const char *CompilerMSL::get_combined_sampler_suffix() const return sampler_name_suffix.c_str(); } +bool CompilerMSL::specialization_constant_is_macro(uint32_t const_id) const +{ + return constant_macro_ids.find(const_id) != constant_macro_ids.end(); +} + +// Start with all fast math flags enabled, and selectively disable based execution modes and float controls +uint32_t CompilerMSL::get_fp_fast_math_flags(bool incl_ops) +{ + uint32_t fp_flags = ~0; + auto &ep = get_entry_point(); + + if (ep.flags.get(ExecutionModeSignedZeroInfNanPreserve)) + fp_flags &= ~(FPFastMathModeNSZMask | FPFastMathModeNotInfMask | FPFastMathModeNotNaNMask); + + if (ep.flags.get(ExecutionModeContractionOff)) + fp_flags &= ~(FPFastMathModeAllowContractMask); + + for (auto &fp_pair : ep.fp_fast_math_defaults) + if (fp_pair.second) + fp_flags &= get(fp_pair.second).scalar(); + + if (incl_ops) + for (auto &p_m : ir.meta) + if (p_m.second.decoration.decoration_flags.get(DecorationFPFastMathMode)) + fp_flags &= p_m.second.decoration.fp_fast_math_mode; + + return fp_flags; +} + void CompilerMSL::emit_block_hints(const SPIRBlock &) { } @@ -19379,6 +20145,70 @@ void CompilerMSL::emit_mesh_tasks(SPIRBlock &block) statement("return;"); } +void CompilerMSL::emit_workgroup_initialization(const SPIRVariable &var) +{ + auto &type = get_variable_data_type(var); + + begin_scope(); + + if (type.array.empty() && type.member_types.empty()) + { + // For simple shared variables, we just initialize it in thread 0 of the block + // We use short to represent bool for threadgroup variable to workaround compiler bug, + // so we do a temporary fixup here. Alas. (see the type_to_glsl method) + bool is_boolean = type.basetype == SPIRType::Boolean; + if (is_boolean) + type.basetype = SPIRType::Short; + + statement("if (gl_LocalInvocationIndex == 0)"); + begin_scope(); + statement(to_name(var.self), " = ", to_initializer_expression(var), ";"); + end_scope(); + + if (is_boolean) + type.basetype = SPIRType::Boolean; + } + else + { + // Otherwise, we use a loop to cooperatively initialize the memory within the group + + // First, we define a few variable names; + string var_name = to_name(var.self); + string var_ptr_name = join(var_name, "_ptr"); + string var_size_name = join(var_name, "_sz"); + string var_pos_name = join(var_name, "_pos"); + string var_stride_name = join(var_name, "_stride"); + string var_ptr2_name = join(var_name, "_ptr2"); + + statement("threadgroup uint *", var_ptr_name, " = (threadgroup uint *)&", var_name, ";"); + statement("uint ", var_size_name, " = ", "sizeof(", var_name, ");"); + statement("uint ", var_pos_name, " = gl_LocalInvocationIndex;"); + statement("uint ", var_stride_name, " = gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z;"); + + statement("while (sizeof(uint) * ", var_pos_name, " < ", var_size_name, ")"); + begin_scope(); + statement(var_ptr_name, "[", var_pos_name, "] = 0u;"); + statement(var_pos_name, " += ", var_stride_name, ";"); + end_scope(); + + statement("if (gl_LocalInvocationIndex == 0)"); + begin_scope(); + statement(var_pos_name, " = (", var_size_name, " / sizeof(uint)) * sizeof(uint);"); + statement("threadgroup uchar *", var_ptr2_name, " = (threadgroup uchar *)&", var_name, ";"); + + statement("while (", var_pos_name, " < ", var_size_name, ")"); + begin_scope(); + statement(var_ptr2_name, "[", var_pos_name, "] = '\\0';"); + statement(var_pos_name, "++;"); + end_scope(); + end_scope(); + } + + statement("threadgroup_barrier(mem_flags::mem_threadgroup);"); + + end_scope(); +} + string CompilerMSL::additional_fixed_sample_mask_str() const { char print_buffer[32]; diff --git a/thirdparty/spirv-cross/spirv_msl.hpp b/thirdparty/spirv-cross/spirv_msl.hpp index 4aaad01a89..99cbb5e138 100644 --- a/thirdparty/spirv-cross/spirv_msl.hpp +++ b/thirdparty/spirv-cross/spirv_msl.hpp @@ -324,6 +324,8 @@ public: // of the shader with the additional fixed sample mask. uint32_t additional_fixed_sample_mask = 0xffffffff; bool enable_point_size_builtin = true; + bool enable_point_size_default = false; + float default_point_size = 1.0f; bool enable_frag_depth_builtin = true; bool enable_frag_stencil_ref_builtin = true; bool disable_rasterization = false; @@ -536,6 +538,14 @@ public: // if the fragment does not modify the depth value. bool input_attachment_is_ds_attachment = false; + // If BuiltInPosition is not written, automatically disable rasterization. + // The result can be queried with get_is_rasterization_disabled. + bool auto_disable_rasterization = false; + + // Use Fast Math pragmas in MSL code, based on SPIR-V float controls and FP ExecutionModes. + // Requires MSL 3.2 or above, and has no effect with earlier MSL versions. + bool use_fast_math_pragmas = false; + bool is_ios() const { return platform == iOS; @@ -756,6 +766,19 @@ public: void set_combined_sampler_suffix(const char *suffix); const char *get_combined_sampler_suffix() const; + // Information about specialization constants that are translated into MSL macros + // instead of using function constant + // These must only be called after a successful call to CompilerMSL::compile(). + bool specialization_constant_is_macro(uint32_t constant_id) const; + + // Returns a mask of SPIR-V FP Fast Math Mode flags, that represents the set of flags that can be applied + // across all floating-point types. Each FPFastMathDefault execution mode operation identifies the flags + // for one floating-point type, and the value returned here is a bitwise-AND combination across all types. + // If incl_ops is enabled, the FPFastMathMode of any SPIR-V operations are also included in the bitwise-AND + // to determine the minimal fast-math that applies to all default execution modes and all operations. + // The returned value is also affected by execution modes SignedZeroInfNanPreserve and ContractionOff. + uint32_t get_fp_fast_math_flags(bool incl_ops); + protected: // An enum of SPIR-V functions that are implemented in additional // source code that is added to the shader if necessary. @@ -763,6 +786,7 @@ protected: { SPVFuncImplNone, SPVFuncImplMod, + SPVFuncImplSMod, SPVFuncImplRadians, SPVFuncImplDegrees, SPVFuncImplFindILsb, @@ -784,12 +808,11 @@ protected: SPVFuncImplInverse4x4, SPVFuncImplInverse3x3, SPVFuncImplInverse2x2, - // It is very important that this come before *Swizzle and ChromaReconstruct*, to ensure it's - // emitted before them. - SPVFuncImplForwardArgs, - // Likewise, this must come before *Swizzle. + // It is very important that this come before *Swizzle, to ensure it's emitted before them. SPVFuncImplGetSwizzle, SPVFuncImplTextureSwizzle, + SPVFuncImplGatherReturn, + SPVFuncImplGatherCompareReturn, SPVFuncImplGatherSwizzle, SPVFuncImplGatherCompareSwizzle, SPVFuncImplGatherConstOffsets, @@ -806,6 +829,30 @@ protected: SPVFuncImplSubgroupShuffleXor, SPVFuncImplSubgroupShuffleUp, SPVFuncImplSubgroupShuffleDown, + SPVFuncImplSubgroupRotate, + SPVFuncImplSubgroupClusteredAdd, + SPVFuncImplSubgroupClusteredFAdd = SPVFuncImplSubgroupClusteredAdd, + SPVFuncImplSubgroupClusteredIAdd = SPVFuncImplSubgroupClusteredAdd, + SPVFuncImplSubgroupClusteredMul, + SPVFuncImplSubgroupClusteredFMul = SPVFuncImplSubgroupClusteredMul, + SPVFuncImplSubgroupClusteredIMul = SPVFuncImplSubgroupClusteredMul, + SPVFuncImplSubgroupClusteredMin, + SPVFuncImplSubgroupClusteredFMin = SPVFuncImplSubgroupClusteredMin, + SPVFuncImplSubgroupClusteredSMin = SPVFuncImplSubgroupClusteredMin, + SPVFuncImplSubgroupClusteredUMin = SPVFuncImplSubgroupClusteredMin, + SPVFuncImplSubgroupClusteredMax, + SPVFuncImplSubgroupClusteredFMax = SPVFuncImplSubgroupClusteredMax, + SPVFuncImplSubgroupClusteredSMax = SPVFuncImplSubgroupClusteredMax, + SPVFuncImplSubgroupClusteredUMax = SPVFuncImplSubgroupClusteredMax, + SPVFuncImplSubgroupClusteredAnd, + SPVFuncImplSubgroupClusteredBitwiseAnd = SPVFuncImplSubgroupClusteredAnd, + SPVFuncImplSubgroupClusteredLogicalAnd = SPVFuncImplSubgroupClusteredAnd, + SPVFuncImplSubgroupClusteredOr, + SPVFuncImplSubgroupClusteredBitwiseOr = SPVFuncImplSubgroupClusteredOr, + SPVFuncImplSubgroupClusteredLogicalOr = SPVFuncImplSubgroupClusteredOr, + SPVFuncImplSubgroupClusteredXor, + SPVFuncImplSubgroupClusteredBitwiseXor = SPVFuncImplSubgroupClusteredXor, + SPVFuncImplSubgroupClusteredLogicalXor = SPVFuncImplSubgroupClusteredXor, SPVFuncImplQuadBroadcast, SPVFuncImplQuadSwap, SPVFuncImplReflectScalar, @@ -841,6 +888,7 @@ protected: SPVFuncImplTextureCast, SPVFuncImplMulExtended, SPVFuncImplSetMeshOutputsEXT, + SPVFuncImplAssume, }; // If the underlying resource has been used for comparison then duplicate loads of that resource must be too @@ -858,6 +906,11 @@ protected: void emit_function_prototype(SPIRFunction &func, const Bitset &return_flags) override; void emit_sampled_image_op(uint32_t result_type, uint32_t result_id, uint32_t image_id, uint32_t samp_id) override; void emit_subgroup_op(const Instruction &i) override; + void emit_subgroup_cluster_op(uint32_t result_type, uint32_t result_id, uint32_t cluster_size, uint32_t op0, + const char *op); + void emit_subgroup_cluster_op_cast(uint32_t result_type, uint32_t result_id, uint32_t cluster_size, uint32_t op0, + const char *op, SPIRType::BaseType input_type, + SPIRType::BaseType expected_result_type); std::string to_texture_op(const Instruction &i, bool sparse, bool *forward, SmallVector &inherited_expressions) override; void emit_fixup() override; @@ -872,6 +925,7 @@ protected: void emit_mesh_entry_point(); void emit_mesh_outputs(); void emit_mesh_tasks(SPIRBlock &block) override; + void emit_workgroup_initialization(const SPIRVariable &var) override; // Allow Metal to use the array template to make arrays a value type std::string type_to_array_glsl(const SPIRType &type, uint32_t variable_id) override; @@ -985,6 +1039,7 @@ protected: void add_tess_level_input_to_interface_block(const std::string &ib_var_ref, SPIRType &ib_type, SPIRVariable &var); void add_tess_level_input(const std::string &base_ref, const std::string &mbr_name, SPIRVariable &var); + void ensure_struct_members_valid_vecsizes(SPIRType &struct_type, uint32_t &location); void fix_up_interface_member_indices(spv::StorageClass storage, uint32_t ib_type_id); void mark_location_as_used_by_shader(uint32_t location, const SPIRType &type, @@ -1069,7 +1124,8 @@ protected: bool validate_member_packing_rules_msl(const SPIRType &type, uint32_t index) const; std::string get_argument_address_space(const SPIRVariable &argument); std::string get_type_address_space(const SPIRType &type, uint32_t id, bool argument = false); - static bool decoration_flags_signal_volatile(const Bitset &flags); + bool decoration_flags_signal_volatile(const Bitset &flags) const; + bool decoration_flags_signal_coherent(const Bitset &flags) const; const char *to_restrict(uint32_t id, bool space); SPIRType &get_stage_in_struct_type(); SPIRType &get_stage_out_struct_type(); @@ -1082,7 +1138,7 @@ protected: uint32_t mem_order_1, uint32_t mem_order_2, bool has_mem_order_2, uint32_t op0, uint32_t op1 = 0, bool op1_is_pointer = false, bool op1_is_literal = false, uint32_t op2 = 0); const char *get_memory_order(uint32_t spv_mem_sem); - void add_pragma_line(const std::string &line); + void add_pragma_line(const std::string &line, bool recompile_on_unique); void add_typedef_line(const std::string &line); void emit_barrier(uint32_t id_exe_scope, uint32_t id_mem_scope, uint32_t id_mem_sem); bool emit_array_copy(const char *expr, uint32_t lhs_id, uint32_t rhs_id, @@ -1133,12 +1189,13 @@ protected: void emit_store_statement(uint32_t lhs_expression, uint32_t rhs_expression) override; void analyze_sampled_image_usage(); + void analyze_workgroup_variables(); bool access_chain_needs_stage_io_builtin_translation(uint32_t base) override; bool prepare_access_chain_for_scalar_access(std::string &expr, const SPIRType &type, spv::StorageClass storage, bool &is_packed) override; void fix_up_interpolant_access_chain(const uint32_t *ops, uint32_t length); - void check_physical_type_cast(std::string &expr, const SPIRType *type, uint32_t physical_type) override; + bool check_physical_type_cast(std::string &expr, const SPIRType *type, uint32_t physical_type) override; bool emit_tessellation_access_chain(const uint32_t *ops, uint32_t length); bool emit_tessellation_io_load(uint32_t result_type, uint32_t id, uint32_t ptr); @@ -1164,9 +1221,10 @@ protected: std::unordered_map fragment_output_components; std::unordered_map builtin_to_automatic_input_location; std::unordered_map builtin_to_automatic_output_location; - std::set pragma_lines; - std::set typedef_lines; + std::vector pragma_lines; + std::vector typedef_lines; SmallVector vars_needing_early_declaration; + std::unordered_set constant_macro_ids; std::unordered_map, InternalHasher> resource_bindings; std::unordered_map resource_arg_buff_idx_to_binding_number; @@ -1210,11 +1268,14 @@ protected: bool needs_swizzle_buffer_def = false; bool used_swizzle_buffer = false; bool added_builtin_tess_level = false; + bool needs_local_invocation_index = false; bool needs_subgroup_invocation_id = false; bool needs_subgroup_size = false; bool needs_sample_id = false; bool needs_helper_invocation = false; + bool needs_workgroup_zero_init = false; bool writes_to_depth = false; + bool writes_to_point_size = false; std::string qual_pos_var_name; std::string stage_in_var_name = "in"; std::string stage_out_var_name = "out"; @@ -1276,6 +1337,7 @@ protected: bool suppress_missing_prototypes = false; bool suppress_incompatible_pointer_types_discard_qualifiers = false; + bool suppress_sometimes_unitialized = false; void add_spv_func_and_recompile(SPVFuncImpl spv_func); @@ -1308,7 +1370,7 @@ protected: } bool handle(spv::Op opcode, const uint32_t *args, uint32_t length) override; - CompilerMSL::SPVFuncImpl get_spv_func_impl(spv::Op opcode, const uint32_t *args); + CompilerMSL::SPVFuncImpl get_spv_func_impl(spv::Op opcode, const uint32_t *args, uint32_t length); void check_resource_write(uint32_t var_id); CompilerMSL &compiler; @@ -1319,6 +1381,7 @@ protected: bool uses_image_write = false; bool uses_buffer_write = false; bool uses_discard = false; + bool needs_local_invocation_index = false; bool needs_subgroup_invocation_id = false; bool needs_subgroup_size = false; bool needs_sample_id = false; diff --git a/thirdparty/spirv-cross/spirv_parser.cpp b/thirdparty/spirv-cross/spirv_parser.cpp index 6108dbb653..da1b8a89d1 100644 --- a/thirdparty/spirv-cross/spirv_parser.cpp +++ b/thirdparty/spirv-cross/spirv_parser.cpp @@ -305,6 +305,7 @@ void Parser::parse(const Instruction &instruction) } case OpExtInst: + case OpExtInstWithForwardRefsKHR: { // The SPIR-V debug information extended instructions might come at global scope. if (current_block) @@ -380,13 +381,21 @@ void Parser::parse(const Instruction &instruction) auto mode = static_cast(ops[1]); execution.flags.set(mode); - if (mode == ExecutionModeLocalSizeId) + switch (mode) { + case ExecutionModeLocalSizeId: execution.workgroup_size.id_x = ops[2]; execution.workgroup_size.id_y = ops[3]; execution.workgroup_size.id_z = ops[4]; - } + break; + case ExecutionModeFPFastMathDefault: + execution.fp_fast_math_defaults[ops[2]] = ops[3]; + break; + + default: + break; + } break; } @@ -536,12 +545,37 @@ void Parser::parse(const Instruction &instruction) uint32_t id = ops[0]; uint32_t width = ops[1]; auto &type = set(id, op); + + if (width != 16 && width != 8 && length > 2) + SPIRV_CROSS_THROW("Unrecognized FP encoding mode for OpTypeFloat."); + if (width == 64) type.basetype = SPIRType::Double; else if (width == 32) type.basetype = SPIRType::Float; else if (width == 16) - type.basetype = SPIRType::Half; + { + if (length > 2) + { + if (ops[2] == spv::FPEncodingBFloat16KHR) + type.basetype = SPIRType::BFloat16; + else + SPIRV_CROSS_THROW("Unrecognized encoding for OpTypeFloat 16."); + } + else + type.basetype = SPIRType::Half; + } + else if (width == 8) + { + if (length < 2) + SPIRV_CROSS_THROW("Missing encoding for OpTypeFloat 8."); + else if (ops[2] == spv::FPEncodingFloat8E4M3EXT) + type.basetype = SPIRType::FloatE4M3; + else if (ops[2] == spv::FPEncodingFloat8E5M2EXT) + type.basetype = SPIRType::FloatE5M2; + else + SPIRV_CROSS_THROW("Invalid encoding for OpTypeFloat 8."); + } else SPIRV_CROSS_THROW("Unrecognized bit-width of floating point type."); type.width = width; @@ -592,6 +626,22 @@ void Parser::parse(const Instruction &instruction) break; } + case OpTypeCooperativeMatrixKHR: + { + uint32_t id = ops[0]; + auto &base = get(ops[1]); + auto &matrixbase = set(id, base); + + matrixbase.op = op; + matrixbase.cooperative.scope_id = ops[2]; + matrixbase.cooperative.rows_id = ops[3]; + matrixbase.cooperative.columns_id = ops[4]; + matrixbase.cooperative.use_id = ops[5]; + matrixbase.self = id; + matrixbase.parent_type = ops[1]; + break; + } + case OpTypeArray: { uint32_t id = ops[0]; @@ -835,17 +885,27 @@ void Parser::parse(const Instruction &instruction) break; } - // Constants + // Constants case OpSpecConstant: case OpConstant: + case OpConstantCompositeReplicateEXT: + case OpSpecConstantCompositeReplicateEXT: { uint32_t id = ops[1]; auto &type = get(ops[0]); - - if (type.width > 32) - set(id, ops[0], ops[2] | (uint64_t(ops[3]) << 32), op == OpSpecConstant); + if (op == OpConstantCompositeReplicateEXT || op == OpSpecConstantCompositeReplicateEXT) + { + auto subconstant = uint32_t(ops[2]); + set(id, ops[0], &subconstant, 1, op == OpSpecConstantCompositeReplicateEXT, true); + } else - set(id, ops[0], ops[2], op == OpSpecConstant); + { + + if (type.width > 32) + set(id, ops[0], ops[2] | (uint64_t(ops[3]) << 32), op == OpSpecConstant); + else + set(id, ops[0], ops[2], op == OpSpecConstant); + } break; }