diff --git a/doc/classes/RenderingDevice.xml b/doc/classes/RenderingDevice.xml
index 3ab4aa107d..427c57926c 100644
--- a/doc/classes/RenderingDevice.xml
+++ b/doc/classes/RenderingDevice.xml
@@ -2490,6 +2490,9 @@
Features support for buffer device address extension.
+
+ Support for 32-bit image atomic operations.
+
Maximum number of uniform sets that can be bound at a given time.
diff --git a/drivers/apple/foundation_helpers.h b/drivers/apple/foundation_helpers.h
new file mode 100644
index 0000000000..db87fba96c
--- /dev/null
+++ b/drivers/apple/foundation_helpers.h
@@ -0,0 +1,56 @@
+/**************************************************************************/
+/* foundation_helpers.h */
+/**************************************************************************/
+/* This file is part of: */
+/* GODOT ENGINE */
+/* https://godotengine.org */
+/**************************************************************************/
+/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
+/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
+/* */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the */
+/* "Software"), to deal in the Software without restriction, including */
+/* without limitation the rights to use, copy, modify, merge, publish, */
+/* distribute, sublicense, and/or sell copies of the Software, and to */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions: */
+/* */
+/* The above copyright notice and this permission notice shall be */
+/* included in all copies or substantial portions of the Software. */
+/* */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
+/**************************************************************************/
+
+#pragma once
+
+#import
+
+class String;
+template
+class CharStringT;
+
+using CharString = CharStringT;
+
+namespace conv {
+
+/**
+ * Converts a Godot String to an NSString without allocating an intermediate UTF-8 buffer.
+ * */
+NSString *to_nsstring(const String &p_str);
+/**
+ * Converts a Godot CharString to an NSString without allocating an intermediate UTF-8 buffer.
+ * */
+NSString *to_nsstring(const CharString &p_str);
+/**
+ * Converts an NSString to a Godot String without allocating intermediate buffers.
+ * */
+String to_string(NSString *p_str);
+
+} //namespace conv
diff --git a/drivers/apple/foundation_helpers.mm b/drivers/apple/foundation_helpers.mm
new file mode 100644
index 0000000000..0453011b1d
--- /dev/null
+++ b/drivers/apple/foundation_helpers.mm
@@ -0,0 +1,85 @@
+/**************************************************************************/
+/* foundation_helpers.mm */
+/**************************************************************************/
+/* This file is part of: */
+/* GODOT ENGINE */
+/* https://godotengine.org */
+/**************************************************************************/
+/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
+/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
+/* */
+/* Permission is hereby granted, free of charge, to any person obtaining */
+/* a copy of this software and associated documentation files (the */
+/* "Software"), to deal in the Software without restriction, including */
+/* without limitation the rights to use, copy, modify, merge, publish, */
+/* distribute, sublicense, and/or sell copies of the Software, and to */
+/* permit persons to whom the Software is furnished to do so, subject to */
+/* the following conditions: */
+/* */
+/* The above copyright notice and this permission notice shall be */
+/* included in all copies or substantial portions of the Software. */
+/* */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
+/**************************************************************************/
+
+#import "foundation_helpers.h"
+
+#import "core/string/ustring.h"
+
+#import
+
+namespace conv {
+
+NSString *to_nsstring(const String &p_str) {
+ return [[NSString alloc] initWithBytes:(const void *)p_str.ptr()
+ length:p_str.length() * sizeof(char32_t)
+ encoding:NSUTF32LittleEndianStringEncoding];
+}
+
+NSString *to_nsstring(const CharString &p_str) {
+ return [[NSString alloc] initWithBytes:(const void *)p_str.ptr()
+ length:p_str.length()
+ encoding:NSUTF8StringEncoding];
+}
+
+String to_string(NSString *p_str) {
+ CFStringRef str = (__bridge CFStringRef)p_str;
+ CFStringEncoding fastest = CFStringGetFastestEncoding(str);
+ // Sometimes, CFString will return a pointer to it's encoded data,
+ // so we can create the string without allocating intermediate buffers.
+ const char *p = CFStringGetCStringPtr(str, fastest);
+ if (p) {
+ switch (fastest) {
+ case kCFStringEncodingASCII:
+ return String::ascii(Span(p, CFStringGetLength(str)));
+ case kCFStringEncodingUTF8:
+ return String::utf8(p);
+ case kCFStringEncodingUTF32LE:
+ return String::utf32(Span((char32_t *)p, CFStringGetLength(str)));
+ default:
+ break;
+ }
+ }
+
+ CFRange range = CFRangeMake(0, CFStringGetLength(str));
+ CFIndex byte_len = 0;
+ // Try to losslessly convert the string directly into a String's buffer to avoid intermediate allocations.
+ CFIndex n = CFStringGetBytes(str, range, kCFStringEncodingUTF32LE, 0, NO, nil, 0, &byte_len);
+ if (n == range.length) {
+ String res;
+ res.resize_uninitialized((byte_len / sizeof(char32_t)) + 1);
+ res[n] = 0;
+ n = CFStringGetBytes(str, range, kCFStringEncodingUTF32LE, 0, NO, (UInt8 *)res.ptrw(), res.length() * sizeof(char32_t), nil);
+ return res;
+ }
+
+ return String::utf8(p_str.UTF8String);
+}
+
+} //namespace conv
diff --git a/drivers/d3d12/rendering_device_driver_d3d12.cpp b/drivers/d3d12/rendering_device_driver_d3d12.cpp
index a91eee3050..f454291a1b 100644
--- a/drivers/d3d12/rendering_device_driver_d3d12.cpp
+++ b/drivers/d3d12/rendering_device_driver_d3d12.cpp
@@ -5586,6 +5586,8 @@ bool RenderingDeviceDriverD3D12::has_feature(Features p_feature) {
return true;
case SUPPORTS_BUFFER_DEVICE_ADDRESS:
return true;
+ case SUPPORTS_IMAGE_ATOMIC_32_BIT:
+ return true;
default:
return false;
}
diff --git a/drivers/metal/SCsub b/drivers/metal/SCsub
index a4c1c65b82..f55933a2c3 100644
--- a/drivers/metal/SCsub
+++ b/drivers/metal/SCsub
@@ -12,7 +12,6 @@ thirdparty_obj = []
thirdparty_dir = "#thirdparty/spirv-cross/"
thirdparty_sources = [
"spirv_cfg.cpp",
- "spirv_cross_util.cpp",
"spirv_cross.cpp",
"spirv_parser.cpp",
"spirv_msl.cpp",
diff --git a/drivers/metal/metal_device_properties.h b/drivers/metal/metal_device_properties.h
index 720efd64e1..24a1a4cdf9 100644
--- a/drivers/metal/metal_device_properties.h
+++ b/drivers/metal/metal_device_properties.h
@@ -94,6 +94,8 @@ struct API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MetalFeatures {
bool metal_fx_spatial = false; /**< If true, Metal FX spatial functions are supported. */
bool metal_fx_temporal = false; /**< If true, Metal FX temporal functions are supported. */
bool supports_gpu_address = false; /**< If true, referencing a GPU address in a shader is supported. */
+ bool supports_image_atomic_32_bit = false; /**< If true, 32-bit atomic operations on images are supported. */
+ bool supports_image_atomic_64_bit = false; /**< If true, 64-bit atomic operations on images are supported. */
};
struct MetalLimits {
diff --git a/drivers/metal/metal_device_properties.mm b/drivers/metal/metal_device_properties.mm
index 43946ede6e..4b06e24ad3 100644
--- a/drivers/metal/metal_device_properties.mm
+++ b/drivers/metal/metal_device_properties.mm
@@ -121,6 +121,12 @@ void MetalDeviceProperties::init_features(id p_device) {
features.simdPermute = [p_device supportsFamily:MTLGPUFamilyApple6];
features.simdReduction = [p_device supportsFamily:MTLGPUFamilyApple7];
features.argument_buffers_tier = p_device.argumentBuffersSupport;
+ features.supports_image_atomic_32_bit = [p_device supportsFamily:MTLGPUFamilyApple6];
+ features.supports_image_atomic_64_bit = [p_device supportsFamily:MTLGPUFamilyApple8];
+ if (OS::get_singleton()->get_environment("GODOT_MTL_DISABLE_IMAGE_ATOMICS") == "1") {
+ features.supports_image_atomic_32_bit = false;
+ features.supports_image_atomic_64_bit = false;
+ }
if (@available(macOS 13.0, iOS 16.0, tvOS 16.0, *)) {
features.needs_arg_encoders = !([p_device supportsFamily:MTLGPUFamilyMetal3] && features.argument_buffers_tier == MTLArgumentBuffersTier2);
diff --git a/drivers/metal/metal_objects.h b/drivers/metal/metal_objects.h
index b89d4ba2e0..4826ded95d 100644
--- a/drivers/metal/metal_objects.h
+++ b/drivers/metal/metal_objects.h
@@ -309,9 +309,23 @@ public:
class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDCommandBuffer {
private:
+#pragma mark - Common State
+
+ // From RenderingDevice
+ static constexpr uint32_t MAX_PUSH_CONSTANT_SIZE = 128;
+
RenderingDeviceDriverMetal *device_driver = nullptr;
id queue = nil;
id commandBuffer = nil;
+ bool state_begin = false;
+
+ _FORCE_INLINE_ id command_buffer() {
+ DEV_ASSERT(state_begin);
+ if (commandBuffer == nil) {
+ commandBuffer = queue.commandBuffer;
+ }
+ return commandBuffer;
+ }
void _end_compute_dispatch();
void _end_blit();
@@ -326,6 +340,11 @@ private:
void _end_render_pass();
void _render_clear_render_area();
+#pragma mark - Compute
+
+ void _compute_set_dirty_state();
+ void _compute_bind_uniform_sets();
+
public:
MDCommandBufferStateType type = MDCommandBufferStateType::None;
@@ -349,18 +368,18 @@ public:
LocalVector vertex_offsets;
ResourceUsageMap resource_usage;
// clang-format off
- enum DirtyFlag: uint8_t {
- DIRTY_NONE = 0b0000'0000,
- DIRTY_PIPELINE = 0b0000'0001, //! pipeline state
- DIRTY_UNIFORMS = 0b0000'0010, //! uniform sets
- DIRTY_DEPTH = 0b0000'0100, //! depth / stencil state
- DIRTY_VERTEX = 0b0000'1000, //! vertex buffers
- DIRTY_VIEWPORT = 0b0001'0000, //! viewport rectangles
- DIRTY_SCISSOR = 0b0010'0000, //! scissor rectangles
- DIRTY_BLEND = 0b0100'0000, //! blend state
- DIRTY_RASTER = 0b1000'0000, //! encoder state like cull mode
-
- DIRTY_ALL = 0xff,
+ enum DirtyFlag: uint16_t {
+ DIRTY_NONE = 0,
+ DIRTY_PIPELINE = 1 << 0, //! pipeline state
+ DIRTY_UNIFORMS = 1 << 1, //! uniform sets
+ DIRTY_PUSH = 1 << 2, //! push constants
+ DIRTY_DEPTH = 1 << 3, //! depth / stencil state
+ DIRTY_VERTEX = 1 << 4, //! vertex buffers
+ DIRTY_VIEWPORT = 1 << 5, //! viewport rectangles
+ DIRTY_SCISSOR = 1 << 6, //! scissor rectangles
+ DIRTY_BLEND = 1 << 7, //! blend state
+ DIRTY_RASTER = 1 << 8, //! encoder state like cull mode
+ DIRTY_ALL = (1 << 9) - 1,
};
// clang-format on
BitField dirty = DIRTY_NONE;
@@ -368,6 +387,9 @@ public:
LocalVector uniform_sets;
// Bit mask of the uniform sets that are dirty, to prevent redundant binding.
uint64_t uniform_set_mask = 0;
+ uint8_t push_constant_data[MAX_PUSH_CONSTANT_SIZE];
+ uint32_t push_constant_data_len = 0;
+ uint32_t push_constant_bindings[2] = { 0 };
_FORCE_INLINE_ void reset();
void end_encoding();
@@ -422,6 +444,13 @@ public:
dirty.set_flag(DirtyFlag::DIRTY_UNIFORMS);
}
+ _FORCE_INLINE_ void mark_push_constants_dirty() {
+ if (push_constant_data_len == 0) {
+ return;
+ }
+ dirty.set_flag(DirtyFlag::DIRTY_PUSH);
+ }
+
_FORCE_INLINE_ void mark_blend_dirty() {
if (!blend_constants.has_value()) {
return;
@@ -464,16 +493,46 @@ public:
MDComputePipeline *pipeline = nullptr;
id encoder = nil;
ResourceUsageMap resource_usage;
- _FORCE_INLINE_ void reset() {
- pipeline = nil;
- encoder = nil;
- // Keep the keys, as they are likely to be used again.
- for (KeyValue>> &kv : resource_usage) {
- kv.value.clear();
+ // clang-format off
+ enum DirtyFlag: uint16_t {
+ DIRTY_NONE = 0,
+ DIRTY_PIPELINE = 1 << 0, //! pipeline state
+ DIRTY_UNIFORMS = 1 << 1, //! uniform sets
+ DIRTY_PUSH = 1 << 2, //! push constants
+ DIRTY_ALL = (1 << 3) - 1,
+ };
+ // clang-format on
+ BitField dirty = DIRTY_NONE;
+
+ LocalVector uniform_sets;
+ // Bit mask of the uniform sets that are dirty, to prevent redundant binding.
+ uint64_t uniform_set_mask = 0;
+ uint8_t push_constant_data[MAX_PUSH_CONSTANT_SIZE];
+ uint32_t push_constant_data_len = 0;
+ uint32_t push_constant_bindings[1] = { 0 };
+
+ _FORCE_INLINE_ void reset();
+ void end_encoding();
+
+ _FORCE_INLINE_ void mark_uniforms_dirty(void) {
+ if (uniform_sets.is_empty()) {
+ return;
}
+ for (uint32_t i = 0; i < uniform_sets.size(); i++) {
+ if (uniform_sets[i] != nullptr) {
+ uniform_set_mask |= 1 << i;
+ }
+ }
+ dirty.set_flag(DirtyFlag::DIRTY_UNIFORMS);
+ }
+
+ _FORCE_INLINE_ void mark_push_constants_dirty() {
+ if (push_constant_data_len == 0) {
+ return;
+ }
+ dirty.set_flag(DirtyFlag::DIRTY_PUSH);
}
- void end_encoding();
} compute;
// State specific to a blit pass.
@@ -496,6 +555,7 @@ public:
void encodeRenderCommandEncoderWithDescriptor(MTLRenderPassDescriptor *p_desc, NSString *p_label);
void bind_pipeline(RDD::PipelineID p_pipeline);
+ void encode_push_constant_data(RDD::ShaderID p_shader, VectorView p_data);
#pragma mark - Render Commands
@@ -661,8 +721,6 @@ public:
Vector sets;
bool uses_argument_buffers = true;
- virtual void encode_push_constant_data(VectorView p_data, MDCommandBuffer *p_cb) = 0;
-
MDShader(CharString p_name, Vector p_sets, bool p_uses_argument_buffers) :
name(p_name), sets(p_sets), uses_argument_buffers(p_uses_argument_buffers) {}
virtual ~MDShader() = default;
@@ -671,15 +729,13 @@ public:
class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDComputeShader final : public MDShader {
public:
struct {
- uint32_t binding = -1;
+ int32_t binding = -1;
uint32_t size = 0;
} push_constants;
MTLSize local = {};
MDLibrary *kernel;
- void encode_push_constant_data(VectorView p_data, MDCommandBuffer *p_cb) final;
-
MDComputeShader(CharString p_name, Vector p_sets, bool p_uses_argument_buffers, MDLibrary *p_kernel);
};
@@ -700,8 +756,6 @@ public:
MDLibrary *vert;
MDLibrary *frag;
- void encode_push_constant_data(VectorView p_data, MDCommandBuffer *p_cb) final;
-
MDRenderShader(CharString p_name,
Vector p_sets,
bool p_needs_view_mask_buffer,
diff --git a/drivers/metal/metal_objects.mm b/drivers/metal/metal_objects.mm
index f8056e217b..96ade55c1b 100644
--- a/drivers/metal/metal_objects.mm
+++ b/drivers/metal/metal_objects.mm
@@ -62,8 +62,8 @@
#undef MAX
void MDCommandBuffer::begin() {
- DEV_ASSERT(commandBuffer == nil);
- commandBuffer = queue.commandBuffer;
+ DEV_ASSERT(commandBuffer == nil && !state_begin);
+ state_begin = true;
}
void MDCommandBuffer::end() {
@@ -83,6 +83,7 @@ void MDCommandBuffer::commit() {
end();
[commandBuffer commit];
commandBuffer = nil;
+ state_begin = false;
}
void MDCommandBuffer::bind_pipeline(RDD::PipelineID p_pipeline) {
@@ -136,7 +137,7 @@ void MDCommandBuffer::bind_pipeline(RDD::PipelineID p_pipeline) {
render.desc.colorAttachments[0].resolveTexture = res_tex;
}
#endif
- render.encoder = [commandBuffer renderCommandEncoderWithDescriptor:render.desc];
+ render.encoder = [command_buffer() renderCommandEncoderWithDescriptor:render.desc];
}
if (render.pipeline != rp) {
@@ -160,9 +161,44 @@ void MDCommandBuffer::bind_pipeline(RDD::PipelineID p_pipeline) {
DEV_ASSERT(type == MDCommandBufferStateType::None);
type = MDCommandBufferStateType::Compute;
- compute.pipeline = (MDComputePipeline *)p;
- compute.encoder = commandBuffer.computeCommandEncoder;
- [compute.encoder setComputePipelineState:compute.pipeline->state];
+ if (compute.pipeline != p) {
+ compute.dirty.set_flag(ComputeState::DIRTY_PIPELINE);
+ compute.mark_uniforms_dirty();
+ compute.pipeline = (MDComputePipeline *)p;
+ }
+ }
+}
+
+void MDCommandBuffer::encode_push_constant_data(RDD::ShaderID p_shader, VectorView p_data) {
+ switch (type) {
+ case MDCommandBufferStateType::Render: {
+ MDRenderShader *shader = (MDRenderShader *)(p_shader.id);
+ if (shader->push_constants.vert.binding == -1 && shader->push_constants.frag.binding == -1) {
+ return;
+ }
+ render.push_constant_bindings[0] = shader->push_constants.vert.binding;
+ render.push_constant_bindings[1] = shader->push_constants.frag.binding;
+ void const *ptr = p_data.ptr();
+ render.push_constant_data_len = p_data.size() * sizeof(uint32_t);
+ DEV_ASSERT(render.push_constant_data_len <= sizeof(RenderState::push_constant_data));
+ memcpy(render.push_constant_data, ptr, render.push_constant_data_len);
+ render.mark_push_constants_dirty();
+ } break;
+ case MDCommandBufferStateType::Compute: {
+ MDComputeShader *shader = (MDComputeShader *)(p_shader.id);
+ if (shader->push_constants.binding == -1) {
+ return;
+ }
+ compute.push_constant_bindings[0] = shader->push_constants.binding;
+ void const *ptr = p_data.ptr();
+ compute.push_constant_data_len = p_data.size() * sizeof(uint32_t);
+ DEV_ASSERT(compute.push_constant_data_len <= sizeof(ComputeState::push_constant_data));
+ memcpy(compute.push_constant_data, ptr, compute.push_constant_data_len);
+ compute.mark_push_constants_dirty();
+ } break;
+ case MDCommandBufferStateType::Blit:
+ case MDCommandBufferStateType::None:
+ return;
}
}
@@ -181,7 +217,7 @@ id MDCommandBuffer::blit_command_encoder() {
}
type = MDCommandBufferStateType::Blit;
- blit.encoder = commandBuffer.blitCommandEncoder;
+ blit.encoder = command_buffer().blitCommandEncoder;
return blit.encoder;
}
@@ -200,7 +236,7 @@ void MDCommandBuffer::encodeRenderCommandEncoderWithDescriptor(MTLRenderPassDesc
break;
}
- id enc = [commandBuffer renderCommandEncoderWithDescriptor:p_desc];
+ id enc = [command_buffer() renderCommandEncoderWithDescriptor:p_desc];
if (p_label != nil) {
[enc pushDebugGroup:p_label];
[enc popDebugGroup];
@@ -344,6 +380,19 @@ void MDCommandBuffer::render_clear_attachments(VectorView
void MDCommandBuffer::_render_set_dirty_state() {
_render_bind_uniform_sets();
+ if (render.dirty.has_flag(RenderState::DIRTY_PUSH)) {
+ if (render.push_constant_bindings[0] != (uint32_t)-1) {
+ [render.encoder setVertexBytes:render.push_constant_data
+ length:render.push_constant_data_len
+ atIndex:render.push_constant_bindings[0]];
+ }
+ if (render.push_constant_bindings[1] != (uint32_t)-1) {
+ [render.encoder setFragmentBytes:render.push_constant_data
+ length:render.push_constant_data_len
+ atIndex:render.push_constant_bindings[1]];
+ }
+ }
+
MDSubpass const &subpass = render.get_subpass();
if (subpass.view_count > 1) {
uint32_t view_range[2] = { 0, subpass.view_count };
@@ -552,7 +601,7 @@ uint32_t MDCommandBuffer::_populate_vertices(simd::float4 *p_vertices, uint32_t
}
void MDCommandBuffer::render_begin_pass(RDD::RenderPassID p_render_pass, RDD::FramebufferID p_frameBuffer, RDD::CommandBufferType p_cmd_buffer_type, const Rect2i &p_rect, VectorView p_clear_values) {
- DEV_ASSERT(commandBuffer != nil);
+ DEV_ASSERT(command_buffer() != nil);
end();
MDRenderPass *pass = (MDRenderPass *)(p_render_pass.id);
@@ -639,7 +688,7 @@ void MDCommandBuffer::_render_clear_render_area() {
}
void MDCommandBuffer::render_next_subpass() {
- DEV_ASSERT(commandBuffer != nil);
+ DEV_ASSERT(command_buffer() != nil);
if (render.current_subpass == UINT32_MAX) {
render.current_subpass = 0;
@@ -726,7 +775,7 @@ void MDCommandBuffer::render_next_subpass() {
// the defaultRasterSampleCount from the pipeline's sample count.
render.desc = desc;
} else {
- render.encoder = [commandBuffer renderCommandEncoderWithDescriptor:desc];
+ render.encoder = [command_buffer() renderCommandEncoderWithDescriptor:desc];
if (!render.is_rendering_entire_area) {
_render_clear_render_area();
@@ -895,6 +944,7 @@ void MDCommandBuffer::RenderState::reset() {
dirty = DIRTY_NONE;
uniform_sets.clear();
uniform_set_mask = 0;
+ push_constant_data_len = 0;
clear_values.clear();
viewports.clear();
scissors.clear();
@@ -960,29 +1010,108 @@ void MDCommandBuffer::ComputeState::end_encoding() {
#pragma mark - Compute
+void MDCommandBuffer::_compute_set_dirty_state() {
+ if (compute.dirty.has_flag(ComputeState::DIRTY_PIPELINE)) {
+ compute.encoder = [command_buffer() computeCommandEncoderWithDispatchType:MTLDispatchTypeConcurrent];
+ [compute.encoder setComputePipelineState:compute.pipeline->state];
+ }
+
+ _compute_bind_uniform_sets();
+
+ if (compute.dirty.has_flag(ComputeState::DIRTY_PUSH)) {
+ if (compute.push_constant_bindings[0] != (uint32_t)-1) {
+ [compute.encoder setBytes:compute.push_constant_data
+ length:compute.push_constant_data_len
+ atIndex:compute.push_constant_bindings[0]];
+ }
+ }
+
+ compute.dirty.clear();
+}
+
+void MDCommandBuffer::_compute_bind_uniform_sets() {
+ DEV_ASSERT(type == MDCommandBufferStateType::Compute);
+ if (!compute.dirty.has_flag(ComputeState::DIRTY_UNIFORMS)) {
+ return;
+ }
+
+ compute.dirty.clear_flag(ComputeState::DIRTY_UNIFORMS);
+ uint64_t set_uniforms = compute.uniform_set_mask;
+ compute.uniform_set_mask = 0;
+
+ MDComputeShader *shader = compute.pipeline->shader;
+
+ while (set_uniforms != 0) {
+ // Find the index of the next set bit.
+ uint32_t index = (uint32_t)__builtin_ctzll(set_uniforms);
+ // Clear the set bit.
+ set_uniforms &= (set_uniforms - 1);
+ MDUniformSet *set = compute.uniform_sets[index];
+ if (set == nullptr || index >= (uint32_t)shader->sets.size()) {
+ continue;
+ }
+ set->bind_uniforms(shader, compute, index);
+ }
+}
+
+void MDCommandBuffer::ComputeState::reset() {
+ pipeline = nil;
+ encoder = nil;
+ dirty = DIRTY_NONE;
+ uniform_sets.clear();
+ uniform_set_mask = 0;
+ push_constant_data_len = 0;
+ // Keep the keys, as they are likely to be used again.
+ for (KeyValue>> &kv : resource_usage) {
+ kv.value.clear();
+ }
+}
+
void MDCommandBuffer::compute_bind_uniform_set(RDD::UniformSetID p_uniform_set, RDD::ShaderID p_shader, uint32_t p_set_index) {
DEV_ASSERT(type == MDCommandBufferStateType::Compute);
- MDShader *shader = (MDShader *)(p_shader.id);
MDUniformSet *set = (MDUniformSet *)(p_uniform_set.id);
- set->bind_uniforms(shader, compute, p_set_index);
+ if (compute.uniform_sets.size() <= p_set_index) {
+ uint32_t s = render.uniform_sets.size();
+ compute.uniform_sets.resize(p_set_index + 1);
+ // Set intermediate values to null.
+ std::fill(&compute.uniform_sets[s], &compute.uniform_sets[p_set_index] + 1, nullptr);
+ }
+
+ if (compute.uniform_sets[p_set_index] != set) {
+ compute.dirty.set_flag(ComputeState::DIRTY_UNIFORMS);
+ compute.uniform_set_mask |= 1ULL << p_set_index;
+ compute.uniform_sets[p_set_index] = set;
+ }
}
void MDCommandBuffer::compute_bind_uniform_sets(VectorView p_uniform_sets, RDD::ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count) {
DEV_ASSERT(type == MDCommandBufferStateType::Compute);
- MDShader *shader = (MDShader *)(p_shader.id);
-
- // TODO(sgc): Bind multiple buffers using [encoder setBuffers:offsets:withRange:]
- for (size_t i = 0u; i < p_set_count; ++i) {
+ for (size_t i = 0; i < p_set_count; ++i) {
MDUniformSet *set = (MDUniformSet *)(p_uniform_sets[i].id);
- set->bind_uniforms(shader, compute, p_first_set_index + i);
+
+ uint32_t index = p_first_set_index + i;
+ if (compute.uniform_sets.size() <= index) {
+ uint32_t s = compute.uniform_sets.size();
+ compute.uniform_sets.resize(index + 1);
+ // Set intermediate values to null.
+ std::fill(&compute.uniform_sets[s], &compute.uniform_sets[index] + 1, nullptr);
+ }
+
+ if (compute.uniform_sets[index] != set) {
+ compute.dirty.set_flag(ComputeState::DIRTY_UNIFORMS);
+ compute.uniform_set_mask |= 1ULL << index;
+ compute.uniform_sets[index] = set;
+ }
}
}
void MDCommandBuffer::compute_dispatch(uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups) {
DEV_ASSERT(type == MDCommandBufferStateType::Compute);
+ _compute_set_dirty_state();
+
MTLRegion region = MTLRegionMake3D(0, 0, 0, p_x_groups, p_y_groups, p_z_groups);
id enc = compute.encoder;
@@ -992,6 +1121,8 @@ void MDCommandBuffer::compute_dispatch(uint32_t p_x_groups, uint32_t p_y_groups,
void MDCommandBuffer::compute_dispatch_indirect(RDD::BufferID p_indirect_buffer, uint64_t p_offset) {
DEV_ASSERT(type == MDCommandBufferStateType::Compute);
+ _compute_set_dirty_state();
+
id indirectBuffer = rid::get(p_indirect_buffer);
id enc = compute.encoder;
@@ -1021,20 +1152,6 @@ MDComputeShader::MDComputeShader(CharString p_name,
MDShader(p_name, p_sets, p_uses_argument_buffers), kernel(p_kernel) {
}
-void MDComputeShader::encode_push_constant_data(VectorView p_data, MDCommandBuffer *p_cb) {
- DEV_ASSERT(p_cb->type == MDCommandBufferStateType::Compute);
- if (push_constants.binding == (uint32_t)-1) {
- return;
- }
-
- id enc = p_cb->compute.encoder;
-
- void const *ptr = p_data.ptr();
- size_t length = p_data.size() * sizeof(uint32_t);
-
- [enc setBytes:ptr length:length atIndex:push_constants.binding];
-}
-
MDRenderShader::MDRenderShader(CharString p_name,
Vector p_sets,
bool p_needs_view_mask_buffer,
@@ -1046,22 +1163,6 @@ MDRenderShader::MDRenderShader(CharString p_name,
frag(p_frag) {
}
-void MDRenderShader::encode_push_constant_data(VectorView p_data, MDCommandBuffer *p_cb) {
- DEV_ASSERT(p_cb->type == MDCommandBufferStateType::Render);
- id __unsafe_unretained enc = p_cb->render.encoder;
-
- void const *ptr = p_data.ptr();
- size_t length = p_data.size() * sizeof(uint32_t);
-
- if (push_constants.vert.binding > -1) {
- [enc setVertexBytes:ptr length:length atIndex:push_constants.vert.binding];
- }
-
- if (push_constants.frag.binding > -1) {
- [enc setFragmentBytes:ptr length:length atIndex:push_constants.frag.binding];
- }
-}
-
void MDUniformSet::bind_uniforms_argument_buffers(MDShader *p_shader, MDCommandBuffer::RenderState &p_state, uint32_t p_set_index) {
DEV_ASSERT(p_shader->uses_argument_buffers);
DEV_ASSERT(p_state.encoder != nil);
diff --git a/drivers/metal/rendering_device_driver_metal.mm b/drivers/metal/rendering_device_driver_metal.mm
index ed450b8bf8..e099e28a39 100644
--- a/drivers/metal/rendering_device_driver_metal.mm
+++ b/drivers/metal/rendering_device_driver_metal.mm
@@ -58,6 +58,7 @@
#include "core/io/marshalls.h"
#include "core/string/ustring.h"
#include "core/templates/hash_map.h"
+#include "drivers/apple/foundation_helpers.h"
#import
#import
@@ -317,12 +318,6 @@ RDD::TextureID RenderingDeviceDriverMetal::texture_create(const TextureFormat &p
desc.usage |= MTLTextureUsageShaderWrite;
}
- if (@available(macOS 14.0, iOS 17.0, tvOS 17.0, *)) {
- if (format_caps & kMTLFmtCapsAtomic) {
- desc.usage |= MTLTextureUsageShaderAtomic;
- }
- }
-
bool can_be_attachment = flags::any(format_caps, (kMTLFmtCapsColorAtt | kMTLFmtCapsDSAtt));
if (flags::any(p_format.usage_bits, TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) &&
@@ -334,6 +329,18 @@ RDD::TextureID RenderingDeviceDriverMetal::texture_create(const TextureFormat &p
desc.usage |= MTLTextureUsageShaderRead;
}
+ if (p_format.usage_bits & TEXTURE_USAGE_STORAGE_ATOMIC_BIT) {
+ if (@available(macOS 14.0, iOS 17.0, tvOS 17.0, *)) {
+ if (format_caps & kMTLFmtCapsAtomic) {
+ desc.usage |= MTLTextureUsageShaderAtomic;
+ } else {
+ ERR_FAIL_V_MSG(RDD::TextureID(), "Atomic operations on this texture format are not supported.");
+ }
+ } else {
+ ERR_FAIL_V_MSG(RDD::TextureID(), "Atomic texture operations not supported on this OS version.");
+ }
+ }
+
if (p_format.usage_bits & TEXTURE_USAGE_VRS_ATTACHMENT_BIT) {
ERR_FAIL_V_MSG(RDD::TextureID(), "unsupported: TEXTURE_USAGE_VRS_ATTACHMENT_BIT");
}
@@ -363,7 +370,29 @@ RDD::TextureID RenderingDeviceDriverMetal::texture_create(const TextureFormat &p
// Check if it is a linear format for atomic operations and therefore needs a buffer,
// as generally Metal does not support atomic operations on textures.
- bool needs_buffer = is_linear || (p_format.array_layers == 1 && p_format.mipmaps == 1 && p_format.texture_type == TEXTURE_TYPE_2D && flags::any(p_format.usage_bits, TEXTURE_USAGE_STORAGE_BIT) && (p_format.format == DATA_FORMAT_R32_UINT || p_format.format == DATA_FORMAT_R32_SINT || p_format.format == DATA_FORMAT_R32G32_UINT || p_format.format == DATA_FORMAT_R32G32_SINT));
+ bool needs_buffer = is_linear;
+
+ // Check for atomic requirements.
+ if (flags::any(p_format.usage_bits, TEXTURE_USAGE_STORAGE_BIT) && p_format.array_layers == 1 && p_format.mipmaps == 1 && p_format.texture_type == TEXTURE_TYPE_2D) {
+ switch (p_format.format) {
+ case RenderingDeviceCommons::DATA_FORMAT_R32_SINT:
+ case RenderingDeviceCommons::DATA_FORMAT_R32_UINT: {
+ if (!device_properties->features.supports_image_atomic_32_bit) {
+ // We can emulate 32-bit atomic operations on textures.
+ needs_buffer = true;
+ }
+ } break;
+ case RenderingDeviceCommons::DATA_FORMAT_R32G32_SINT:
+ case RenderingDeviceCommons::DATA_FORMAT_R32G32_UINT: {
+ if (!device_properties->features.supports_image_atomic_64_bit) {
+ // No emulation for 64-bit atomics.
+ ERR_FAIL_V_MSG(TextureID(), "64-bit atomic operations are not supported.");
+ }
+ } break;
+ default:
+ break;
+ }
+ }
id obj = nil;
if (needs_buffer) {
@@ -900,9 +929,15 @@ Error RenderingDeviceDriverMetal::command_queue_execute_and_present(CommandQueue
MDCommandBuffer *cmd_buffer = (MDCommandBuffer *)(p_cmd_buffers[size - 1].id);
Fence *fence = (Fence *)(p_cmd_fence.id);
if (fence != nullptr) {
- [cmd_buffer->get_command_buffer() addCompletedHandler:^(id buffer) {
+ id cb = cmd_buffer->get_command_buffer();
+ if (cb == nil) {
+ // If there is nothing to do, signal the fence immediately.
dispatch_semaphore_signal(fence->semaphore);
- }];
+ } else {
+ [cb addCompletedHandler:^(id buffer) {
+ dispatch_semaphore_signal(fence->semaphore);
+ }];
+ }
}
for (uint32_t i = 0; i < p_swap_chains.size(); i++) {
@@ -1730,8 +1765,7 @@ void RenderingDeviceDriverMetal::pipeline_free(PipelineID p_pipeline_id) {
void RenderingDeviceDriverMetal::command_bind_push_constants(CommandBufferID p_cmd_buffer, ShaderID p_shader, uint32_t p_dst_first_index, VectorView p_data) {
MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id);
- MDShader *shader = (MDShader *)(p_shader.id);
- shader->encode_push_constant_data(p_data, cb);
+ cb->encode_push_constant_data(p_shader, p_data);
}
// ----- CACHE -----
@@ -2417,6 +2451,7 @@ RDD::PipelineID RenderingDeviceDriverMetal::compute_pipeline_create(ShaderID p_s
MTLComputePipelineDescriptor *desc = [MTLComputePipelineDescriptor new];
desc.computeFunction = function;
+ desc.label = conv::to_nsstring(shader->name);
if (archive) {
desc.binaryArchives = @[ archive ];
}
@@ -2735,6 +2770,8 @@ bool RenderingDeviceDriverMetal::has_feature(Features p_feature) {
return device_properties->features.metal_fx_spatial;
case SUPPORTS_METALFX_TEMPORAL:
return device_properties->features.metal_fx_temporal;
+ case SUPPORTS_IMAGE_ATOMIC_32_BIT:
+ return device_properties->features.supports_image_atomic_32_bit;
default:
return false;
}
diff --git a/drivers/metal/rendering_shader_container_metal.mm b/drivers/metal/rendering_shader_container_metal.mm
index c2e4518a06..d9c81dba6f 100644
--- a/drivers/metal/rendering_shader_container_metal.mm
+++ b/drivers/metal/rendering_shader_container_metal.mm
@@ -199,6 +199,8 @@ bool RenderingShaderContainerMetal::_set_code_from_spirv(const Vectorfeatures.mslVersionMajor, device_profile->features.mslVersionMinor);
msl_options.set_msl_version(device_profile->features.mslVersionMajor, device_profile->features.mslVersionMinor);
mtl_reflection_data.msl_version = msl_options.msl_version;
msl_options.platform = device_profile->platform == MetalDeviceProfile::Platform::macOS ? CompilerMSL::Options::macOS : CompilerMSL::Options::iOS;
@@ -209,7 +211,7 @@ bool RenderingShaderContainerMetal::_set_code_from_spirv(const Vectorget_environment(U"GODOT_DISABLE_ARGUMENT_BUFFERS"); v == U"1") {
+ if (String v = OS::get_singleton()->get_environment("GODOT_MTL_DISABLE_ARGUMENT_BUFFERS"); v == "1") {
disable_argument_buffers = true;
}
@@ -236,6 +238,10 @@ bool RenderingShaderContainerMetal::_set_code_from_spirv(const Vector= CompilerMSL::Options::make_msl_version(3, 2)) {
+ // All 3.2+ versions support device coherence, so we can disable texture fences.
+ msl_options.readwrite_texture_fences = false;
+ }
CompilerGLSL::Options options{};
options.vertex.flip_vert_y = true;
@@ -417,6 +423,10 @@ bool RenderingShaderContainerMetal::_set_code_from_spirv(const Vectorhas_feature(RD::SUPPORTS_IMAGE_ATOMIC_32_BIT) ? RD::UNIFORM_TYPE_IMAGE : RD::UNIFORM_TYPE_STORAGE_BUFFER;
RD::TextureFormat tf;
tf.format = RD::DATA_FORMAT_R16G16B16A16_SFLOAT;
@@ -440,29 +441,29 @@ void Fog::VolumetricFog::init(const Vector3i &fog_size, RID p_sky_shader) {
fog_map = RD::get_singleton()->texture_create(tf, RD::TextureView());
RD::get_singleton()->set_resource_name(fog_map, "Fog map");
-#if defined(MACOS_ENABLED) || defined(APPLE_EMBEDDED_ENABLED)
- Vector dm;
- dm.resize_initialized(fog_size.x * fog_size.y * fog_size.z * 4);
+ if (atomic_type == RD::UNIFORM_TYPE_STORAGE_BUFFER) {
+ Vector dm;
+ dm.resize_initialized(fog_size.x * fog_size.y * fog_size.z * 4);
- density_map = RD::get_singleton()->storage_buffer_create(dm.size(), dm);
- RD::get_singleton()->set_resource_name(density_map, "Fog density map");
- light_map = RD::get_singleton()->storage_buffer_create(dm.size(), dm);
- RD::get_singleton()->set_resource_name(light_map, "Fog light map");
- emissive_map = RD::get_singleton()->storage_buffer_create(dm.size(), dm);
- RD::get_singleton()->set_resource_name(emissive_map, "Fog emissive map");
-#else
- tf.format = RD::DATA_FORMAT_R32_UINT;
- tf.usage_bits = RD::TEXTURE_USAGE_STORAGE_BIT | RD::TEXTURE_USAGE_CAN_COPY_TO_BIT;
- density_map = RD::get_singleton()->texture_create(tf, RD::TextureView());
- RD::get_singleton()->set_resource_name(density_map, "Fog density map");
- RD::get_singleton()->texture_clear(density_map, Color(0, 0, 0, 0), 0, 1, 0, 1);
- light_map = RD::get_singleton()->texture_create(tf, RD::TextureView());
- RD::get_singleton()->set_resource_name(light_map, "Fog light map");
- RD::get_singleton()->texture_clear(light_map, Color(0, 0, 0, 0), 0, 1, 0, 1);
- emissive_map = RD::get_singleton()->texture_create(tf, RD::TextureView());
- RD::get_singleton()->set_resource_name(emissive_map, "Fog emissive map");
- RD::get_singleton()->texture_clear(emissive_map, Color(0, 0, 0, 0), 0, 1, 0, 1);
-#endif
+ density_map = RD::get_singleton()->storage_buffer_create(dm.size(), dm);
+ RD::get_singleton()->set_resource_name(density_map, "Fog density map");
+ light_map = RD::get_singleton()->storage_buffer_create(dm.size(), dm);
+ RD::get_singleton()->set_resource_name(light_map, "Fog light map");
+ emissive_map = RD::get_singleton()->storage_buffer_create(dm.size(), dm);
+ RD::get_singleton()->set_resource_name(emissive_map, "Fog emissive map");
+ } else {
+ tf.format = RD::DATA_FORMAT_R32_UINT;
+ tf.usage_bits = RD::TEXTURE_USAGE_STORAGE_BIT | RD::TEXTURE_USAGE_CAN_COPY_TO_BIT | RD::TEXTURE_USAGE_STORAGE_ATOMIC_BIT;
+ density_map = RD::get_singleton()->texture_create(tf, RD::TextureView());
+ RD::get_singleton()->set_resource_name(density_map, "Fog density map");
+ RD::get_singleton()->texture_clear(density_map, Color(0, 0, 0, 0), 0, 1, 0, 1);
+ light_map = RD::get_singleton()->texture_create(tf, RD::TextureView());
+ RD::get_singleton()->set_resource_name(light_map, "Fog light map");
+ RD::get_singleton()->texture_clear(light_map, Color(0, 0, 0, 0), 0, 1, 0, 1);
+ emissive_map = RD::get_singleton()->texture_create(tf, RD::TextureView());
+ RD::get_singleton()->set_resource_name(emissive_map, "Fog emissive map");
+ RD::get_singleton()->texture_clear(emissive_map, Color(0, 0, 0, 0), 0, 1, 0, 1);
+ }
Vector uniforms;
{
@@ -579,11 +580,7 @@ void Fog::volumetric_fog_update(const VolumetricFogSettings &p_settings, const P
{
RD::Uniform u;
-#if defined(MACOS_ENABLED) || defined(APPLE_EMBEDDED_ENABLED)
- u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER;
-#else
- u.uniform_type = RD::UNIFORM_TYPE_IMAGE;
-#endif
+ u.uniform_type = fog->atomic_type;
u.binding = 1;
u.append_id(fog->emissive_map);
uniforms.push_back(u);
@@ -599,11 +596,7 @@ void Fog::volumetric_fog_update(const VolumetricFogSettings &p_settings, const P
{
RD::Uniform u;
-#if defined(MACOS_ENABLED) || defined(APPLE_EMBEDDED_ENABLED)
- u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER;
-#else
- u.uniform_type = RD::UNIFORM_TYPE_IMAGE;
-#endif
+ u.uniform_type = fog->atomic_type;
u.binding = 3;
u.append_id(fog->density_map);
uniforms.push_back(u);
@@ -611,11 +604,7 @@ void Fog::volumetric_fog_update(const VolumetricFogSettings &p_settings, const P
{
RD::Uniform u;
-#if defined(MACOS_ENABLED) || defined(APPLE_EMBEDDED_ENABLED)
- u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER;
-#else
- u.uniform_type = RD::UNIFORM_TYPE_IMAGE;
-#endif
+ u.uniform_type = fog->atomic_type;
u.binding = 4;
u.append_id(fog->light_map);
uniforms.push_back(u);
@@ -918,22 +907,14 @@ void Fog::volumetric_fog_update(const VolumetricFogSettings &p_settings, const P
}
{
RD::Uniform u;
-#if defined(MACOS_ENABLED) || defined(APPLE_EMBEDDED_ENABLED)
- u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER;
-#else
- u.uniform_type = RD::UNIFORM_TYPE_IMAGE;
-#endif
+ u.uniform_type = fog->atomic_type;
u.binding = 16;
u.append_id(fog->density_map);
uniforms.push_back(u);
}
{
RD::Uniform u;
-#if defined(MACOS_ENABLED) || defined(APPLE_EMBEDDED_ENABLED)
- u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER;
-#else
- u.uniform_type = RD::UNIFORM_TYPE_IMAGE;
-#endif
+ u.uniform_type = fog->atomic_type;
u.binding = 17;
u.append_id(fog->light_map);
uniforms.push_back(u);
@@ -941,11 +922,7 @@ void Fog::volumetric_fog_update(const VolumetricFogSettings &p_settings, const P
{
RD::Uniform u;
-#if defined(MACOS_ENABLED) || defined(APPLE_EMBEDDED_ENABLED)
- u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER;
-#else
- u.uniform_type = RD::UNIFORM_TYPE_IMAGE;
-#endif
+ u.uniform_type = fog->atomic_type;
u.binding = 18;
u.append_id(fog->emissive_map);
uniforms.push_back(u);
diff --git a/servers/rendering/renderer_rd/environment/fog.h b/servers/rendering/renderer_rd/environment/fog.h
index 6e91b4bcf4..f593b3310f 100644
--- a/servers/rendering/renderer_rd/environment/fog.h
+++ b/servers/rendering/renderer_rd/environment/fog.h
@@ -316,6 +316,9 @@ public:
int last_shadow_filter = -1;
+ // If the device doesn't support image atomics, use storage buffers instead.
+ RD::UniformType atomic_type = RD::UNIFORM_TYPE_IMAGE;
+
virtual void configure(RenderSceneBuffersRD *p_render_buffers) override {}
virtual void free_data() override {}
diff --git a/servers/rendering/renderer_rd/shader_rd.cpp b/servers/rendering/renderer_rd/shader_rd.cpp
index 49ebbcdaf7..ed8ecdc4b1 100644
--- a/servers/rendering/renderer_rd/shader_rd.cpp
+++ b/servers/rendering/renderer_rd/shader_rd.cpp
@@ -234,11 +234,13 @@ void ShaderRD::_build_variant_code(StringBuilder &builder, uint32_t p_variant, c
builder.append(String("#define ") + String(E.key) + "_CODE_USED\n");
}
#if (defined(MACOS_ENABLED) || defined(APPLE_EMBEDDED_ENABLED))
- if (RD::get_singleton()->get_device_capabilities().device_family == RDD::DEVICE_VULKAN) {
+ RenderingDevice *rd = RD::get_singleton();
+ if (rd->get_device_capabilities().device_family == RDD::DEVICE_VULKAN) {
builder.append("#define MOLTENVK_USED\n");
}
- // Image atomics are supported on Metal 3.1 but no support in MoltenVK or SPIRV-Cross yet.
- builder.append("#define NO_IMAGE_ATOMICS\n");
+ if (!rd->has_feature(RD::SUPPORTS_IMAGE_ATOMIC_32_BIT)) {
+ builder.append("#define NO_IMAGE_ATOMICS\n");
+ }
#endif
builder.append(String("#define RENDER_DRIVER_") + OS::get_singleton()->get_current_rendering_driver_name().to_upper() + "\n");
diff --git a/servers/rendering/renderer_rd/shaders/environment/volumetric_fog.glsl b/servers/rendering/renderer_rd/shaders/environment/volumetric_fog.glsl
index 929f1e34df..4ca666019a 100644
--- a/servers/rendering/renderer_rd/shaders/environment/volumetric_fog.glsl
+++ b/servers/rendering/renderer_rd/shaders/environment/volumetric_fog.glsl
@@ -2,6 +2,8 @@
#version 450
+#pragma use_vulkan_memory_model
+
#VERSION_DEFINES
layout(local_size_x = 4, local_size_y = 4, local_size_z = 4) in;
diff --git a/servers/rendering/renderer_rd/shaders/environment/volumetric_fog_process.glsl b/servers/rendering/renderer_rd/shaders/environment/volumetric_fog_process.glsl
index 832058553e..17ee5ced28 100644
--- a/servers/rendering/renderer_rd/shaders/environment/volumetric_fog_process.glsl
+++ b/servers/rendering/renderer_rd/shaders/environment/volumetric_fog_process.glsl
@@ -2,6 +2,8 @@
#version 450
+#pragma use_vulkan_memory_model
+
#VERSION_DEFINES
#ifdef MODE_DENSITY
diff --git a/servers/rendering/rendering_device.cpp b/servers/rendering/rendering_device.cpp
index ddf6f91d25..0030c8674d 100644
--- a/servers/rendering/rendering_device.cpp
+++ b/servers/rendering/rendering_device.cpp
@@ -7979,6 +7979,7 @@ void RenderingDevice::_bind_methods() {
BIND_ENUM_CONSTANT(SUPPORTS_METALFX_SPATIAL);
BIND_ENUM_CONSTANT(SUPPORTS_METALFX_TEMPORAL);
BIND_ENUM_CONSTANT(SUPPORTS_BUFFER_DEVICE_ADDRESS);
+ BIND_ENUM_CONSTANT(SUPPORTS_IMAGE_ATOMIC_32_BIT);
BIND_ENUM_CONSTANT(LIMIT_MAX_BOUND_UNIFORM_SETS);
BIND_ENUM_CONSTANT(LIMIT_MAX_FRAMEBUFFER_COLOR_ATTACHMENTS);
diff --git a/servers/rendering/rendering_device_commons.h b/servers/rendering/rendering_device_commons.h
index 2196622828..960233e3ab 100644
--- a/servers/rendering/rendering_device_commons.h
+++ b/servers/rendering/rendering_device_commons.h
@@ -952,6 +952,7 @@ public:
// If not supported, a fragment shader with only side effects (i.e., writes to buffers, but doesn't output to attachments), may be optimized down to no-op by the GPU driver.
SUPPORTS_FRAGMENT_SHADER_WITH_ONLY_SIDE_EFFECTS,
SUPPORTS_BUFFER_DEVICE_ADDRESS,
+ SUPPORTS_IMAGE_ATOMIC_32_BIT,
};
enum SubgroupOperations {
diff --git a/thirdparty/README.md b/thirdparty/README.md
index a15f7304cf..8996fc8aa2 100644
--- a/thirdparty/README.md
+++ b/thirdparty/README.md
@@ -978,7 +978,7 @@ Its version and license is described in this file under `hidapi`.
## spirv-cross
- Upstream: https://github.com/KhronosGroup/SPIRV-Cross
-- Version: git (6173e24b31f09a0c3217103a130e74c4ddec14a6, 2024)
+- Version: git (d7440cbc6c50332600fdf21c45e6a5df0b07e54c, 2025)
- License: Apache 2.0
Files extracted from upstream source:
diff --git a/thirdparty/spirv-cross/spirv.hpp b/thirdparty/spirv-cross/spirv.hpp
index 5047b9b302..f7a7bf835e 100644
--- a/thirdparty/spirv-cross/spirv.hpp
+++ b/thirdparty/spirv-cross/spirv.hpp
@@ -1,26 +1,10 @@
-// Copyright (c) 2014-2024 The Khronos Group Inc.
+// Copyright: 2014-2024 The Khronos Group Inc.
+// License: MIT
//
-// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and/or associated documentation files (the "Materials"),
-// to deal in the Materials without restriction, including without limitation
-// the rights to use, copy, modify, merge, publish, distribute, sublicense,
-// and/or sell copies of the Materials, and to permit persons to whom the
-// Materials are furnished to do so, subject to the following conditions:
-//
-// The above copyright notice and this permission notice shall be included in
-// all copies or substantial portions of the Materials.
-//
-// MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS
-// STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND
-// HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/
-//
-// THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
-// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-// FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS
-// IN THE MATERIALS.
+// MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
+// KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
+// SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
+// https://www.khronos.org/registry/
// This header is automatically generated by the same tool that creates
// the Binary Section of the SPIR-V specification.
@@ -69,6 +53,12 @@ enum SourceLanguage {
SourceLanguageHLSL = 5,
SourceLanguageCPP_for_OpenCL = 6,
SourceLanguageSYCL = 7,
+ SourceLanguageHERO_C = 8,
+ SourceLanguageNZSL = 9,
+ SourceLanguageWGSL = 10,
+ SourceLanguageSlang = 11,
+ SourceLanguageZig = 12,
+ SourceLanguageRust = 13,
SourceLanguageMax = 0x7fffffff,
};
@@ -156,6 +146,9 @@ enum ExecutionMode {
ExecutionModeSubgroupsPerWorkgroupId = 37,
ExecutionModeLocalSizeId = 38,
ExecutionModeLocalSizeHintId = 39,
+ ExecutionModeNonCoherentColorAttachmentReadEXT = 4169,
+ ExecutionModeNonCoherentDepthAttachmentReadEXT = 4170,
+ ExecutionModeNonCoherentStencilAttachmentReadEXT = 4171,
ExecutionModeSubgroupUniformControlFlowKHR = 4421,
ExecutionModePostDepthCoverage = 4446,
ExecutionModeDenormPreserve = 4459,
@@ -163,19 +156,32 @@ enum ExecutionMode {
ExecutionModeSignedZeroInfNanPreserve = 4461,
ExecutionModeRoundingModeRTE = 4462,
ExecutionModeRoundingModeRTZ = 4463,
+ ExecutionModeNonCoherentTileAttachmentReadQCOM = 4489,
+ ExecutionModeTileShadingRateQCOM = 4490,
ExecutionModeEarlyAndLateFragmentTestsAMD = 5017,
ExecutionModeStencilRefReplacingEXT = 5027,
+ ExecutionModeCoalescingAMDX = 5069,
+ ExecutionModeIsApiEntryAMDX = 5070,
+ ExecutionModeMaxNodeRecursionAMDX = 5071,
+ ExecutionModeStaticNumWorkgroupsAMDX = 5072,
+ ExecutionModeShaderIndexAMDX = 5073,
+ ExecutionModeMaxNumWorkgroupsAMDX = 5077,
ExecutionModeStencilRefUnchangedFrontAMD = 5079,
ExecutionModeStencilRefGreaterFrontAMD = 5080,
ExecutionModeStencilRefLessFrontAMD = 5081,
ExecutionModeStencilRefUnchangedBackAMD = 5082,
ExecutionModeStencilRefGreaterBackAMD = 5083,
ExecutionModeStencilRefLessBackAMD = 5084,
+ ExecutionModeQuadDerivativesKHR = 5088,
+ ExecutionModeRequireFullQuadsKHR = 5089,
+ ExecutionModeSharesInputWithAMDX = 5102,
ExecutionModeOutputLinesEXT = 5269,
ExecutionModeOutputLinesNV = 5269,
ExecutionModeOutputPrimitivesEXT = 5270,
ExecutionModeOutputPrimitivesNV = 5270,
+ ExecutionModeDerivativeGroupQuadsKHR = 5289,
ExecutionModeDerivativeGroupQuadsNV = 5289,
+ ExecutionModeDerivativeGroupLinearKHR = 5290,
ExecutionModeDerivativeGroupLinearNV = 5290,
ExecutionModeOutputTrianglesEXT = 5298,
ExecutionModeOutputTrianglesNV = 5298,
@@ -195,7 +201,14 @@ enum ExecutionMode {
ExecutionModeNoGlobalOffsetINTEL = 5895,
ExecutionModeNumSIMDWorkitemsINTEL = 5896,
ExecutionModeSchedulerTargetFmaxMhzINTEL = 5903,
+ ExecutionModeMaximallyReconvergesKHR = 6023,
+ ExecutionModeFPFastMathDefault = 6028,
+ ExecutionModeStreamingInterfaceINTEL = 6154,
+ ExecutionModeRegisterMapInterfaceINTEL = 6160,
ExecutionModeNamedBarrierCountINTEL = 6417,
+ ExecutionModeMaximumRegistersINTEL = 6461,
+ ExecutionModeMaximumRegistersIdINTEL = 6462,
+ ExecutionModeNamedMaximumRegistersINTEL = 6463,
ExecutionModeMax = 0x7fffffff,
};
@@ -213,6 +226,9 @@ enum StorageClass {
StorageClassAtomicCounter = 10,
StorageClassImage = 11,
StorageClassStorageBuffer = 12,
+ StorageClassTileImageEXT = 4172,
+ StorageClassTileAttachmentQCOM = 4491,
+ StorageClassNodePayloadAMDX = 5068,
StorageClassCallableDataKHR = 5328,
StorageClassCallableDataNV = 5328,
StorageClassIncomingCallableDataKHR = 5329,
@@ -227,6 +243,7 @@ enum StorageClass {
StorageClassShaderRecordBufferNV = 5343,
StorageClassPhysicalStorageBuffer = 5349,
StorageClassPhysicalStorageBufferEXT = 5349,
+ StorageClassHitObjectAttributeNV = 5385,
StorageClassTaskPayloadWorkgroupEXT = 5402,
StorageClassCodeSectionINTEL = 5605,
StorageClassDeviceOnlyINTEL = 5936,
@@ -242,6 +259,7 @@ enum Dim {
DimRect = 4,
DimBuffer = 5,
DimSubpassData = 6,
+ DimTileImageDataEXT = 4173,
DimMax = 0x7fffffff,
};
@@ -348,6 +366,15 @@ enum ImageChannelDataType {
ImageChannelDataTypeFloat = 14,
ImageChannelDataTypeUnormInt24 = 15,
ImageChannelDataTypeUnormInt101010_2 = 16,
+ ImageChannelDataTypeUnormInt10X6EXT = 17,
+ ImageChannelDataTypeUnsignedIntRaw10EXT = 19,
+ ImageChannelDataTypeUnsignedIntRaw12EXT = 20,
+ ImageChannelDataTypeUnormInt2_101010EXT = 21,
+ ImageChannelDataTypeUnsignedInt10X6EXT = 22,
+ ImageChannelDataTypeUnsignedInt12X4EXT = 23,
+ ImageChannelDataTypeUnsignedInt14X2EXT = 24,
+ ImageChannelDataTypeUnormInt12X4EXT = 25,
+ ImageChannelDataTypeUnormInt14X2EXT = 26,
ImageChannelDataTypeMax = 0x7fffffff,
};
@@ -405,8 +432,11 @@ enum FPFastMathModeShift {
FPFastMathModeNSZShift = 2,
FPFastMathModeAllowRecipShift = 3,
FPFastMathModeFastShift = 4,
+ FPFastMathModeAllowContractShift = 16,
FPFastMathModeAllowContractFastINTELShift = 16,
+ FPFastMathModeAllowReassocShift = 17,
FPFastMathModeAllowReassocINTELShift = 17,
+ FPFastMathModeAllowTransformShift = 18,
FPFastMathModeMax = 0x7fffffff,
};
@@ -417,8 +447,11 @@ enum FPFastMathModeMask {
FPFastMathModeNSZMask = 0x00000004,
FPFastMathModeAllowRecipMask = 0x00000008,
FPFastMathModeFastMask = 0x00000010,
+ FPFastMathModeAllowContractMask = 0x00010000,
FPFastMathModeAllowContractFastINTELMask = 0x00010000,
+ FPFastMathModeAllowReassocMask = 0x00020000,
FPFastMathModeAllowReassocINTELMask = 0x00020000,
+ FPFastMathModeAllowTransformMask = 0x00040000,
};
enum FPRoundingMode {
@@ -452,6 +485,7 @@ enum FunctionParameterAttribute {
FunctionParameterAttributeNoCapture = 5,
FunctionParameterAttributeNoWrite = 6,
FunctionParameterAttributeNoReadWrite = 7,
+ FunctionParameterAttributeRuntimeAlignedINTEL = 5940,
FunctionParameterAttributeMax = 0x7fffffff,
};
@@ -503,12 +537,21 @@ enum Decoration {
DecorationMaxByteOffset = 45,
DecorationAlignmentId = 46,
DecorationMaxByteOffsetId = 47,
+ DecorationSaturatedToLargestFloat8NormalConversionEXT = 4216,
DecorationNoSignedWrap = 4469,
DecorationNoUnsignedWrap = 4470,
DecorationWeightTextureQCOM = 4487,
DecorationBlockMatchTextureQCOM = 4488,
DecorationBlockMatchSamplerQCOM = 4499,
DecorationExplicitInterpAMD = 4999,
+ DecorationNodeSharesPayloadLimitsWithAMDX = 5019,
+ DecorationNodeMaxPayloadsAMDX = 5020,
+ DecorationTrackFinishWritingAMDX = 5078,
+ DecorationPayloadNodeNameAMDX = 5091,
+ DecorationPayloadNodeBaseIndexAMDX = 5098,
+ DecorationPayloadNodeSparseArrayAMDX = 5099,
+ DecorationPayloadNodeArraySizeAMDX = 5100,
+ DecorationPayloadDispatchIndirectAMDX = 5105,
DecorationOverrideCoverageNV = 5248,
DecorationPassthroughNV = 5250,
DecorationViewportRelativeNV = 5252,
@@ -525,6 +568,7 @@ enum Decoration {
DecorationRestrictPointerEXT = 5355,
DecorationAliasedPointer = 5356,
DecorationAliasedPointerEXT = 5356,
+ DecorationHitObjectShaderRecordBufferNV = 5386,
DecorationBindlessSamplerNV = 5398,
DecorationBindlessImageNV = 5399,
DecorationBoundSamplerNV = 5400,
@@ -557,20 +601,45 @@ enum Decoration {
DecorationMergeINTEL = 5834,
DecorationBankBitsINTEL = 5835,
DecorationForcePow2DepthINTEL = 5836,
+ DecorationStridesizeINTEL = 5883,
+ DecorationWordsizeINTEL = 5884,
+ DecorationTrueDualPortINTEL = 5885,
DecorationBurstCoalesceINTEL = 5899,
DecorationCacheSizeINTEL = 5900,
DecorationDontStaticallyCoalesceINTEL = 5901,
DecorationPrefetchINTEL = 5902,
DecorationStallEnableINTEL = 5905,
DecorationFuseLoopsInFunctionINTEL = 5907,
+ DecorationMathOpDSPModeINTEL = 5909,
DecorationAliasScopeINTEL = 5914,
DecorationNoAliasINTEL = 5915,
+ DecorationInitiationIntervalINTEL = 5917,
+ DecorationMaxConcurrencyINTEL = 5918,
+ DecorationPipelineEnableINTEL = 5919,
DecorationBufferLocationINTEL = 5921,
DecorationIOPipeStorageINTEL = 5944,
DecorationFunctionFloatingPointModeINTEL = 6080,
DecorationSingleElementVectorINTEL = 6085,
DecorationVectorComputeCallableFunctionINTEL = 6087,
DecorationMediaBlockIOINTEL = 6140,
+ DecorationStallFreeINTEL = 6151,
+ DecorationFPMaxErrorDecorationINTEL = 6170,
+ DecorationLatencyControlLabelINTEL = 6172,
+ DecorationLatencyControlConstraintINTEL = 6173,
+ DecorationConduitKernelArgumentINTEL = 6175,
+ DecorationRegisterMapKernelArgumentINTEL = 6176,
+ DecorationMMHostInterfaceAddressWidthINTEL = 6177,
+ DecorationMMHostInterfaceDataWidthINTEL = 6178,
+ DecorationMMHostInterfaceLatencyINTEL = 6179,
+ DecorationMMHostInterfaceReadWriteModeINTEL = 6180,
+ DecorationMMHostInterfaceMaxBurstINTEL = 6181,
+ DecorationMMHostInterfaceWaitRequestINTEL = 6182,
+ DecorationStableKernelArgumentINTEL = 6183,
+ DecorationHostAccessINTEL = 6188,
+ DecorationInitModeINTEL = 6190,
+ DecorationImplementInRegisterMapINTEL = 6191,
+ DecorationCacheControlLoadINTEL = 6442,
+ DecorationCacheControlStoreINTEL = 6443,
DecorationMax = 0x7fffffff,
};
@@ -616,6 +685,11 @@ enum BuiltIn {
BuiltInSubgroupLocalInvocationId = 41,
BuiltInVertexIndex = 42,
BuiltInInstanceIndex = 43,
+ BuiltInCoreIDARM = 4160,
+ BuiltInCoreCountARM = 4161,
+ BuiltInCoreMaxIDARM = 4162,
+ BuiltInWarpIDARM = 4163,
+ BuiltInWarpMaxIDARM = 4164,
BuiltInSubgroupEqMask = 4416,
BuiltInSubgroupEqMaskKHR = 4416,
BuiltInSubgroupGeMask = 4417,
@@ -633,6 +707,9 @@ enum BuiltIn {
BuiltInDeviceIndex = 4438,
BuiltInViewIndex = 4440,
BuiltInShadingRateKHR = 4444,
+ BuiltInTileOffsetQCOM = 4492,
+ BuiltInTileDimensionQCOM = 4493,
+ BuiltInTileApronSizeQCOM = 4494,
BuiltInBaryCoordNoPerspAMD = 4992,
BuiltInBaryCoordNoPerspCentroidAMD = 4993,
BuiltInBaryCoordNoPerspSampleAMD = 4994,
@@ -641,6 +718,8 @@ enum BuiltIn {
BuiltInBaryCoordSmoothSampleAMD = 4997,
BuiltInBaryCoordPullModelAMD = 4998,
BuiltInFragStencilRefEXT = 5014,
+ BuiltInRemainingRecursionLevelsAMDX = 5021,
+ BuiltInShaderIndexAMDX = 5073,
BuiltInViewportMaskNV = 5253,
BuiltInSecondaryPositionNV = 5257,
BuiltInSecondaryViewportMaskNV = 5258,
@@ -693,13 +772,25 @@ enum BuiltIn {
BuiltInHitKindKHR = 5333,
BuiltInHitKindNV = 5333,
BuiltInCurrentRayTimeNV = 5334,
+ BuiltInHitTriangleVertexPositionsKHR = 5335,
+ BuiltInHitMicroTriangleVertexPositionsNV = 5337,
+ BuiltInHitMicroTriangleVertexBarycentricsNV = 5344,
BuiltInIncomingRayFlagsKHR = 5351,
BuiltInIncomingRayFlagsNV = 5351,
BuiltInRayGeometryIndexKHR = 5352,
+ BuiltInHitIsSphereNV = 5359,
+ BuiltInHitIsLSSNV = 5360,
+ BuiltInHitSpherePositionNV = 5361,
BuiltInWarpsPerSMNV = 5374,
BuiltInSMCountNV = 5375,
BuiltInWarpIDNV = 5376,
BuiltInSMIDNV = 5377,
+ BuiltInHitLSSPositionsNV = 5396,
+ BuiltInHitKindFrontFacingMicroTriangleNV = 5405,
+ BuiltInHitKindBackFacingMicroTriangleNV = 5406,
+ BuiltInHitSphereRadiusNV = 5420,
+ BuiltInHitLSSRadiiNV = 5421,
+ BuiltInClusterIDNV = 5436,
BuiltInCullMaskKHR = 6021,
BuiltInMax = 0x7fffffff,
};
@@ -734,6 +825,8 @@ enum LoopControlShift {
LoopControlMaxInterleavingINTELShift = 21,
LoopControlSpeculatedIterationsINTELShift = 22,
LoopControlNoFusionINTELShift = 23,
+ LoopControlLoopCountINTELShift = 24,
+ LoopControlMaxReinvocationDelayINTELShift = 25,
LoopControlMax = 0x7fffffff,
};
@@ -756,6 +849,8 @@ enum LoopControlMask {
LoopControlMaxInterleavingINTELMask = 0x00200000,
LoopControlSpeculatedIterationsINTELMask = 0x00400000,
LoopControlNoFusionINTELMask = 0x00800000,
+ LoopControlLoopCountINTELMask = 0x01000000,
+ LoopControlMaxReinvocationDelayINTELMask = 0x02000000,
};
enum FunctionControlShift {
@@ -763,6 +858,7 @@ enum FunctionControlShift {
FunctionControlDontInlineShift = 1,
FunctionControlPureShift = 2,
FunctionControlConstShift = 3,
+ FunctionControlOptNoneEXTShift = 16,
FunctionControlOptNoneINTELShift = 16,
FunctionControlMax = 0x7fffffff,
};
@@ -773,6 +869,7 @@ enum FunctionControlMask {
FunctionControlDontInlineMask = 0x00000002,
FunctionControlPureMask = 0x00000004,
FunctionControlConstMask = 0x00000008,
+ FunctionControlOptNoneEXTMask = 0x00010000,
FunctionControlOptNoneINTELMask = 0x00010000,
};
@@ -959,6 +1056,17 @@ enum Capability {
CapabilityShaderLayer = 69,
CapabilityShaderViewportIndex = 70,
CapabilityUniformDecoration = 71,
+ CapabilityCoreBuiltinsARM = 4165,
+ CapabilityTileImageColorReadAccessEXT = 4166,
+ CapabilityTileImageDepthReadAccessEXT = 4167,
+ CapabilityTileImageStencilReadAccessEXT = 4168,
+ CapabilityTensorsARM = 4174,
+ CapabilityStorageTensorArrayDynamicIndexingARM = 4175,
+ CapabilityStorageTensorArrayNonUniformIndexingARM = 4176,
+ CapabilityGraphARM = 4191,
+ CapabilityCooperativeMatrixLayoutsARM = 4201,
+ CapabilityFloat8EXT = 4212,
+ CapabilityFloat8CooperativeMatrixEXT = 4213,
CapabilityFragmentShadingRateKHR = 4422,
CapabilitySubgroupBallotKHR = 4423,
CapabilityDrawParameters = 4427,
@@ -988,11 +1096,13 @@ enum Capability {
CapabilityRoundingModeRTZ = 4468,
CapabilityRayQueryProvisionalKHR = 4471,
CapabilityRayQueryKHR = 4472,
+ CapabilityUntypedPointersKHR = 4473,
CapabilityRayTraversalPrimitiveCullingKHR = 4478,
CapabilityRayTracingKHR = 4479,
CapabilityTextureSampleWeightedQCOM = 4484,
CapabilityTextureBoxFilterQCOM = 4485,
CapabilityTextureBlockMatchQCOM = 4486,
+ CapabilityTileShadingQCOM = 4495,
CapabilityTextureBlockMatch2QCOM = 4498,
CapabilityFloat16ImageAMD = 5008,
CapabilityImageGatherBiasLodAMD = 5009,
@@ -1001,6 +1111,13 @@ enum Capability {
CapabilityImageReadWriteLodAMD = 5015,
CapabilityInt64ImageEXT = 5016,
CapabilityShaderClockKHR = 5055,
+ CapabilityShaderEnqueueAMDX = 5067,
+ CapabilityQuadControlKHR = 5087,
+ CapabilityInt4TypeINTEL = 5112,
+ CapabilityInt4CooperativeMatrixINTEL = 5114,
+ CapabilityBFloat16TypeKHR = 5116,
+ CapabilityBFloat16DotProductKHR = 5117,
+ CapabilityBFloat16CooperativeMatrixKHR = 5118,
CapabilitySampleMaskOverrideCoverageNV = 5249,
CapabilityGeometryShaderPassthroughNV = 5251,
CapabilityShaderViewportIndexLayerEXT = 5254,
@@ -1014,6 +1131,7 @@ enum Capability {
CapabilityMeshShadingEXT = 5283,
CapabilityFragmentBarycentricKHR = 5284,
CapabilityFragmentBarycentricNV = 5284,
+ CapabilityComputeDerivativeGroupQuadsKHR = 5288,
CapabilityComputeDerivativeGroupQuadsNV = 5288,
CapabilityFragmentDensityEXT = 5291,
CapabilityShadingRateNV = 5291,
@@ -1042,6 +1160,7 @@ enum Capability {
CapabilityUniformTexelBufferArrayNonUniformIndexingEXT = 5311,
CapabilityStorageTexelBufferArrayNonUniformIndexing = 5312,
CapabilityStorageTexelBufferArrayNonUniformIndexingEXT = 5312,
+ CapabilityRayTracingPositionFetchKHR = 5336,
CapabilityRayTracingNV = 5340,
CapabilityRayTracingMotionBlurNV = 5341,
CapabilityVulkanMemoryModel = 5345,
@@ -1050,6 +1169,7 @@ enum Capability {
CapabilityVulkanMemoryModelDeviceScopeKHR = 5346,
CapabilityPhysicalStorageBufferAddresses = 5347,
CapabilityPhysicalStorageBufferAddressesEXT = 5347,
+ CapabilityComputeDerivativeGroupLinearKHR = 5350,
CapabilityComputeDerivativeGroupLinearNV = 5350,
CapabilityRayTracingProvisionalKHR = 5353,
CapabilityCooperativeMatrixNV = 5357,
@@ -1059,7 +1179,25 @@ enum Capability {
CapabilityFragmentShaderPixelInterlockEXT = 5378,
CapabilityDemoteToHelperInvocation = 5379,
CapabilityDemoteToHelperInvocationEXT = 5379,
+ CapabilityDisplacementMicromapNV = 5380,
+ CapabilityRayTracingOpacityMicromapEXT = 5381,
+ CapabilityShaderInvocationReorderNV = 5383,
CapabilityBindlessTextureNV = 5390,
+ CapabilityRayQueryPositionFetchKHR = 5391,
+ CapabilityCooperativeVectorNV = 5394,
+ CapabilityAtomicFloat16VectorNV = 5404,
+ CapabilityRayTracingDisplacementMicromapNV = 5409,
+ CapabilityRawAccessChainsNV = 5414,
+ CapabilityRayTracingSpheresGeometryNV = 5418,
+ CapabilityRayTracingLinearSweptSpheresGeometryNV = 5419,
+ CapabilityCooperativeMatrixReductionsNV = 5430,
+ CapabilityCooperativeMatrixConversionsNV = 5431,
+ CapabilityCooperativeMatrixPerElementOperationsNV = 5432,
+ CapabilityCooperativeMatrixTensorAddressingNV = 5433,
+ CapabilityCooperativeMatrixBlockLoadsNV = 5434,
+ CapabilityCooperativeVectorTrainingNV = 5435,
+ CapabilityRayTracingClusterAccelerationStructureNV = 5437,
+ CapabilityTensorAddressingNV = 5439,
CapabilitySubgroupShuffleINTEL = 5568,
CapabilitySubgroupBufferBlockIOINTEL = 5569,
CapabilitySubgroupImageBlockIOINTEL = 5570,
@@ -1092,10 +1230,13 @@ enum Capability {
CapabilityFPGAMemoryAccessesINTEL = 5898,
CapabilityFPGAClusterAttributesINTEL = 5904,
CapabilityLoopFuseINTEL = 5906,
+ CapabilityFPGADSPControlINTEL = 5908,
CapabilityMemoryAccessAliasingINTEL = 5910,
+ CapabilityFPGAInvocationPipeliningAttributesINTEL = 5916,
CapabilityFPGABufferLocationINTEL = 5920,
CapabilityArbitraryPrecisionFixedPointINTEL = 5922,
CapabilityUSMStorageClassesINTEL = 5935,
+ CapabilityRuntimeAlignedAttributeINTEL = 5939,
CapabilityIOPipesINTEL = 5943,
CapabilityBlockingPipesINTEL = 5945,
CapabilityFPGARegINTEL = 5948,
@@ -1108,16 +1249,41 @@ enum Capability {
CapabilityDotProduct = 6019,
CapabilityDotProductKHR = 6019,
CapabilityRayCullMaskKHR = 6020,
+ CapabilityCooperativeMatrixKHR = 6022,
+ CapabilityReplicatedCompositesEXT = 6024,
CapabilityBitInstructions = 6025,
CapabilityGroupNonUniformRotateKHR = 6026,
+ CapabilityFloatControls2 = 6029,
CapabilityAtomicFloat32AddEXT = 6033,
CapabilityAtomicFloat64AddEXT = 6034,
- CapabilityLongConstantCompositeINTEL = 6089,
+ CapabilityLongCompositesINTEL = 6089,
+ CapabilityOptNoneEXT = 6094,
CapabilityOptNoneINTEL = 6094,
CapabilityAtomicFloat16AddEXT = 6095,
CapabilityDebugInfoModuleINTEL = 6114,
+ CapabilityBFloat16ConversionINTEL = 6115,
CapabilitySplitBarrierINTEL = 6141,
+ CapabilityArithmeticFenceEXT = 6144,
+ CapabilityFPGAClusterAttributesV2INTEL = 6150,
+ CapabilityFPGAKernelAttributesv2INTEL = 6161,
+ CapabilityTaskSequenceINTEL = 6162,
+ CapabilityFPMaxErrorINTEL = 6169,
+ CapabilityFPGALatencyControlINTEL = 6171,
+ CapabilityFPGAArgumentInterfacesINTEL = 6174,
+ CapabilityGlobalVariableHostAccessINTEL = 6187,
+ CapabilityGlobalVariableFPGADecorationsINTEL = 6189,
+ CapabilitySubgroupBufferPrefetchINTEL = 6220,
+ CapabilitySubgroup2DBlockIOINTEL = 6228,
+ CapabilitySubgroup2DBlockTransformINTEL = 6229,
+ CapabilitySubgroup2DBlockTransposeINTEL = 6230,
+ CapabilitySubgroupMatrixMultiplyAccumulateINTEL = 6236,
+ CapabilityTernaryBitwiseFunctionINTEL = 6241,
CapabilityGroupUniformArithmeticKHR = 6400,
+ CapabilityTensorFloat32RoundingINTEL = 6425,
+ CapabilityMaskedGatherScatterINTEL = 6427,
+ CapabilityCacheControlsINTEL = 6441,
+ CapabilityRegisterLimitsINTEL = 6460,
+ CapabilityBindlessImagesINTEL = 6528,
CapabilityMax = 0x7fffffff,
};
@@ -1130,8 +1296,10 @@ enum RayFlagsShift {
RayFlagsCullFrontFacingTrianglesKHRShift = 5,
RayFlagsCullOpaqueKHRShift = 6,
RayFlagsCullNoOpaqueKHRShift = 7,
+ RayFlagsSkipBuiltinPrimitivesNVShift = 8,
RayFlagsSkipTrianglesKHRShift = 8,
RayFlagsSkipAABBsKHRShift = 9,
+ RayFlagsForceOpacityMicromap2StateEXTShift = 10,
RayFlagsMax = 0x7fffffff,
};
@@ -1145,8 +1313,10 @@ enum RayFlagsMask {
RayFlagsCullFrontFacingTrianglesKHRMask = 0x00000020,
RayFlagsCullOpaqueKHRMask = 0x00000040,
RayFlagsCullNoOpaqueKHRMask = 0x00000080,
+ RayFlagsSkipBuiltinPrimitivesNVMask = 0x00000100,
RayFlagsSkipTrianglesKHRMask = 0x00000100,
RayFlagsSkipAABBsKHRMask = 0x00000200,
+ RayFlagsForceOpacityMicromap2StateEXTMask = 0x00000400,
};
enum RayQueryIntersection {
@@ -1222,6 +1392,210 @@ enum PackedVectorFormat {
PackedVectorFormatMax = 0x7fffffff,
};
+enum CooperativeMatrixOperandsShift {
+ CooperativeMatrixOperandsMatrixASignedComponentsKHRShift = 0,
+ CooperativeMatrixOperandsMatrixBSignedComponentsKHRShift = 1,
+ CooperativeMatrixOperandsMatrixCSignedComponentsKHRShift = 2,
+ CooperativeMatrixOperandsMatrixResultSignedComponentsKHRShift = 3,
+ CooperativeMatrixOperandsSaturatingAccumulationKHRShift = 4,
+ CooperativeMatrixOperandsMax = 0x7fffffff,
+};
+
+enum CooperativeMatrixOperandsMask {
+ CooperativeMatrixOperandsMaskNone = 0,
+ CooperativeMatrixOperandsMatrixASignedComponentsKHRMask = 0x00000001,
+ CooperativeMatrixOperandsMatrixBSignedComponentsKHRMask = 0x00000002,
+ CooperativeMatrixOperandsMatrixCSignedComponentsKHRMask = 0x00000004,
+ CooperativeMatrixOperandsMatrixResultSignedComponentsKHRMask = 0x00000008,
+ CooperativeMatrixOperandsSaturatingAccumulationKHRMask = 0x00000010,
+};
+
+enum CooperativeMatrixLayout {
+ CooperativeMatrixLayoutRowMajorKHR = 0,
+ CooperativeMatrixLayoutColumnMajorKHR = 1,
+ CooperativeMatrixLayoutRowBlockedInterleavedARM = 4202,
+ CooperativeMatrixLayoutColumnBlockedInterleavedARM = 4203,
+ CooperativeMatrixLayoutMax = 0x7fffffff,
+};
+
+enum CooperativeMatrixUse {
+ CooperativeMatrixUseMatrixAKHR = 0,
+ CooperativeMatrixUseMatrixBKHR = 1,
+ CooperativeMatrixUseMatrixAccumulatorKHR = 2,
+ CooperativeMatrixUseMax = 0x7fffffff,
+};
+
+enum CooperativeMatrixReduceShift {
+ CooperativeMatrixReduceRowShift = 0,
+ CooperativeMatrixReduceColumnShift = 1,
+ CooperativeMatrixReduce2x2Shift = 2,
+ CooperativeMatrixReduceMax = 0x7fffffff,
+};
+
+enum CooperativeMatrixReduceMask {
+ CooperativeMatrixReduceMaskNone = 0,
+ CooperativeMatrixReduceRowMask = 0x00000001,
+ CooperativeMatrixReduceColumnMask = 0x00000002,
+ CooperativeMatrixReduce2x2Mask = 0x00000004,
+};
+
+enum TensorClampMode {
+ TensorClampModeUndefined = 0,
+ TensorClampModeConstant = 1,
+ TensorClampModeClampToEdge = 2,
+ TensorClampModeRepeat = 3,
+ TensorClampModeRepeatMirrored = 4,
+ TensorClampModeMax = 0x7fffffff,
+};
+
+enum TensorAddressingOperandsShift {
+ TensorAddressingOperandsTensorViewShift = 0,
+ TensorAddressingOperandsDecodeFuncShift = 1,
+ TensorAddressingOperandsMax = 0x7fffffff,
+};
+
+enum TensorAddressingOperandsMask {
+ TensorAddressingOperandsMaskNone = 0,
+ TensorAddressingOperandsTensorViewMask = 0x00000001,
+ TensorAddressingOperandsDecodeFuncMask = 0x00000002,
+};
+
+enum TensorOperandsShift {
+ TensorOperandsNontemporalARMShift = 0,
+ TensorOperandsOutOfBoundsValueARMShift = 1,
+ TensorOperandsMakeElementAvailableARMShift = 2,
+ TensorOperandsMakeElementVisibleARMShift = 3,
+ TensorOperandsNonPrivateElementARMShift = 4,
+ TensorOperandsMax = 0x7fffffff,
+};
+
+enum TensorOperandsMask {
+ TensorOperandsMaskNone = 0,
+ TensorOperandsNontemporalARMMask = 0x00000001,
+ TensorOperandsOutOfBoundsValueARMMask = 0x00000002,
+ TensorOperandsMakeElementAvailableARMMask = 0x00000004,
+ TensorOperandsMakeElementVisibleARMMask = 0x00000008,
+ TensorOperandsNonPrivateElementARMMask = 0x00000010,
+};
+
+enum InitializationModeQualifier {
+ InitializationModeQualifierInitOnDeviceReprogramINTEL = 0,
+ InitializationModeQualifierInitOnDeviceResetINTEL = 1,
+ InitializationModeQualifierMax = 0x7fffffff,
+};
+
+enum HostAccessQualifier {
+ HostAccessQualifierNoneINTEL = 0,
+ HostAccessQualifierReadINTEL = 1,
+ HostAccessQualifierWriteINTEL = 2,
+ HostAccessQualifierReadWriteINTEL = 3,
+ HostAccessQualifierMax = 0x7fffffff,
+};
+
+enum LoadCacheControl {
+ LoadCacheControlUncachedINTEL = 0,
+ LoadCacheControlCachedINTEL = 1,
+ LoadCacheControlStreamingINTEL = 2,
+ LoadCacheControlInvalidateAfterReadINTEL = 3,
+ LoadCacheControlConstCachedINTEL = 4,
+ LoadCacheControlMax = 0x7fffffff,
+};
+
+enum StoreCacheControl {
+ StoreCacheControlUncachedINTEL = 0,
+ StoreCacheControlWriteThroughINTEL = 1,
+ StoreCacheControlWriteBackINTEL = 2,
+ StoreCacheControlStreamingINTEL = 3,
+ StoreCacheControlMax = 0x7fffffff,
+};
+
+enum NamedMaximumNumberOfRegisters {
+ NamedMaximumNumberOfRegistersAutoINTEL = 0,
+ NamedMaximumNumberOfRegistersMax = 0x7fffffff,
+};
+
+enum MatrixMultiplyAccumulateOperandsShift {
+ MatrixMultiplyAccumulateOperandsMatrixASignedComponentsINTELShift = 0,
+ MatrixMultiplyAccumulateOperandsMatrixBSignedComponentsINTELShift = 1,
+ MatrixMultiplyAccumulateOperandsMatrixCBFloat16INTELShift = 2,
+ MatrixMultiplyAccumulateOperandsMatrixResultBFloat16INTELShift = 3,
+ MatrixMultiplyAccumulateOperandsMatrixAPackedInt8INTELShift = 4,
+ MatrixMultiplyAccumulateOperandsMatrixBPackedInt8INTELShift = 5,
+ MatrixMultiplyAccumulateOperandsMatrixAPackedInt4INTELShift = 6,
+ MatrixMultiplyAccumulateOperandsMatrixBPackedInt4INTELShift = 7,
+ MatrixMultiplyAccumulateOperandsMatrixATF32INTELShift = 8,
+ MatrixMultiplyAccumulateOperandsMatrixBTF32INTELShift = 9,
+ MatrixMultiplyAccumulateOperandsMatrixAPackedFloat16INTELShift = 10,
+ MatrixMultiplyAccumulateOperandsMatrixBPackedFloat16INTELShift = 11,
+ MatrixMultiplyAccumulateOperandsMatrixAPackedBFloat16INTELShift = 12,
+ MatrixMultiplyAccumulateOperandsMatrixBPackedBFloat16INTELShift = 13,
+ MatrixMultiplyAccumulateOperandsMax = 0x7fffffff,
+};
+
+enum MatrixMultiplyAccumulateOperandsMask {
+ MatrixMultiplyAccumulateOperandsMaskNone = 0,
+ MatrixMultiplyAccumulateOperandsMatrixASignedComponentsINTELMask = 0x00000001,
+ MatrixMultiplyAccumulateOperandsMatrixBSignedComponentsINTELMask = 0x00000002,
+ MatrixMultiplyAccumulateOperandsMatrixCBFloat16INTELMask = 0x00000004,
+ MatrixMultiplyAccumulateOperandsMatrixResultBFloat16INTELMask = 0x00000008,
+ MatrixMultiplyAccumulateOperandsMatrixAPackedInt8INTELMask = 0x00000010,
+ MatrixMultiplyAccumulateOperandsMatrixBPackedInt8INTELMask = 0x00000020,
+ MatrixMultiplyAccumulateOperandsMatrixAPackedInt4INTELMask = 0x00000040,
+ MatrixMultiplyAccumulateOperandsMatrixBPackedInt4INTELMask = 0x00000080,
+ MatrixMultiplyAccumulateOperandsMatrixATF32INTELMask = 0x00000100,
+ MatrixMultiplyAccumulateOperandsMatrixBTF32INTELMask = 0x00000200,
+ MatrixMultiplyAccumulateOperandsMatrixAPackedFloat16INTELMask = 0x00000400,
+ MatrixMultiplyAccumulateOperandsMatrixBPackedFloat16INTELMask = 0x00000800,
+ MatrixMultiplyAccumulateOperandsMatrixAPackedBFloat16INTELMask = 0x00001000,
+ MatrixMultiplyAccumulateOperandsMatrixBPackedBFloat16INTELMask = 0x00002000,
+};
+
+enum RawAccessChainOperandsShift {
+ RawAccessChainOperandsRobustnessPerComponentNVShift = 0,
+ RawAccessChainOperandsRobustnessPerElementNVShift = 1,
+ RawAccessChainOperandsMax = 0x7fffffff,
+};
+
+enum RawAccessChainOperandsMask {
+ RawAccessChainOperandsMaskNone = 0,
+ RawAccessChainOperandsRobustnessPerComponentNVMask = 0x00000001,
+ RawAccessChainOperandsRobustnessPerElementNVMask = 0x00000002,
+};
+
+enum FPEncoding {
+ FPEncodingBFloat16KHR = 0,
+ FPEncodingFloat8E4M3EXT = 4214,
+ FPEncodingFloat8E5M2EXT = 4215,
+ FPEncodingMax = 0x7fffffff,
+};
+
+enum CooperativeVectorMatrixLayout {
+ CooperativeVectorMatrixLayoutRowMajorNV = 0,
+ CooperativeVectorMatrixLayoutColumnMajorNV = 1,
+ CooperativeVectorMatrixLayoutInferencingOptimalNV = 2,
+ CooperativeVectorMatrixLayoutTrainingOptimalNV = 3,
+ CooperativeVectorMatrixLayoutMax = 0x7fffffff,
+};
+
+enum ComponentType {
+ ComponentTypeFloat16NV = 0,
+ ComponentTypeFloat32NV = 1,
+ ComponentTypeFloat64NV = 2,
+ ComponentTypeSignedInt8NV = 3,
+ ComponentTypeSignedInt16NV = 4,
+ ComponentTypeSignedInt32NV = 5,
+ ComponentTypeSignedInt64NV = 6,
+ ComponentTypeUnsignedInt8NV = 7,
+ ComponentTypeUnsignedInt16NV = 8,
+ ComponentTypeUnsignedInt32NV = 9,
+ ComponentTypeUnsignedInt64NV = 10,
+ ComponentTypeSignedInt8PackedNV = 1000491000,
+ ComponentTypeUnsignedInt8PackedNV = 1000491001,
+ ComponentTypeFloatE4M3NV = 1000491002,
+ ComponentTypeFloatE5M2NV = 1000491003,
+ ComponentTypeMax = 0x7fffffff,
+};
+
enum Op {
OpNop = 0,
OpUndef = 1,
@@ -1567,14 +1941,37 @@ enum Op {
OpPtrEqual = 401,
OpPtrNotEqual = 402,
OpPtrDiff = 403,
+ OpColorAttachmentReadEXT = 4160,
+ OpDepthAttachmentReadEXT = 4161,
+ OpStencilAttachmentReadEXT = 4162,
+ OpTypeTensorARM = 4163,
+ OpTensorReadARM = 4164,
+ OpTensorWriteARM = 4165,
+ OpTensorQuerySizeARM = 4166,
+ OpGraphConstantARM = 4181,
+ OpGraphEntryPointARM = 4182,
+ OpGraphARM = 4183,
+ OpGraphInputARM = 4184,
+ OpGraphSetOutputARM = 4185,
+ OpGraphEndARM = 4186,
+ OpTypeGraphARM = 4190,
OpTerminateInvocation = 4416,
+ OpTypeUntypedPointerKHR = 4417,
+ OpUntypedVariableKHR = 4418,
+ OpUntypedAccessChainKHR = 4419,
+ OpUntypedInBoundsAccessChainKHR = 4420,
OpSubgroupBallotKHR = 4421,
OpSubgroupFirstInvocationKHR = 4422,
+ OpUntypedPtrAccessChainKHR = 4423,
+ OpUntypedInBoundsPtrAccessChainKHR = 4424,
+ OpUntypedArrayLengthKHR = 4425,
+ OpUntypedPrefetchKHR = 4426,
OpSubgroupAllKHR = 4428,
OpSubgroupAnyKHR = 4429,
OpSubgroupAllEqualKHR = 4430,
OpGroupNonUniformRotateKHR = 4431,
OpSubgroupReadInvocationKHR = 4432,
+ OpExtInstWithForwardRefsKHR = 4433,
OpTraceRayKHR = 4445,
OpExecuteCallableKHR = 4446,
OpConvertUToAccelerationStructureKHR = 4447,
@@ -1592,6 +1989,14 @@ enum Op {
OpUDotAccSatKHR = 4454,
OpSUDotAccSat = 4455,
OpSUDotAccSatKHR = 4455,
+ OpTypeCooperativeMatrixKHR = 4456,
+ OpCooperativeMatrixLoadKHR = 4457,
+ OpCooperativeMatrixStoreKHR = 4458,
+ OpCooperativeMatrixMulAddKHR = 4459,
+ OpCooperativeMatrixLengthKHR = 4460,
+ OpConstantCompositeReplicateEXT = 4461,
+ OpSpecConstantCompositeReplicateEXT = 4462,
+ OpCompositeConstructReplicateEXT = 4463,
OpTypeRayQueryKHR = 4472,
OpRayQueryInitializeKHR = 4473,
OpRayQueryTerminateKHR = 4474,
@@ -1618,11 +2023,64 @@ enum Op {
OpFragmentMaskFetchAMD = 5011,
OpFragmentFetchAMD = 5012,
OpReadClockKHR = 5056,
+ OpAllocateNodePayloadsAMDX = 5074,
+ OpEnqueueNodePayloadsAMDX = 5075,
+ OpTypeNodePayloadArrayAMDX = 5076,
+ OpFinishWritingNodePayloadAMDX = 5078,
+ OpNodePayloadArrayLengthAMDX = 5090,
+ OpIsNodePayloadValidAMDX = 5101,
+ OpConstantStringAMDX = 5103,
+ OpSpecConstantStringAMDX = 5104,
+ OpGroupNonUniformQuadAllKHR = 5110,
+ OpGroupNonUniformQuadAnyKHR = 5111,
+ OpHitObjectRecordHitMotionNV = 5249,
+ OpHitObjectRecordHitWithIndexMotionNV = 5250,
+ OpHitObjectRecordMissMotionNV = 5251,
+ OpHitObjectGetWorldToObjectNV = 5252,
+ OpHitObjectGetObjectToWorldNV = 5253,
+ OpHitObjectGetObjectRayDirectionNV = 5254,
+ OpHitObjectGetObjectRayOriginNV = 5255,
+ OpHitObjectTraceRayMotionNV = 5256,
+ OpHitObjectGetShaderRecordBufferHandleNV = 5257,
+ OpHitObjectGetShaderBindingTableRecordIndexNV = 5258,
+ OpHitObjectRecordEmptyNV = 5259,
+ OpHitObjectTraceRayNV = 5260,
+ OpHitObjectRecordHitNV = 5261,
+ OpHitObjectRecordHitWithIndexNV = 5262,
+ OpHitObjectRecordMissNV = 5263,
+ OpHitObjectExecuteShaderNV = 5264,
+ OpHitObjectGetCurrentTimeNV = 5265,
+ OpHitObjectGetAttributesNV = 5266,
+ OpHitObjectGetHitKindNV = 5267,
+ OpHitObjectGetPrimitiveIndexNV = 5268,
+ OpHitObjectGetGeometryIndexNV = 5269,
+ OpHitObjectGetInstanceIdNV = 5270,
+ OpHitObjectGetInstanceCustomIndexNV = 5271,
+ OpHitObjectGetWorldRayDirectionNV = 5272,
+ OpHitObjectGetWorldRayOriginNV = 5273,
+ OpHitObjectGetRayTMaxNV = 5274,
+ OpHitObjectGetRayTMinNV = 5275,
+ OpHitObjectIsEmptyNV = 5276,
+ OpHitObjectIsHitNV = 5277,
+ OpHitObjectIsMissNV = 5278,
+ OpReorderThreadWithHitObjectNV = 5279,
+ OpReorderThreadWithHintNV = 5280,
+ OpTypeHitObjectNV = 5281,
OpImageSampleFootprintNV = 5283,
+ OpTypeCooperativeVectorNV = 5288,
+ OpCooperativeVectorMatrixMulNV = 5289,
+ OpCooperativeVectorOuterProductAccumulateNV = 5290,
+ OpCooperativeVectorReduceSumAccumulateNV = 5291,
+ OpCooperativeVectorMatrixMulAddNV = 5292,
+ OpCooperativeMatrixConvertNV = 5293,
OpEmitMeshTasksEXT = 5294,
OpSetMeshOutputsEXT = 5295,
OpGroupNonUniformPartitionNV = 5296,
OpWritePackedPrimitiveIndices4x8NV = 5299,
+ OpFetchMicroTriangleVertexPositionNV = 5300,
+ OpFetchMicroTriangleVertexBarycentricNV = 5301,
+ OpCooperativeVectorLoadNV = 5302,
+ OpCooperativeVectorStoreNV = 5303,
OpReportIntersectionKHR = 5334,
OpReportIntersectionNV = 5334,
OpIgnoreIntersectionNV = 5335,
@@ -1630,9 +2088,12 @@ enum Op {
OpTraceNV = 5337,
OpTraceMotionNV = 5338,
OpTraceRayMotionNV = 5339,
+ OpRayQueryGetIntersectionTriangleVertexPositionsKHR = 5340,
OpTypeAccelerationStructureKHR = 5341,
OpTypeAccelerationStructureNV = 5341,
OpExecuteCallableNV = 5344,
+ OpRayQueryGetClusterIdNV = 5345,
+ OpHitObjectGetClusterIdNV = 5346,
OpTypeCooperativeMatrixNV = 5358,
OpCooperativeMatrixLoadNV = 5359,
OpCooperativeMatrixStoreNV = 5360,
@@ -1640,9 +2101,26 @@ enum Op {
OpCooperativeMatrixLengthNV = 5362,
OpBeginInvocationInterlockEXT = 5364,
OpEndInvocationInterlockEXT = 5365,
+ OpCooperativeMatrixReduceNV = 5366,
+ OpCooperativeMatrixLoadTensorNV = 5367,
+ OpCooperativeMatrixStoreTensorNV = 5368,
+ OpCooperativeMatrixPerElementOpNV = 5369,
+ OpTypeTensorLayoutNV = 5370,
+ OpTypeTensorViewNV = 5371,
+ OpCreateTensorLayoutNV = 5372,
+ OpTensorLayoutSetDimensionNV = 5373,
+ OpTensorLayoutSetStrideNV = 5374,
+ OpTensorLayoutSliceNV = 5375,
+ OpTensorLayoutSetClampValueNV = 5376,
+ OpCreateTensorViewNV = 5377,
+ OpTensorViewSetDimensionNV = 5378,
+ OpTensorViewSetStrideNV = 5379,
OpDemoteToHelperInvocation = 5380,
OpDemoteToHelperInvocationEXT = 5380,
OpIsHelperInvocationEXT = 5381,
+ OpTensorViewSetClipNV = 5382,
+ OpTensorLayoutSetBlockSizeNV = 5384,
+ OpCooperativeMatrixTransposeNV = 5390,
OpConvertUToImageNV = 5391,
OpConvertUToSamplerNV = 5392,
OpConvertImageToUNV = 5393,
@@ -1650,6 +2128,20 @@ enum Op {
OpConvertUToSampledImageNV = 5395,
OpConvertSampledImageToUNV = 5396,
OpSamplerImageAddressingModeNV = 5397,
+ OpRawAccessChainNV = 5398,
+ OpRayQueryGetIntersectionSpherePositionNV = 5427,
+ OpRayQueryGetIntersectionSphereRadiusNV = 5428,
+ OpRayQueryGetIntersectionLSSPositionsNV = 5429,
+ OpRayQueryGetIntersectionLSSRadiiNV = 5430,
+ OpRayQueryGetIntersectionLSSHitValueNV = 5431,
+ OpHitObjectGetSpherePositionNV = 5432,
+ OpHitObjectGetSphereRadiusNV = 5433,
+ OpHitObjectGetLSSPositionsNV = 5434,
+ OpHitObjectGetLSSRadiiNV = 5435,
+ OpHitObjectIsSphereHitNV = 5436,
+ OpHitObjectIsLSSHitNV = 5437,
+ OpRayQueryIsSphereHitNV = 5438,
+ OpRayQueryIsLSSHitNV = 5439,
OpSubgroupShuffleINTEL = 5571,
OpSubgroupShuffleDownINTEL = 5572,
OpSubgroupShuffleUpINTEL = 5573,
@@ -1891,8 +2383,25 @@ enum Op {
OpTypeStructContinuedINTEL = 6090,
OpConstantCompositeContinuedINTEL = 6091,
OpSpecConstantCompositeContinuedINTEL = 6092,
+ OpCompositeConstructContinuedINTEL = 6096,
+ OpConvertFToBF16INTEL = 6116,
+ OpConvertBF16ToFINTEL = 6117,
OpControlBarrierArriveINTEL = 6142,
OpControlBarrierWaitINTEL = 6143,
+ OpArithmeticFenceEXT = 6145,
+ OpTaskSequenceCreateINTEL = 6163,
+ OpTaskSequenceAsyncINTEL = 6164,
+ OpTaskSequenceGetINTEL = 6165,
+ OpTaskSequenceReleaseINTEL = 6166,
+ OpTypeTaskSequenceINTEL = 6199,
+ OpSubgroupBlockPrefetchINTEL = 6221,
+ OpSubgroup2DBlockLoadINTEL = 6231,
+ OpSubgroup2DBlockLoadTransformINTEL = 6232,
+ OpSubgroup2DBlockLoadTransposeINTEL = 6233,
+ OpSubgroup2DBlockPrefetchINTEL = 6234,
+ OpSubgroup2DBlockStoreINTEL = 6235,
+ OpSubgroupMatrixMultiplyAccumulateINTEL = 6237,
+ OpBitwiseFunctionINTEL = 6242,
OpGroupIMulKHR = 6401,
OpGroupFMulKHR = 6402,
OpGroupBitwiseAndKHR = 6403,
@@ -1901,6 +2410,12 @@ enum Op {
OpGroupLogicalAndKHR = 6406,
OpGroupLogicalOrKHR = 6407,
OpGroupLogicalXorKHR = 6408,
+ OpRoundFToTF32INTEL = 6426,
+ OpMaskedGatherINTEL = 6428,
+ OpMaskedScatterINTEL = 6429,
+ OpConvertHandleToImageINTEL = 6529,
+ OpConvertHandleToSamplerINTEL = 6530,
+ OpConvertHandleToSampledImageINTEL = 6531,
OpMax = 0x7fffffff,
};
@@ -2256,14 +2771,37 @@ inline void HasResultAndType(Op opcode, bool *hasResult, bool *hasResultType) {
case OpPtrEqual: *hasResult = true; *hasResultType = true; break;
case OpPtrNotEqual: *hasResult = true; *hasResultType = true; break;
case OpPtrDiff: *hasResult = true; *hasResultType = true; break;
+ case OpColorAttachmentReadEXT: *hasResult = true; *hasResultType = true; break;
+ case OpDepthAttachmentReadEXT: *hasResult = true; *hasResultType = true; break;
+ case OpStencilAttachmentReadEXT: *hasResult = true; *hasResultType = true; break;
+ case OpTypeTensorARM: *hasResult = true; *hasResultType = false; break;
+ case OpTensorReadARM: *hasResult = true; *hasResultType = true; break;
+ case OpTensorWriteARM: *hasResult = false; *hasResultType = false; break;
+ case OpTensorQuerySizeARM: *hasResult = true; *hasResultType = true; break;
+ case OpGraphConstantARM: *hasResult = true; *hasResultType = true; break;
+ case OpGraphEntryPointARM: *hasResult = false; *hasResultType = false; break;
+ case OpGraphARM: *hasResult = true; *hasResultType = true; break;
+ case OpGraphInputARM: *hasResult = true; *hasResultType = true; break;
+ case OpGraphSetOutputARM: *hasResult = false; *hasResultType = false; break;
+ case OpGraphEndARM: *hasResult = false; *hasResultType = false; break;
+ case OpTypeGraphARM: *hasResult = true; *hasResultType = false; break;
case OpTerminateInvocation: *hasResult = false; *hasResultType = false; break;
+ case OpTypeUntypedPointerKHR: *hasResult = true; *hasResultType = false; break;
+ case OpUntypedVariableKHR: *hasResult = true; *hasResultType = true; break;
+ case OpUntypedAccessChainKHR: *hasResult = true; *hasResultType = true; break;
+ case OpUntypedInBoundsAccessChainKHR: *hasResult = true; *hasResultType = true; break;
case OpSubgroupBallotKHR: *hasResult = true; *hasResultType = true; break;
case OpSubgroupFirstInvocationKHR: *hasResult = true; *hasResultType = true; break;
+ case OpUntypedPtrAccessChainKHR: *hasResult = true; *hasResultType = true; break;
+ case OpUntypedInBoundsPtrAccessChainKHR: *hasResult = true; *hasResultType = true; break;
+ case OpUntypedArrayLengthKHR: *hasResult = true; *hasResultType = true; break;
+ case OpUntypedPrefetchKHR: *hasResult = false; *hasResultType = false; break;
case OpSubgroupAllKHR: *hasResult = true; *hasResultType = true; break;
case OpSubgroupAnyKHR: *hasResult = true; *hasResultType = true; break;
case OpSubgroupAllEqualKHR: *hasResult = true; *hasResultType = true; break;
case OpGroupNonUniformRotateKHR: *hasResult = true; *hasResultType = true; break;
case OpSubgroupReadInvocationKHR: *hasResult = true; *hasResultType = true; break;
+ case OpExtInstWithForwardRefsKHR: *hasResult = true; *hasResultType = true; break;
case OpTraceRayKHR: *hasResult = false; *hasResultType = false; break;
case OpExecuteCallableKHR: *hasResult = false; *hasResultType = false; break;
case OpConvertUToAccelerationStructureKHR: *hasResult = true; *hasResultType = true; break;
@@ -2275,6 +2813,14 @@ inline void HasResultAndType(Op opcode, bool *hasResult, bool *hasResultType) {
case OpSDotAccSat: *hasResult = true; *hasResultType = true; break;
case OpUDotAccSat: *hasResult = true; *hasResultType = true; break;
case OpSUDotAccSat: *hasResult = true; *hasResultType = true; break;
+ case OpTypeCooperativeMatrixKHR: *hasResult = true; *hasResultType = false; break;
+ case OpCooperativeMatrixLoadKHR: *hasResult = true; *hasResultType = true; break;
+ case OpCooperativeMatrixStoreKHR: *hasResult = false; *hasResultType = false; break;
+ case OpCooperativeMatrixMulAddKHR: *hasResult = true; *hasResultType = true; break;
+ case OpCooperativeMatrixLengthKHR: *hasResult = true; *hasResultType = true; break;
+ case OpConstantCompositeReplicateEXT: *hasResult = true; *hasResultType = true; break;
+ case OpSpecConstantCompositeReplicateEXT: *hasResult = true; *hasResultType = true; break;
+ case OpCompositeConstructReplicateEXT: *hasResult = true; *hasResultType = true; break;
case OpTypeRayQueryKHR: *hasResult = true; *hasResultType = false; break;
case OpRayQueryInitializeKHR: *hasResult = false; *hasResultType = false; break;
case OpRayQueryTerminateKHR: *hasResult = false; *hasResultType = false; break;
@@ -2301,19 +2847,75 @@ inline void HasResultAndType(Op opcode, bool *hasResult, bool *hasResultType) {
case OpFragmentMaskFetchAMD: *hasResult = true; *hasResultType = true; break;
case OpFragmentFetchAMD: *hasResult = true; *hasResultType = true; break;
case OpReadClockKHR: *hasResult = true; *hasResultType = true; break;
+ case OpAllocateNodePayloadsAMDX: *hasResult = true; *hasResultType = true; break;
+ case OpEnqueueNodePayloadsAMDX: *hasResult = false; *hasResultType = false; break;
+ case OpTypeNodePayloadArrayAMDX: *hasResult = true; *hasResultType = false; break;
+ case OpFinishWritingNodePayloadAMDX: *hasResult = true; *hasResultType = true; break;
+ case OpNodePayloadArrayLengthAMDX: *hasResult = true; *hasResultType = true; break;
+ case OpIsNodePayloadValidAMDX: *hasResult = true; *hasResultType = true; break;
+ case OpConstantStringAMDX: *hasResult = true; *hasResultType = false; break;
+ case OpSpecConstantStringAMDX: *hasResult = true; *hasResultType = false; break;
+ case OpGroupNonUniformQuadAllKHR: *hasResult = true; *hasResultType = true; break;
+ case OpGroupNonUniformQuadAnyKHR: *hasResult = true; *hasResultType = true; break;
+ case OpHitObjectRecordHitMotionNV: *hasResult = false; *hasResultType = false; break;
+ case OpHitObjectRecordHitWithIndexMotionNV: *hasResult = false; *hasResultType = false; break;
+ case OpHitObjectRecordMissMotionNV: *hasResult = false; *hasResultType = false; break;
+ case OpHitObjectGetWorldToObjectNV: *hasResult = true; *hasResultType = true; break;
+ case OpHitObjectGetObjectToWorldNV: *hasResult = true; *hasResultType = true; break;
+ case OpHitObjectGetObjectRayDirectionNV: *hasResult = true; *hasResultType = true; break;
+ case OpHitObjectGetObjectRayOriginNV: *hasResult = true; *hasResultType = true; break;
+ case OpHitObjectTraceRayMotionNV: *hasResult = false; *hasResultType = false; break;
+ case OpHitObjectGetShaderRecordBufferHandleNV: *hasResult = true; *hasResultType = true; break;
+ case OpHitObjectGetShaderBindingTableRecordIndexNV: *hasResult = true; *hasResultType = true; break;
+ case OpHitObjectRecordEmptyNV: *hasResult = false; *hasResultType = false; break;
+ case OpHitObjectTraceRayNV: *hasResult = false; *hasResultType = false; break;
+ case OpHitObjectRecordHitNV: *hasResult = false; *hasResultType = false; break;
+ case OpHitObjectRecordHitWithIndexNV: *hasResult = false; *hasResultType = false; break;
+ case OpHitObjectRecordMissNV: *hasResult = false; *hasResultType = false; break;
+ case OpHitObjectExecuteShaderNV: *hasResult = false; *hasResultType = false; break;
+ case OpHitObjectGetCurrentTimeNV: *hasResult = true; *hasResultType = true; break;
+ case OpHitObjectGetAttributesNV: *hasResult = false; *hasResultType = false; break;
+ case OpHitObjectGetHitKindNV: *hasResult = true; *hasResultType = true; break;
+ case OpHitObjectGetPrimitiveIndexNV: *hasResult = true; *hasResultType = true; break;
+ case OpHitObjectGetGeometryIndexNV: *hasResult = true; *hasResultType = true; break;
+ case OpHitObjectGetInstanceIdNV: *hasResult = true; *hasResultType = true; break;
+ case OpHitObjectGetInstanceCustomIndexNV: *hasResult = true; *hasResultType = true; break;
+ case OpHitObjectGetWorldRayDirectionNV: *hasResult = true; *hasResultType = true; break;
+ case OpHitObjectGetWorldRayOriginNV: *hasResult = true; *hasResultType = true; break;
+ case OpHitObjectGetRayTMaxNV: *hasResult = true; *hasResultType = true; break;
+ case OpHitObjectGetRayTMinNV: *hasResult = true; *hasResultType = true; break;
+ case OpHitObjectIsEmptyNV: *hasResult = true; *hasResultType = true; break;
+ case OpHitObjectIsHitNV: *hasResult = true; *hasResultType = true; break;
+ case OpHitObjectIsMissNV: *hasResult = true; *hasResultType = true; break;
+ case OpReorderThreadWithHitObjectNV: *hasResult = false; *hasResultType = false; break;
+ case OpReorderThreadWithHintNV: *hasResult = false; *hasResultType = false; break;
+ case OpTypeHitObjectNV: *hasResult = true; *hasResultType = false; break;
case OpImageSampleFootprintNV: *hasResult = true; *hasResultType = true; break;
+ case OpTypeCooperativeVectorNV: *hasResult = true; *hasResultType = false; break;
+ case OpCooperativeVectorMatrixMulNV: *hasResult = true; *hasResultType = true; break;
+ case OpCooperativeVectorOuterProductAccumulateNV: *hasResult = false; *hasResultType = false; break;
+ case OpCooperativeVectorReduceSumAccumulateNV: *hasResult = false; *hasResultType = false; break;
+ case OpCooperativeVectorMatrixMulAddNV: *hasResult = true; *hasResultType = true; break;
+ case OpCooperativeMatrixConvertNV: *hasResult = true; *hasResultType = true; break;
case OpEmitMeshTasksEXT: *hasResult = false; *hasResultType = false; break;
case OpSetMeshOutputsEXT: *hasResult = false; *hasResultType = false; break;
case OpGroupNonUniformPartitionNV: *hasResult = true; *hasResultType = true; break;
case OpWritePackedPrimitiveIndices4x8NV: *hasResult = false; *hasResultType = false; break;
- case OpReportIntersectionNV: *hasResult = true; *hasResultType = true; break;
+ case OpFetchMicroTriangleVertexPositionNV: *hasResult = true; *hasResultType = true; break;
+ case OpFetchMicroTriangleVertexBarycentricNV: *hasResult = true; *hasResultType = true; break;
+ case OpCooperativeVectorLoadNV: *hasResult = true; *hasResultType = true; break;
+ case OpCooperativeVectorStoreNV: *hasResult = false; *hasResultType = false; break;
+ case OpReportIntersectionKHR: *hasResult = true; *hasResultType = true; break;
case OpIgnoreIntersectionNV: *hasResult = false; *hasResultType = false; break;
case OpTerminateRayNV: *hasResult = false; *hasResultType = false; break;
case OpTraceNV: *hasResult = false; *hasResultType = false; break;
case OpTraceMotionNV: *hasResult = false; *hasResultType = false; break;
case OpTraceRayMotionNV: *hasResult = false; *hasResultType = false; break;
- case OpTypeAccelerationStructureNV: *hasResult = true; *hasResultType = false; break;
+ case OpRayQueryGetIntersectionTriangleVertexPositionsKHR: *hasResult = true; *hasResultType = true; break;
+ case OpTypeAccelerationStructureKHR: *hasResult = true; *hasResultType = false; break;
case OpExecuteCallableNV: *hasResult = false; *hasResultType = false; break;
+ case OpRayQueryGetClusterIdNV: *hasResult = true; *hasResultType = true; break;
+ case OpHitObjectGetClusterIdNV: *hasResult = true; *hasResultType = true; break;
case OpTypeCooperativeMatrixNV: *hasResult = true; *hasResultType = false; break;
case OpCooperativeMatrixLoadNV: *hasResult = true; *hasResultType = true; break;
case OpCooperativeMatrixStoreNV: *hasResult = false; *hasResultType = false; break;
@@ -2321,8 +2923,25 @@ inline void HasResultAndType(Op opcode, bool *hasResult, bool *hasResultType) {
case OpCooperativeMatrixLengthNV: *hasResult = true; *hasResultType = true; break;
case OpBeginInvocationInterlockEXT: *hasResult = false; *hasResultType = false; break;
case OpEndInvocationInterlockEXT: *hasResult = false; *hasResultType = false; break;
+ case OpCooperativeMatrixReduceNV: *hasResult = true; *hasResultType = true; break;
+ case OpCooperativeMatrixLoadTensorNV: *hasResult = true; *hasResultType = true; break;
+ case OpCooperativeMatrixStoreTensorNV: *hasResult = false; *hasResultType = false; break;
+ case OpCooperativeMatrixPerElementOpNV: *hasResult = true; *hasResultType = true; break;
+ case OpTypeTensorLayoutNV: *hasResult = true; *hasResultType = false; break;
+ case OpTypeTensorViewNV: *hasResult = true; *hasResultType = false; break;
+ case OpCreateTensorLayoutNV: *hasResult = true; *hasResultType = true; break;
+ case OpTensorLayoutSetDimensionNV: *hasResult = true; *hasResultType = true; break;
+ case OpTensorLayoutSetStrideNV: *hasResult = true; *hasResultType = true; break;
+ case OpTensorLayoutSliceNV: *hasResult = true; *hasResultType = true; break;
+ case OpTensorLayoutSetClampValueNV: *hasResult = true; *hasResultType = true; break;
+ case OpCreateTensorViewNV: *hasResult = true; *hasResultType = true; break;
+ case OpTensorViewSetDimensionNV: *hasResult = true; *hasResultType = true; break;
+ case OpTensorViewSetStrideNV: *hasResult = true; *hasResultType = true; break;
case OpDemoteToHelperInvocation: *hasResult = false; *hasResultType = false; break;
case OpIsHelperInvocationEXT: *hasResult = true; *hasResultType = true; break;
+ case OpTensorViewSetClipNV: *hasResult = true; *hasResultType = true; break;
+ case OpTensorLayoutSetBlockSizeNV: *hasResult = true; *hasResultType = true; break;
+ case OpCooperativeMatrixTransposeNV: *hasResult = true; *hasResultType = true; break;
case OpConvertUToImageNV: *hasResult = true; *hasResultType = true; break;
case OpConvertUToSamplerNV: *hasResult = true; *hasResultType = true; break;
case OpConvertImageToUNV: *hasResult = true; *hasResultType = true; break;
@@ -2330,6 +2949,20 @@ inline void HasResultAndType(Op opcode, bool *hasResult, bool *hasResultType) {
case OpConvertUToSampledImageNV: *hasResult = true; *hasResultType = true; break;
case OpConvertSampledImageToUNV: *hasResult = true; *hasResultType = true; break;
case OpSamplerImageAddressingModeNV: *hasResult = false; *hasResultType = false; break;
+ case OpRawAccessChainNV: *hasResult = true; *hasResultType = true; break;
+ case OpRayQueryGetIntersectionSpherePositionNV: *hasResult = true; *hasResultType = true; break;
+ case OpRayQueryGetIntersectionSphereRadiusNV: *hasResult = true; *hasResultType = true; break;
+ case OpRayQueryGetIntersectionLSSPositionsNV: *hasResult = true; *hasResultType = true; break;
+ case OpRayQueryGetIntersectionLSSRadiiNV: *hasResult = true; *hasResultType = true; break;
+ case OpRayQueryGetIntersectionLSSHitValueNV: *hasResult = true; *hasResultType = true; break;
+ case OpHitObjectGetSpherePositionNV: *hasResult = true; *hasResultType = true; break;
+ case OpHitObjectGetSphereRadiusNV: *hasResult = true; *hasResultType = true; break;
+ case OpHitObjectGetLSSPositionsNV: *hasResult = true; *hasResultType = true; break;
+ case OpHitObjectGetLSSRadiiNV: *hasResult = true; *hasResultType = true; break;
+ case OpHitObjectIsSphereHitNV: *hasResult = true; *hasResultType = true; break;
+ case OpHitObjectIsLSSHitNV: *hasResult = true; *hasResultType = true; break;
+ case OpRayQueryIsSphereHitNV: *hasResult = true; *hasResultType = true; break;
+ case OpRayQueryIsLSSHitNV: *hasResult = true; *hasResultType = true; break;
case OpSubgroupShuffleINTEL: *hasResult = true; *hasResultType = true; break;
case OpSubgroupShuffleDownINTEL: *hasResult = true; *hasResultType = true; break;
case OpSubgroupShuffleUpINTEL: *hasResult = true; *hasResultType = true; break;
@@ -2356,7 +2989,7 @@ inline void HasResultAndType(Op opcode, bool *hasResult, bool *hasResultType) {
case OpUMul32x16INTEL: *hasResult = true; *hasResultType = true; break;
case OpConstantFunctionPointerINTEL: *hasResult = true; *hasResultType = true; break;
case OpFunctionPointerCallINTEL: *hasResult = true; *hasResultType = true; break;
- case OpAsmTargetINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpAsmTargetINTEL: *hasResult = true; *hasResultType = false; break;
case OpAsmINTEL: *hasResult = true; *hasResultType = true; break;
case OpAsmCallINTEL: *hasResult = true; *hasResultType = true; break;
case OpAtomicFMinEXT: *hasResult = true; *hasResultType = true; break;
@@ -2569,8 +3202,25 @@ inline void HasResultAndType(Op opcode, bool *hasResult, bool *hasResultType) {
case OpTypeStructContinuedINTEL: *hasResult = false; *hasResultType = false; break;
case OpConstantCompositeContinuedINTEL: *hasResult = false; *hasResultType = false; break;
case OpSpecConstantCompositeContinuedINTEL: *hasResult = false; *hasResultType = false; break;
+ case OpCompositeConstructContinuedINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpConvertFToBF16INTEL: *hasResult = true; *hasResultType = true; break;
+ case OpConvertBF16ToFINTEL: *hasResult = true; *hasResultType = true; break;
case OpControlBarrierArriveINTEL: *hasResult = false; *hasResultType = false; break;
case OpControlBarrierWaitINTEL: *hasResult = false; *hasResultType = false; break;
+ case OpArithmeticFenceEXT: *hasResult = true; *hasResultType = true; break;
+ case OpTaskSequenceCreateINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpTaskSequenceAsyncINTEL: *hasResult = false; *hasResultType = false; break;
+ case OpTaskSequenceGetINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpTaskSequenceReleaseINTEL: *hasResult = false; *hasResultType = false; break;
+ case OpTypeTaskSequenceINTEL: *hasResult = true; *hasResultType = false; break;
+ case OpSubgroupBlockPrefetchINTEL: *hasResult = false; *hasResultType = false; break;
+ case OpSubgroup2DBlockLoadINTEL: *hasResult = false; *hasResultType = false; break;
+ case OpSubgroup2DBlockLoadTransformINTEL: *hasResult = false; *hasResultType = false; break;
+ case OpSubgroup2DBlockLoadTransposeINTEL: *hasResult = false; *hasResultType = false; break;
+ case OpSubgroup2DBlockPrefetchINTEL: *hasResult = false; *hasResultType = false; break;
+ case OpSubgroup2DBlockStoreINTEL: *hasResult = false; *hasResultType = false; break;
+ case OpSubgroupMatrixMultiplyAccumulateINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpBitwiseFunctionINTEL: *hasResult = true; *hasResultType = true; break;
case OpGroupIMulKHR: *hasResult = true; *hasResultType = true; break;
case OpGroupFMulKHR: *hasResult = true; *hasResultType = true; break;
case OpGroupBitwiseAndKHR: *hasResult = true; *hasResultType = true; break;
@@ -2579,22 +3229,2057 @@ inline void HasResultAndType(Op opcode, bool *hasResult, bool *hasResultType) {
case OpGroupLogicalAndKHR: *hasResult = true; *hasResultType = true; break;
case OpGroupLogicalOrKHR: *hasResult = true; *hasResultType = true; break;
case OpGroupLogicalXorKHR: *hasResult = true; *hasResultType = true; break;
+ case OpRoundFToTF32INTEL: *hasResult = true; *hasResultType = true; break;
+ case OpMaskedGatherINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpMaskedScatterINTEL: *hasResult = false; *hasResultType = false; break;
+ case OpConvertHandleToImageINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpConvertHandleToSamplerINTEL: *hasResult = true; *hasResultType = true; break;
+ case OpConvertHandleToSampledImageINTEL: *hasResult = true; *hasResultType = true; break;
}
}
+inline const char* SourceLanguageToString(SourceLanguage value) {
+ switch (value) {
+ case SourceLanguageUnknown: return "Unknown";
+ case SourceLanguageESSL: return "ESSL";
+ case SourceLanguageGLSL: return "GLSL";
+ case SourceLanguageOpenCL_C: return "OpenCL_C";
+ case SourceLanguageOpenCL_CPP: return "OpenCL_CPP";
+ case SourceLanguageHLSL: return "HLSL";
+ case SourceLanguageCPP_for_OpenCL: return "CPP_for_OpenCL";
+ case SourceLanguageSYCL: return "SYCL";
+ case SourceLanguageHERO_C: return "HERO_C";
+ case SourceLanguageNZSL: return "NZSL";
+ case SourceLanguageWGSL: return "WGSL";
+ case SourceLanguageSlang: return "Slang";
+ case SourceLanguageZig: return "Zig";
+ case SourceLanguageRust: return "Rust";
+ default: return "Unknown";
+ }
+}
+
+inline const char* ExecutionModelToString(ExecutionModel value) {
+ switch (value) {
+ case ExecutionModelVertex: return "Vertex";
+ case ExecutionModelTessellationControl: return "TessellationControl";
+ case ExecutionModelTessellationEvaluation: return "TessellationEvaluation";
+ case ExecutionModelGeometry: return "Geometry";
+ case ExecutionModelFragment: return "Fragment";
+ case ExecutionModelGLCompute: return "GLCompute";
+ case ExecutionModelKernel: return "Kernel";
+ case ExecutionModelTaskNV: return "TaskNV";
+ case ExecutionModelMeshNV: return "MeshNV";
+ case ExecutionModelRayGenerationKHR: return "RayGenerationKHR";
+ case ExecutionModelIntersectionKHR: return "IntersectionKHR";
+ case ExecutionModelAnyHitKHR: return "AnyHitKHR";
+ case ExecutionModelClosestHitKHR: return "ClosestHitKHR";
+ case ExecutionModelMissKHR: return "MissKHR";
+ case ExecutionModelCallableKHR: return "CallableKHR";
+ case ExecutionModelTaskEXT: return "TaskEXT";
+ case ExecutionModelMeshEXT: return "MeshEXT";
+ default: return "Unknown";
+ }
+}
+
+inline const char* AddressingModelToString(AddressingModel value) {
+ switch (value) {
+ case AddressingModelLogical: return "Logical";
+ case AddressingModelPhysical32: return "Physical32";
+ case AddressingModelPhysical64: return "Physical64";
+ case AddressingModelPhysicalStorageBuffer64: return "PhysicalStorageBuffer64";
+ default: return "Unknown";
+ }
+}
+
+inline const char* MemoryModelToString(MemoryModel value) {
+ switch (value) {
+ case MemoryModelSimple: return "Simple";
+ case MemoryModelGLSL450: return "GLSL450";
+ case MemoryModelOpenCL: return "OpenCL";
+ case MemoryModelVulkan: return "Vulkan";
+ default: return "Unknown";
+ }
+}
+
+inline const char* ExecutionModeToString(ExecutionMode value) {
+ switch (value) {
+ case ExecutionModeInvocations: return "Invocations";
+ case ExecutionModeSpacingEqual: return "SpacingEqual";
+ case ExecutionModeSpacingFractionalEven: return "SpacingFractionalEven";
+ case ExecutionModeSpacingFractionalOdd: return "SpacingFractionalOdd";
+ case ExecutionModeVertexOrderCw: return "VertexOrderCw";
+ case ExecutionModeVertexOrderCcw: return "VertexOrderCcw";
+ case ExecutionModePixelCenterInteger: return "PixelCenterInteger";
+ case ExecutionModeOriginUpperLeft: return "OriginUpperLeft";
+ case ExecutionModeOriginLowerLeft: return "OriginLowerLeft";
+ case ExecutionModeEarlyFragmentTests: return "EarlyFragmentTests";
+ case ExecutionModePointMode: return "PointMode";
+ case ExecutionModeXfb: return "Xfb";
+ case ExecutionModeDepthReplacing: return "DepthReplacing";
+ case ExecutionModeDepthGreater: return "DepthGreater";
+ case ExecutionModeDepthLess: return "DepthLess";
+ case ExecutionModeDepthUnchanged: return "DepthUnchanged";
+ case ExecutionModeLocalSize: return "LocalSize";
+ case ExecutionModeLocalSizeHint: return "LocalSizeHint";
+ case ExecutionModeInputPoints: return "InputPoints";
+ case ExecutionModeInputLines: return "InputLines";
+ case ExecutionModeInputLinesAdjacency: return "InputLinesAdjacency";
+ case ExecutionModeTriangles: return "Triangles";
+ case ExecutionModeInputTrianglesAdjacency: return "InputTrianglesAdjacency";
+ case ExecutionModeQuads: return "Quads";
+ case ExecutionModeIsolines: return "Isolines";
+ case ExecutionModeOutputVertices: return "OutputVertices";
+ case ExecutionModeOutputPoints: return "OutputPoints";
+ case ExecutionModeOutputLineStrip: return "OutputLineStrip";
+ case ExecutionModeOutputTriangleStrip: return "OutputTriangleStrip";
+ case ExecutionModeVecTypeHint: return "VecTypeHint";
+ case ExecutionModeContractionOff: return "ContractionOff";
+ case ExecutionModeInitializer: return "Initializer";
+ case ExecutionModeFinalizer: return "Finalizer";
+ case ExecutionModeSubgroupSize: return "SubgroupSize";
+ case ExecutionModeSubgroupsPerWorkgroup: return "SubgroupsPerWorkgroup";
+ case ExecutionModeSubgroupsPerWorkgroupId: return "SubgroupsPerWorkgroupId";
+ case ExecutionModeLocalSizeId: return "LocalSizeId";
+ case ExecutionModeLocalSizeHintId: return "LocalSizeHintId";
+ case ExecutionModeNonCoherentColorAttachmentReadEXT: return "NonCoherentColorAttachmentReadEXT";
+ case ExecutionModeNonCoherentDepthAttachmentReadEXT: return "NonCoherentDepthAttachmentReadEXT";
+ case ExecutionModeNonCoherentStencilAttachmentReadEXT: return "NonCoherentStencilAttachmentReadEXT";
+ case ExecutionModeSubgroupUniformControlFlowKHR: return "SubgroupUniformControlFlowKHR";
+ case ExecutionModePostDepthCoverage: return "PostDepthCoverage";
+ case ExecutionModeDenormPreserve: return "DenormPreserve";
+ case ExecutionModeDenormFlushToZero: return "DenormFlushToZero";
+ case ExecutionModeSignedZeroInfNanPreserve: return "SignedZeroInfNanPreserve";
+ case ExecutionModeRoundingModeRTE: return "RoundingModeRTE";
+ case ExecutionModeRoundingModeRTZ: return "RoundingModeRTZ";
+ case ExecutionModeNonCoherentTileAttachmentReadQCOM: return "NonCoherentTileAttachmentReadQCOM";
+ case ExecutionModeTileShadingRateQCOM: return "TileShadingRateQCOM";
+ case ExecutionModeEarlyAndLateFragmentTestsAMD: return "EarlyAndLateFragmentTestsAMD";
+ case ExecutionModeStencilRefReplacingEXT: return "StencilRefReplacingEXT";
+ case ExecutionModeCoalescingAMDX: return "CoalescingAMDX";
+ case ExecutionModeIsApiEntryAMDX: return "IsApiEntryAMDX";
+ case ExecutionModeMaxNodeRecursionAMDX: return "MaxNodeRecursionAMDX";
+ case ExecutionModeStaticNumWorkgroupsAMDX: return "StaticNumWorkgroupsAMDX";
+ case ExecutionModeShaderIndexAMDX: return "ShaderIndexAMDX";
+ case ExecutionModeMaxNumWorkgroupsAMDX: return "MaxNumWorkgroupsAMDX";
+ case ExecutionModeStencilRefUnchangedFrontAMD: return "StencilRefUnchangedFrontAMD";
+ case ExecutionModeStencilRefGreaterFrontAMD: return "StencilRefGreaterFrontAMD";
+ case ExecutionModeStencilRefLessFrontAMD: return "StencilRefLessFrontAMD";
+ case ExecutionModeStencilRefUnchangedBackAMD: return "StencilRefUnchangedBackAMD";
+ case ExecutionModeStencilRefGreaterBackAMD: return "StencilRefGreaterBackAMD";
+ case ExecutionModeStencilRefLessBackAMD: return "StencilRefLessBackAMD";
+ case ExecutionModeQuadDerivativesKHR: return "QuadDerivativesKHR";
+ case ExecutionModeRequireFullQuadsKHR: return "RequireFullQuadsKHR";
+ case ExecutionModeSharesInputWithAMDX: return "SharesInputWithAMDX";
+ case ExecutionModeOutputLinesEXT: return "OutputLinesEXT";
+ case ExecutionModeOutputPrimitivesEXT: return "OutputPrimitivesEXT";
+ case ExecutionModeDerivativeGroupQuadsKHR: return "DerivativeGroupQuadsKHR";
+ case ExecutionModeDerivativeGroupLinearKHR: return "DerivativeGroupLinearKHR";
+ case ExecutionModeOutputTrianglesEXT: return "OutputTrianglesEXT";
+ case ExecutionModePixelInterlockOrderedEXT: return "PixelInterlockOrderedEXT";
+ case ExecutionModePixelInterlockUnorderedEXT: return "PixelInterlockUnorderedEXT";
+ case ExecutionModeSampleInterlockOrderedEXT: return "SampleInterlockOrderedEXT";
+ case ExecutionModeSampleInterlockUnorderedEXT: return "SampleInterlockUnorderedEXT";
+ case ExecutionModeShadingRateInterlockOrderedEXT: return "ShadingRateInterlockOrderedEXT";
+ case ExecutionModeShadingRateInterlockUnorderedEXT: return "ShadingRateInterlockUnorderedEXT";
+ case ExecutionModeSharedLocalMemorySizeINTEL: return "SharedLocalMemorySizeINTEL";
+ case ExecutionModeRoundingModeRTPINTEL: return "RoundingModeRTPINTEL";
+ case ExecutionModeRoundingModeRTNINTEL: return "RoundingModeRTNINTEL";
+ case ExecutionModeFloatingPointModeALTINTEL: return "FloatingPointModeALTINTEL";
+ case ExecutionModeFloatingPointModeIEEEINTEL: return "FloatingPointModeIEEEINTEL";
+ case ExecutionModeMaxWorkgroupSizeINTEL: return "MaxWorkgroupSizeINTEL";
+ case ExecutionModeMaxWorkDimINTEL: return "MaxWorkDimINTEL";
+ case ExecutionModeNoGlobalOffsetINTEL: return "NoGlobalOffsetINTEL";
+ case ExecutionModeNumSIMDWorkitemsINTEL: return "NumSIMDWorkitemsINTEL";
+ case ExecutionModeSchedulerTargetFmaxMhzINTEL: return "SchedulerTargetFmaxMhzINTEL";
+ case ExecutionModeMaximallyReconvergesKHR: return "MaximallyReconvergesKHR";
+ case ExecutionModeFPFastMathDefault: return "FPFastMathDefault";
+ case ExecutionModeStreamingInterfaceINTEL: return "StreamingInterfaceINTEL";
+ case ExecutionModeRegisterMapInterfaceINTEL: return "RegisterMapInterfaceINTEL";
+ case ExecutionModeNamedBarrierCountINTEL: return "NamedBarrierCountINTEL";
+ case ExecutionModeMaximumRegistersINTEL: return "MaximumRegistersINTEL";
+ case ExecutionModeMaximumRegistersIdINTEL: return "MaximumRegistersIdINTEL";
+ case ExecutionModeNamedMaximumRegistersINTEL: return "NamedMaximumRegistersINTEL";
+ default: return "Unknown";
+ }
+}
+
+inline const char* StorageClassToString(StorageClass value) {
+ switch (value) {
+ case StorageClassUniformConstant: return "UniformConstant";
+ case StorageClassInput: return "Input";
+ case StorageClassUniform: return "Uniform";
+ case StorageClassOutput: return "Output";
+ case StorageClassWorkgroup: return "Workgroup";
+ case StorageClassCrossWorkgroup: return "CrossWorkgroup";
+ case StorageClassPrivate: return "Private";
+ case StorageClassFunction: return "Function";
+ case StorageClassGeneric: return "Generic";
+ case StorageClassPushConstant: return "PushConstant";
+ case StorageClassAtomicCounter: return "AtomicCounter";
+ case StorageClassImage: return "Image";
+ case StorageClassStorageBuffer: return "StorageBuffer";
+ case StorageClassTileImageEXT: return "TileImageEXT";
+ case StorageClassTileAttachmentQCOM: return "TileAttachmentQCOM";
+ case StorageClassNodePayloadAMDX: return "NodePayloadAMDX";
+ case StorageClassCallableDataKHR: return "CallableDataKHR";
+ case StorageClassIncomingCallableDataKHR: return "IncomingCallableDataKHR";
+ case StorageClassRayPayloadKHR: return "RayPayloadKHR";
+ case StorageClassHitAttributeKHR: return "HitAttributeKHR";
+ case StorageClassIncomingRayPayloadKHR: return "IncomingRayPayloadKHR";
+ case StorageClassShaderRecordBufferKHR: return "ShaderRecordBufferKHR";
+ case StorageClassPhysicalStorageBuffer: return "PhysicalStorageBuffer";
+ case StorageClassHitObjectAttributeNV: return "HitObjectAttributeNV";
+ case StorageClassTaskPayloadWorkgroupEXT: return "TaskPayloadWorkgroupEXT";
+ case StorageClassCodeSectionINTEL: return "CodeSectionINTEL";
+ case StorageClassDeviceOnlyINTEL: return "DeviceOnlyINTEL";
+ case StorageClassHostOnlyINTEL: return "HostOnlyINTEL";
+ default: return "Unknown";
+ }
+}
+
+inline const char* DimToString(Dim value) {
+ switch (value) {
+ case Dim1D: return "1D";
+ case Dim2D: return "2D";
+ case Dim3D: return "3D";
+ case DimCube: return "Cube";
+ case DimRect: return "Rect";
+ case DimBuffer: return "Buffer";
+ case DimSubpassData: return "SubpassData";
+ case DimTileImageDataEXT: return "TileImageDataEXT";
+ default: return "Unknown";
+ }
+}
+
+inline const char* SamplerAddressingModeToString(SamplerAddressingMode value) {
+ switch (value) {
+ case SamplerAddressingModeNone: return "None";
+ case SamplerAddressingModeClampToEdge: return "ClampToEdge";
+ case SamplerAddressingModeClamp: return "Clamp";
+ case SamplerAddressingModeRepeat: return "Repeat";
+ case SamplerAddressingModeRepeatMirrored: return "RepeatMirrored";
+ default: return "Unknown";
+ }
+}
+
+inline const char* SamplerFilterModeToString(SamplerFilterMode value) {
+ switch (value) {
+ case SamplerFilterModeNearest: return "Nearest";
+ case SamplerFilterModeLinear: return "Linear";
+ default: return "Unknown";
+ }
+}
+
+inline const char* ImageFormatToString(ImageFormat value) {
+ switch (value) {
+ case ImageFormatUnknown: return "Unknown";
+ case ImageFormatRgba32f: return "Rgba32f";
+ case ImageFormatRgba16f: return "Rgba16f";
+ case ImageFormatR32f: return "R32f";
+ case ImageFormatRgba8: return "Rgba8";
+ case ImageFormatRgba8Snorm: return "Rgba8Snorm";
+ case ImageFormatRg32f: return "Rg32f";
+ case ImageFormatRg16f: return "Rg16f";
+ case ImageFormatR11fG11fB10f: return "R11fG11fB10f";
+ case ImageFormatR16f: return "R16f";
+ case ImageFormatRgba16: return "Rgba16";
+ case ImageFormatRgb10A2: return "Rgb10A2";
+ case ImageFormatRg16: return "Rg16";
+ case ImageFormatRg8: return "Rg8";
+ case ImageFormatR16: return "R16";
+ case ImageFormatR8: return "R8";
+ case ImageFormatRgba16Snorm: return "Rgba16Snorm";
+ case ImageFormatRg16Snorm: return "Rg16Snorm";
+ case ImageFormatRg8Snorm: return "Rg8Snorm";
+ case ImageFormatR16Snorm: return "R16Snorm";
+ case ImageFormatR8Snorm: return "R8Snorm";
+ case ImageFormatRgba32i: return "Rgba32i";
+ case ImageFormatRgba16i: return "Rgba16i";
+ case ImageFormatRgba8i: return "Rgba8i";
+ case ImageFormatR32i: return "R32i";
+ case ImageFormatRg32i: return "Rg32i";
+ case ImageFormatRg16i: return "Rg16i";
+ case ImageFormatRg8i: return "Rg8i";
+ case ImageFormatR16i: return "R16i";
+ case ImageFormatR8i: return "R8i";
+ case ImageFormatRgba32ui: return "Rgba32ui";
+ case ImageFormatRgba16ui: return "Rgba16ui";
+ case ImageFormatRgba8ui: return "Rgba8ui";
+ case ImageFormatR32ui: return "R32ui";
+ case ImageFormatRgb10a2ui: return "Rgb10a2ui";
+ case ImageFormatRg32ui: return "Rg32ui";
+ case ImageFormatRg16ui: return "Rg16ui";
+ case ImageFormatRg8ui: return "Rg8ui";
+ case ImageFormatR16ui: return "R16ui";
+ case ImageFormatR8ui: return "R8ui";
+ case ImageFormatR64ui: return "R64ui";
+ case ImageFormatR64i: return "R64i";
+ default: return "Unknown";
+ }
+}
+
+inline const char* ImageChannelOrderToString(ImageChannelOrder value) {
+ switch (value) {
+ case ImageChannelOrderR: return "R";
+ case ImageChannelOrderA: return "A";
+ case ImageChannelOrderRG: return "RG";
+ case ImageChannelOrderRA: return "RA";
+ case ImageChannelOrderRGB: return "RGB";
+ case ImageChannelOrderRGBA: return "RGBA";
+ case ImageChannelOrderBGRA: return "BGRA";
+ case ImageChannelOrderARGB: return "ARGB";
+ case ImageChannelOrderIntensity: return "Intensity";
+ case ImageChannelOrderLuminance: return "Luminance";
+ case ImageChannelOrderRx: return "Rx";
+ case ImageChannelOrderRGx: return "RGx";
+ case ImageChannelOrderRGBx: return "RGBx";
+ case ImageChannelOrderDepth: return "Depth";
+ case ImageChannelOrderDepthStencil: return "DepthStencil";
+ case ImageChannelOrdersRGB: return "sRGB";
+ case ImageChannelOrdersRGBx: return "sRGBx";
+ case ImageChannelOrdersRGBA: return "sRGBA";
+ case ImageChannelOrdersBGRA: return "sBGRA";
+ case ImageChannelOrderABGR: return "ABGR";
+ default: return "Unknown";
+ }
+}
+
+inline const char* ImageChannelDataTypeToString(ImageChannelDataType value) {
+ switch (value) {
+ case ImageChannelDataTypeSnormInt8: return "SnormInt8";
+ case ImageChannelDataTypeSnormInt16: return "SnormInt16";
+ case ImageChannelDataTypeUnormInt8: return "UnormInt8";
+ case ImageChannelDataTypeUnormInt16: return "UnormInt16";
+ case ImageChannelDataTypeUnormShort565: return "UnormShort565";
+ case ImageChannelDataTypeUnormShort555: return "UnormShort555";
+ case ImageChannelDataTypeUnormInt101010: return "UnormInt101010";
+ case ImageChannelDataTypeSignedInt8: return "SignedInt8";
+ case ImageChannelDataTypeSignedInt16: return "SignedInt16";
+ case ImageChannelDataTypeSignedInt32: return "SignedInt32";
+ case ImageChannelDataTypeUnsignedInt8: return "UnsignedInt8";
+ case ImageChannelDataTypeUnsignedInt16: return "UnsignedInt16";
+ case ImageChannelDataTypeUnsignedInt32: return "UnsignedInt32";
+ case ImageChannelDataTypeHalfFloat: return "HalfFloat";
+ case ImageChannelDataTypeFloat: return "Float";
+ case ImageChannelDataTypeUnormInt24: return "UnormInt24";
+ case ImageChannelDataTypeUnormInt101010_2: return "UnormInt101010_2";
+ case ImageChannelDataTypeUnormInt10X6EXT: return "UnormInt10X6EXT";
+ case ImageChannelDataTypeUnsignedIntRaw10EXT: return "UnsignedIntRaw10EXT";
+ case ImageChannelDataTypeUnsignedIntRaw12EXT: return "UnsignedIntRaw12EXT";
+ case ImageChannelDataTypeUnormInt2_101010EXT: return "UnormInt2_101010EXT";
+ case ImageChannelDataTypeUnsignedInt10X6EXT: return "UnsignedInt10X6EXT";
+ case ImageChannelDataTypeUnsignedInt12X4EXT: return "UnsignedInt12X4EXT";
+ case ImageChannelDataTypeUnsignedInt14X2EXT: return "UnsignedInt14X2EXT";
+ case ImageChannelDataTypeUnormInt12X4EXT: return "UnormInt12X4EXT";
+ case ImageChannelDataTypeUnormInt14X2EXT: return "UnormInt14X2EXT";
+ default: return "Unknown";
+ }
+}
+
+inline const char* FPRoundingModeToString(FPRoundingMode value) {
+ switch (value) {
+ case FPRoundingModeRTE: return "RTE";
+ case FPRoundingModeRTZ: return "RTZ";
+ case FPRoundingModeRTP: return "RTP";
+ case FPRoundingModeRTN: return "RTN";
+ default: return "Unknown";
+ }
+}
+
+inline const char* LinkageTypeToString(LinkageType value) {
+ switch (value) {
+ case LinkageTypeExport: return "Export";
+ case LinkageTypeImport: return "Import";
+ case LinkageTypeLinkOnceODR: return "LinkOnceODR";
+ default: return "Unknown";
+ }
+}
+
+inline const char* AccessQualifierToString(AccessQualifier value) {
+ switch (value) {
+ case AccessQualifierReadOnly: return "ReadOnly";
+ case AccessQualifierWriteOnly: return "WriteOnly";
+ case AccessQualifierReadWrite: return "ReadWrite";
+ default: return "Unknown";
+ }
+}
+
+inline const char* FunctionParameterAttributeToString(FunctionParameterAttribute value) {
+ switch (value) {
+ case FunctionParameterAttributeZext: return "Zext";
+ case FunctionParameterAttributeSext: return "Sext";
+ case FunctionParameterAttributeByVal: return "ByVal";
+ case FunctionParameterAttributeSret: return "Sret";
+ case FunctionParameterAttributeNoAlias: return "NoAlias";
+ case FunctionParameterAttributeNoCapture: return "NoCapture";
+ case FunctionParameterAttributeNoWrite: return "NoWrite";
+ case FunctionParameterAttributeNoReadWrite: return "NoReadWrite";
+ case FunctionParameterAttributeRuntimeAlignedINTEL: return "RuntimeAlignedINTEL";
+ default: return "Unknown";
+ }
+}
+
+inline const char* DecorationToString(Decoration value) {
+ switch (value) {
+ case DecorationRelaxedPrecision: return "RelaxedPrecision";
+ case DecorationSpecId: return "SpecId";
+ case DecorationBlock: return "Block";
+ case DecorationBufferBlock: return "BufferBlock";
+ case DecorationRowMajor: return "RowMajor";
+ case DecorationColMajor: return "ColMajor";
+ case DecorationArrayStride: return "ArrayStride";
+ case DecorationMatrixStride: return "MatrixStride";
+ case DecorationGLSLShared: return "GLSLShared";
+ case DecorationGLSLPacked: return "GLSLPacked";
+ case DecorationCPacked: return "CPacked";
+ case DecorationBuiltIn: return "BuiltIn";
+ case DecorationNoPerspective: return "NoPerspective";
+ case DecorationFlat: return "Flat";
+ case DecorationPatch: return "Patch";
+ case DecorationCentroid: return "Centroid";
+ case DecorationSample: return "Sample";
+ case DecorationInvariant: return "Invariant";
+ case DecorationRestrict: return "Restrict";
+ case DecorationAliased: return "Aliased";
+ case DecorationVolatile: return "Volatile";
+ case DecorationConstant: return "Constant";
+ case DecorationCoherent: return "Coherent";
+ case DecorationNonWritable: return "NonWritable";
+ case DecorationNonReadable: return "NonReadable";
+ case DecorationUniform: return "Uniform";
+ case DecorationUniformId: return "UniformId";
+ case DecorationSaturatedConversion: return "SaturatedConversion";
+ case DecorationStream: return "Stream";
+ case DecorationLocation: return "Location";
+ case DecorationComponent: return "Component";
+ case DecorationIndex: return "Index";
+ case DecorationBinding: return "Binding";
+ case DecorationDescriptorSet: return "DescriptorSet";
+ case DecorationOffset: return "Offset";
+ case DecorationXfbBuffer: return "XfbBuffer";
+ case DecorationXfbStride: return "XfbStride";
+ case DecorationFuncParamAttr: return "FuncParamAttr";
+ case DecorationFPRoundingMode: return "FPRoundingMode";
+ case DecorationFPFastMathMode: return "FPFastMathMode";
+ case DecorationLinkageAttributes: return "LinkageAttributes";
+ case DecorationNoContraction: return "NoContraction";
+ case DecorationInputAttachmentIndex: return "InputAttachmentIndex";
+ case DecorationAlignment: return "Alignment";
+ case DecorationMaxByteOffset: return "MaxByteOffset";
+ case DecorationAlignmentId: return "AlignmentId";
+ case DecorationMaxByteOffsetId: return "MaxByteOffsetId";
+ case DecorationSaturatedToLargestFloat8NormalConversionEXT: return "SaturatedToLargestFloat8NormalConversionEXT";
+ case DecorationNoSignedWrap: return "NoSignedWrap";
+ case DecorationNoUnsignedWrap: return "NoUnsignedWrap";
+ case DecorationWeightTextureQCOM: return "WeightTextureQCOM";
+ case DecorationBlockMatchTextureQCOM: return "BlockMatchTextureQCOM";
+ case DecorationBlockMatchSamplerQCOM: return "BlockMatchSamplerQCOM";
+ case DecorationExplicitInterpAMD: return "ExplicitInterpAMD";
+ case DecorationNodeSharesPayloadLimitsWithAMDX: return "NodeSharesPayloadLimitsWithAMDX";
+ case DecorationNodeMaxPayloadsAMDX: return "NodeMaxPayloadsAMDX";
+ case DecorationTrackFinishWritingAMDX: return "TrackFinishWritingAMDX";
+ case DecorationPayloadNodeNameAMDX: return "PayloadNodeNameAMDX";
+ case DecorationPayloadNodeBaseIndexAMDX: return "PayloadNodeBaseIndexAMDX";
+ case DecorationPayloadNodeSparseArrayAMDX: return "PayloadNodeSparseArrayAMDX";
+ case DecorationPayloadNodeArraySizeAMDX: return "PayloadNodeArraySizeAMDX";
+ case DecorationPayloadDispatchIndirectAMDX: return "PayloadDispatchIndirectAMDX";
+ case DecorationOverrideCoverageNV: return "OverrideCoverageNV";
+ case DecorationPassthroughNV: return "PassthroughNV";
+ case DecorationViewportRelativeNV: return "ViewportRelativeNV";
+ case DecorationSecondaryViewportRelativeNV: return "SecondaryViewportRelativeNV";
+ case DecorationPerPrimitiveEXT: return "PerPrimitiveEXT";
+ case DecorationPerViewNV: return "PerViewNV";
+ case DecorationPerTaskNV: return "PerTaskNV";
+ case DecorationPerVertexKHR: return "PerVertexKHR";
+ case DecorationNonUniform: return "NonUniform";
+ case DecorationRestrictPointer: return "RestrictPointer";
+ case DecorationAliasedPointer: return "AliasedPointer";
+ case DecorationHitObjectShaderRecordBufferNV: return "HitObjectShaderRecordBufferNV";
+ case DecorationBindlessSamplerNV: return "BindlessSamplerNV";
+ case DecorationBindlessImageNV: return "BindlessImageNV";
+ case DecorationBoundSamplerNV: return "BoundSamplerNV";
+ case DecorationBoundImageNV: return "BoundImageNV";
+ case DecorationSIMTCallINTEL: return "SIMTCallINTEL";
+ case DecorationReferencedIndirectlyINTEL: return "ReferencedIndirectlyINTEL";
+ case DecorationClobberINTEL: return "ClobberINTEL";
+ case DecorationSideEffectsINTEL: return "SideEffectsINTEL";
+ case DecorationVectorComputeVariableINTEL: return "VectorComputeVariableINTEL";
+ case DecorationFuncParamIOKindINTEL: return "FuncParamIOKindINTEL";
+ case DecorationVectorComputeFunctionINTEL: return "VectorComputeFunctionINTEL";
+ case DecorationStackCallINTEL: return "StackCallINTEL";
+ case DecorationGlobalVariableOffsetINTEL: return "GlobalVariableOffsetINTEL";
+ case DecorationCounterBuffer: return "CounterBuffer";
+ case DecorationHlslSemanticGOOGLE: return "HlslSemanticGOOGLE";
+ case DecorationUserTypeGOOGLE: return "UserTypeGOOGLE";
+ case DecorationFunctionRoundingModeINTEL: return "FunctionRoundingModeINTEL";
+ case DecorationFunctionDenormModeINTEL: return "FunctionDenormModeINTEL";
+ case DecorationRegisterINTEL: return "RegisterINTEL";
+ case DecorationMemoryINTEL: return "MemoryINTEL";
+ case DecorationNumbanksINTEL: return "NumbanksINTEL";
+ case DecorationBankwidthINTEL: return "BankwidthINTEL";
+ case DecorationMaxPrivateCopiesINTEL: return "MaxPrivateCopiesINTEL";
+ case DecorationSinglepumpINTEL: return "SinglepumpINTEL";
+ case DecorationDoublepumpINTEL: return "DoublepumpINTEL";
+ case DecorationMaxReplicatesINTEL: return "MaxReplicatesINTEL";
+ case DecorationSimpleDualPortINTEL: return "SimpleDualPortINTEL";
+ case DecorationMergeINTEL: return "MergeINTEL";
+ case DecorationBankBitsINTEL: return "BankBitsINTEL";
+ case DecorationForcePow2DepthINTEL: return "ForcePow2DepthINTEL";
+ case DecorationStridesizeINTEL: return "StridesizeINTEL";
+ case DecorationWordsizeINTEL: return "WordsizeINTEL";
+ case DecorationTrueDualPortINTEL: return "TrueDualPortINTEL";
+ case DecorationBurstCoalesceINTEL: return "BurstCoalesceINTEL";
+ case DecorationCacheSizeINTEL: return "CacheSizeINTEL";
+ case DecorationDontStaticallyCoalesceINTEL: return "DontStaticallyCoalesceINTEL";
+ case DecorationPrefetchINTEL: return "PrefetchINTEL";
+ case DecorationStallEnableINTEL: return "StallEnableINTEL";
+ case DecorationFuseLoopsInFunctionINTEL: return "FuseLoopsInFunctionINTEL";
+ case DecorationMathOpDSPModeINTEL: return "MathOpDSPModeINTEL";
+ case DecorationAliasScopeINTEL: return "AliasScopeINTEL";
+ case DecorationNoAliasINTEL: return "NoAliasINTEL";
+ case DecorationInitiationIntervalINTEL: return "InitiationIntervalINTEL";
+ case DecorationMaxConcurrencyINTEL: return "MaxConcurrencyINTEL";
+ case DecorationPipelineEnableINTEL: return "PipelineEnableINTEL";
+ case DecorationBufferLocationINTEL: return "BufferLocationINTEL";
+ case DecorationIOPipeStorageINTEL: return "IOPipeStorageINTEL";
+ case DecorationFunctionFloatingPointModeINTEL: return "FunctionFloatingPointModeINTEL";
+ case DecorationSingleElementVectorINTEL: return "SingleElementVectorINTEL";
+ case DecorationVectorComputeCallableFunctionINTEL: return "VectorComputeCallableFunctionINTEL";
+ case DecorationMediaBlockIOINTEL: return "MediaBlockIOINTEL";
+ case DecorationStallFreeINTEL: return "StallFreeINTEL";
+ case DecorationFPMaxErrorDecorationINTEL: return "FPMaxErrorDecorationINTEL";
+ case DecorationLatencyControlLabelINTEL: return "LatencyControlLabelINTEL";
+ case DecorationLatencyControlConstraintINTEL: return "LatencyControlConstraintINTEL";
+ case DecorationConduitKernelArgumentINTEL: return "ConduitKernelArgumentINTEL";
+ case DecorationRegisterMapKernelArgumentINTEL: return "RegisterMapKernelArgumentINTEL";
+ case DecorationMMHostInterfaceAddressWidthINTEL: return "MMHostInterfaceAddressWidthINTEL";
+ case DecorationMMHostInterfaceDataWidthINTEL: return "MMHostInterfaceDataWidthINTEL";
+ case DecorationMMHostInterfaceLatencyINTEL: return "MMHostInterfaceLatencyINTEL";
+ case DecorationMMHostInterfaceReadWriteModeINTEL: return "MMHostInterfaceReadWriteModeINTEL";
+ case DecorationMMHostInterfaceMaxBurstINTEL: return "MMHostInterfaceMaxBurstINTEL";
+ case DecorationMMHostInterfaceWaitRequestINTEL: return "MMHostInterfaceWaitRequestINTEL";
+ case DecorationStableKernelArgumentINTEL: return "StableKernelArgumentINTEL";
+ case DecorationHostAccessINTEL: return "HostAccessINTEL";
+ case DecorationInitModeINTEL: return "InitModeINTEL";
+ case DecorationImplementInRegisterMapINTEL: return "ImplementInRegisterMapINTEL";
+ case DecorationCacheControlLoadINTEL: return "CacheControlLoadINTEL";
+ case DecorationCacheControlStoreINTEL: return "CacheControlStoreINTEL";
+ default: return "Unknown";
+ }
+}
+
+inline const char* BuiltInToString(BuiltIn value) {
+ switch (value) {
+ case BuiltInPosition: return "Position";
+ case BuiltInPointSize: return "PointSize";
+ case BuiltInClipDistance: return "ClipDistance";
+ case BuiltInCullDistance: return "CullDistance";
+ case BuiltInVertexId: return "VertexId";
+ case BuiltInInstanceId: return "InstanceId";
+ case BuiltInPrimitiveId: return "PrimitiveId";
+ case BuiltInInvocationId: return "InvocationId";
+ case BuiltInLayer: return "Layer";
+ case BuiltInViewportIndex: return "ViewportIndex";
+ case BuiltInTessLevelOuter: return "TessLevelOuter";
+ case BuiltInTessLevelInner: return "TessLevelInner";
+ case BuiltInTessCoord: return "TessCoord";
+ case BuiltInPatchVertices: return "PatchVertices";
+ case BuiltInFragCoord: return "FragCoord";
+ case BuiltInPointCoord: return "PointCoord";
+ case BuiltInFrontFacing: return "FrontFacing";
+ case BuiltInSampleId: return "SampleId";
+ case BuiltInSamplePosition: return "SamplePosition";
+ case BuiltInSampleMask: return "SampleMask";
+ case BuiltInFragDepth: return "FragDepth";
+ case BuiltInHelperInvocation: return "HelperInvocation";
+ case BuiltInNumWorkgroups: return "NumWorkgroups";
+ case BuiltInWorkgroupSize: return "WorkgroupSize";
+ case BuiltInWorkgroupId: return "WorkgroupId";
+ case BuiltInLocalInvocationId: return "LocalInvocationId";
+ case BuiltInGlobalInvocationId: return "GlobalInvocationId";
+ case BuiltInLocalInvocationIndex: return "LocalInvocationIndex";
+ case BuiltInWorkDim: return "WorkDim";
+ case BuiltInGlobalSize: return "GlobalSize";
+ case BuiltInEnqueuedWorkgroupSize: return "EnqueuedWorkgroupSize";
+ case BuiltInGlobalOffset: return "GlobalOffset";
+ case BuiltInGlobalLinearId: return "GlobalLinearId";
+ case BuiltInSubgroupSize: return "SubgroupSize";
+ case BuiltInSubgroupMaxSize: return "SubgroupMaxSize";
+ case BuiltInNumSubgroups: return "NumSubgroups";
+ case BuiltInNumEnqueuedSubgroups: return "NumEnqueuedSubgroups";
+ case BuiltInSubgroupId: return "SubgroupId";
+ case BuiltInSubgroupLocalInvocationId: return "SubgroupLocalInvocationId";
+ case BuiltInVertexIndex: return "VertexIndex";
+ case BuiltInInstanceIndex: return "InstanceIndex";
+ case BuiltInCoreIDARM: return "CoreIDARM";
+ case BuiltInCoreCountARM: return "CoreCountARM";
+ case BuiltInCoreMaxIDARM: return "CoreMaxIDARM";
+ case BuiltInWarpIDARM: return "WarpIDARM";
+ case BuiltInWarpMaxIDARM: return "WarpMaxIDARM";
+ case BuiltInSubgroupEqMask: return "SubgroupEqMask";
+ case BuiltInSubgroupGeMask: return "SubgroupGeMask";
+ case BuiltInSubgroupGtMask: return "SubgroupGtMask";
+ case BuiltInSubgroupLeMask: return "SubgroupLeMask";
+ case BuiltInSubgroupLtMask: return "SubgroupLtMask";
+ case BuiltInBaseVertex: return "BaseVertex";
+ case BuiltInBaseInstance: return "BaseInstance";
+ case BuiltInDrawIndex: return "DrawIndex";
+ case BuiltInPrimitiveShadingRateKHR: return "PrimitiveShadingRateKHR";
+ case BuiltInDeviceIndex: return "DeviceIndex";
+ case BuiltInViewIndex: return "ViewIndex";
+ case BuiltInShadingRateKHR: return "ShadingRateKHR";
+ case BuiltInTileOffsetQCOM: return "TileOffsetQCOM";
+ case BuiltInTileDimensionQCOM: return "TileDimensionQCOM";
+ case BuiltInTileApronSizeQCOM: return "TileApronSizeQCOM";
+ case BuiltInBaryCoordNoPerspAMD: return "BaryCoordNoPerspAMD";
+ case BuiltInBaryCoordNoPerspCentroidAMD: return "BaryCoordNoPerspCentroidAMD";
+ case BuiltInBaryCoordNoPerspSampleAMD: return "BaryCoordNoPerspSampleAMD";
+ case BuiltInBaryCoordSmoothAMD: return "BaryCoordSmoothAMD";
+ case BuiltInBaryCoordSmoothCentroidAMD: return "BaryCoordSmoothCentroidAMD";
+ case BuiltInBaryCoordSmoothSampleAMD: return "BaryCoordSmoothSampleAMD";
+ case BuiltInBaryCoordPullModelAMD: return "BaryCoordPullModelAMD";
+ case BuiltInFragStencilRefEXT: return "FragStencilRefEXT";
+ case BuiltInRemainingRecursionLevelsAMDX: return "RemainingRecursionLevelsAMDX";
+ case BuiltInShaderIndexAMDX: return "ShaderIndexAMDX";
+ case BuiltInViewportMaskNV: return "ViewportMaskNV";
+ case BuiltInSecondaryPositionNV: return "SecondaryPositionNV";
+ case BuiltInSecondaryViewportMaskNV: return "SecondaryViewportMaskNV";
+ case BuiltInPositionPerViewNV: return "PositionPerViewNV";
+ case BuiltInViewportMaskPerViewNV: return "ViewportMaskPerViewNV";
+ case BuiltInFullyCoveredEXT: return "FullyCoveredEXT";
+ case BuiltInTaskCountNV: return "TaskCountNV";
+ case BuiltInPrimitiveCountNV: return "PrimitiveCountNV";
+ case BuiltInPrimitiveIndicesNV: return "PrimitiveIndicesNV";
+ case BuiltInClipDistancePerViewNV: return "ClipDistancePerViewNV";
+ case BuiltInCullDistancePerViewNV: return "CullDistancePerViewNV";
+ case BuiltInLayerPerViewNV: return "LayerPerViewNV";
+ case BuiltInMeshViewCountNV: return "MeshViewCountNV";
+ case BuiltInMeshViewIndicesNV: return "MeshViewIndicesNV";
+ case BuiltInBaryCoordKHR: return "BaryCoordKHR";
+ case BuiltInBaryCoordNoPerspKHR: return "BaryCoordNoPerspKHR";
+ case BuiltInFragSizeEXT: return "FragSizeEXT";
+ case BuiltInFragInvocationCountEXT: return "FragInvocationCountEXT";
+ case BuiltInPrimitivePointIndicesEXT: return "PrimitivePointIndicesEXT";
+ case BuiltInPrimitiveLineIndicesEXT: return "PrimitiveLineIndicesEXT";
+ case BuiltInPrimitiveTriangleIndicesEXT: return "PrimitiveTriangleIndicesEXT";
+ case BuiltInCullPrimitiveEXT: return "CullPrimitiveEXT";
+ case BuiltInLaunchIdKHR: return "LaunchIdKHR";
+ case BuiltInLaunchSizeKHR: return "LaunchSizeKHR";
+ case BuiltInWorldRayOriginKHR: return "WorldRayOriginKHR";
+ case BuiltInWorldRayDirectionKHR: return "WorldRayDirectionKHR";
+ case BuiltInObjectRayOriginKHR: return "ObjectRayOriginKHR";
+ case BuiltInObjectRayDirectionKHR: return "ObjectRayDirectionKHR";
+ case BuiltInRayTminKHR: return "RayTminKHR";
+ case BuiltInRayTmaxKHR: return "RayTmaxKHR";
+ case BuiltInInstanceCustomIndexKHR: return "InstanceCustomIndexKHR";
+ case BuiltInObjectToWorldKHR: return "ObjectToWorldKHR";
+ case BuiltInWorldToObjectKHR: return "WorldToObjectKHR";
+ case BuiltInHitTNV: return "HitTNV";
+ case BuiltInHitKindKHR: return "HitKindKHR";
+ case BuiltInCurrentRayTimeNV: return "CurrentRayTimeNV";
+ case BuiltInHitTriangleVertexPositionsKHR: return "HitTriangleVertexPositionsKHR";
+ case BuiltInHitMicroTriangleVertexPositionsNV: return "HitMicroTriangleVertexPositionsNV";
+ case BuiltInHitMicroTriangleVertexBarycentricsNV: return "HitMicroTriangleVertexBarycentricsNV";
+ case BuiltInIncomingRayFlagsKHR: return "IncomingRayFlagsKHR";
+ case BuiltInRayGeometryIndexKHR: return "RayGeometryIndexKHR";
+ case BuiltInHitIsSphereNV: return "HitIsSphereNV";
+ case BuiltInHitIsLSSNV: return "HitIsLSSNV";
+ case BuiltInHitSpherePositionNV: return "HitSpherePositionNV";
+ case BuiltInWarpsPerSMNV: return "WarpsPerSMNV";
+ case BuiltInSMCountNV: return "SMCountNV";
+ case BuiltInWarpIDNV: return "WarpIDNV";
+ case BuiltInSMIDNV: return "SMIDNV";
+ case BuiltInHitLSSPositionsNV: return "HitLSSPositionsNV";
+ case BuiltInHitKindFrontFacingMicroTriangleNV: return "HitKindFrontFacingMicroTriangleNV";
+ case BuiltInHitKindBackFacingMicroTriangleNV: return "HitKindBackFacingMicroTriangleNV";
+ case BuiltInHitSphereRadiusNV: return "HitSphereRadiusNV";
+ case BuiltInHitLSSRadiiNV: return "HitLSSRadiiNV";
+ case BuiltInClusterIDNV: return "ClusterIDNV";
+ case BuiltInCullMaskKHR: return "CullMaskKHR";
+ default: return "Unknown";
+ }
+}
+
+inline const char* ScopeToString(Scope value) {
+ switch (value) {
+ case ScopeCrossDevice: return "CrossDevice";
+ case ScopeDevice: return "Device";
+ case ScopeWorkgroup: return "Workgroup";
+ case ScopeSubgroup: return "Subgroup";
+ case ScopeInvocation: return "Invocation";
+ case ScopeQueueFamily: return "QueueFamily";
+ case ScopeShaderCallKHR: return "ShaderCallKHR";
+ default: return "Unknown";
+ }
+}
+
+inline const char* GroupOperationToString(GroupOperation value) {
+ switch (value) {
+ case GroupOperationReduce: return "Reduce";
+ case GroupOperationInclusiveScan: return "InclusiveScan";
+ case GroupOperationExclusiveScan: return "ExclusiveScan";
+ case GroupOperationClusteredReduce: return "ClusteredReduce";
+ case GroupOperationPartitionedReduceNV: return "PartitionedReduceNV";
+ case GroupOperationPartitionedInclusiveScanNV: return "PartitionedInclusiveScanNV";
+ case GroupOperationPartitionedExclusiveScanNV: return "PartitionedExclusiveScanNV";
+ default: return "Unknown";
+ }
+}
+
+inline const char* KernelEnqueueFlagsToString(KernelEnqueueFlags value) {
+ switch (value) {
+ case KernelEnqueueFlagsNoWait: return "NoWait";
+ case KernelEnqueueFlagsWaitKernel: return "WaitKernel";
+ case KernelEnqueueFlagsWaitWorkGroup: return "WaitWorkGroup";
+ default: return "Unknown";
+ }
+}
+
+inline const char* CapabilityToString(Capability value) {
+ switch (value) {
+ case CapabilityMatrix: return "Matrix";
+ case CapabilityShader: return "Shader";
+ case CapabilityGeometry: return "Geometry";
+ case CapabilityTessellation: return "Tessellation";
+ case CapabilityAddresses: return "Addresses";
+ case CapabilityLinkage: return "Linkage";
+ case CapabilityKernel: return "Kernel";
+ case CapabilityVector16: return "Vector16";
+ case CapabilityFloat16Buffer: return "Float16Buffer";
+ case CapabilityFloat16: return "Float16";
+ case CapabilityFloat64: return "Float64";
+ case CapabilityInt64: return "Int64";
+ case CapabilityInt64Atomics: return "Int64Atomics";
+ case CapabilityImageBasic: return "ImageBasic";
+ case CapabilityImageReadWrite: return "ImageReadWrite";
+ case CapabilityImageMipmap: return "ImageMipmap";
+ case CapabilityPipes: return "Pipes";
+ case CapabilityGroups: return "Groups";
+ case CapabilityDeviceEnqueue: return "DeviceEnqueue";
+ case CapabilityLiteralSampler: return "LiteralSampler";
+ case CapabilityAtomicStorage: return "AtomicStorage";
+ case CapabilityInt16: return "Int16";
+ case CapabilityTessellationPointSize: return "TessellationPointSize";
+ case CapabilityGeometryPointSize: return "GeometryPointSize";
+ case CapabilityImageGatherExtended: return "ImageGatherExtended";
+ case CapabilityStorageImageMultisample: return "StorageImageMultisample";
+ case CapabilityUniformBufferArrayDynamicIndexing: return "UniformBufferArrayDynamicIndexing";
+ case CapabilitySampledImageArrayDynamicIndexing: return "SampledImageArrayDynamicIndexing";
+ case CapabilityStorageBufferArrayDynamicIndexing: return "StorageBufferArrayDynamicIndexing";
+ case CapabilityStorageImageArrayDynamicIndexing: return "StorageImageArrayDynamicIndexing";
+ case CapabilityClipDistance: return "ClipDistance";
+ case CapabilityCullDistance: return "CullDistance";
+ case CapabilityImageCubeArray: return "ImageCubeArray";
+ case CapabilitySampleRateShading: return "SampleRateShading";
+ case CapabilityImageRect: return "ImageRect";
+ case CapabilitySampledRect: return "SampledRect";
+ case CapabilityGenericPointer: return "GenericPointer";
+ case CapabilityInt8: return "Int8";
+ case CapabilityInputAttachment: return "InputAttachment";
+ case CapabilitySparseResidency: return "SparseResidency";
+ case CapabilityMinLod: return "MinLod";
+ case CapabilitySampled1D: return "Sampled1D";
+ case CapabilityImage1D: return "Image1D";
+ case CapabilitySampledCubeArray: return "SampledCubeArray";
+ case CapabilitySampledBuffer: return "SampledBuffer";
+ case CapabilityImageBuffer: return "ImageBuffer";
+ case CapabilityImageMSArray: return "ImageMSArray";
+ case CapabilityStorageImageExtendedFormats: return "StorageImageExtendedFormats";
+ case CapabilityImageQuery: return "ImageQuery";
+ case CapabilityDerivativeControl: return "DerivativeControl";
+ case CapabilityInterpolationFunction: return "InterpolationFunction";
+ case CapabilityTransformFeedback: return "TransformFeedback";
+ case CapabilityGeometryStreams: return "GeometryStreams";
+ case CapabilityStorageImageReadWithoutFormat: return "StorageImageReadWithoutFormat";
+ case CapabilityStorageImageWriteWithoutFormat: return "StorageImageWriteWithoutFormat";
+ case CapabilityMultiViewport: return "MultiViewport";
+ case CapabilitySubgroupDispatch: return "SubgroupDispatch";
+ case CapabilityNamedBarrier: return "NamedBarrier";
+ case CapabilityPipeStorage: return "PipeStorage";
+ case CapabilityGroupNonUniform: return "GroupNonUniform";
+ case CapabilityGroupNonUniformVote: return "GroupNonUniformVote";
+ case CapabilityGroupNonUniformArithmetic: return "GroupNonUniformArithmetic";
+ case CapabilityGroupNonUniformBallot: return "GroupNonUniformBallot";
+ case CapabilityGroupNonUniformShuffle: return "GroupNonUniformShuffle";
+ case CapabilityGroupNonUniformShuffleRelative: return "GroupNonUniformShuffleRelative";
+ case CapabilityGroupNonUniformClustered: return "GroupNonUniformClustered";
+ case CapabilityGroupNonUniformQuad: return "GroupNonUniformQuad";
+ case CapabilityShaderLayer: return "ShaderLayer";
+ case CapabilityShaderViewportIndex: return "ShaderViewportIndex";
+ case CapabilityUniformDecoration: return "UniformDecoration";
+ case CapabilityCoreBuiltinsARM: return "CoreBuiltinsARM";
+ case CapabilityTileImageColorReadAccessEXT: return "TileImageColorReadAccessEXT";
+ case CapabilityTileImageDepthReadAccessEXT: return "TileImageDepthReadAccessEXT";
+ case CapabilityTileImageStencilReadAccessEXT: return "TileImageStencilReadAccessEXT";
+ case CapabilityTensorsARM: return "TensorsARM";
+ case CapabilityStorageTensorArrayDynamicIndexingARM: return "StorageTensorArrayDynamicIndexingARM";
+ case CapabilityStorageTensorArrayNonUniformIndexingARM: return "StorageTensorArrayNonUniformIndexingARM";
+ case CapabilityGraphARM: return "GraphARM";
+ case CapabilityCooperativeMatrixLayoutsARM: return "CooperativeMatrixLayoutsARM";
+ case CapabilityFloat8EXT: return "Float8EXT";
+ case CapabilityFloat8CooperativeMatrixEXT: return "Float8CooperativeMatrixEXT";
+ case CapabilityFragmentShadingRateKHR: return "FragmentShadingRateKHR";
+ case CapabilitySubgroupBallotKHR: return "SubgroupBallotKHR";
+ case CapabilityDrawParameters: return "DrawParameters";
+ case CapabilityWorkgroupMemoryExplicitLayoutKHR: return "WorkgroupMemoryExplicitLayoutKHR";
+ case CapabilityWorkgroupMemoryExplicitLayout8BitAccessKHR: return "WorkgroupMemoryExplicitLayout8BitAccessKHR";
+ case CapabilityWorkgroupMemoryExplicitLayout16BitAccessKHR: return "WorkgroupMemoryExplicitLayout16BitAccessKHR";
+ case CapabilitySubgroupVoteKHR: return "SubgroupVoteKHR";
+ case CapabilityStorageBuffer16BitAccess: return "StorageBuffer16BitAccess";
+ case CapabilityStorageUniform16: return "StorageUniform16";
+ case CapabilityStoragePushConstant16: return "StoragePushConstant16";
+ case CapabilityStorageInputOutput16: return "StorageInputOutput16";
+ case CapabilityDeviceGroup: return "DeviceGroup";
+ case CapabilityMultiView: return "MultiView";
+ case CapabilityVariablePointersStorageBuffer: return "VariablePointersStorageBuffer";
+ case CapabilityVariablePointers: return "VariablePointers";
+ case CapabilityAtomicStorageOps: return "AtomicStorageOps";
+ case CapabilitySampleMaskPostDepthCoverage: return "SampleMaskPostDepthCoverage";
+ case CapabilityStorageBuffer8BitAccess: return "StorageBuffer8BitAccess";
+ case CapabilityUniformAndStorageBuffer8BitAccess: return "UniformAndStorageBuffer8BitAccess";
+ case CapabilityStoragePushConstant8: return "StoragePushConstant8";
+ case CapabilityDenormPreserve: return "DenormPreserve";
+ case CapabilityDenormFlushToZero: return "DenormFlushToZero";
+ case CapabilitySignedZeroInfNanPreserve: return "SignedZeroInfNanPreserve";
+ case CapabilityRoundingModeRTE: return "RoundingModeRTE";
+ case CapabilityRoundingModeRTZ: return "RoundingModeRTZ";
+ case CapabilityRayQueryProvisionalKHR: return "RayQueryProvisionalKHR";
+ case CapabilityRayQueryKHR: return "RayQueryKHR";
+ case CapabilityUntypedPointersKHR: return "UntypedPointersKHR";
+ case CapabilityRayTraversalPrimitiveCullingKHR: return "RayTraversalPrimitiveCullingKHR";
+ case CapabilityRayTracingKHR: return "RayTracingKHR";
+ case CapabilityTextureSampleWeightedQCOM: return "TextureSampleWeightedQCOM";
+ case CapabilityTextureBoxFilterQCOM: return "TextureBoxFilterQCOM";
+ case CapabilityTextureBlockMatchQCOM: return "TextureBlockMatchQCOM";
+ case CapabilityTileShadingQCOM: return "TileShadingQCOM";
+ case CapabilityTextureBlockMatch2QCOM: return "TextureBlockMatch2QCOM";
+ case CapabilityFloat16ImageAMD: return "Float16ImageAMD";
+ case CapabilityImageGatherBiasLodAMD: return "ImageGatherBiasLodAMD";
+ case CapabilityFragmentMaskAMD: return "FragmentMaskAMD";
+ case CapabilityStencilExportEXT: return "StencilExportEXT";
+ case CapabilityImageReadWriteLodAMD: return "ImageReadWriteLodAMD";
+ case CapabilityInt64ImageEXT: return "Int64ImageEXT";
+ case CapabilityShaderClockKHR: return "ShaderClockKHR";
+ case CapabilityShaderEnqueueAMDX: return "ShaderEnqueueAMDX";
+ case CapabilityQuadControlKHR: return "QuadControlKHR";
+ case CapabilityInt4TypeINTEL: return "Int4TypeINTEL";
+ case CapabilityInt4CooperativeMatrixINTEL: return "Int4CooperativeMatrixINTEL";
+ case CapabilityBFloat16TypeKHR: return "BFloat16TypeKHR";
+ case CapabilityBFloat16DotProductKHR: return "BFloat16DotProductKHR";
+ case CapabilityBFloat16CooperativeMatrixKHR: return "BFloat16CooperativeMatrixKHR";
+ case CapabilitySampleMaskOverrideCoverageNV: return "SampleMaskOverrideCoverageNV";
+ case CapabilityGeometryShaderPassthroughNV: return "GeometryShaderPassthroughNV";
+ case CapabilityShaderViewportIndexLayerEXT: return "ShaderViewportIndexLayerEXT";
+ case CapabilityShaderViewportMaskNV: return "ShaderViewportMaskNV";
+ case CapabilityShaderStereoViewNV: return "ShaderStereoViewNV";
+ case CapabilityPerViewAttributesNV: return "PerViewAttributesNV";
+ case CapabilityFragmentFullyCoveredEXT: return "FragmentFullyCoveredEXT";
+ case CapabilityMeshShadingNV: return "MeshShadingNV";
+ case CapabilityImageFootprintNV: return "ImageFootprintNV";
+ case CapabilityMeshShadingEXT: return "MeshShadingEXT";
+ case CapabilityFragmentBarycentricKHR: return "FragmentBarycentricKHR";
+ case CapabilityComputeDerivativeGroupQuadsKHR: return "ComputeDerivativeGroupQuadsKHR";
+ case CapabilityFragmentDensityEXT: return "FragmentDensityEXT";
+ case CapabilityGroupNonUniformPartitionedNV: return "GroupNonUniformPartitionedNV";
+ case CapabilityShaderNonUniform: return "ShaderNonUniform";
+ case CapabilityRuntimeDescriptorArray: return "RuntimeDescriptorArray";
+ case CapabilityInputAttachmentArrayDynamicIndexing: return "InputAttachmentArrayDynamicIndexing";
+ case CapabilityUniformTexelBufferArrayDynamicIndexing: return "UniformTexelBufferArrayDynamicIndexing";
+ case CapabilityStorageTexelBufferArrayDynamicIndexing: return "StorageTexelBufferArrayDynamicIndexing";
+ case CapabilityUniformBufferArrayNonUniformIndexing: return "UniformBufferArrayNonUniformIndexing";
+ case CapabilitySampledImageArrayNonUniformIndexing: return "SampledImageArrayNonUniformIndexing";
+ case CapabilityStorageBufferArrayNonUniformIndexing: return "StorageBufferArrayNonUniformIndexing";
+ case CapabilityStorageImageArrayNonUniformIndexing: return "StorageImageArrayNonUniformIndexing";
+ case CapabilityInputAttachmentArrayNonUniformIndexing: return "InputAttachmentArrayNonUniformIndexing";
+ case CapabilityUniformTexelBufferArrayNonUniformIndexing: return "UniformTexelBufferArrayNonUniformIndexing";
+ case CapabilityStorageTexelBufferArrayNonUniformIndexing: return "StorageTexelBufferArrayNonUniformIndexing";
+ case CapabilityRayTracingPositionFetchKHR: return "RayTracingPositionFetchKHR";
+ case CapabilityRayTracingNV: return "RayTracingNV";
+ case CapabilityRayTracingMotionBlurNV: return "RayTracingMotionBlurNV";
+ case CapabilityVulkanMemoryModel: return "VulkanMemoryModel";
+ case CapabilityVulkanMemoryModelDeviceScope: return "VulkanMemoryModelDeviceScope";
+ case CapabilityPhysicalStorageBufferAddresses: return "PhysicalStorageBufferAddresses";
+ case CapabilityComputeDerivativeGroupLinearKHR: return "ComputeDerivativeGroupLinearKHR";
+ case CapabilityRayTracingProvisionalKHR: return "RayTracingProvisionalKHR";
+ case CapabilityCooperativeMatrixNV: return "CooperativeMatrixNV";
+ case CapabilityFragmentShaderSampleInterlockEXT: return "FragmentShaderSampleInterlockEXT";
+ case CapabilityFragmentShaderShadingRateInterlockEXT: return "FragmentShaderShadingRateInterlockEXT";
+ case CapabilityShaderSMBuiltinsNV: return "ShaderSMBuiltinsNV";
+ case CapabilityFragmentShaderPixelInterlockEXT: return "FragmentShaderPixelInterlockEXT";
+ case CapabilityDemoteToHelperInvocation: return "DemoteToHelperInvocation";
+ case CapabilityDisplacementMicromapNV: return "DisplacementMicromapNV";
+ case CapabilityRayTracingOpacityMicromapEXT: return "RayTracingOpacityMicromapEXT";
+ case CapabilityShaderInvocationReorderNV: return "ShaderInvocationReorderNV";
+ case CapabilityBindlessTextureNV: return "BindlessTextureNV";
+ case CapabilityRayQueryPositionFetchKHR: return "RayQueryPositionFetchKHR";
+ case CapabilityCooperativeVectorNV: return "CooperativeVectorNV";
+ case CapabilityAtomicFloat16VectorNV: return "AtomicFloat16VectorNV";
+ case CapabilityRayTracingDisplacementMicromapNV: return "RayTracingDisplacementMicromapNV";
+ case CapabilityRawAccessChainsNV: return "RawAccessChainsNV";
+ case CapabilityRayTracingSpheresGeometryNV: return "RayTracingSpheresGeometryNV";
+ case CapabilityRayTracingLinearSweptSpheresGeometryNV: return "RayTracingLinearSweptSpheresGeometryNV";
+ case CapabilityCooperativeMatrixReductionsNV: return "CooperativeMatrixReductionsNV";
+ case CapabilityCooperativeMatrixConversionsNV: return "CooperativeMatrixConversionsNV";
+ case CapabilityCooperativeMatrixPerElementOperationsNV: return "CooperativeMatrixPerElementOperationsNV";
+ case CapabilityCooperativeMatrixTensorAddressingNV: return "CooperativeMatrixTensorAddressingNV";
+ case CapabilityCooperativeMatrixBlockLoadsNV: return "CooperativeMatrixBlockLoadsNV";
+ case CapabilityCooperativeVectorTrainingNV: return "CooperativeVectorTrainingNV";
+ case CapabilityRayTracingClusterAccelerationStructureNV: return "RayTracingClusterAccelerationStructureNV";
+ case CapabilityTensorAddressingNV: return "TensorAddressingNV";
+ case CapabilitySubgroupShuffleINTEL: return "SubgroupShuffleINTEL";
+ case CapabilitySubgroupBufferBlockIOINTEL: return "SubgroupBufferBlockIOINTEL";
+ case CapabilitySubgroupImageBlockIOINTEL: return "SubgroupImageBlockIOINTEL";
+ case CapabilitySubgroupImageMediaBlockIOINTEL: return "SubgroupImageMediaBlockIOINTEL";
+ case CapabilityRoundToInfinityINTEL: return "RoundToInfinityINTEL";
+ case CapabilityFloatingPointModeINTEL: return "FloatingPointModeINTEL";
+ case CapabilityIntegerFunctions2INTEL: return "IntegerFunctions2INTEL";
+ case CapabilityFunctionPointersINTEL: return "FunctionPointersINTEL";
+ case CapabilityIndirectReferencesINTEL: return "IndirectReferencesINTEL";
+ case CapabilityAsmINTEL: return "AsmINTEL";
+ case CapabilityAtomicFloat32MinMaxEXT: return "AtomicFloat32MinMaxEXT";
+ case CapabilityAtomicFloat64MinMaxEXT: return "AtomicFloat64MinMaxEXT";
+ case CapabilityAtomicFloat16MinMaxEXT: return "AtomicFloat16MinMaxEXT";
+ case CapabilityVectorComputeINTEL: return "VectorComputeINTEL";
+ case CapabilityVectorAnyINTEL: return "VectorAnyINTEL";
+ case CapabilityExpectAssumeKHR: return "ExpectAssumeKHR";
+ case CapabilitySubgroupAvcMotionEstimationINTEL: return "SubgroupAvcMotionEstimationINTEL";
+ case CapabilitySubgroupAvcMotionEstimationIntraINTEL: return "SubgroupAvcMotionEstimationIntraINTEL";
+ case CapabilitySubgroupAvcMotionEstimationChromaINTEL: return "SubgroupAvcMotionEstimationChromaINTEL";
+ case CapabilityVariableLengthArrayINTEL: return "VariableLengthArrayINTEL";
+ case CapabilityFunctionFloatControlINTEL: return "FunctionFloatControlINTEL";
+ case CapabilityFPGAMemoryAttributesINTEL: return "FPGAMemoryAttributesINTEL";
+ case CapabilityFPFastMathModeINTEL: return "FPFastMathModeINTEL";
+ case CapabilityArbitraryPrecisionIntegersINTEL: return "ArbitraryPrecisionIntegersINTEL";
+ case CapabilityArbitraryPrecisionFloatingPointINTEL: return "ArbitraryPrecisionFloatingPointINTEL";
+ case CapabilityUnstructuredLoopControlsINTEL: return "UnstructuredLoopControlsINTEL";
+ case CapabilityFPGALoopControlsINTEL: return "FPGALoopControlsINTEL";
+ case CapabilityKernelAttributesINTEL: return "KernelAttributesINTEL";
+ case CapabilityFPGAKernelAttributesINTEL: return "FPGAKernelAttributesINTEL";
+ case CapabilityFPGAMemoryAccessesINTEL: return "FPGAMemoryAccessesINTEL";
+ case CapabilityFPGAClusterAttributesINTEL: return "FPGAClusterAttributesINTEL";
+ case CapabilityLoopFuseINTEL: return "LoopFuseINTEL";
+ case CapabilityFPGADSPControlINTEL: return "FPGADSPControlINTEL";
+ case CapabilityMemoryAccessAliasingINTEL: return "MemoryAccessAliasingINTEL";
+ case CapabilityFPGAInvocationPipeliningAttributesINTEL: return "FPGAInvocationPipeliningAttributesINTEL";
+ case CapabilityFPGABufferLocationINTEL: return "FPGABufferLocationINTEL";
+ case CapabilityArbitraryPrecisionFixedPointINTEL: return "ArbitraryPrecisionFixedPointINTEL";
+ case CapabilityUSMStorageClassesINTEL: return "USMStorageClassesINTEL";
+ case CapabilityRuntimeAlignedAttributeINTEL: return "RuntimeAlignedAttributeINTEL";
+ case CapabilityIOPipesINTEL: return "IOPipesINTEL";
+ case CapabilityBlockingPipesINTEL: return "BlockingPipesINTEL";
+ case CapabilityFPGARegINTEL: return "FPGARegINTEL";
+ case CapabilityDotProductInputAll: return "DotProductInputAll";
+ case CapabilityDotProductInput4x8Bit: return "DotProductInput4x8Bit";
+ case CapabilityDotProductInput4x8BitPacked: return "DotProductInput4x8BitPacked";
+ case CapabilityDotProduct: return "DotProduct";
+ case CapabilityRayCullMaskKHR: return "RayCullMaskKHR";
+ case CapabilityCooperativeMatrixKHR: return "CooperativeMatrixKHR";
+ case CapabilityReplicatedCompositesEXT: return "ReplicatedCompositesEXT";
+ case CapabilityBitInstructions: return "BitInstructions";
+ case CapabilityGroupNonUniformRotateKHR: return "GroupNonUniformRotateKHR";
+ case CapabilityFloatControls2: return "FloatControls2";
+ case CapabilityAtomicFloat32AddEXT: return "AtomicFloat32AddEXT";
+ case CapabilityAtomicFloat64AddEXT: return "AtomicFloat64AddEXT";
+ case CapabilityLongCompositesINTEL: return "LongCompositesINTEL";
+ case CapabilityOptNoneEXT: return "OptNoneEXT";
+ case CapabilityAtomicFloat16AddEXT: return "AtomicFloat16AddEXT";
+ case CapabilityDebugInfoModuleINTEL: return "DebugInfoModuleINTEL";
+ case CapabilityBFloat16ConversionINTEL: return "BFloat16ConversionINTEL";
+ case CapabilitySplitBarrierINTEL: return "SplitBarrierINTEL";
+ case CapabilityArithmeticFenceEXT: return "ArithmeticFenceEXT";
+ case CapabilityFPGAClusterAttributesV2INTEL: return "FPGAClusterAttributesV2INTEL";
+ case CapabilityFPGAKernelAttributesv2INTEL: return "FPGAKernelAttributesv2INTEL";
+ case CapabilityTaskSequenceINTEL: return "TaskSequenceINTEL";
+ case CapabilityFPMaxErrorINTEL: return "FPMaxErrorINTEL";
+ case CapabilityFPGALatencyControlINTEL: return "FPGALatencyControlINTEL";
+ case CapabilityFPGAArgumentInterfacesINTEL: return "FPGAArgumentInterfacesINTEL";
+ case CapabilityGlobalVariableHostAccessINTEL: return "GlobalVariableHostAccessINTEL";
+ case CapabilityGlobalVariableFPGADecorationsINTEL: return "GlobalVariableFPGADecorationsINTEL";
+ case CapabilitySubgroupBufferPrefetchINTEL: return "SubgroupBufferPrefetchINTEL";
+ case CapabilitySubgroup2DBlockIOINTEL: return "Subgroup2DBlockIOINTEL";
+ case CapabilitySubgroup2DBlockTransformINTEL: return "Subgroup2DBlockTransformINTEL";
+ case CapabilitySubgroup2DBlockTransposeINTEL: return "Subgroup2DBlockTransposeINTEL";
+ case CapabilitySubgroupMatrixMultiplyAccumulateINTEL: return "SubgroupMatrixMultiplyAccumulateINTEL";
+ case CapabilityTernaryBitwiseFunctionINTEL: return "TernaryBitwiseFunctionINTEL";
+ case CapabilityGroupUniformArithmeticKHR: return "GroupUniformArithmeticKHR";
+ case CapabilityTensorFloat32RoundingINTEL: return "TensorFloat32RoundingINTEL";
+ case CapabilityMaskedGatherScatterINTEL: return "MaskedGatherScatterINTEL";
+ case CapabilityCacheControlsINTEL: return "CacheControlsINTEL";
+ case CapabilityRegisterLimitsINTEL: return "RegisterLimitsINTEL";
+ case CapabilityBindlessImagesINTEL: return "BindlessImagesINTEL";
+ default: return "Unknown";
+ }
+}
+
+inline const char* RayQueryIntersectionToString(RayQueryIntersection value) {
+ switch (value) {
+ case RayQueryIntersectionRayQueryCandidateIntersectionKHR: return "RayQueryCandidateIntersectionKHR";
+ case RayQueryIntersectionRayQueryCommittedIntersectionKHR: return "RayQueryCommittedIntersectionKHR";
+ default: return "Unknown";
+ }
+}
+
+inline const char* RayQueryCommittedIntersectionTypeToString(RayQueryCommittedIntersectionType value) {
+ switch (value) {
+ case RayQueryCommittedIntersectionTypeRayQueryCommittedIntersectionNoneKHR: return "RayQueryCommittedIntersectionNoneKHR";
+ case RayQueryCommittedIntersectionTypeRayQueryCommittedIntersectionTriangleKHR: return "RayQueryCommittedIntersectionTriangleKHR";
+ case RayQueryCommittedIntersectionTypeRayQueryCommittedIntersectionGeneratedKHR: return "RayQueryCommittedIntersectionGeneratedKHR";
+ default: return "Unknown";
+ }
+}
+
+inline const char* RayQueryCandidateIntersectionTypeToString(RayQueryCandidateIntersectionType value) {
+ switch (value) {
+ case RayQueryCandidateIntersectionTypeRayQueryCandidateIntersectionTriangleKHR: return "RayQueryCandidateIntersectionTriangleKHR";
+ case RayQueryCandidateIntersectionTypeRayQueryCandidateIntersectionAABBKHR: return "RayQueryCandidateIntersectionAABBKHR";
+ default: return "Unknown";
+ }
+}
+
+inline const char* FPDenormModeToString(FPDenormMode value) {
+ switch (value) {
+ case FPDenormModePreserve: return "Preserve";
+ case FPDenormModeFlushToZero: return "FlushToZero";
+ default: return "Unknown";
+ }
+}
+
+inline const char* FPOperationModeToString(FPOperationMode value) {
+ switch (value) {
+ case FPOperationModeIEEE: return "IEEE";
+ case FPOperationModeALT: return "ALT";
+ default: return "Unknown";
+ }
+}
+
+inline const char* QuantizationModesToString(QuantizationModes value) {
+ switch (value) {
+ case QuantizationModesTRN: return "TRN";
+ case QuantizationModesTRN_ZERO: return "TRN_ZERO";
+ case QuantizationModesRND: return "RND";
+ case QuantizationModesRND_ZERO: return "RND_ZERO";
+ case QuantizationModesRND_INF: return "RND_INF";
+ case QuantizationModesRND_MIN_INF: return "RND_MIN_INF";
+ case QuantizationModesRND_CONV: return "RND_CONV";
+ case QuantizationModesRND_CONV_ODD: return "RND_CONV_ODD";
+ default: return "Unknown";
+ }
+}
+
+inline const char* OverflowModesToString(OverflowModes value) {
+ switch (value) {
+ case OverflowModesWRAP: return "WRAP";
+ case OverflowModesSAT: return "SAT";
+ case OverflowModesSAT_ZERO: return "SAT_ZERO";
+ case OverflowModesSAT_SYM: return "SAT_SYM";
+ default: return "Unknown";
+ }
+}
+
+inline const char* PackedVectorFormatToString(PackedVectorFormat value) {
+ switch (value) {
+ case PackedVectorFormatPackedVectorFormat4x8Bit: return "PackedVectorFormat4x8Bit";
+ default: return "Unknown";
+ }
+}
+
+inline const char* CooperativeMatrixLayoutToString(CooperativeMatrixLayout value) {
+ switch (value) {
+ case CooperativeMatrixLayoutRowMajorKHR: return "RowMajorKHR";
+ case CooperativeMatrixLayoutColumnMajorKHR: return "ColumnMajorKHR";
+ case CooperativeMatrixLayoutRowBlockedInterleavedARM: return "RowBlockedInterleavedARM";
+ case CooperativeMatrixLayoutColumnBlockedInterleavedARM: return "ColumnBlockedInterleavedARM";
+ default: return "Unknown";
+ }
+}
+
+inline const char* CooperativeMatrixUseToString(CooperativeMatrixUse value) {
+ switch (value) {
+ case CooperativeMatrixUseMatrixAKHR: return "MatrixAKHR";
+ case CooperativeMatrixUseMatrixBKHR: return "MatrixBKHR";
+ case CooperativeMatrixUseMatrixAccumulatorKHR: return "MatrixAccumulatorKHR";
+ default: return "Unknown";
+ }
+}
+
+inline const char* TensorClampModeToString(TensorClampMode value) {
+ switch (value) {
+ case TensorClampModeUndefined: return "Undefined";
+ case TensorClampModeConstant: return "Constant";
+ case TensorClampModeClampToEdge: return "ClampToEdge";
+ case TensorClampModeRepeat: return "Repeat";
+ case TensorClampModeRepeatMirrored: return "RepeatMirrored";
+ default: return "Unknown";
+ }
+}
+
+inline const char* InitializationModeQualifierToString(InitializationModeQualifier value) {
+ switch (value) {
+ case InitializationModeQualifierInitOnDeviceReprogramINTEL: return "InitOnDeviceReprogramINTEL";
+ case InitializationModeQualifierInitOnDeviceResetINTEL: return "InitOnDeviceResetINTEL";
+ default: return "Unknown";
+ }
+}
+
+inline const char* HostAccessQualifierToString(HostAccessQualifier value) {
+ switch (value) {
+ case HostAccessQualifierNoneINTEL: return "NoneINTEL";
+ case HostAccessQualifierReadINTEL: return "ReadINTEL";
+ case HostAccessQualifierWriteINTEL: return "WriteINTEL";
+ case HostAccessQualifierReadWriteINTEL: return "ReadWriteINTEL";
+ default: return "Unknown";
+ }
+}
+
+inline const char* LoadCacheControlToString(LoadCacheControl value) {
+ switch (value) {
+ case LoadCacheControlUncachedINTEL: return "UncachedINTEL";
+ case LoadCacheControlCachedINTEL: return "CachedINTEL";
+ case LoadCacheControlStreamingINTEL: return "StreamingINTEL";
+ case LoadCacheControlInvalidateAfterReadINTEL: return "InvalidateAfterReadINTEL";
+ case LoadCacheControlConstCachedINTEL: return "ConstCachedINTEL";
+ default: return "Unknown";
+ }
+}
+
+inline const char* StoreCacheControlToString(StoreCacheControl value) {
+ switch (value) {
+ case StoreCacheControlUncachedINTEL: return "UncachedINTEL";
+ case StoreCacheControlWriteThroughINTEL: return "WriteThroughINTEL";
+ case StoreCacheControlWriteBackINTEL: return "WriteBackINTEL";
+ case StoreCacheControlStreamingINTEL: return "StreamingINTEL";
+ default: return "Unknown";
+ }
+}
+
+inline const char* NamedMaximumNumberOfRegistersToString(NamedMaximumNumberOfRegisters value) {
+ switch (value) {
+ case NamedMaximumNumberOfRegistersAutoINTEL: return "AutoINTEL";
+ default: return "Unknown";
+ }
+}
+
+inline const char* FPEncodingToString(FPEncoding value) {
+ switch (value) {
+ case FPEncodingBFloat16KHR: return "BFloat16KHR";
+ case FPEncodingFloat8E4M3EXT: return "Float8E4M3EXT";
+ case FPEncodingFloat8E5M2EXT: return "Float8E5M2EXT";
+ default: return "Unknown";
+ }
+}
+
+inline const char* CooperativeVectorMatrixLayoutToString(CooperativeVectorMatrixLayout value) {
+ switch (value) {
+ case CooperativeVectorMatrixLayoutRowMajorNV: return "RowMajorNV";
+ case CooperativeVectorMatrixLayoutColumnMajorNV: return "ColumnMajorNV";
+ case CooperativeVectorMatrixLayoutInferencingOptimalNV: return "InferencingOptimalNV";
+ case CooperativeVectorMatrixLayoutTrainingOptimalNV: return "TrainingOptimalNV";
+ default: return "Unknown";
+ }
+}
+
+inline const char* ComponentTypeToString(ComponentType value) {
+ switch (value) {
+ case ComponentTypeFloat16NV: return "Float16NV";
+ case ComponentTypeFloat32NV: return "Float32NV";
+ case ComponentTypeFloat64NV: return "Float64NV";
+ case ComponentTypeSignedInt8NV: return "SignedInt8NV";
+ case ComponentTypeSignedInt16NV: return "SignedInt16NV";
+ case ComponentTypeSignedInt32NV: return "SignedInt32NV";
+ case ComponentTypeSignedInt64NV: return "SignedInt64NV";
+ case ComponentTypeUnsignedInt8NV: return "UnsignedInt8NV";
+ case ComponentTypeUnsignedInt16NV: return "UnsignedInt16NV";
+ case ComponentTypeUnsignedInt32NV: return "UnsignedInt32NV";
+ case ComponentTypeUnsignedInt64NV: return "UnsignedInt64NV";
+ case ComponentTypeSignedInt8PackedNV: return "SignedInt8PackedNV";
+ case ComponentTypeUnsignedInt8PackedNV: return "UnsignedInt8PackedNV";
+ case ComponentTypeFloatE4M3NV: return "FloatE4M3NV";
+ case ComponentTypeFloatE5M2NV: return "FloatE5M2NV";
+ default: return "Unknown";
+ }
+}
+
+inline const char* OpToString(Op value) {
+ switch (value) {
+ case OpNop: return "OpNop";
+ case OpUndef: return "OpUndef";
+ case OpSourceContinued: return "OpSourceContinued";
+ case OpSource: return "OpSource";
+ case OpSourceExtension: return "OpSourceExtension";
+ case OpName: return "OpName";
+ case OpMemberName: return "OpMemberName";
+ case OpString: return "OpString";
+ case OpLine: return "OpLine";
+ case OpExtension: return "OpExtension";
+ case OpExtInstImport: return "OpExtInstImport";
+ case OpExtInst: return "OpExtInst";
+ case OpMemoryModel: return "OpMemoryModel";
+ case OpEntryPoint: return "OpEntryPoint";
+ case OpExecutionMode: return "OpExecutionMode";
+ case OpCapability: return "OpCapability";
+ case OpTypeVoid: return "OpTypeVoid";
+ case OpTypeBool: return "OpTypeBool";
+ case OpTypeInt: return "OpTypeInt";
+ case OpTypeFloat: return "OpTypeFloat";
+ case OpTypeVector: return "OpTypeVector";
+ case OpTypeMatrix: return "OpTypeMatrix";
+ case OpTypeImage: return "OpTypeImage";
+ case OpTypeSampler: return "OpTypeSampler";
+ case OpTypeSampledImage: return "OpTypeSampledImage";
+ case OpTypeArray: return "OpTypeArray";
+ case OpTypeRuntimeArray: return "OpTypeRuntimeArray";
+ case OpTypeStruct: return "OpTypeStruct";
+ case OpTypeOpaque: return "OpTypeOpaque";
+ case OpTypePointer: return "OpTypePointer";
+ case OpTypeFunction: return "OpTypeFunction";
+ case OpTypeEvent: return "OpTypeEvent";
+ case OpTypeDeviceEvent: return "OpTypeDeviceEvent";
+ case OpTypeReserveId: return "OpTypeReserveId";
+ case OpTypeQueue: return "OpTypeQueue";
+ case OpTypePipe: return "OpTypePipe";
+ case OpTypeForwardPointer: return "OpTypeForwardPointer";
+ case OpConstantTrue: return "OpConstantTrue";
+ case OpConstantFalse: return "OpConstantFalse";
+ case OpConstant: return "OpConstant";
+ case OpConstantComposite: return "OpConstantComposite";
+ case OpConstantSampler: return "OpConstantSampler";
+ case OpConstantNull: return "OpConstantNull";
+ case OpSpecConstantTrue: return "OpSpecConstantTrue";
+ case OpSpecConstantFalse: return "OpSpecConstantFalse";
+ case OpSpecConstant: return "OpSpecConstant";
+ case OpSpecConstantComposite: return "OpSpecConstantComposite";
+ case OpSpecConstantOp: return "OpSpecConstantOp";
+ case OpFunction: return "OpFunction";
+ case OpFunctionParameter: return "OpFunctionParameter";
+ case OpFunctionEnd: return "OpFunctionEnd";
+ case OpFunctionCall: return "OpFunctionCall";
+ case OpVariable: return "OpVariable";
+ case OpImageTexelPointer: return "OpImageTexelPointer";
+ case OpLoad: return "OpLoad";
+ case OpStore: return "OpStore";
+ case OpCopyMemory: return "OpCopyMemory";
+ case OpCopyMemorySized: return "OpCopyMemorySized";
+ case OpAccessChain: return "OpAccessChain";
+ case OpInBoundsAccessChain: return "OpInBoundsAccessChain";
+ case OpPtrAccessChain: return "OpPtrAccessChain";
+ case OpArrayLength: return "OpArrayLength";
+ case OpGenericPtrMemSemantics: return "OpGenericPtrMemSemantics";
+ case OpInBoundsPtrAccessChain: return "OpInBoundsPtrAccessChain";
+ case OpDecorate: return "OpDecorate";
+ case OpMemberDecorate: return "OpMemberDecorate";
+ case OpDecorationGroup: return "OpDecorationGroup";
+ case OpGroupDecorate: return "OpGroupDecorate";
+ case OpGroupMemberDecorate: return "OpGroupMemberDecorate";
+ case OpVectorExtractDynamic: return "OpVectorExtractDynamic";
+ case OpVectorInsertDynamic: return "OpVectorInsertDynamic";
+ case OpVectorShuffle: return "OpVectorShuffle";
+ case OpCompositeConstruct: return "OpCompositeConstruct";
+ case OpCompositeExtract: return "OpCompositeExtract";
+ case OpCompositeInsert: return "OpCompositeInsert";
+ case OpCopyObject: return "OpCopyObject";
+ case OpTranspose: return "OpTranspose";
+ case OpSampledImage: return "OpSampledImage";
+ case OpImageSampleImplicitLod: return "OpImageSampleImplicitLod";
+ case OpImageSampleExplicitLod: return "OpImageSampleExplicitLod";
+ case OpImageSampleDrefImplicitLod: return "OpImageSampleDrefImplicitLod";
+ case OpImageSampleDrefExplicitLod: return "OpImageSampleDrefExplicitLod";
+ case OpImageSampleProjImplicitLod: return "OpImageSampleProjImplicitLod";
+ case OpImageSampleProjExplicitLod: return "OpImageSampleProjExplicitLod";
+ case OpImageSampleProjDrefImplicitLod: return "OpImageSampleProjDrefImplicitLod";
+ case OpImageSampleProjDrefExplicitLod: return "OpImageSampleProjDrefExplicitLod";
+ case OpImageFetch: return "OpImageFetch";
+ case OpImageGather: return "OpImageGather";
+ case OpImageDrefGather: return "OpImageDrefGather";
+ case OpImageRead: return "OpImageRead";
+ case OpImageWrite: return "OpImageWrite";
+ case OpImage: return "OpImage";
+ case OpImageQueryFormat: return "OpImageQueryFormat";
+ case OpImageQueryOrder: return "OpImageQueryOrder";
+ case OpImageQuerySizeLod: return "OpImageQuerySizeLod";
+ case OpImageQuerySize: return "OpImageQuerySize";
+ case OpImageQueryLod: return "OpImageQueryLod";
+ case OpImageQueryLevels: return "OpImageQueryLevels";
+ case OpImageQuerySamples: return "OpImageQuerySamples";
+ case OpConvertFToU: return "OpConvertFToU";
+ case OpConvertFToS: return "OpConvertFToS";
+ case OpConvertSToF: return "OpConvertSToF";
+ case OpConvertUToF: return "OpConvertUToF";
+ case OpUConvert: return "OpUConvert";
+ case OpSConvert: return "OpSConvert";
+ case OpFConvert: return "OpFConvert";
+ case OpQuantizeToF16: return "OpQuantizeToF16";
+ case OpConvertPtrToU: return "OpConvertPtrToU";
+ case OpSatConvertSToU: return "OpSatConvertSToU";
+ case OpSatConvertUToS: return "OpSatConvertUToS";
+ case OpConvertUToPtr: return "OpConvertUToPtr";
+ case OpPtrCastToGeneric: return "OpPtrCastToGeneric";
+ case OpGenericCastToPtr: return "OpGenericCastToPtr";
+ case OpGenericCastToPtrExplicit: return "OpGenericCastToPtrExplicit";
+ case OpBitcast: return "OpBitcast";
+ case OpSNegate: return "OpSNegate";
+ case OpFNegate: return "OpFNegate";
+ case OpIAdd: return "OpIAdd";
+ case OpFAdd: return "OpFAdd";
+ case OpISub: return "OpISub";
+ case OpFSub: return "OpFSub";
+ case OpIMul: return "OpIMul";
+ case OpFMul: return "OpFMul";
+ case OpUDiv: return "OpUDiv";
+ case OpSDiv: return "OpSDiv";
+ case OpFDiv: return "OpFDiv";
+ case OpUMod: return "OpUMod";
+ case OpSRem: return "OpSRem";
+ case OpSMod: return "OpSMod";
+ case OpFRem: return "OpFRem";
+ case OpFMod: return "OpFMod";
+ case OpVectorTimesScalar: return "OpVectorTimesScalar";
+ case OpMatrixTimesScalar: return "OpMatrixTimesScalar";
+ case OpVectorTimesMatrix: return "OpVectorTimesMatrix";
+ case OpMatrixTimesVector: return "OpMatrixTimesVector";
+ case OpMatrixTimesMatrix: return "OpMatrixTimesMatrix";
+ case OpOuterProduct: return "OpOuterProduct";
+ case OpDot: return "OpDot";
+ case OpIAddCarry: return "OpIAddCarry";
+ case OpISubBorrow: return "OpISubBorrow";
+ case OpUMulExtended: return "OpUMulExtended";
+ case OpSMulExtended: return "OpSMulExtended";
+ case OpAny: return "OpAny";
+ case OpAll: return "OpAll";
+ case OpIsNan: return "OpIsNan";
+ case OpIsInf: return "OpIsInf";
+ case OpIsFinite: return "OpIsFinite";
+ case OpIsNormal: return "OpIsNormal";
+ case OpSignBitSet: return "OpSignBitSet";
+ case OpLessOrGreater: return "OpLessOrGreater";
+ case OpOrdered: return "OpOrdered";
+ case OpUnordered: return "OpUnordered";
+ case OpLogicalEqual: return "OpLogicalEqual";
+ case OpLogicalNotEqual: return "OpLogicalNotEqual";
+ case OpLogicalOr: return "OpLogicalOr";
+ case OpLogicalAnd: return "OpLogicalAnd";
+ case OpLogicalNot: return "OpLogicalNot";
+ case OpSelect: return "OpSelect";
+ case OpIEqual: return "OpIEqual";
+ case OpINotEqual: return "OpINotEqual";
+ case OpUGreaterThan: return "OpUGreaterThan";
+ case OpSGreaterThan: return "OpSGreaterThan";
+ case OpUGreaterThanEqual: return "OpUGreaterThanEqual";
+ case OpSGreaterThanEqual: return "OpSGreaterThanEqual";
+ case OpULessThan: return "OpULessThan";
+ case OpSLessThan: return "OpSLessThan";
+ case OpULessThanEqual: return "OpULessThanEqual";
+ case OpSLessThanEqual: return "OpSLessThanEqual";
+ case OpFOrdEqual: return "OpFOrdEqual";
+ case OpFUnordEqual: return "OpFUnordEqual";
+ case OpFOrdNotEqual: return "OpFOrdNotEqual";
+ case OpFUnordNotEqual: return "OpFUnordNotEqual";
+ case OpFOrdLessThan: return "OpFOrdLessThan";
+ case OpFUnordLessThan: return "OpFUnordLessThan";
+ case OpFOrdGreaterThan: return "OpFOrdGreaterThan";
+ case OpFUnordGreaterThan: return "OpFUnordGreaterThan";
+ case OpFOrdLessThanEqual: return "OpFOrdLessThanEqual";
+ case OpFUnordLessThanEqual: return "OpFUnordLessThanEqual";
+ case OpFOrdGreaterThanEqual: return "OpFOrdGreaterThanEqual";
+ case OpFUnordGreaterThanEqual: return "OpFUnordGreaterThanEqual";
+ case OpShiftRightLogical: return "OpShiftRightLogical";
+ case OpShiftRightArithmetic: return "OpShiftRightArithmetic";
+ case OpShiftLeftLogical: return "OpShiftLeftLogical";
+ case OpBitwiseOr: return "OpBitwiseOr";
+ case OpBitwiseXor: return "OpBitwiseXor";
+ case OpBitwiseAnd: return "OpBitwiseAnd";
+ case OpNot: return "OpNot";
+ case OpBitFieldInsert: return "OpBitFieldInsert";
+ case OpBitFieldSExtract: return "OpBitFieldSExtract";
+ case OpBitFieldUExtract: return "OpBitFieldUExtract";
+ case OpBitReverse: return "OpBitReverse";
+ case OpBitCount: return "OpBitCount";
+ case OpDPdx: return "OpDPdx";
+ case OpDPdy: return "OpDPdy";
+ case OpFwidth: return "OpFwidth";
+ case OpDPdxFine: return "OpDPdxFine";
+ case OpDPdyFine: return "OpDPdyFine";
+ case OpFwidthFine: return "OpFwidthFine";
+ case OpDPdxCoarse: return "OpDPdxCoarse";
+ case OpDPdyCoarse: return "OpDPdyCoarse";
+ case OpFwidthCoarse: return "OpFwidthCoarse";
+ case OpEmitVertex: return "OpEmitVertex";
+ case OpEndPrimitive: return "OpEndPrimitive";
+ case OpEmitStreamVertex: return "OpEmitStreamVertex";
+ case OpEndStreamPrimitive: return "OpEndStreamPrimitive";
+ case OpControlBarrier: return "OpControlBarrier";
+ case OpMemoryBarrier: return "OpMemoryBarrier";
+ case OpAtomicLoad: return "OpAtomicLoad";
+ case OpAtomicStore: return "OpAtomicStore";
+ case OpAtomicExchange: return "OpAtomicExchange";
+ case OpAtomicCompareExchange: return "OpAtomicCompareExchange";
+ case OpAtomicCompareExchangeWeak: return "OpAtomicCompareExchangeWeak";
+ case OpAtomicIIncrement: return "OpAtomicIIncrement";
+ case OpAtomicIDecrement: return "OpAtomicIDecrement";
+ case OpAtomicIAdd: return "OpAtomicIAdd";
+ case OpAtomicISub: return "OpAtomicISub";
+ case OpAtomicSMin: return "OpAtomicSMin";
+ case OpAtomicUMin: return "OpAtomicUMin";
+ case OpAtomicSMax: return "OpAtomicSMax";
+ case OpAtomicUMax: return "OpAtomicUMax";
+ case OpAtomicAnd: return "OpAtomicAnd";
+ case OpAtomicOr: return "OpAtomicOr";
+ case OpAtomicXor: return "OpAtomicXor";
+ case OpPhi: return "OpPhi";
+ case OpLoopMerge: return "OpLoopMerge";
+ case OpSelectionMerge: return "OpSelectionMerge";
+ case OpLabel: return "OpLabel";
+ case OpBranch: return "OpBranch";
+ case OpBranchConditional: return "OpBranchConditional";
+ case OpSwitch: return "OpSwitch";
+ case OpKill: return "OpKill";
+ case OpReturn: return "OpReturn";
+ case OpReturnValue: return "OpReturnValue";
+ case OpUnreachable: return "OpUnreachable";
+ case OpLifetimeStart: return "OpLifetimeStart";
+ case OpLifetimeStop: return "OpLifetimeStop";
+ case OpGroupAsyncCopy: return "OpGroupAsyncCopy";
+ case OpGroupWaitEvents: return "OpGroupWaitEvents";
+ case OpGroupAll: return "OpGroupAll";
+ case OpGroupAny: return "OpGroupAny";
+ case OpGroupBroadcast: return "OpGroupBroadcast";
+ case OpGroupIAdd: return "OpGroupIAdd";
+ case OpGroupFAdd: return "OpGroupFAdd";
+ case OpGroupFMin: return "OpGroupFMin";
+ case OpGroupUMin: return "OpGroupUMin";
+ case OpGroupSMin: return "OpGroupSMin";
+ case OpGroupFMax: return "OpGroupFMax";
+ case OpGroupUMax: return "OpGroupUMax";
+ case OpGroupSMax: return "OpGroupSMax";
+ case OpReadPipe: return "OpReadPipe";
+ case OpWritePipe: return "OpWritePipe";
+ case OpReservedReadPipe: return "OpReservedReadPipe";
+ case OpReservedWritePipe: return "OpReservedWritePipe";
+ case OpReserveReadPipePackets: return "OpReserveReadPipePackets";
+ case OpReserveWritePipePackets: return "OpReserveWritePipePackets";
+ case OpCommitReadPipe: return "OpCommitReadPipe";
+ case OpCommitWritePipe: return "OpCommitWritePipe";
+ case OpIsValidReserveId: return "OpIsValidReserveId";
+ case OpGetNumPipePackets: return "OpGetNumPipePackets";
+ case OpGetMaxPipePackets: return "OpGetMaxPipePackets";
+ case OpGroupReserveReadPipePackets: return "OpGroupReserveReadPipePackets";
+ case OpGroupReserveWritePipePackets: return "OpGroupReserveWritePipePackets";
+ case OpGroupCommitReadPipe: return "OpGroupCommitReadPipe";
+ case OpGroupCommitWritePipe: return "OpGroupCommitWritePipe";
+ case OpEnqueueMarker: return "OpEnqueueMarker";
+ case OpEnqueueKernel: return "OpEnqueueKernel";
+ case OpGetKernelNDrangeSubGroupCount: return "OpGetKernelNDrangeSubGroupCount";
+ case OpGetKernelNDrangeMaxSubGroupSize: return "OpGetKernelNDrangeMaxSubGroupSize";
+ case OpGetKernelWorkGroupSize: return "OpGetKernelWorkGroupSize";
+ case OpGetKernelPreferredWorkGroupSizeMultiple: return "OpGetKernelPreferredWorkGroupSizeMultiple";
+ case OpRetainEvent: return "OpRetainEvent";
+ case OpReleaseEvent: return "OpReleaseEvent";
+ case OpCreateUserEvent: return "OpCreateUserEvent";
+ case OpIsValidEvent: return "OpIsValidEvent";
+ case OpSetUserEventStatus: return "OpSetUserEventStatus";
+ case OpCaptureEventProfilingInfo: return "OpCaptureEventProfilingInfo";
+ case OpGetDefaultQueue: return "OpGetDefaultQueue";
+ case OpBuildNDRange: return "OpBuildNDRange";
+ case OpImageSparseSampleImplicitLod: return "OpImageSparseSampleImplicitLod";
+ case OpImageSparseSampleExplicitLod: return "OpImageSparseSampleExplicitLod";
+ case OpImageSparseSampleDrefImplicitLod: return "OpImageSparseSampleDrefImplicitLod";
+ case OpImageSparseSampleDrefExplicitLod: return "OpImageSparseSampleDrefExplicitLod";
+ case OpImageSparseSampleProjImplicitLod: return "OpImageSparseSampleProjImplicitLod";
+ case OpImageSparseSampleProjExplicitLod: return "OpImageSparseSampleProjExplicitLod";
+ case OpImageSparseSampleProjDrefImplicitLod: return "OpImageSparseSampleProjDrefImplicitLod";
+ case OpImageSparseSampleProjDrefExplicitLod: return "OpImageSparseSampleProjDrefExplicitLod";
+ case OpImageSparseFetch: return "OpImageSparseFetch";
+ case OpImageSparseGather: return "OpImageSparseGather";
+ case OpImageSparseDrefGather: return "OpImageSparseDrefGather";
+ case OpImageSparseTexelsResident: return "OpImageSparseTexelsResident";
+ case OpNoLine: return "OpNoLine";
+ case OpAtomicFlagTestAndSet: return "OpAtomicFlagTestAndSet";
+ case OpAtomicFlagClear: return "OpAtomicFlagClear";
+ case OpImageSparseRead: return "OpImageSparseRead";
+ case OpSizeOf: return "OpSizeOf";
+ case OpTypePipeStorage: return "OpTypePipeStorage";
+ case OpConstantPipeStorage: return "OpConstantPipeStorage";
+ case OpCreatePipeFromPipeStorage: return "OpCreatePipeFromPipeStorage";
+ case OpGetKernelLocalSizeForSubgroupCount: return "OpGetKernelLocalSizeForSubgroupCount";
+ case OpGetKernelMaxNumSubgroups: return "OpGetKernelMaxNumSubgroups";
+ case OpTypeNamedBarrier: return "OpTypeNamedBarrier";
+ case OpNamedBarrierInitialize: return "OpNamedBarrierInitialize";
+ case OpMemoryNamedBarrier: return "OpMemoryNamedBarrier";
+ case OpModuleProcessed: return "OpModuleProcessed";
+ case OpExecutionModeId: return "OpExecutionModeId";
+ case OpDecorateId: return "OpDecorateId";
+ case OpGroupNonUniformElect: return "OpGroupNonUniformElect";
+ case OpGroupNonUniformAll: return "OpGroupNonUniformAll";
+ case OpGroupNonUniformAny: return "OpGroupNonUniformAny";
+ case OpGroupNonUniformAllEqual: return "OpGroupNonUniformAllEqual";
+ case OpGroupNonUniformBroadcast: return "OpGroupNonUniformBroadcast";
+ case OpGroupNonUniformBroadcastFirst: return "OpGroupNonUniformBroadcastFirst";
+ case OpGroupNonUniformBallot: return "OpGroupNonUniformBallot";
+ case OpGroupNonUniformInverseBallot: return "OpGroupNonUniformInverseBallot";
+ case OpGroupNonUniformBallotBitExtract: return "OpGroupNonUniformBallotBitExtract";
+ case OpGroupNonUniformBallotBitCount: return "OpGroupNonUniformBallotBitCount";
+ case OpGroupNonUniformBallotFindLSB: return "OpGroupNonUniformBallotFindLSB";
+ case OpGroupNonUniformBallotFindMSB: return "OpGroupNonUniformBallotFindMSB";
+ case OpGroupNonUniformShuffle: return "OpGroupNonUniformShuffle";
+ case OpGroupNonUniformShuffleXor: return "OpGroupNonUniformShuffleXor";
+ case OpGroupNonUniformShuffleUp: return "OpGroupNonUniformShuffleUp";
+ case OpGroupNonUniformShuffleDown: return "OpGroupNonUniformShuffleDown";
+ case OpGroupNonUniformIAdd: return "OpGroupNonUniformIAdd";
+ case OpGroupNonUniformFAdd: return "OpGroupNonUniformFAdd";
+ case OpGroupNonUniformIMul: return "OpGroupNonUniformIMul";
+ case OpGroupNonUniformFMul: return "OpGroupNonUniformFMul";
+ case OpGroupNonUniformSMin: return "OpGroupNonUniformSMin";
+ case OpGroupNonUniformUMin: return "OpGroupNonUniformUMin";
+ case OpGroupNonUniformFMin: return "OpGroupNonUniformFMin";
+ case OpGroupNonUniformSMax: return "OpGroupNonUniformSMax";
+ case OpGroupNonUniformUMax: return "OpGroupNonUniformUMax";
+ case OpGroupNonUniformFMax: return "OpGroupNonUniformFMax";
+ case OpGroupNonUniformBitwiseAnd: return "OpGroupNonUniformBitwiseAnd";
+ case OpGroupNonUniformBitwiseOr: return "OpGroupNonUniformBitwiseOr";
+ case OpGroupNonUniformBitwiseXor: return "OpGroupNonUniformBitwiseXor";
+ case OpGroupNonUniformLogicalAnd: return "OpGroupNonUniformLogicalAnd";
+ case OpGroupNonUniformLogicalOr: return "OpGroupNonUniformLogicalOr";
+ case OpGroupNonUniformLogicalXor: return "OpGroupNonUniformLogicalXor";
+ case OpGroupNonUniformQuadBroadcast: return "OpGroupNonUniformQuadBroadcast";
+ case OpGroupNonUniformQuadSwap: return "OpGroupNonUniformQuadSwap";
+ case OpCopyLogical: return "OpCopyLogical";
+ case OpPtrEqual: return "OpPtrEqual";
+ case OpPtrNotEqual: return "OpPtrNotEqual";
+ case OpPtrDiff: return "OpPtrDiff";
+ case OpColorAttachmentReadEXT: return "OpColorAttachmentReadEXT";
+ case OpDepthAttachmentReadEXT: return "OpDepthAttachmentReadEXT";
+ case OpStencilAttachmentReadEXT: return "OpStencilAttachmentReadEXT";
+ case OpTypeTensorARM: return "OpTypeTensorARM";
+ case OpTensorReadARM: return "OpTensorReadARM";
+ case OpTensorWriteARM: return "OpTensorWriteARM";
+ case OpTensorQuerySizeARM: return "OpTensorQuerySizeARM";
+ case OpGraphConstantARM: return "OpGraphConstantARM";
+ case OpGraphEntryPointARM: return "OpGraphEntryPointARM";
+ case OpGraphARM: return "OpGraphARM";
+ case OpGraphInputARM: return "OpGraphInputARM";
+ case OpGraphSetOutputARM: return "OpGraphSetOutputARM";
+ case OpGraphEndARM: return "OpGraphEndARM";
+ case OpTypeGraphARM: return "OpTypeGraphARM";
+ case OpTerminateInvocation: return "OpTerminateInvocation";
+ case OpTypeUntypedPointerKHR: return "OpTypeUntypedPointerKHR";
+ case OpUntypedVariableKHR: return "OpUntypedVariableKHR";
+ case OpUntypedAccessChainKHR: return "OpUntypedAccessChainKHR";
+ case OpUntypedInBoundsAccessChainKHR: return "OpUntypedInBoundsAccessChainKHR";
+ case OpSubgroupBallotKHR: return "OpSubgroupBallotKHR";
+ case OpSubgroupFirstInvocationKHR: return "OpSubgroupFirstInvocationKHR";
+ case OpUntypedPtrAccessChainKHR: return "OpUntypedPtrAccessChainKHR";
+ case OpUntypedInBoundsPtrAccessChainKHR: return "OpUntypedInBoundsPtrAccessChainKHR";
+ case OpUntypedArrayLengthKHR: return "OpUntypedArrayLengthKHR";
+ case OpUntypedPrefetchKHR: return "OpUntypedPrefetchKHR";
+ case OpSubgroupAllKHR: return "OpSubgroupAllKHR";
+ case OpSubgroupAnyKHR: return "OpSubgroupAnyKHR";
+ case OpSubgroupAllEqualKHR: return "OpSubgroupAllEqualKHR";
+ case OpGroupNonUniformRotateKHR: return "OpGroupNonUniformRotateKHR";
+ case OpSubgroupReadInvocationKHR: return "OpSubgroupReadInvocationKHR";
+ case OpExtInstWithForwardRefsKHR: return "OpExtInstWithForwardRefsKHR";
+ case OpTraceRayKHR: return "OpTraceRayKHR";
+ case OpExecuteCallableKHR: return "OpExecuteCallableKHR";
+ case OpConvertUToAccelerationStructureKHR: return "OpConvertUToAccelerationStructureKHR";
+ case OpIgnoreIntersectionKHR: return "OpIgnoreIntersectionKHR";
+ case OpTerminateRayKHR: return "OpTerminateRayKHR";
+ case OpSDot: return "OpSDot";
+ case OpUDot: return "OpUDot";
+ case OpSUDot: return "OpSUDot";
+ case OpSDotAccSat: return "OpSDotAccSat";
+ case OpUDotAccSat: return "OpUDotAccSat";
+ case OpSUDotAccSat: return "OpSUDotAccSat";
+ case OpTypeCooperativeMatrixKHR: return "OpTypeCooperativeMatrixKHR";
+ case OpCooperativeMatrixLoadKHR: return "OpCooperativeMatrixLoadKHR";
+ case OpCooperativeMatrixStoreKHR: return "OpCooperativeMatrixStoreKHR";
+ case OpCooperativeMatrixMulAddKHR: return "OpCooperativeMatrixMulAddKHR";
+ case OpCooperativeMatrixLengthKHR: return "OpCooperativeMatrixLengthKHR";
+ case OpConstantCompositeReplicateEXT: return "OpConstantCompositeReplicateEXT";
+ case OpSpecConstantCompositeReplicateEXT: return "OpSpecConstantCompositeReplicateEXT";
+ case OpCompositeConstructReplicateEXT: return "OpCompositeConstructReplicateEXT";
+ case OpTypeRayQueryKHR: return "OpTypeRayQueryKHR";
+ case OpRayQueryInitializeKHR: return "OpRayQueryInitializeKHR";
+ case OpRayQueryTerminateKHR: return "OpRayQueryTerminateKHR";
+ case OpRayQueryGenerateIntersectionKHR: return "OpRayQueryGenerateIntersectionKHR";
+ case OpRayQueryConfirmIntersectionKHR: return "OpRayQueryConfirmIntersectionKHR";
+ case OpRayQueryProceedKHR: return "OpRayQueryProceedKHR";
+ case OpRayQueryGetIntersectionTypeKHR: return "OpRayQueryGetIntersectionTypeKHR";
+ case OpImageSampleWeightedQCOM: return "OpImageSampleWeightedQCOM";
+ case OpImageBoxFilterQCOM: return "OpImageBoxFilterQCOM";
+ case OpImageBlockMatchSSDQCOM: return "OpImageBlockMatchSSDQCOM";
+ case OpImageBlockMatchSADQCOM: return "OpImageBlockMatchSADQCOM";
+ case OpImageBlockMatchWindowSSDQCOM: return "OpImageBlockMatchWindowSSDQCOM";
+ case OpImageBlockMatchWindowSADQCOM: return "OpImageBlockMatchWindowSADQCOM";
+ case OpImageBlockMatchGatherSSDQCOM: return "OpImageBlockMatchGatherSSDQCOM";
+ case OpImageBlockMatchGatherSADQCOM: return "OpImageBlockMatchGatherSADQCOM";
+ case OpGroupIAddNonUniformAMD: return "OpGroupIAddNonUniformAMD";
+ case OpGroupFAddNonUniformAMD: return "OpGroupFAddNonUniformAMD";
+ case OpGroupFMinNonUniformAMD: return "OpGroupFMinNonUniformAMD";
+ case OpGroupUMinNonUniformAMD: return "OpGroupUMinNonUniformAMD";
+ case OpGroupSMinNonUniformAMD: return "OpGroupSMinNonUniformAMD";
+ case OpGroupFMaxNonUniformAMD: return "OpGroupFMaxNonUniformAMD";
+ case OpGroupUMaxNonUniformAMD: return "OpGroupUMaxNonUniformAMD";
+ case OpGroupSMaxNonUniformAMD: return "OpGroupSMaxNonUniformAMD";
+ case OpFragmentMaskFetchAMD: return "OpFragmentMaskFetchAMD";
+ case OpFragmentFetchAMD: return "OpFragmentFetchAMD";
+ case OpReadClockKHR: return "OpReadClockKHR";
+ case OpAllocateNodePayloadsAMDX: return "OpAllocateNodePayloadsAMDX";
+ case OpEnqueueNodePayloadsAMDX: return "OpEnqueueNodePayloadsAMDX";
+ case OpTypeNodePayloadArrayAMDX: return "OpTypeNodePayloadArrayAMDX";
+ case OpFinishWritingNodePayloadAMDX: return "OpFinishWritingNodePayloadAMDX";
+ case OpNodePayloadArrayLengthAMDX: return "OpNodePayloadArrayLengthAMDX";
+ case OpIsNodePayloadValidAMDX: return "OpIsNodePayloadValidAMDX";
+ case OpConstantStringAMDX: return "OpConstantStringAMDX";
+ case OpSpecConstantStringAMDX: return "OpSpecConstantStringAMDX";
+ case OpGroupNonUniformQuadAllKHR: return "OpGroupNonUniformQuadAllKHR";
+ case OpGroupNonUniformQuadAnyKHR: return "OpGroupNonUniformQuadAnyKHR";
+ case OpHitObjectRecordHitMotionNV: return "OpHitObjectRecordHitMotionNV";
+ case OpHitObjectRecordHitWithIndexMotionNV: return "OpHitObjectRecordHitWithIndexMotionNV";
+ case OpHitObjectRecordMissMotionNV: return "OpHitObjectRecordMissMotionNV";
+ case OpHitObjectGetWorldToObjectNV: return "OpHitObjectGetWorldToObjectNV";
+ case OpHitObjectGetObjectToWorldNV: return "OpHitObjectGetObjectToWorldNV";
+ case OpHitObjectGetObjectRayDirectionNV: return "OpHitObjectGetObjectRayDirectionNV";
+ case OpHitObjectGetObjectRayOriginNV: return "OpHitObjectGetObjectRayOriginNV";
+ case OpHitObjectTraceRayMotionNV: return "OpHitObjectTraceRayMotionNV";
+ case OpHitObjectGetShaderRecordBufferHandleNV: return "OpHitObjectGetShaderRecordBufferHandleNV";
+ case OpHitObjectGetShaderBindingTableRecordIndexNV: return "OpHitObjectGetShaderBindingTableRecordIndexNV";
+ case OpHitObjectRecordEmptyNV: return "OpHitObjectRecordEmptyNV";
+ case OpHitObjectTraceRayNV: return "OpHitObjectTraceRayNV";
+ case OpHitObjectRecordHitNV: return "OpHitObjectRecordHitNV";
+ case OpHitObjectRecordHitWithIndexNV: return "OpHitObjectRecordHitWithIndexNV";
+ case OpHitObjectRecordMissNV: return "OpHitObjectRecordMissNV";
+ case OpHitObjectExecuteShaderNV: return "OpHitObjectExecuteShaderNV";
+ case OpHitObjectGetCurrentTimeNV: return "OpHitObjectGetCurrentTimeNV";
+ case OpHitObjectGetAttributesNV: return "OpHitObjectGetAttributesNV";
+ case OpHitObjectGetHitKindNV: return "OpHitObjectGetHitKindNV";
+ case OpHitObjectGetPrimitiveIndexNV: return "OpHitObjectGetPrimitiveIndexNV";
+ case OpHitObjectGetGeometryIndexNV: return "OpHitObjectGetGeometryIndexNV";
+ case OpHitObjectGetInstanceIdNV: return "OpHitObjectGetInstanceIdNV";
+ case OpHitObjectGetInstanceCustomIndexNV: return "OpHitObjectGetInstanceCustomIndexNV";
+ case OpHitObjectGetWorldRayDirectionNV: return "OpHitObjectGetWorldRayDirectionNV";
+ case OpHitObjectGetWorldRayOriginNV: return "OpHitObjectGetWorldRayOriginNV";
+ case OpHitObjectGetRayTMaxNV: return "OpHitObjectGetRayTMaxNV";
+ case OpHitObjectGetRayTMinNV: return "OpHitObjectGetRayTMinNV";
+ case OpHitObjectIsEmptyNV: return "OpHitObjectIsEmptyNV";
+ case OpHitObjectIsHitNV: return "OpHitObjectIsHitNV";
+ case OpHitObjectIsMissNV: return "OpHitObjectIsMissNV";
+ case OpReorderThreadWithHitObjectNV: return "OpReorderThreadWithHitObjectNV";
+ case OpReorderThreadWithHintNV: return "OpReorderThreadWithHintNV";
+ case OpTypeHitObjectNV: return "OpTypeHitObjectNV";
+ case OpImageSampleFootprintNV: return "OpImageSampleFootprintNV";
+ case OpTypeCooperativeVectorNV: return "OpTypeCooperativeVectorNV";
+ case OpCooperativeVectorMatrixMulNV: return "OpCooperativeVectorMatrixMulNV";
+ case OpCooperativeVectorOuterProductAccumulateNV: return "OpCooperativeVectorOuterProductAccumulateNV";
+ case OpCooperativeVectorReduceSumAccumulateNV: return "OpCooperativeVectorReduceSumAccumulateNV";
+ case OpCooperativeVectorMatrixMulAddNV: return "OpCooperativeVectorMatrixMulAddNV";
+ case OpCooperativeMatrixConvertNV: return "OpCooperativeMatrixConvertNV";
+ case OpEmitMeshTasksEXT: return "OpEmitMeshTasksEXT";
+ case OpSetMeshOutputsEXT: return "OpSetMeshOutputsEXT";
+ case OpGroupNonUniformPartitionNV: return "OpGroupNonUniformPartitionNV";
+ case OpWritePackedPrimitiveIndices4x8NV: return "OpWritePackedPrimitiveIndices4x8NV";
+ case OpFetchMicroTriangleVertexPositionNV: return "OpFetchMicroTriangleVertexPositionNV";
+ case OpFetchMicroTriangleVertexBarycentricNV: return "OpFetchMicroTriangleVertexBarycentricNV";
+ case OpCooperativeVectorLoadNV: return "OpCooperativeVectorLoadNV";
+ case OpCooperativeVectorStoreNV: return "OpCooperativeVectorStoreNV";
+ case OpReportIntersectionKHR: return "OpReportIntersectionKHR";
+ case OpIgnoreIntersectionNV: return "OpIgnoreIntersectionNV";
+ case OpTerminateRayNV: return "OpTerminateRayNV";
+ case OpTraceNV: return "OpTraceNV";
+ case OpTraceMotionNV: return "OpTraceMotionNV";
+ case OpTraceRayMotionNV: return "OpTraceRayMotionNV";
+ case OpRayQueryGetIntersectionTriangleVertexPositionsKHR: return "OpRayQueryGetIntersectionTriangleVertexPositionsKHR";
+ case OpTypeAccelerationStructureKHR: return "OpTypeAccelerationStructureKHR";
+ case OpExecuteCallableNV: return "OpExecuteCallableNV";
+ case OpRayQueryGetClusterIdNV: return "OpRayQueryGetClusterIdNV";
+ case OpHitObjectGetClusterIdNV: return "OpHitObjectGetClusterIdNV";
+ case OpTypeCooperativeMatrixNV: return "OpTypeCooperativeMatrixNV";
+ case OpCooperativeMatrixLoadNV: return "OpCooperativeMatrixLoadNV";
+ case OpCooperativeMatrixStoreNV: return "OpCooperativeMatrixStoreNV";
+ case OpCooperativeMatrixMulAddNV: return "OpCooperativeMatrixMulAddNV";
+ case OpCooperativeMatrixLengthNV: return "OpCooperativeMatrixLengthNV";
+ case OpBeginInvocationInterlockEXT: return "OpBeginInvocationInterlockEXT";
+ case OpEndInvocationInterlockEXT: return "OpEndInvocationInterlockEXT";
+ case OpCooperativeMatrixReduceNV: return "OpCooperativeMatrixReduceNV";
+ case OpCooperativeMatrixLoadTensorNV: return "OpCooperativeMatrixLoadTensorNV";
+ case OpCooperativeMatrixStoreTensorNV: return "OpCooperativeMatrixStoreTensorNV";
+ case OpCooperativeMatrixPerElementOpNV: return "OpCooperativeMatrixPerElementOpNV";
+ case OpTypeTensorLayoutNV: return "OpTypeTensorLayoutNV";
+ case OpTypeTensorViewNV: return "OpTypeTensorViewNV";
+ case OpCreateTensorLayoutNV: return "OpCreateTensorLayoutNV";
+ case OpTensorLayoutSetDimensionNV: return "OpTensorLayoutSetDimensionNV";
+ case OpTensorLayoutSetStrideNV: return "OpTensorLayoutSetStrideNV";
+ case OpTensorLayoutSliceNV: return "OpTensorLayoutSliceNV";
+ case OpTensorLayoutSetClampValueNV: return "OpTensorLayoutSetClampValueNV";
+ case OpCreateTensorViewNV: return "OpCreateTensorViewNV";
+ case OpTensorViewSetDimensionNV: return "OpTensorViewSetDimensionNV";
+ case OpTensorViewSetStrideNV: return "OpTensorViewSetStrideNV";
+ case OpDemoteToHelperInvocation: return "OpDemoteToHelperInvocation";
+ case OpIsHelperInvocationEXT: return "OpIsHelperInvocationEXT";
+ case OpTensorViewSetClipNV: return "OpTensorViewSetClipNV";
+ case OpTensorLayoutSetBlockSizeNV: return "OpTensorLayoutSetBlockSizeNV";
+ case OpCooperativeMatrixTransposeNV: return "OpCooperativeMatrixTransposeNV";
+ case OpConvertUToImageNV: return "OpConvertUToImageNV";
+ case OpConvertUToSamplerNV: return "OpConvertUToSamplerNV";
+ case OpConvertImageToUNV: return "OpConvertImageToUNV";
+ case OpConvertSamplerToUNV: return "OpConvertSamplerToUNV";
+ case OpConvertUToSampledImageNV: return "OpConvertUToSampledImageNV";
+ case OpConvertSampledImageToUNV: return "OpConvertSampledImageToUNV";
+ case OpSamplerImageAddressingModeNV: return "OpSamplerImageAddressingModeNV";
+ case OpRawAccessChainNV: return "OpRawAccessChainNV";
+ case OpRayQueryGetIntersectionSpherePositionNV: return "OpRayQueryGetIntersectionSpherePositionNV";
+ case OpRayQueryGetIntersectionSphereRadiusNV: return "OpRayQueryGetIntersectionSphereRadiusNV";
+ case OpRayQueryGetIntersectionLSSPositionsNV: return "OpRayQueryGetIntersectionLSSPositionsNV";
+ case OpRayQueryGetIntersectionLSSRadiiNV: return "OpRayQueryGetIntersectionLSSRadiiNV";
+ case OpRayQueryGetIntersectionLSSHitValueNV: return "OpRayQueryGetIntersectionLSSHitValueNV";
+ case OpHitObjectGetSpherePositionNV: return "OpHitObjectGetSpherePositionNV";
+ case OpHitObjectGetSphereRadiusNV: return "OpHitObjectGetSphereRadiusNV";
+ case OpHitObjectGetLSSPositionsNV: return "OpHitObjectGetLSSPositionsNV";
+ case OpHitObjectGetLSSRadiiNV: return "OpHitObjectGetLSSRadiiNV";
+ case OpHitObjectIsSphereHitNV: return "OpHitObjectIsSphereHitNV";
+ case OpHitObjectIsLSSHitNV: return "OpHitObjectIsLSSHitNV";
+ case OpRayQueryIsSphereHitNV: return "OpRayQueryIsSphereHitNV";
+ case OpRayQueryIsLSSHitNV: return "OpRayQueryIsLSSHitNV";
+ case OpSubgroupShuffleINTEL: return "OpSubgroupShuffleINTEL";
+ case OpSubgroupShuffleDownINTEL: return "OpSubgroupShuffleDownINTEL";
+ case OpSubgroupShuffleUpINTEL: return "OpSubgroupShuffleUpINTEL";
+ case OpSubgroupShuffleXorINTEL: return "OpSubgroupShuffleXorINTEL";
+ case OpSubgroupBlockReadINTEL: return "OpSubgroupBlockReadINTEL";
+ case OpSubgroupBlockWriteINTEL: return "OpSubgroupBlockWriteINTEL";
+ case OpSubgroupImageBlockReadINTEL: return "OpSubgroupImageBlockReadINTEL";
+ case OpSubgroupImageBlockWriteINTEL: return "OpSubgroupImageBlockWriteINTEL";
+ case OpSubgroupImageMediaBlockReadINTEL: return "OpSubgroupImageMediaBlockReadINTEL";
+ case OpSubgroupImageMediaBlockWriteINTEL: return "OpSubgroupImageMediaBlockWriteINTEL";
+ case OpUCountLeadingZerosINTEL: return "OpUCountLeadingZerosINTEL";
+ case OpUCountTrailingZerosINTEL: return "OpUCountTrailingZerosINTEL";
+ case OpAbsISubINTEL: return "OpAbsISubINTEL";
+ case OpAbsUSubINTEL: return "OpAbsUSubINTEL";
+ case OpIAddSatINTEL: return "OpIAddSatINTEL";
+ case OpUAddSatINTEL: return "OpUAddSatINTEL";
+ case OpIAverageINTEL: return "OpIAverageINTEL";
+ case OpUAverageINTEL: return "OpUAverageINTEL";
+ case OpIAverageRoundedINTEL: return "OpIAverageRoundedINTEL";
+ case OpUAverageRoundedINTEL: return "OpUAverageRoundedINTEL";
+ case OpISubSatINTEL: return "OpISubSatINTEL";
+ case OpUSubSatINTEL: return "OpUSubSatINTEL";
+ case OpIMul32x16INTEL: return "OpIMul32x16INTEL";
+ case OpUMul32x16INTEL: return "OpUMul32x16INTEL";
+ case OpConstantFunctionPointerINTEL: return "OpConstantFunctionPointerINTEL";
+ case OpFunctionPointerCallINTEL: return "OpFunctionPointerCallINTEL";
+ case OpAsmTargetINTEL: return "OpAsmTargetINTEL";
+ case OpAsmINTEL: return "OpAsmINTEL";
+ case OpAsmCallINTEL: return "OpAsmCallINTEL";
+ case OpAtomicFMinEXT: return "OpAtomicFMinEXT";
+ case OpAtomicFMaxEXT: return "OpAtomicFMaxEXT";
+ case OpAssumeTrueKHR: return "OpAssumeTrueKHR";
+ case OpExpectKHR: return "OpExpectKHR";
+ case OpDecorateString: return "OpDecorateString";
+ case OpMemberDecorateString: return "OpMemberDecorateString";
+ case OpVmeImageINTEL: return "OpVmeImageINTEL";
+ case OpTypeVmeImageINTEL: return "OpTypeVmeImageINTEL";
+ case OpTypeAvcImePayloadINTEL: return "OpTypeAvcImePayloadINTEL";
+ case OpTypeAvcRefPayloadINTEL: return "OpTypeAvcRefPayloadINTEL";
+ case OpTypeAvcSicPayloadINTEL: return "OpTypeAvcSicPayloadINTEL";
+ case OpTypeAvcMcePayloadINTEL: return "OpTypeAvcMcePayloadINTEL";
+ case OpTypeAvcMceResultINTEL: return "OpTypeAvcMceResultINTEL";
+ case OpTypeAvcImeResultINTEL: return "OpTypeAvcImeResultINTEL";
+ case OpTypeAvcImeResultSingleReferenceStreamoutINTEL: return "OpTypeAvcImeResultSingleReferenceStreamoutINTEL";
+ case OpTypeAvcImeResultDualReferenceStreamoutINTEL: return "OpTypeAvcImeResultDualReferenceStreamoutINTEL";
+ case OpTypeAvcImeSingleReferenceStreaminINTEL: return "OpTypeAvcImeSingleReferenceStreaminINTEL";
+ case OpTypeAvcImeDualReferenceStreaminINTEL: return "OpTypeAvcImeDualReferenceStreaminINTEL";
+ case OpTypeAvcRefResultINTEL: return "OpTypeAvcRefResultINTEL";
+ case OpTypeAvcSicResultINTEL: return "OpTypeAvcSicResultINTEL";
+ case OpSubgroupAvcMceGetDefaultInterBaseMultiReferencePenaltyINTEL: return "OpSubgroupAvcMceGetDefaultInterBaseMultiReferencePenaltyINTEL";
+ case OpSubgroupAvcMceSetInterBaseMultiReferencePenaltyINTEL: return "OpSubgroupAvcMceSetInterBaseMultiReferencePenaltyINTEL";
+ case OpSubgroupAvcMceGetDefaultInterShapePenaltyINTEL: return "OpSubgroupAvcMceGetDefaultInterShapePenaltyINTEL";
+ case OpSubgroupAvcMceSetInterShapePenaltyINTEL: return "OpSubgroupAvcMceSetInterShapePenaltyINTEL";
+ case OpSubgroupAvcMceGetDefaultInterDirectionPenaltyINTEL: return "OpSubgroupAvcMceGetDefaultInterDirectionPenaltyINTEL";
+ case OpSubgroupAvcMceSetInterDirectionPenaltyINTEL: return "OpSubgroupAvcMceSetInterDirectionPenaltyINTEL";
+ case OpSubgroupAvcMceGetDefaultIntraLumaShapePenaltyINTEL: return "OpSubgroupAvcMceGetDefaultIntraLumaShapePenaltyINTEL";
+ case OpSubgroupAvcMceGetDefaultInterMotionVectorCostTableINTEL: return "OpSubgroupAvcMceGetDefaultInterMotionVectorCostTableINTEL";
+ case OpSubgroupAvcMceGetDefaultHighPenaltyCostTableINTEL: return "OpSubgroupAvcMceGetDefaultHighPenaltyCostTableINTEL";
+ case OpSubgroupAvcMceGetDefaultMediumPenaltyCostTableINTEL: return "OpSubgroupAvcMceGetDefaultMediumPenaltyCostTableINTEL";
+ case OpSubgroupAvcMceGetDefaultLowPenaltyCostTableINTEL: return "OpSubgroupAvcMceGetDefaultLowPenaltyCostTableINTEL";
+ case OpSubgroupAvcMceSetMotionVectorCostFunctionINTEL: return "OpSubgroupAvcMceSetMotionVectorCostFunctionINTEL";
+ case OpSubgroupAvcMceGetDefaultIntraLumaModePenaltyINTEL: return "OpSubgroupAvcMceGetDefaultIntraLumaModePenaltyINTEL";
+ case OpSubgroupAvcMceGetDefaultNonDcLumaIntraPenaltyINTEL: return "OpSubgroupAvcMceGetDefaultNonDcLumaIntraPenaltyINTEL";
+ case OpSubgroupAvcMceGetDefaultIntraChromaModeBasePenaltyINTEL: return "OpSubgroupAvcMceGetDefaultIntraChromaModeBasePenaltyINTEL";
+ case OpSubgroupAvcMceSetAcOnlyHaarINTEL: return "OpSubgroupAvcMceSetAcOnlyHaarINTEL";
+ case OpSubgroupAvcMceSetSourceInterlacedFieldPolarityINTEL: return "OpSubgroupAvcMceSetSourceInterlacedFieldPolarityINTEL";
+ case OpSubgroupAvcMceSetSingleReferenceInterlacedFieldPolarityINTEL: return "OpSubgroupAvcMceSetSingleReferenceInterlacedFieldPolarityINTEL";
+ case OpSubgroupAvcMceSetDualReferenceInterlacedFieldPolaritiesINTEL: return "OpSubgroupAvcMceSetDualReferenceInterlacedFieldPolaritiesINTEL";
+ case OpSubgroupAvcMceConvertToImePayloadINTEL: return "OpSubgroupAvcMceConvertToImePayloadINTEL";
+ case OpSubgroupAvcMceConvertToImeResultINTEL: return "OpSubgroupAvcMceConvertToImeResultINTEL";
+ case OpSubgroupAvcMceConvertToRefPayloadINTEL: return "OpSubgroupAvcMceConvertToRefPayloadINTEL";
+ case OpSubgroupAvcMceConvertToRefResultINTEL: return "OpSubgroupAvcMceConvertToRefResultINTEL";
+ case OpSubgroupAvcMceConvertToSicPayloadINTEL: return "OpSubgroupAvcMceConvertToSicPayloadINTEL";
+ case OpSubgroupAvcMceConvertToSicResultINTEL: return "OpSubgroupAvcMceConvertToSicResultINTEL";
+ case OpSubgroupAvcMceGetMotionVectorsINTEL: return "OpSubgroupAvcMceGetMotionVectorsINTEL";
+ case OpSubgroupAvcMceGetInterDistortionsINTEL: return "OpSubgroupAvcMceGetInterDistortionsINTEL";
+ case OpSubgroupAvcMceGetBestInterDistortionsINTEL: return "OpSubgroupAvcMceGetBestInterDistortionsINTEL";
+ case OpSubgroupAvcMceGetInterMajorShapeINTEL: return "OpSubgroupAvcMceGetInterMajorShapeINTEL";
+ case OpSubgroupAvcMceGetInterMinorShapeINTEL: return "OpSubgroupAvcMceGetInterMinorShapeINTEL";
+ case OpSubgroupAvcMceGetInterDirectionsINTEL: return "OpSubgroupAvcMceGetInterDirectionsINTEL";
+ case OpSubgroupAvcMceGetInterMotionVectorCountINTEL: return "OpSubgroupAvcMceGetInterMotionVectorCountINTEL";
+ case OpSubgroupAvcMceGetInterReferenceIdsINTEL: return "OpSubgroupAvcMceGetInterReferenceIdsINTEL";
+ case OpSubgroupAvcMceGetInterReferenceInterlacedFieldPolaritiesINTEL: return "OpSubgroupAvcMceGetInterReferenceInterlacedFieldPolaritiesINTEL";
+ case OpSubgroupAvcImeInitializeINTEL: return "OpSubgroupAvcImeInitializeINTEL";
+ case OpSubgroupAvcImeSetSingleReferenceINTEL: return "OpSubgroupAvcImeSetSingleReferenceINTEL";
+ case OpSubgroupAvcImeSetDualReferenceINTEL: return "OpSubgroupAvcImeSetDualReferenceINTEL";
+ case OpSubgroupAvcImeRefWindowSizeINTEL: return "OpSubgroupAvcImeRefWindowSizeINTEL";
+ case OpSubgroupAvcImeAdjustRefOffsetINTEL: return "OpSubgroupAvcImeAdjustRefOffsetINTEL";
+ case OpSubgroupAvcImeConvertToMcePayloadINTEL: return "OpSubgroupAvcImeConvertToMcePayloadINTEL";
+ case OpSubgroupAvcImeSetMaxMotionVectorCountINTEL: return "OpSubgroupAvcImeSetMaxMotionVectorCountINTEL";
+ case OpSubgroupAvcImeSetUnidirectionalMixDisableINTEL: return "OpSubgroupAvcImeSetUnidirectionalMixDisableINTEL";
+ case OpSubgroupAvcImeSetEarlySearchTerminationThresholdINTEL: return "OpSubgroupAvcImeSetEarlySearchTerminationThresholdINTEL";
+ case OpSubgroupAvcImeSetWeightedSadINTEL: return "OpSubgroupAvcImeSetWeightedSadINTEL";
+ case OpSubgroupAvcImeEvaluateWithSingleReferenceINTEL: return "OpSubgroupAvcImeEvaluateWithSingleReferenceINTEL";
+ case OpSubgroupAvcImeEvaluateWithDualReferenceINTEL: return "OpSubgroupAvcImeEvaluateWithDualReferenceINTEL";
+ case OpSubgroupAvcImeEvaluateWithSingleReferenceStreaminINTEL: return "OpSubgroupAvcImeEvaluateWithSingleReferenceStreaminINTEL";
+ case OpSubgroupAvcImeEvaluateWithDualReferenceStreaminINTEL: return "OpSubgroupAvcImeEvaluateWithDualReferenceStreaminINTEL";
+ case OpSubgroupAvcImeEvaluateWithSingleReferenceStreamoutINTEL: return "OpSubgroupAvcImeEvaluateWithSingleReferenceStreamoutINTEL";
+ case OpSubgroupAvcImeEvaluateWithDualReferenceStreamoutINTEL: return "OpSubgroupAvcImeEvaluateWithDualReferenceStreamoutINTEL";
+ case OpSubgroupAvcImeEvaluateWithSingleReferenceStreaminoutINTEL: return "OpSubgroupAvcImeEvaluateWithSingleReferenceStreaminoutINTEL";
+ case OpSubgroupAvcImeEvaluateWithDualReferenceStreaminoutINTEL: return "OpSubgroupAvcImeEvaluateWithDualReferenceStreaminoutINTEL";
+ case OpSubgroupAvcImeConvertToMceResultINTEL: return "OpSubgroupAvcImeConvertToMceResultINTEL";
+ case OpSubgroupAvcImeGetSingleReferenceStreaminINTEL: return "OpSubgroupAvcImeGetSingleReferenceStreaminINTEL";
+ case OpSubgroupAvcImeGetDualReferenceStreaminINTEL: return "OpSubgroupAvcImeGetDualReferenceStreaminINTEL";
+ case OpSubgroupAvcImeStripSingleReferenceStreamoutINTEL: return "OpSubgroupAvcImeStripSingleReferenceStreamoutINTEL";
+ case OpSubgroupAvcImeStripDualReferenceStreamoutINTEL: return "OpSubgroupAvcImeStripDualReferenceStreamoutINTEL";
+ case OpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeMotionVectorsINTEL: return "OpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeMotionVectorsINTEL";
+ case OpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeDistortionsINTEL: return "OpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeDistortionsINTEL";
+ case OpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeReferenceIdsINTEL: return "OpSubgroupAvcImeGetStreamoutSingleReferenceMajorShapeReferenceIdsINTEL";
+ case OpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeMotionVectorsINTEL: return "OpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeMotionVectorsINTEL";
+ case OpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeDistortionsINTEL: return "OpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeDistortionsINTEL";
+ case OpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeReferenceIdsINTEL: return "OpSubgroupAvcImeGetStreamoutDualReferenceMajorShapeReferenceIdsINTEL";
+ case OpSubgroupAvcImeGetBorderReachedINTEL: return "OpSubgroupAvcImeGetBorderReachedINTEL";
+ case OpSubgroupAvcImeGetTruncatedSearchIndicationINTEL: return "OpSubgroupAvcImeGetTruncatedSearchIndicationINTEL";
+ case OpSubgroupAvcImeGetUnidirectionalEarlySearchTerminationINTEL: return "OpSubgroupAvcImeGetUnidirectionalEarlySearchTerminationINTEL";
+ case OpSubgroupAvcImeGetWeightingPatternMinimumMotionVectorINTEL: return "OpSubgroupAvcImeGetWeightingPatternMinimumMotionVectorINTEL";
+ case OpSubgroupAvcImeGetWeightingPatternMinimumDistortionINTEL: return "OpSubgroupAvcImeGetWeightingPatternMinimumDistortionINTEL";
+ case OpSubgroupAvcFmeInitializeINTEL: return "OpSubgroupAvcFmeInitializeINTEL";
+ case OpSubgroupAvcBmeInitializeINTEL: return "OpSubgroupAvcBmeInitializeINTEL";
+ case OpSubgroupAvcRefConvertToMcePayloadINTEL: return "OpSubgroupAvcRefConvertToMcePayloadINTEL";
+ case OpSubgroupAvcRefSetBidirectionalMixDisableINTEL: return "OpSubgroupAvcRefSetBidirectionalMixDisableINTEL";
+ case OpSubgroupAvcRefSetBilinearFilterEnableINTEL: return "OpSubgroupAvcRefSetBilinearFilterEnableINTEL";
+ case OpSubgroupAvcRefEvaluateWithSingleReferenceINTEL: return "OpSubgroupAvcRefEvaluateWithSingleReferenceINTEL";
+ case OpSubgroupAvcRefEvaluateWithDualReferenceINTEL: return "OpSubgroupAvcRefEvaluateWithDualReferenceINTEL";
+ case OpSubgroupAvcRefEvaluateWithMultiReferenceINTEL: return "OpSubgroupAvcRefEvaluateWithMultiReferenceINTEL";
+ case OpSubgroupAvcRefEvaluateWithMultiReferenceInterlacedINTEL: return "OpSubgroupAvcRefEvaluateWithMultiReferenceInterlacedINTEL";
+ case OpSubgroupAvcRefConvertToMceResultINTEL: return "OpSubgroupAvcRefConvertToMceResultINTEL";
+ case OpSubgroupAvcSicInitializeINTEL: return "OpSubgroupAvcSicInitializeINTEL";
+ case OpSubgroupAvcSicConfigureSkcINTEL: return "OpSubgroupAvcSicConfigureSkcINTEL";
+ case OpSubgroupAvcSicConfigureIpeLumaINTEL: return "OpSubgroupAvcSicConfigureIpeLumaINTEL";
+ case OpSubgroupAvcSicConfigureIpeLumaChromaINTEL: return "OpSubgroupAvcSicConfigureIpeLumaChromaINTEL";
+ case OpSubgroupAvcSicGetMotionVectorMaskINTEL: return "OpSubgroupAvcSicGetMotionVectorMaskINTEL";
+ case OpSubgroupAvcSicConvertToMcePayloadINTEL: return "OpSubgroupAvcSicConvertToMcePayloadINTEL";
+ case OpSubgroupAvcSicSetIntraLumaShapePenaltyINTEL: return "OpSubgroupAvcSicSetIntraLumaShapePenaltyINTEL";
+ case OpSubgroupAvcSicSetIntraLumaModeCostFunctionINTEL: return "OpSubgroupAvcSicSetIntraLumaModeCostFunctionINTEL";
+ case OpSubgroupAvcSicSetIntraChromaModeCostFunctionINTEL: return "OpSubgroupAvcSicSetIntraChromaModeCostFunctionINTEL";
+ case OpSubgroupAvcSicSetBilinearFilterEnableINTEL: return "OpSubgroupAvcSicSetBilinearFilterEnableINTEL";
+ case OpSubgroupAvcSicSetSkcForwardTransformEnableINTEL: return "OpSubgroupAvcSicSetSkcForwardTransformEnableINTEL";
+ case OpSubgroupAvcSicSetBlockBasedRawSkipSadINTEL: return "OpSubgroupAvcSicSetBlockBasedRawSkipSadINTEL";
+ case OpSubgroupAvcSicEvaluateIpeINTEL: return "OpSubgroupAvcSicEvaluateIpeINTEL";
+ case OpSubgroupAvcSicEvaluateWithSingleReferenceINTEL: return "OpSubgroupAvcSicEvaluateWithSingleReferenceINTEL";
+ case OpSubgroupAvcSicEvaluateWithDualReferenceINTEL: return "OpSubgroupAvcSicEvaluateWithDualReferenceINTEL";
+ case OpSubgroupAvcSicEvaluateWithMultiReferenceINTEL: return "OpSubgroupAvcSicEvaluateWithMultiReferenceINTEL";
+ case OpSubgroupAvcSicEvaluateWithMultiReferenceInterlacedINTEL: return "OpSubgroupAvcSicEvaluateWithMultiReferenceInterlacedINTEL";
+ case OpSubgroupAvcSicConvertToMceResultINTEL: return "OpSubgroupAvcSicConvertToMceResultINTEL";
+ case OpSubgroupAvcSicGetIpeLumaShapeINTEL: return "OpSubgroupAvcSicGetIpeLumaShapeINTEL";
+ case OpSubgroupAvcSicGetBestIpeLumaDistortionINTEL: return "OpSubgroupAvcSicGetBestIpeLumaDistortionINTEL";
+ case OpSubgroupAvcSicGetBestIpeChromaDistortionINTEL: return "OpSubgroupAvcSicGetBestIpeChromaDistortionINTEL";
+ case OpSubgroupAvcSicGetPackedIpeLumaModesINTEL: return "OpSubgroupAvcSicGetPackedIpeLumaModesINTEL";
+ case OpSubgroupAvcSicGetIpeChromaModeINTEL: return "OpSubgroupAvcSicGetIpeChromaModeINTEL";
+ case OpSubgroupAvcSicGetPackedSkcLumaCountThresholdINTEL: return "OpSubgroupAvcSicGetPackedSkcLumaCountThresholdINTEL";
+ case OpSubgroupAvcSicGetPackedSkcLumaSumThresholdINTEL: return "OpSubgroupAvcSicGetPackedSkcLumaSumThresholdINTEL";
+ case OpSubgroupAvcSicGetInterRawSadsINTEL: return "OpSubgroupAvcSicGetInterRawSadsINTEL";
+ case OpVariableLengthArrayINTEL: return "OpVariableLengthArrayINTEL";
+ case OpSaveMemoryINTEL: return "OpSaveMemoryINTEL";
+ case OpRestoreMemoryINTEL: return "OpRestoreMemoryINTEL";
+ case OpArbitraryFloatSinCosPiINTEL: return "OpArbitraryFloatSinCosPiINTEL";
+ case OpArbitraryFloatCastINTEL: return "OpArbitraryFloatCastINTEL";
+ case OpArbitraryFloatCastFromIntINTEL: return "OpArbitraryFloatCastFromIntINTEL";
+ case OpArbitraryFloatCastToIntINTEL: return "OpArbitraryFloatCastToIntINTEL";
+ case OpArbitraryFloatAddINTEL: return "OpArbitraryFloatAddINTEL";
+ case OpArbitraryFloatSubINTEL: return "OpArbitraryFloatSubINTEL";
+ case OpArbitraryFloatMulINTEL: return "OpArbitraryFloatMulINTEL";
+ case OpArbitraryFloatDivINTEL: return "OpArbitraryFloatDivINTEL";
+ case OpArbitraryFloatGTINTEL: return "OpArbitraryFloatGTINTEL";
+ case OpArbitraryFloatGEINTEL: return "OpArbitraryFloatGEINTEL";
+ case OpArbitraryFloatLTINTEL: return "OpArbitraryFloatLTINTEL";
+ case OpArbitraryFloatLEINTEL: return "OpArbitraryFloatLEINTEL";
+ case OpArbitraryFloatEQINTEL: return "OpArbitraryFloatEQINTEL";
+ case OpArbitraryFloatRecipINTEL: return "OpArbitraryFloatRecipINTEL";
+ case OpArbitraryFloatRSqrtINTEL: return "OpArbitraryFloatRSqrtINTEL";
+ case OpArbitraryFloatCbrtINTEL: return "OpArbitraryFloatCbrtINTEL";
+ case OpArbitraryFloatHypotINTEL: return "OpArbitraryFloatHypotINTEL";
+ case OpArbitraryFloatSqrtINTEL: return "OpArbitraryFloatSqrtINTEL";
+ case OpArbitraryFloatLogINTEL: return "OpArbitraryFloatLogINTEL";
+ case OpArbitraryFloatLog2INTEL: return "OpArbitraryFloatLog2INTEL";
+ case OpArbitraryFloatLog10INTEL: return "OpArbitraryFloatLog10INTEL";
+ case OpArbitraryFloatLog1pINTEL: return "OpArbitraryFloatLog1pINTEL";
+ case OpArbitraryFloatExpINTEL: return "OpArbitraryFloatExpINTEL";
+ case OpArbitraryFloatExp2INTEL: return "OpArbitraryFloatExp2INTEL";
+ case OpArbitraryFloatExp10INTEL: return "OpArbitraryFloatExp10INTEL";
+ case OpArbitraryFloatExpm1INTEL: return "OpArbitraryFloatExpm1INTEL";
+ case OpArbitraryFloatSinINTEL: return "OpArbitraryFloatSinINTEL";
+ case OpArbitraryFloatCosINTEL: return "OpArbitraryFloatCosINTEL";
+ case OpArbitraryFloatSinCosINTEL: return "OpArbitraryFloatSinCosINTEL";
+ case OpArbitraryFloatSinPiINTEL: return "OpArbitraryFloatSinPiINTEL";
+ case OpArbitraryFloatCosPiINTEL: return "OpArbitraryFloatCosPiINTEL";
+ case OpArbitraryFloatASinINTEL: return "OpArbitraryFloatASinINTEL";
+ case OpArbitraryFloatASinPiINTEL: return "OpArbitraryFloatASinPiINTEL";
+ case OpArbitraryFloatACosINTEL: return "OpArbitraryFloatACosINTEL";
+ case OpArbitraryFloatACosPiINTEL: return "OpArbitraryFloatACosPiINTEL";
+ case OpArbitraryFloatATanINTEL: return "OpArbitraryFloatATanINTEL";
+ case OpArbitraryFloatATanPiINTEL: return "OpArbitraryFloatATanPiINTEL";
+ case OpArbitraryFloatATan2INTEL: return "OpArbitraryFloatATan2INTEL";
+ case OpArbitraryFloatPowINTEL: return "OpArbitraryFloatPowINTEL";
+ case OpArbitraryFloatPowRINTEL: return "OpArbitraryFloatPowRINTEL";
+ case OpArbitraryFloatPowNINTEL: return "OpArbitraryFloatPowNINTEL";
+ case OpLoopControlINTEL: return "OpLoopControlINTEL";
+ case OpAliasDomainDeclINTEL: return "OpAliasDomainDeclINTEL";
+ case OpAliasScopeDeclINTEL: return "OpAliasScopeDeclINTEL";
+ case OpAliasScopeListDeclINTEL: return "OpAliasScopeListDeclINTEL";
+ case OpFixedSqrtINTEL: return "OpFixedSqrtINTEL";
+ case OpFixedRecipINTEL: return "OpFixedRecipINTEL";
+ case OpFixedRsqrtINTEL: return "OpFixedRsqrtINTEL";
+ case OpFixedSinINTEL: return "OpFixedSinINTEL";
+ case OpFixedCosINTEL: return "OpFixedCosINTEL";
+ case OpFixedSinCosINTEL: return "OpFixedSinCosINTEL";
+ case OpFixedSinPiINTEL: return "OpFixedSinPiINTEL";
+ case OpFixedCosPiINTEL: return "OpFixedCosPiINTEL";
+ case OpFixedSinCosPiINTEL: return "OpFixedSinCosPiINTEL";
+ case OpFixedLogINTEL: return "OpFixedLogINTEL";
+ case OpFixedExpINTEL: return "OpFixedExpINTEL";
+ case OpPtrCastToCrossWorkgroupINTEL: return "OpPtrCastToCrossWorkgroupINTEL";
+ case OpCrossWorkgroupCastToPtrINTEL: return "OpCrossWorkgroupCastToPtrINTEL";
+ case OpReadPipeBlockingINTEL: return "OpReadPipeBlockingINTEL";
+ case OpWritePipeBlockingINTEL: return "OpWritePipeBlockingINTEL";
+ case OpFPGARegINTEL: return "OpFPGARegINTEL";
+ case OpRayQueryGetRayTMinKHR: return "OpRayQueryGetRayTMinKHR";
+ case OpRayQueryGetRayFlagsKHR: return "OpRayQueryGetRayFlagsKHR";
+ case OpRayQueryGetIntersectionTKHR: return "OpRayQueryGetIntersectionTKHR";
+ case OpRayQueryGetIntersectionInstanceCustomIndexKHR: return "OpRayQueryGetIntersectionInstanceCustomIndexKHR";
+ case OpRayQueryGetIntersectionInstanceIdKHR: return "OpRayQueryGetIntersectionInstanceIdKHR";
+ case OpRayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetKHR: return "OpRayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetKHR";
+ case OpRayQueryGetIntersectionGeometryIndexKHR: return "OpRayQueryGetIntersectionGeometryIndexKHR";
+ case OpRayQueryGetIntersectionPrimitiveIndexKHR: return "OpRayQueryGetIntersectionPrimitiveIndexKHR";
+ case OpRayQueryGetIntersectionBarycentricsKHR: return "OpRayQueryGetIntersectionBarycentricsKHR";
+ case OpRayQueryGetIntersectionFrontFaceKHR: return "OpRayQueryGetIntersectionFrontFaceKHR";
+ case OpRayQueryGetIntersectionCandidateAABBOpaqueKHR: return "OpRayQueryGetIntersectionCandidateAABBOpaqueKHR";
+ case OpRayQueryGetIntersectionObjectRayDirectionKHR: return "OpRayQueryGetIntersectionObjectRayDirectionKHR";
+ case OpRayQueryGetIntersectionObjectRayOriginKHR: return "OpRayQueryGetIntersectionObjectRayOriginKHR";
+ case OpRayQueryGetWorldRayDirectionKHR: return "OpRayQueryGetWorldRayDirectionKHR";
+ case OpRayQueryGetWorldRayOriginKHR: return "OpRayQueryGetWorldRayOriginKHR";
+ case OpRayQueryGetIntersectionObjectToWorldKHR: return "OpRayQueryGetIntersectionObjectToWorldKHR";
+ case OpRayQueryGetIntersectionWorldToObjectKHR: return "OpRayQueryGetIntersectionWorldToObjectKHR";
+ case OpAtomicFAddEXT: return "OpAtomicFAddEXT";
+ case OpTypeBufferSurfaceINTEL: return "OpTypeBufferSurfaceINTEL";
+ case OpTypeStructContinuedINTEL: return "OpTypeStructContinuedINTEL";
+ case OpConstantCompositeContinuedINTEL: return "OpConstantCompositeContinuedINTEL";
+ case OpSpecConstantCompositeContinuedINTEL: return "OpSpecConstantCompositeContinuedINTEL";
+ case OpCompositeConstructContinuedINTEL: return "OpCompositeConstructContinuedINTEL";
+ case OpConvertFToBF16INTEL: return "OpConvertFToBF16INTEL";
+ case OpConvertBF16ToFINTEL: return "OpConvertBF16ToFINTEL";
+ case OpControlBarrierArriveINTEL: return "OpControlBarrierArriveINTEL";
+ case OpControlBarrierWaitINTEL: return "OpControlBarrierWaitINTEL";
+ case OpArithmeticFenceEXT: return "OpArithmeticFenceEXT";
+ case OpTaskSequenceCreateINTEL: return "OpTaskSequenceCreateINTEL";
+ case OpTaskSequenceAsyncINTEL: return "OpTaskSequenceAsyncINTEL";
+ case OpTaskSequenceGetINTEL: return "OpTaskSequenceGetINTEL";
+ case OpTaskSequenceReleaseINTEL: return "OpTaskSequenceReleaseINTEL";
+ case OpTypeTaskSequenceINTEL: return "OpTypeTaskSequenceINTEL";
+ case OpSubgroupBlockPrefetchINTEL: return "OpSubgroupBlockPrefetchINTEL";
+ case OpSubgroup2DBlockLoadINTEL: return "OpSubgroup2DBlockLoadINTEL";
+ case OpSubgroup2DBlockLoadTransformINTEL: return "OpSubgroup2DBlockLoadTransformINTEL";
+ case OpSubgroup2DBlockLoadTransposeINTEL: return "OpSubgroup2DBlockLoadTransposeINTEL";
+ case OpSubgroup2DBlockPrefetchINTEL: return "OpSubgroup2DBlockPrefetchINTEL";
+ case OpSubgroup2DBlockStoreINTEL: return "OpSubgroup2DBlockStoreINTEL";
+ case OpSubgroupMatrixMultiplyAccumulateINTEL: return "OpSubgroupMatrixMultiplyAccumulateINTEL";
+ case OpBitwiseFunctionINTEL: return "OpBitwiseFunctionINTEL";
+ case OpGroupIMulKHR: return "OpGroupIMulKHR";
+ case OpGroupFMulKHR: return "OpGroupFMulKHR";
+ case OpGroupBitwiseAndKHR: return "OpGroupBitwiseAndKHR";
+ case OpGroupBitwiseOrKHR: return "OpGroupBitwiseOrKHR";
+ case OpGroupBitwiseXorKHR: return "OpGroupBitwiseXorKHR";
+ case OpGroupLogicalAndKHR: return "OpGroupLogicalAndKHR";
+ case OpGroupLogicalOrKHR: return "OpGroupLogicalOrKHR";
+ case OpGroupLogicalXorKHR: return "OpGroupLogicalXorKHR";
+ case OpRoundFToTF32INTEL: return "OpRoundFToTF32INTEL";
+ case OpMaskedGatherINTEL: return "OpMaskedGatherINTEL";
+ case OpMaskedScatterINTEL: return "OpMaskedScatterINTEL";
+ case OpConvertHandleToImageINTEL: return "OpConvertHandleToImageINTEL";
+ case OpConvertHandleToSamplerINTEL: return "OpConvertHandleToSamplerINTEL";
+ case OpConvertHandleToSampledImageINTEL: return "OpConvertHandleToSampledImageINTEL";
+ default: return "Unknown";
+ }
+}
+
#endif /* SPV_ENABLE_UTILITY_CODE */
-// Overload operator| for mask bit combining
+// Overload bitwise operators for mask bit combining
inline ImageOperandsMask operator|(ImageOperandsMask a, ImageOperandsMask b) { return ImageOperandsMask(unsigned(a) | unsigned(b)); }
+inline ImageOperandsMask operator&(ImageOperandsMask a, ImageOperandsMask b) { return ImageOperandsMask(unsigned(a) & unsigned(b)); }
+inline ImageOperandsMask operator^(ImageOperandsMask a, ImageOperandsMask b) { return ImageOperandsMask(unsigned(a) ^ unsigned(b)); }
+inline ImageOperandsMask operator~(ImageOperandsMask a) { return ImageOperandsMask(~unsigned(a)); }
inline FPFastMathModeMask operator|(FPFastMathModeMask a, FPFastMathModeMask b) { return FPFastMathModeMask(unsigned(a) | unsigned(b)); }
+inline FPFastMathModeMask operator&(FPFastMathModeMask a, FPFastMathModeMask b) { return FPFastMathModeMask(unsigned(a) & unsigned(b)); }
+inline FPFastMathModeMask operator^(FPFastMathModeMask a, FPFastMathModeMask b) { return FPFastMathModeMask(unsigned(a) ^ unsigned(b)); }
+inline FPFastMathModeMask operator~(FPFastMathModeMask a) { return FPFastMathModeMask(~unsigned(a)); }
inline SelectionControlMask operator|(SelectionControlMask a, SelectionControlMask b) { return SelectionControlMask(unsigned(a) | unsigned(b)); }
+inline SelectionControlMask operator&(SelectionControlMask a, SelectionControlMask b) { return SelectionControlMask(unsigned(a) & unsigned(b)); }
+inline SelectionControlMask operator^(SelectionControlMask a, SelectionControlMask b) { return SelectionControlMask(unsigned(a) ^ unsigned(b)); }
+inline SelectionControlMask operator~(SelectionControlMask a) { return SelectionControlMask(~unsigned(a)); }
inline LoopControlMask operator|(LoopControlMask a, LoopControlMask b) { return LoopControlMask(unsigned(a) | unsigned(b)); }
+inline LoopControlMask operator&(LoopControlMask a, LoopControlMask b) { return LoopControlMask(unsigned(a) & unsigned(b)); }
+inline LoopControlMask operator^(LoopControlMask a, LoopControlMask b) { return LoopControlMask(unsigned(a) ^ unsigned(b)); }
+inline LoopControlMask operator~(LoopControlMask a) { return LoopControlMask(~unsigned(a)); }
inline FunctionControlMask operator|(FunctionControlMask a, FunctionControlMask b) { return FunctionControlMask(unsigned(a) | unsigned(b)); }
+inline FunctionControlMask operator&(FunctionControlMask a, FunctionControlMask b) { return FunctionControlMask(unsigned(a) & unsigned(b)); }
+inline FunctionControlMask operator^(FunctionControlMask a, FunctionControlMask b) { return FunctionControlMask(unsigned(a) ^ unsigned(b)); }
+inline FunctionControlMask operator~(FunctionControlMask a) { return FunctionControlMask(~unsigned(a)); }
inline MemorySemanticsMask operator|(MemorySemanticsMask a, MemorySemanticsMask b) { return MemorySemanticsMask(unsigned(a) | unsigned(b)); }
+inline MemorySemanticsMask operator&(MemorySemanticsMask a, MemorySemanticsMask b) { return MemorySemanticsMask(unsigned(a) & unsigned(b)); }
+inline MemorySemanticsMask operator^(MemorySemanticsMask a, MemorySemanticsMask b) { return MemorySemanticsMask(unsigned(a) ^ unsigned(b)); }
+inline MemorySemanticsMask operator~(MemorySemanticsMask a) { return MemorySemanticsMask(~unsigned(a)); }
inline MemoryAccessMask operator|(MemoryAccessMask a, MemoryAccessMask b) { return MemoryAccessMask(unsigned(a) | unsigned(b)); }
+inline MemoryAccessMask operator&(MemoryAccessMask a, MemoryAccessMask b) { return MemoryAccessMask(unsigned(a) & unsigned(b)); }
+inline MemoryAccessMask operator^(MemoryAccessMask a, MemoryAccessMask b) { return MemoryAccessMask(unsigned(a) ^ unsigned(b)); }
+inline MemoryAccessMask operator~(MemoryAccessMask a) { return MemoryAccessMask(~unsigned(a)); }
inline KernelProfilingInfoMask operator|(KernelProfilingInfoMask a, KernelProfilingInfoMask b) { return KernelProfilingInfoMask(unsigned(a) | unsigned(b)); }
+inline KernelProfilingInfoMask operator&(KernelProfilingInfoMask a, KernelProfilingInfoMask b) { return KernelProfilingInfoMask(unsigned(a) & unsigned(b)); }
+inline KernelProfilingInfoMask operator^(KernelProfilingInfoMask a, KernelProfilingInfoMask b) { return KernelProfilingInfoMask(unsigned(a) ^ unsigned(b)); }
+inline KernelProfilingInfoMask operator~(KernelProfilingInfoMask a) { return KernelProfilingInfoMask(~unsigned(a)); }
inline RayFlagsMask operator|(RayFlagsMask a, RayFlagsMask b) { return RayFlagsMask(unsigned(a) | unsigned(b)); }
+inline RayFlagsMask operator&(RayFlagsMask a, RayFlagsMask b) { return RayFlagsMask(unsigned(a) & unsigned(b)); }
+inline RayFlagsMask operator^(RayFlagsMask a, RayFlagsMask b) { return RayFlagsMask(unsigned(a) ^ unsigned(b)); }
+inline RayFlagsMask operator~(RayFlagsMask a) { return RayFlagsMask(~unsigned(a)); }
inline FragmentShadingRateMask operator|(FragmentShadingRateMask a, FragmentShadingRateMask b) { return FragmentShadingRateMask(unsigned(a) | unsigned(b)); }
+inline FragmentShadingRateMask operator&(FragmentShadingRateMask a, FragmentShadingRateMask b) { return FragmentShadingRateMask(unsigned(a) & unsigned(b)); }
+inline FragmentShadingRateMask operator^(FragmentShadingRateMask a, FragmentShadingRateMask b) { return FragmentShadingRateMask(unsigned(a) ^ unsigned(b)); }
+inline FragmentShadingRateMask operator~(FragmentShadingRateMask a) { return FragmentShadingRateMask(~unsigned(a)); }
+inline CooperativeMatrixOperandsMask operator|(CooperativeMatrixOperandsMask a, CooperativeMatrixOperandsMask b) { return CooperativeMatrixOperandsMask(unsigned(a) | unsigned(b)); }
+inline CooperativeMatrixOperandsMask operator&(CooperativeMatrixOperandsMask a, CooperativeMatrixOperandsMask b) { return CooperativeMatrixOperandsMask(unsigned(a) & unsigned(b)); }
+inline CooperativeMatrixOperandsMask operator^(CooperativeMatrixOperandsMask a, CooperativeMatrixOperandsMask b) { return CooperativeMatrixOperandsMask(unsigned(a) ^ unsigned(b)); }
+inline CooperativeMatrixOperandsMask operator~(CooperativeMatrixOperandsMask a) { return CooperativeMatrixOperandsMask(~unsigned(a)); }
+inline CooperativeMatrixReduceMask operator|(CooperativeMatrixReduceMask a, CooperativeMatrixReduceMask b) { return CooperativeMatrixReduceMask(unsigned(a) | unsigned(b)); }
+inline CooperativeMatrixReduceMask operator&(CooperativeMatrixReduceMask a, CooperativeMatrixReduceMask b) { return CooperativeMatrixReduceMask(unsigned(a) & unsigned(b)); }
+inline CooperativeMatrixReduceMask operator^(CooperativeMatrixReduceMask a, CooperativeMatrixReduceMask b) { return CooperativeMatrixReduceMask(unsigned(a) ^ unsigned(b)); }
+inline CooperativeMatrixReduceMask operator~(CooperativeMatrixReduceMask a) { return CooperativeMatrixReduceMask(~unsigned(a)); }
+inline TensorAddressingOperandsMask operator|(TensorAddressingOperandsMask a, TensorAddressingOperandsMask b) { return TensorAddressingOperandsMask(unsigned(a) | unsigned(b)); }
+inline TensorAddressingOperandsMask operator&(TensorAddressingOperandsMask a, TensorAddressingOperandsMask b) { return TensorAddressingOperandsMask(unsigned(a) & unsigned(b)); }
+inline TensorAddressingOperandsMask operator^(TensorAddressingOperandsMask a, TensorAddressingOperandsMask b) { return TensorAddressingOperandsMask(unsigned(a) ^ unsigned(b)); }
+inline TensorAddressingOperandsMask operator~(TensorAddressingOperandsMask a) { return TensorAddressingOperandsMask(~unsigned(a)); }
+inline TensorOperandsMask operator|(TensorOperandsMask a, TensorOperandsMask b) { return TensorOperandsMask(unsigned(a) | unsigned(b)); }
+inline TensorOperandsMask operator&(TensorOperandsMask a, TensorOperandsMask b) { return TensorOperandsMask(unsigned(a) & unsigned(b)); }
+inline TensorOperandsMask operator^(TensorOperandsMask a, TensorOperandsMask b) { return TensorOperandsMask(unsigned(a) ^ unsigned(b)); }
+inline TensorOperandsMask operator~(TensorOperandsMask a) { return TensorOperandsMask(~unsigned(a)); }
+inline MatrixMultiplyAccumulateOperandsMask operator|(MatrixMultiplyAccumulateOperandsMask a, MatrixMultiplyAccumulateOperandsMask b) { return MatrixMultiplyAccumulateOperandsMask(unsigned(a) | unsigned(b)); }
+inline MatrixMultiplyAccumulateOperandsMask operator&(MatrixMultiplyAccumulateOperandsMask a, MatrixMultiplyAccumulateOperandsMask b) { return MatrixMultiplyAccumulateOperandsMask(unsigned(a) & unsigned(b)); }
+inline MatrixMultiplyAccumulateOperandsMask operator^(MatrixMultiplyAccumulateOperandsMask a, MatrixMultiplyAccumulateOperandsMask b) { return MatrixMultiplyAccumulateOperandsMask(unsigned(a) ^ unsigned(b)); }
+inline MatrixMultiplyAccumulateOperandsMask operator~(MatrixMultiplyAccumulateOperandsMask a) { return MatrixMultiplyAccumulateOperandsMask(~unsigned(a)); }
+inline RawAccessChainOperandsMask operator|(RawAccessChainOperandsMask a, RawAccessChainOperandsMask b) { return RawAccessChainOperandsMask(unsigned(a) | unsigned(b)); }
+inline RawAccessChainOperandsMask operator&(RawAccessChainOperandsMask a, RawAccessChainOperandsMask b) { return RawAccessChainOperandsMask(unsigned(a) & unsigned(b)); }
+inline RawAccessChainOperandsMask operator^(RawAccessChainOperandsMask a, RawAccessChainOperandsMask b) { return RawAccessChainOperandsMask(unsigned(a) ^ unsigned(b)); }
+inline RawAccessChainOperandsMask operator~(RawAccessChainOperandsMask a) { return RawAccessChainOperandsMask(~unsigned(a)); }
} // end namespace spv
diff --git a/thirdparty/spirv-cross/spirv_common.hpp b/thirdparty/spirv-cross/spirv_common.hpp
index b70536d9ec..4780d2750f 100644
--- a/thirdparty/spirv-cross/spirv_common.hpp
+++ b/thirdparty/spirv-cross/spirv_common.hpp
@@ -580,7 +580,10 @@ struct SPIRType : IVariant
Interpolant,
Char,
// MSL specific type, that is used by 'object'(analog of 'task' from glsl) shader.
- MeshGridProperties
+ MeshGridProperties,
+ BFloat16,
+ FloatE4M3,
+ FloatE5M2
};
// Scalar/vector/matrix support.
@@ -605,6 +608,14 @@ struct SPIRType : IVariant
bool pointer = false;
bool forward_pointer = false;
+ struct
+ {
+ uint32_t use_id = 0;
+ uint32_t rows_id = 0;
+ uint32_t columns_id = 0;
+ uint32_t scope_id = 0;
+ } cooperative;
+
spv::StorageClass storage = spv::StorageClassGeneric;
SmallVector member_types;
@@ -686,6 +697,7 @@ struct SPIREntryPoint
FunctionID self = 0;
std::string name;
std::string orig_name;
+ std::unordered_map fp_fast_math_defaults;
SmallVector interface_variables;
Bitset flags;
@@ -1026,6 +1038,9 @@ struct SPIRFunction : IVariant
// consider arrays value types.
SmallVector constant_arrays_needed_on_stack;
+ // Does this function (or any function called by it), emit geometry?
+ bool emits_geometry = false;
+
bool active = false;
bool flush_undeclared = true;
bool do_combined_parameters = true;
@@ -1226,6 +1241,26 @@ struct SPIRConstant : IVariant
return u.f32;
}
+ static inline float fe4m3_to_f32(uint8_t v)
+ {
+ if ((v & 0x7f) == 0x7f)
+ {
+ union
+ {
+ float f32;
+ uint32_t u32;
+ } u;
+
+ u.u32 = (v & 0x80) ? 0xffffffffu : 0x7fffffffu;
+ return u.f32;
+ }
+ else
+ {
+ // Reuse the FP16 to FP32 code. Cute bit-hackery.
+ return f16_to_f32((int16_t(int8_t(v)) << 7) & (0xffff ^ 0x4000)) * 256.0f;
+ }
+ }
+
inline uint32_t specialization_constant_id(uint32_t col, uint32_t row) const
{
return m.c[col].id[row];
@@ -1266,6 +1301,24 @@ struct SPIRConstant : IVariant
return f16_to_f32(scalar_u16(col, row));
}
+ inline float scalar_bf16(uint32_t col = 0, uint32_t row = 0) const
+ {
+ uint32_t v = scalar_u16(col, row) << 16;
+ float fp32;
+ memcpy(&fp32, &v, sizeof(float));
+ return fp32;
+ }
+
+ inline float scalar_floate4m3(uint32_t col = 0, uint32_t row = 0) const
+ {
+ return fe4m3_to_f32(scalar_u8(col, row));
+ }
+
+ inline float scalar_bf8(uint32_t col = 0, uint32_t row = 0) const
+ {
+ return f16_to_f32(scalar_u8(col, row) << 8);
+ }
+
inline float scalar_f32(uint32_t col = 0, uint32_t row = 0) const
{
return m.c[col].r[row].f32;
@@ -1336,9 +1389,10 @@ struct SPIRConstant : IVariant
SPIRConstant() = default;
- SPIRConstant(TypeID constant_type_, const uint32_t *elements, uint32_t num_elements, bool specialized)
+ SPIRConstant(TypeID constant_type_, const uint32_t *elements, uint32_t num_elements, bool specialized, bool replicated_ = false)
: constant_type(constant_type_)
, specialization(specialized)
+ , replicated(replicated_)
{
subconstants.reserve(num_elements);
for (uint32_t i = 0; i < num_elements; i++)
@@ -1410,9 +1464,16 @@ struct SPIRConstant : IVariant
// If true, this is a LUT, and should always be declared in the outer scope.
bool is_used_as_lut = false;
+ // If this is a null constant of array type with specialized length.
+ // May require special handling in initializer
+ bool is_null_array_specialized_length = false;
+
// For composites which are constant arrays, etc.
SmallVector subconstants;
+ // Whether the subconstants are intended to be replicated (e.g. OpConstantCompositeReplicateEXT)
+ bool replicated = false;
+
// Non-Vulkan GLSL, HLSL and sometimes MSL emits defines for each specialization constant,
// and uses them to initialize the constant. This allows the user
// to still be able to specialize the value by supplying corresponding
@@ -1708,6 +1769,7 @@ struct Meta
uint32_t spec_id = 0;
uint32_t index = 0;
spv::FPRoundingMode fp_rounding_mode = spv::FPRoundingModeMax;
+ spv::FPFastMathModeMask fp_fast_math_mode = spv::FPFastMathModeMaskNone;
bool builtin = false;
bool qualified_alias_explicit_override = false;
diff --git a/thirdparty/spirv-cross/spirv_cross.cpp b/thirdparty/spirv-cross/spirv_cross.cpp
index 3492f0b3ed..350eff3429 100644
--- a/thirdparty/spirv-cross/spirv_cross.cpp
+++ b/thirdparty/spirv-cross/spirv_cross.cpp
@@ -82,7 +82,7 @@ bool Compiler::variable_storage_is_aliased(const SPIRVariable &v)
ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock);
bool image = type.basetype == SPIRType::Image;
bool counter = type.basetype == SPIRType::AtomicCounter;
- bool buffer_reference = type.storage == StorageClassPhysicalStorageBufferEXT;
+ bool buffer_reference = type.storage == StorageClassPhysicalStorageBuffer;
bool is_restrict;
if (ssbo)
@@ -171,6 +171,7 @@ bool Compiler::block_is_control_dependent(const SPIRBlock &block)
case OpGroupNonUniformLogicalXor:
case OpGroupNonUniformQuadBroadcast:
case OpGroupNonUniformQuadSwap:
+ case OpGroupNonUniformRotateKHR:
// Control barriers
case OpControlBarrier:
@@ -210,6 +211,7 @@ bool Compiler::block_is_pure(const SPIRBlock &block)
case OpCopyMemory:
case OpStore:
+ case OpCooperativeMatrixStoreKHR:
{
auto &type = expression_type(ops[0]);
if (type.storage != StorageClassFunction)
@@ -370,6 +372,7 @@ void Compiler::register_global_read_dependencies(const SPIRBlock &block, uint32_
}
case OpLoad:
+ case OpCooperativeMatrixLoadKHR:
case OpImageRead:
{
// If we're in a storage class which does not get invalidated, adding dependencies here is no big deal.
@@ -481,7 +484,7 @@ void Compiler::register_write(uint32_t chain)
}
}
- if (type.storage == StorageClassPhysicalStorageBufferEXT || variable_storage_is_aliased(*var))
+ if (type.storage == StorageClassPhysicalStorageBuffer || variable_storage_is_aliased(*var))
flush_all_aliased_variables();
else if (var)
flush_dependees(*var);
@@ -587,6 +590,7 @@ const SPIRType &Compiler::expression_type(uint32_t id) const
bool Compiler::expression_is_lvalue(uint32_t id) const
{
auto &type = expression_type(id);
+
switch (type.basetype)
{
case SPIRType::SampledImage:
@@ -818,6 +822,7 @@ bool Compiler::InterfaceVariableAccessHandler::handle(Op opcode, const uint32_t
case OpAtomicStore:
case OpStore:
+ case OpCooperativeMatrixStoreKHR:
// Invalid SPIR-V.
if (length < 1)
return false;
@@ -910,6 +915,7 @@ bool Compiler::InterfaceVariableAccessHandler::handle(Op opcode, const uint32_t
case OpInBoundsAccessChain:
case OpPtrAccessChain:
case OpLoad:
+ case OpCooperativeMatrixLoadKHR:
case OpCopyObject:
case OpImageTexelPointer:
case OpAtomicLoad:
@@ -2364,6 +2370,10 @@ void Compiler::set_execution_mode(ExecutionMode mode, uint32_t arg0, uint32_t ar
execution.output_primitives = arg0;
break;
+ case ExecutionModeFPFastMathDefault:
+ execution.fp_fast_math_defaults[arg0] = arg1;
+ break;
+
default:
break;
}
@@ -3461,6 +3471,7 @@ bool Compiler::AnalyzeVariableScopeAccessHandler::handle(spv::Op op, const uint3
switch (op)
{
case OpStore:
+ case OpCooperativeMatrixStoreKHR:
{
if (length < 2)
return false;
@@ -3581,6 +3592,7 @@ bool Compiler::AnalyzeVariableScopeAccessHandler::handle(spv::Op op, const uint3
}
case OpLoad:
+ case OpCooperativeMatrixLoadKHR:
{
if (length < 3)
return false;
@@ -3800,6 +3812,7 @@ bool Compiler::StaticExpressionAccessHandler::handle(spv::Op op, const uint32_t
switch (op)
{
case OpStore:
+ case OpCooperativeMatrixStoreKHR:
if (length < 2)
return false;
if (args[0] == variable_id)
@@ -3810,6 +3823,7 @@ bool Compiler::StaticExpressionAccessHandler::handle(spv::Op op, const uint32_t
break;
case OpLoad:
+ case OpCooperativeMatrixLoadKHR:
if (length < 3)
return false;
if (args[2] == variable_id && static_expression == 0) // Tried to read from variable before it was initialized.
@@ -4285,6 +4299,7 @@ bool Compiler::may_read_undefined_variable_in_block(const SPIRBlock &block, uint
switch (op.op)
{
case OpStore:
+ case OpCooperativeMatrixStoreKHR:
case OpCopyMemory:
if (ops[0] == var)
return false;
@@ -4323,6 +4338,7 @@ bool Compiler::may_read_undefined_variable_in_block(const SPIRBlock &block, uint
case OpCopyObject:
case OpLoad:
+ case OpCooperativeMatrixLoadKHR:
if (ops[2] == var)
return true;
break;
@@ -4350,6 +4366,39 @@ bool Compiler::may_read_undefined_variable_in_block(const SPIRBlock &block, uint
return true;
}
+bool Compiler::GeometryEmitDisocveryHandler::handle(spv::Op opcode, const uint32_t *, uint32_t)
+{
+ if (opcode == OpEmitVertex || opcode == OpEndPrimitive)
+ {
+ for (auto *func : function_stack)
+ func->emits_geometry = true;
+ }
+
+ return true;
+}
+
+bool Compiler::GeometryEmitDisocveryHandler::begin_function_scope(const uint32_t *stream, uint32_t)
+{
+ auto &callee = compiler.get(stream[2]);
+ function_stack.push_back(&callee);
+ return true;
+}
+
+bool Compiler::GeometryEmitDisocveryHandler::end_function_scope([[maybe_unused]] const uint32_t *stream, uint32_t)
+{
+ assert(function_stack.back() == &compiler.get(stream[2]));
+ function_stack.pop_back();
+
+ return true;
+}
+
+void Compiler::discover_geometry_emitters()
+{
+ GeometryEmitDisocveryHandler handler(*this);
+
+ traverse_all_reachable_opcodes(get(ir.default_entry_point), handler);
+}
+
Bitset Compiler::get_buffer_block_flags(VariableID id) const
{
return ir.get_buffer_block_flags(get(id));
@@ -4462,6 +4511,7 @@ bool Compiler::ActiveBuiltinHandler::handle(spv::Op opcode, const uint32_t *args
switch (opcode)
{
case OpStore:
+ case OpCooperativeMatrixStoreKHR:
if (length < 1)
return false;
@@ -4478,6 +4528,7 @@ bool Compiler::ActiveBuiltinHandler::handle(spv::Op opcode, const uint32_t *args
case OpCopyObject:
case OpLoad:
+ case OpCooperativeMatrixLoadKHR:
if (length < 3)
return false;
@@ -4910,13 +4961,16 @@ void Compiler::make_constant_null(uint32_t id, uint32_t type)
uint32_t parent_id = ir.increase_bound_by(1);
make_constant_null(parent_id, constant_type.parent_type);
- if (!constant_type.array_size_literal.back())
- SPIRV_CROSS_THROW("Array size of OpConstantNull must be a literal.");
-
- SmallVector elements(constant_type.array.back());
- for (uint32_t i = 0; i < constant_type.array.back(); i++)
+ // The array size of OpConstantNull can be either literal or specialization constant.
+ // In the latter case, we cannot take the value as-is, as it can be changed to anything.
+ // Rather, we assume it to be *one* for the sake of initializer.
+ bool is_literal_array_size = constant_type.array_size_literal.back();
+ uint32_t count = is_literal_array_size ? constant_type.array.back() : 1;
+ SmallVector elements(count);
+ for (uint32_t i = 0; i < count; i++)
elements[i] = parent_id;
- set(id, type, elements.data(), uint32_t(elements.size()), false);
+ auto &constant = set(id, type, elements.data(), uint32_t(elements.size()), false);
+ constant.is_null_array_specialized_length = !is_literal_array_size;
}
else if (!constant_type.member_types.empty())
{
@@ -5177,7 +5231,7 @@ bool Compiler::PhysicalStorageBufferPointerHandler::type_is_bda_block_entry(uint
uint32_t Compiler::PhysicalStorageBufferPointerHandler::get_minimum_scalar_alignment(const SPIRType &type) const
{
- if (type.storage == spv::StorageClassPhysicalStorageBufferEXT)
+ if (type.storage == spv::StorageClassPhysicalStorageBuffer)
return 8;
else if (type.basetype == SPIRType::Struct)
{
@@ -5252,6 +5306,13 @@ bool Compiler::PhysicalStorageBufferPointerHandler::handle(Op op, const uint32_t
break;
}
+ case OpCooperativeMatrixLoadKHR:
+ case OpCooperativeMatrixStoreKHR:
+ {
+ // TODO: Can we meaningfully deal with this?
+ break;
+ }
+
default:
break;
}
@@ -5274,6 +5335,10 @@ uint32_t Compiler::PhysicalStorageBufferPointerHandler::get_base_non_block_type_
void Compiler::PhysicalStorageBufferPointerHandler::analyze_non_block_types_from_block(const SPIRType &type)
{
+ if (analyzed_type_ids.count(type.self))
+ return;
+ analyzed_type_ids.insert(type.self);
+
for (auto &member : type.member_types)
{
auto &subtype = compiler.get(member);
@@ -5407,6 +5472,7 @@ bool Compiler::InterlockedResourceAccessHandler::handle(Op opcode, const uint32_
switch (opcode)
{
case OpLoad:
+ case OpCooperativeMatrixLoadKHR:
{
if (length < 3)
return false;
@@ -5484,6 +5550,7 @@ bool Compiler::InterlockedResourceAccessHandler::handle(Op opcode, const uint32_
case OpStore:
case OpImageWrite:
case OpAtomicStore:
+ case OpCooperativeMatrixStoreKHR:
{
if (length < 1)
return false;
diff --git a/thirdparty/spirv-cross/spirv_cross.hpp b/thirdparty/spirv-cross/spirv_cross.hpp
index e9062b485c..b65b5ac77a 100644
--- a/thirdparty/spirv-cross/spirv_cross.hpp
+++ b/thirdparty/spirv-cross/spirv_cross.hpp
@@ -1054,6 +1054,7 @@ protected:
std::unordered_set non_block_types;
std::unordered_map physical_block_type_meta;
std::unordered_map access_chain_to_physical_block;
+ std::unordered_set analyzed_type_ids;
void mark_aligned_access(uint32_t id, const uint32_t *args, uint32_t length);
PhysicalBlockMeta *find_block_meta(uint32_t id) const;
@@ -1072,6 +1073,22 @@ protected:
bool single_function);
bool may_read_undefined_variable_in_block(const SPIRBlock &block, uint32_t var);
+ struct GeometryEmitDisocveryHandler : OpcodeHandler
+ {
+ explicit GeometryEmitDisocveryHandler(Compiler &compiler_)
+ : compiler(compiler_)
+ {
+ }
+ Compiler &compiler;
+
+ bool handle(spv::Op opcode, const uint32_t *args, uint32_t length) override;
+ bool begin_function_scope(const uint32_t *, uint32_t) override;
+ bool end_function_scope(const uint32_t *, uint32_t) override;
+ SmallVector function_stack;
+ };
+
+ void discover_geometry_emitters();
+
// Finds all resources that are written to from inside the critical section, if present.
// The critical section is delimited by OpBeginInvocationInterlockEXT and
// OpEndInvocationInterlockEXT instructions. In MSL and HLSL, any resources written
diff --git a/thirdparty/spirv-cross/spirv_cross_parsed_ir.cpp b/thirdparty/spirv-cross/spirv_cross_parsed_ir.cpp
index b05afeb3f5..760b8037d4 100644
--- a/thirdparty/spirv-cross/spirv_cross_parsed_ir.cpp
+++ b/thirdparty/spirv-cross/spirv_cross_parsed_ir.cpp
@@ -452,6 +452,10 @@ void ParsedIR::set_decoration(ID id, Decoration decoration, uint32_t argument)
dec.fp_rounding_mode = static_cast(argument);
break;
+ case DecorationFPFastMathMode:
+ dec.fp_fast_math_mode = static_cast(argument);
+ break;
+
default:
break;
}
@@ -643,6 +647,8 @@ uint32_t ParsedIR::get_decoration(ID id, Decoration decoration) const
return dec.index;
case DecorationFPRoundingMode:
return dec.fp_rounding_mode;
+ case DecorationFPFastMathMode:
+ return dec.fp_fast_math_mode;
default:
return 1;
}
@@ -730,6 +736,10 @@ void ParsedIR::unset_decoration(ID id, Decoration decoration)
dec.fp_rounding_mode = FPRoundingModeMax;
break;
+ case DecorationFPFastMathMode:
+ dec.fp_fast_math_mode = FPFastMathModeMaskNone;
+ break;
+
case DecorationHlslCounterBufferGOOGLE:
{
auto &counter = meta[id].hlsl_magic_counter_buffer;
@@ -1050,16 +1060,21 @@ void ParsedIR::make_constant_null(uint32_t id, uint32_t type, bool add_to_typed_
uint32_t parent_id = increase_bound_by(1);
make_constant_null(parent_id, constant_type.parent_type, add_to_typed_id_set);
- if (!constant_type.array_size_literal.back())
- SPIRV_CROSS_THROW("Array size of OpConstantNull must be a literal.");
+ // The array size of OpConstantNull can be either literal or specialization constant.
+ // In the latter case, we cannot take the value as-is, as it can be changed to anything.
+ // Rather, we assume it to be *one* for the sake of initializer.
+ bool is_literal_array_size = constant_type.array_size_literal.back();
+ uint32_t count = is_literal_array_size ? constant_type.array.back() : 1;
- SmallVector elements(constant_type.array.back());
- for (uint32_t i = 0; i < constant_type.array.back(); i++)
+ SmallVector elements(count);
+ for (uint32_t i = 0; i < count; i++)
elements[i] = parent_id;
if (add_to_typed_id_set)
add_typed_id(TypeConstant, id);
- variant_set(ids[id], type, elements.data(), uint32_t(elements.size()), false).self = id;
+ auto& constant = variant_set(ids[id], type, elements.data(), uint32_t(elements.size()), false);
+ constant.self = id;
+ constant.is_null_array_specialized_length = !is_literal_array_size;
}
else if (!constant_type.member_types.empty())
{
diff --git a/thirdparty/spirv-cross/spirv_cross_util.cpp b/thirdparty/spirv-cross/spirv_cross_util.cpp
deleted file mode 100644
index 7cff010d1c..0000000000
--- a/thirdparty/spirv-cross/spirv_cross_util.cpp
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Copyright 2015-2021 Arm Limited
- * SPDX-License-Identifier: Apache-2.0 OR MIT
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * At your option, you may choose to accept this material under either:
- * 1. The Apache License, Version 2.0, found at , or
- * 2. The MIT License, found at .
- */
-
-#include "spirv_cross_util.hpp"
-#include "spirv_common.hpp"
-
-using namespace spv;
-using namespace SPIRV_CROSS_NAMESPACE;
-
-namespace spirv_cross_util
-{
-void rename_interface_variable(Compiler &compiler, const SmallVector &resources, uint32_t location,
- const std::string &name)
-{
- for (auto &v : resources)
- {
- if (!compiler.has_decoration(v.id, spv::DecorationLocation))
- continue;
-
- auto loc = compiler.get_decoration(v.id, spv::DecorationLocation);
- if (loc != location)
- continue;
-
- auto &type = compiler.get_type(v.base_type_id);
-
- // This is more of a friendly variant. If we need to rename interface variables, we might have to rename
- // structs as well and make sure all the names match up.
- if (type.basetype == SPIRType::Struct)
- {
- compiler.set_name(v.base_type_id, join("SPIRV_Cross_Interface_Location", location));
- for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++)
- compiler.set_member_name(v.base_type_id, i, join("InterfaceMember", i));
- }
-
- compiler.set_name(v.id, name);
- }
-}
-
-void inherit_combined_sampler_bindings(Compiler &compiler)
-{
- auto &samplers = compiler.get_combined_image_samplers();
- for (auto &s : samplers)
- {
- if (compiler.has_decoration(s.image_id, spv::DecorationDescriptorSet))
- {
- uint32_t set = compiler.get_decoration(s.image_id, spv::DecorationDescriptorSet);
- compiler.set_decoration(s.combined_id, spv::DecorationDescriptorSet, set);
- }
-
- if (compiler.has_decoration(s.image_id, spv::DecorationBinding))
- {
- uint32_t binding = compiler.get_decoration(s.image_id, spv::DecorationBinding);
- compiler.set_decoration(s.combined_id, spv::DecorationBinding, binding);
- }
- }
-}
-} // namespace spirv_cross_util
diff --git a/thirdparty/spirv-cross/spirv_cross_util.hpp b/thirdparty/spirv-cross/spirv_cross_util.hpp
deleted file mode 100644
index e6e3fcdb63..0000000000
--- a/thirdparty/spirv-cross/spirv_cross_util.hpp
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright 2015-2021 Arm Limited
- * SPDX-License-Identifier: Apache-2.0 OR MIT
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * At your option, you may choose to accept this material under either:
- * 1. The Apache License, Version 2.0, found at , or
- * 2. The MIT License, found at .
- */
-
-#ifndef SPIRV_CROSS_UTIL_HPP
-#define SPIRV_CROSS_UTIL_HPP
-
-#include "spirv_cross.hpp"
-
-namespace spirv_cross_util
-{
-void rename_interface_variable(SPIRV_CROSS_NAMESPACE::Compiler &compiler,
- const SPIRV_CROSS_NAMESPACE::SmallVector &resources,
- uint32_t location, const std::string &name);
-void inherit_combined_sampler_bindings(SPIRV_CROSS_NAMESPACE::Compiler &compiler);
-} // namespace spirv_cross_util
-
-#endif
diff --git a/thirdparty/spirv-cross/spirv_glsl.cpp b/thirdparty/spirv-cross/spirv_glsl.cpp
index 6c1d5208b9..a01cef4449 100644
--- a/thirdparty/spirv-cross/spirv_glsl.cpp
+++ b/thirdparty/spirv-cross/spirv_glsl.cpp
@@ -545,7 +545,7 @@ void CompilerGLSL::find_static_extensions()
if (options.separate_shader_objects && !options.es && options.version < 410)
require_extension_internal("GL_ARB_separate_shader_objects");
- if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT)
+ if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64)
{
if (!options.vulkan_semantics)
SPIRV_CROSS_THROW("GL_EXT_buffer_reference is only supported in Vulkan GLSL.");
@@ -557,7 +557,7 @@ void CompilerGLSL::find_static_extensions()
}
else if (ir.addressing_model != AddressingModelLogical)
{
- SPIRV_CROSS_THROW("Only Logical and PhysicalStorageBuffer64EXT addressing models are supported.");
+ SPIRV_CROSS_THROW("Only Logical and PhysicalStorageBuffer64 addressing models are supported.");
}
// Check for nonuniform qualifier and passthrough.
@@ -631,6 +631,12 @@ void CompilerGLSL::find_static_extensions()
require_extension_internal("GL_OVR_multiview2");
}
+ if (execution.flags.get(ExecutionModeQuadDerivativesKHR) ||
+ (execution.flags.get(ExecutionModeRequireFullQuadsKHR) && get_execution_model() == ExecutionModelFragment))
+ {
+ require_extension_internal("GL_EXT_shader_quad_control");
+ }
+
// KHR one is likely to get promoted at some point, so if we don't see an explicit SPIR-V extension, assume KHR.
for (auto &ext : ir.declared_extensions)
if (ext == "SPV_NV_fragment_shader_barycentric")
@@ -681,6 +687,8 @@ string CompilerGLSL::compile()
backend.requires_relaxed_precision_analysis = options.es || options.vulkan_semantics;
backend.support_precise_qualifier =
(!options.es && options.version >= 400) || (options.es && options.version >= 320);
+ backend.constant_null_initializer = "{ }";
+ backend.requires_matching_array_initializer = true;
if (is_legacy_es())
backend.support_case_fallthrough = false;
@@ -700,7 +708,7 @@ string CompilerGLSL::compile()
// Shaders might cast unrelated data to pointers of non-block types.
// Find all such instances and make sure we can cast the pointers to a synthesized block type.
- if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT)
+ if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64)
analyze_non_block_pointer_types();
uint32_t pass_count = 0;
@@ -1191,6 +1199,9 @@ void CompilerGLSL::emit_header()
else if (!options.es && execution.flags.get(ExecutionModeDepthLess))
statement("layout(depth_less) out float gl_FragDepth;");
+ if (execution.flags.get(ExecutionModeRequireFullQuadsKHR))
+ statement("layout(full_quads) in;");
+
break;
default:
@@ -1201,6 +1212,9 @@ void CompilerGLSL::emit_header()
if (cap == CapabilityRayTraversalPrimitiveCullingKHR)
statement("layout(primitive_culling);");
+ if (execution.flags.get(ExecutionModeQuadDerivativesKHR))
+ statement("layout(quad_derivatives) in;");
+
if (!inputs.empty())
statement("layout(", merge(inputs), ") in;");
if (!outputs.empty())
@@ -1515,9 +1529,12 @@ uint32_t CompilerGLSL::type_to_packed_base_size(const SPIRType &type, BufferPack
case SPIRType::Half:
case SPIRType::Short:
case SPIRType::UShort:
+ case SPIRType::BFloat16:
return 2;
case SPIRType::SByte:
case SPIRType::UByte:
+ case SPIRType::FloatE4M3:
+ case SPIRType::FloatE5M2:
return 1;
default:
@@ -1528,14 +1545,14 @@ uint32_t CompilerGLSL::type_to_packed_base_size(const SPIRType &type, BufferPack
uint32_t CompilerGLSL::type_to_packed_alignment(const SPIRType &type, const Bitset &flags,
BufferPackingStandard packing)
{
- // If using PhysicalStorageBufferEXT storage class, this is a pointer,
+ // If using PhysicalStorageBuffer storage class, this is a pointer,
// and is 64-bit.
if (is_physical_pointer(type))
{
if (!type.pointer)
- SPIRV_CROSS_THROW("Types in PhysicalStorageBufferEXT must be pointers.");
+ SPIRV_CROSS_THROW("Types in PhysicalStorageBuffer must be pointers.");
- if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT)
+ if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64)
{
if (packing_is_vec4_padded(packing) && type_is_array_of_pointers(type))
return 16;
@@ -1543,7 +1560,7 @@ uint32_t CompilerGLSL::type_to_packed_alignment(const SPIRType &type, const Bits
return 8;
}
else
- SPIRV_CROSS_THROW("AddressingModelPhysicalStorageBuffer64EXT must be used for PhysicalStorageBufferEXT.");
+ SPIRV_CROSS_THROW("AddressingModelPhysicalStorageBuffer64 must be used for PhysicalStorageBuffer.");
}
else if (is_array(type))
{
@@ -1651,17 +1668,17 @@ uint32_t CompilerGLSL::type_to_packed_array_stride(const SPIRType &type, const B
uint32_t CompilerGLSL::type_to_packed_size(const SPIRType &type, const Bitset &flags, BufferPackingStandard packing)
{
- // If using PhysicalStorageBufferEXT storage class, this is a pointer,
+ // If using PhysicalStorageBuffer storage class, this is a pointer,
// and is 64-bit.
if (is_physical_pointer(type))
{
if (!type.pointer)
- SPIRV_CROSS_THROW("Types in PhysicalStorageBufferEXT must be pointers.");
+ SPIRV_CROSS_THROW("Types in PhysicalStorageBuffer must be pointers.");
- if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT)
+ if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64)
return 8;
else
- SPIRV_CROSS_THROW("AddressingModelPhysicalStorageBuffer64EXT must be used for PhysicalStorageBufferEXT.");
+ SPIRV_CROSS_THROW("AddressingModelPhysicalStorageBuffer64 must be used for PhysicalStorageBuffer.");
}
else if (is_array(type))
{
@@ -2841,7 +2858,7 @@ void CompilerGLSL::emit_uniform(const SPIRVariable &var)
statement(layout_for_variable(var), variable_decl(var), ";");
}
-string CompilerGLSL::constant_value_macro_name(uint32_t id)
+string CompilerGLSL::constant_value_macro_name(uint32_t id) const
{
return join("SPIRV_CROSS_CONSTANT_ID_", id);
}
@@ -3624,6 +3641,36 @@ void CompilerGLSL::emit_resources()
bool emitted = false;
+ if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64)
+ {
+ // Output buffer reference block forward declarations.
+ ir.for_each_typed_id([&](uint32_t id, SPIRType &type)
+ {
+ if (is_physical_pointer(type))
+ {
+ bool emit_type = true;
+ if (!is_physical_pointer_to_buffer_block(type))
+ {
+ // Only forward-declare if we intend to emit it in the non_block_pointer types.
+ // Otherwise, these are just "benign" pointer types that exist as a result of access chains.
+ emit_type = std::find(physical_storage_non_block_pointer_types.begin(),
+ physical_storage_non_block_pointer_types.end(),
+ id) != physical_storage_non_block_pointer_types.end();
+ }
+
+ if (emit_type)
+ {
+ emit_buffer_reference_block(id, true);
+ emitted = true;
+ }
+ }
+ });
+ }
+
+ if (emitted)
+ statement("");
+ emitted = false;
+
// If emitted Vulkan GLSL,
// emit specialization constants as actual floats,
// spec op expressions will redirect to the constant name.
@@ -3733,30 +3780,10 @@ void CompilerGLSL::emit_resources()
emitted = false;
- if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT)
+ if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64)
{
// Output buffer reference blocks.
- // Do this in two stages, one with forward declaration,
- // and one without. Buffer reference blocks can reference themselves
- // to support things like linked lists.
- ir.for_each_typed_id([&](uint32_t id, SPIRType &type) {
- if (is_physical_pointer(type))
- {
- bool emit_type = true;
- if (!is_physical_pointer_to_buffer_block(type))
- {
- // Only forward-declare if we intend to emit it in the non_block_pointer types.
- // Otherwise, these are just "benign" pointer types that exist as a result of access chains.
- emit_type = std::find(physical_storage_non_block_pointer_types.begin(),
- physical_storage_non_block_pointer_types.end(),
- id) != physical_storage_non_block_pointer_types.end();
- }
-
- if (emit_type)
- emit_buffer_reference_block(id, true);
- }
- });
-
+ // Buffer reference blocks can reference themselves to support things like linked lists.
for (auto type : physical_storage_non_block_pointer_types)
emit_buffer_reference_block(type, false);
@@ -4955,12 +4982,16 @@ void CompilerGLSL::emit_polyfills(uint32_t polyfills, bool relaxed)
// Subclasses may override to modify the return value.
string CompilerGLSL::to_func_call_arg(const SPIRFunction::Parameter &, uint32_t id)
{
+ // BDA expects pointers through function interface.
+ if (is_physical_pointer(expression_type(id)))
+ return to_pointer_expression(id);
+
// Make sure that we use the name of the original variable, and not the parameter alias.
uint32_t name_id = id;
auto *var = maybe_get(id);
if (var && var->basevariable)
name_id = var->basevariable;
- return to_expression(name_id);
+ return to_unpacked_expression(name_id);
}
void CompilerGLSL::force_temporary_and_recompile(uint32_t id)
@@ -5391,6 +5422,15 @@ string CompilerGLSL::to_non_uniform_aware_expression(uint32_t id)
return expr;
}
+string CompilerGLSL::to_atomic_ptr_expression(uint32_t id)
+{
+ string expr = to_non_uniform_aware_expression(id);
+ // If we have naked pointer to POD, we need to dereference to get the proper ".value" resolve.
+ if (should_dereference(id))
+ expr = dereference_expression(expression_type(id), expr);
+ return expr;
+}
+
string CompilerGLSL::to_expression(uint32_t id, bool register_expression_read)
{
auto itr = invalid_expressions.find(id);
@@ -5898,6 +5938,35 @@ string CompilerGLSL::constant_expression(const SPIRConstant &c,
{
return backend.null_pointer_literal;
}
+ else if (c.is_null_array_specialized_length && backend.requires_matching_array_initializer)
+ {
+ require_extension_internal("GL_EXT_null_initializer");
+ return backend.constant_null_initializer;
+ }
+ else if (c.replicated && type.op != spv::OpTypeArray)
+ {
+ if (type.op == spv::OpTypeMatrix)
+ {
+ uint32_t num_elements = type.columns;
+ // GLSL does not allow the replication constructor for matrices
+ // mat4(vec4(0.0)) needs to be manually expanded to mat4(vec4(0.0), vec4(0.0), vec4(0.0), vec4(0.0));
+ std::string res;
+ res += type_to_glsl(type);
+ res += "(";
+ for (uint32_t i = 0; i < num_elements; i++)
+ {
+ res += to_expression(c.subconstants[0]);
+ if (i < num_elements - 1)
+ res += ", ";
+ }
+ res += ")";
+ return res;
+ }
+ else
+ {
+ return join(type_to_glsl(type), "(", to_expression(c.subconstants[0]), ")");
+ }
+ }
else if (!c.subconstants.empty())
{
// Handles Arrays and structures.
@@ -5947,8 +6016,16 @@ string CompilerGLSL::constant_expression(const SPIRConstant &c,
}
uint32_t subconstant_index = 0;
- for (auto &elem : c.subconstants)
+ size_t num_elements = c.subconstants.size();
+ if (c.replicated)
{
+ if (type.array.size() != 1)
+ SPIRV_CROSS_THROW("Multidimensional arrays not yet supported as replicated constans");
+ num_elements = type.array[0];
+ }
+ for (size_t i = 0; i < num_elements; i++)
+ {
+ auto &elem = c.subconstants[c.replicated ? 0 : i];
if (auto *op = maybe_get(elem))
{
res += constant_op_expression(*op);
@@ -5979,7 +6056,7 @@ string CompilerGLSL::constant_expression(const SPIRConstant &c,
}
}
- if (&elem != &c.subconstants.back())
+ if (i != num_elements - 1)
res += ", ";
subconstant_index++;
@@ -6003,7 +6080,7 @@ string CompilerGLSL::constant_expression(const SPIRConstant &c,
else
return join(type_to_glsl(type), "(0)");
}
- else if (c.columns() == 1)
+ else if (c.columns() == 1 && type.op != spv::OpTypeCooperativeMatrixKHR)
{
auto res = constant_expression_vector(c, 0);
@@ -6053,17 +6130,44 @@ string CompilerGLSL::constant_expression(const SPIRConstant &c,
#pragma warning(disable : 4996)
#endif
+string CompilerGLSL::convert_floate4m3_to_string(const SPIRConstant &c, uint32_t col, uint32_t row)
+{
+ string res;
+ float float_value = c.scalar_floate4m3(col, row);
+
+ // There is no infinity in e4m3.
+ if (std::isnan(float_value))
+ {
+ SPIRType type { OpTypeFloat };
+ type.basetype = SPIRType::Half;
+ type.vecsize = 1;
+ type.columns = 1;
+ res = join(type_to_glsl(type), "(0.0 / 0.0)");
+ }
+ else
+ {
+ SPIRType type { OpTypeFloat };
+ type.basetype = SPIRType::FloatE4M3;
+ type.vecsize = 1;
+ type.columns = 1;
+ res = join(type_to_glsl(type), "(", format_float(float_value), ")");
+ }
+
+ return res;
+}
+
string CompilerGLSL::convert_half_to_string(const SPIRConstant &c, uint32_t col, uint32_t row)
{
string res;
- float float_value = c.scalar_f16(col, row);
+ bool is_bfloat8 = get(c.constant_type).basetype == SPIRType::FloatE5M2;
+ float float_value = is_bfloat8 ? c.scalar_bf8(col, row) : c.scalar_f16(col, row);
// There is no literal "hf" in GL_NV_gpu_shader5, so to avoid lots
// of complicated workarounds, just value-cast to the half type always.
if (std::isnan(float_value) || std::isinf(float_value))
{
SPIRType type { OpTypeFloat };
- type.basetype = SPIRType::Half;
+ type.basetype = is_bfloat8 ? SPIRType::FloatE5M2 : SPIRType::Half;
type.vecsize = 1;
type.columns = 1;
@@ -6079,7 +6183,7 @@ string CompilerGLSL::convert_half_to_string(const SPIRConstant &c, uint32_t col,
else
{
SPIRType type { OpTypeFloat };
- type.basetype = SPIRType::Half;
+ type.basetype = is_bfloat8 ? SPIRType::FloatE5M2 : SPIRType::Half;
type.vecsize = 1;
type.columns = 1;
res = join(type_to_glsl(type), "(", format_float(float_value), ")");
@@ -6091,7 +6195,9 @@ string CompilerGLSL::convert_half_to_string(const SPIRConstant &c, uint32_t col,
string CompilerGLSL::convert_float_to_string(const SPIRConstant &c, uint32_t col, uint32_t row)
{
string res;
- float float_value = c.scalar_f32(col, row);
+
+ bool is_bfloat16 = get(c.constant_type).basetype == SPIRType::BFloat16;
+ float float_value = is_bfloat16 ? c.scalar_bf16(col, row) : c.scalar_f32(col, row);
if (std::isnan(float_value) || std::isinf(float_value))
{
@@ -6155,6 +6261,9 @@ string CompilerGLSL::convert_float_to_string(const SPIRConstant &c, uint32_t col
res += "f";
}
+ if (is_bfloat16)
+ res = join("bfloat16_t(", res, ")");
+
return res;
}
@@ -6311,6 +6420,29 @@ string CompilerGLSL::constant_expression_vector(const SPIRConstant &c, uint32_t
switch (type.basetype)
{
+ case SPIRType::FloatE4M3:
+ if (splat || swizzle_splat)
+ {
+ res += convert_floate4m3_to_string(c, vector, 0);
+ if (swizzle_splat)
+ res = remap_swizzle(get(c.constant_type), 1, res);
+ }
+ else
+ {
+ for (uint32_t i = 0; i < c.vector_size(); i++)
+ {
+ if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0)
+ res += to_expression(c.specialization_constant_id(vector, i));
+ else
+ res += convert_floate4m3_to_string(c, vector, i);
+
+ if (i + 1 < c.vector_size())
+ res += ", ";
+ }
+ }
+ break;
+
+ case SPIRType::FloatE5M2:
case SPIRType::Half:
if (splat || swizzle_splat)
{
@@ -6333,6 +6465,7 @@ string CompilerGLSL::constant_expression_vector(const SPIRConstant &c, uint32_t
}
break;
+ case SPIRType::BFloat16:
case SPIRType::Float:
if (splat || swizzle_splat)
{
@@ -6988,9 +7121,12 @@ void CompilerGLSL::emit_atomic_func_op(uint32_t result_type, uint32_t result_id,
require_extension_internal("GL_EXT_shader_atomic_float");
}
+ if (type.basetype == SPIRType::UInt64 || type.basetype == SPIRType::Int64)
+ require_extension_internal("GL_EXT_shader_atomic_int64");
+
forced_temporaries.insert(result_id);
emit_op(result_type, result_id,
- join(op, "(", to_non_uniform_aware_expression(op0), ", ",
+ join(op, "(", to_atomic_ptr_expression(op0), ", ",
to_unpacked_expression(op1), ")"), false);
flush_all_atomic_capable_variables();
}
@@ -9358,6 +9494,10 @@ void CompilerGLSL::emit_subgroup_op(const Instruction &i)
require_extension_internal("GL_KHR_shader_subgroup_shuffle_relative");
break;
+ case OpGroupNonUniformRotateKHR:
+ require_extension_internal("GL_KHR_shader_subgroup_rotate");
+ break;
+
case OpGroupNonUniformAll:
case OpGroupNonUniformAny:
case OpGroupNonUniformAllEqual:
@@ -9429,6 +9569,13 @@ void CompilerGLSL::emit_subgroup_op(const Instruction &i)
require_extension_internal("GL_KHR_shader_subgroup_quad");
break;
+ case OpGroupNonUniformQuadAllKHR:
+ case OpGroupNonUniformQuadAnyKHR:
+ // Require both extensions to be enabled.
+ require_extension_internal("GL_KHR_shader_subgroup_vote");
+ require_extension_internal("GL_EXT_shader_quad_control");
+ break;
+
default:
SPIRV_CROSS_THROW("Invalid opcode for subgroup.");
}
@@ -9436,9 +9583,13 @@ void CompilerGLSL::emit_subgroup_op(const Instruction &i)
uint32_t result_type = ops[0];
uint32_t id = ops[1];
- auto scope = static_cast(evaluate_constant_u32(ops[2]));
- if (scope != ScopeSubgroup)
- SPIRV_CROSS_THROW("Only subgroup scope is supported.");
+ // These quad ops do not have a scope parameter.
+ if (op != OpGroupNonUniformQuadAllKHR && op != OpGroupNonUniformQuadAnyKHR)
+ {
+ auto scope = static_cast(evaluate_constant_u32(ops[2]));
+ if (scope != ScopeSubgroup)
+ SPIRV_CROSS_THROW("Only subgroup scope is supported.");
+ }
switch (op)
{
@@ -9504,6 +9655,13 @@ void CompilerGLSL::emit_subgroup_op(const Instruction &i)
emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffleDown");
break;
+ case OpGroupNonUniformRotateKHR:
+ if (i.length > 5)
+ emit_trinary_func_op(result_type, id, ops[3], ops[4], ops[5], "subgroupClusteredRotate");
+ else
+ emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupRotate");
+ break;
+
case OpGroupNonUniformAll:
emit_unary_func_op(result_type, id, ops[3], "subgroupAll");
break;
@@ -9591,6 +9749,14 @@ case OpGroupNonUniform##op: \
break;
}
+ case OpGroupNonUniformQuadAllKHR:
+ emit_unary_func_op(result_type, id, ops[2], "subgroupQuadAll");
+ break;
+
+ case OpGroupNonUniformQuadAnyKHR:
+ emit_unary_func_op(result_type, id, ops[2], "subgroupQuadAny");
+ break;
+
default:
SPIRV_CROSS_THROW("Invalid opcode for subgroup.");
}
@@ -9706,6 +9872,30 @@ string CompilerGLSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &i
return "packUint4x16";
else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::UInt64 && in_type.vecsize == 1)
return "unpackUint4x16";
+ else if (out_type.basetype == SPIRType::BFloat16 && in_type.basetype == SPIRType::UShort)
+ return "uintBitsToBFloat16EXT";
+ else if (out_type.basetype == SPIRType::BFloat16 && in_type.basetype == SPIRType::Short)
+ return "intBitsToBFloat16EXT";
+ else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::BFloat16)
+ return "bfloat16BitsToUintEXT";
+ else if (out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::BFloat16)
+ return "bfloat16BitsToIntEXT";
+ else if (out_type.basetype == SPIRType::FloatE4M3 && in_type.basetype == SPIRType::UByte)
+ return "uintBitsToFloate4m3EXT";
+ else if (out_type.basetype == SPIRType::FloatE4M3 && in_type.basetype == SPIRType::SByte)
+ return "intBitsToFloate4m3EXT";
+ else if (out_type.basetype == SPIRType::UByte && in_type.basetype == SPIRType::FloatE4M3)
+ return "floate4m3BitsToUintEXT";
+ else if (out_type.basetype == SPIRType::SByte && in_type.basetype == SPIRType::FloatE4M3)
+ return "floate4m3BitsToIntEXT";
+ else if (out_type.basetype == SPIRType::FloatE5M2 && in_type.basetype == SPIRType::UByte)
+ return "uintBitsToFloate5m2EXT";
+ else if (out_type.basetype == SPIRType::FloatE5M2 && in_type.basetype == SPIRType::SByte)
+ return "intBitsToFloate5m2EXT";
+ else if (out_type.basetype == SPIRType::UByte && in_type.basetype == SPIRType::FloatE5M2)
+ return "floate5m2BitsToUintEXT";
+ else if (out_type.basetype == SPIRType::SByte && in_type.basetype == SPIRType::FloatE5M2)
+ return "floate5m2BitsToIntEXT";
return "";
}
@@ -9824,7 +10014,17 @@ string CompilerGLSL::builtin_to_glsl(BuiltIn builtin, StorageClass storage)
case BuiltInInvocationId:
return "gl_InvocationID";
case BuiltInLayer:
+ {
+ auto model = get_execution_model();
+ if (model == ExecutionModelVertex || model == ExecutionModelTessellationEvaluation)
+ {
+ if (options.es)
+ require_extension_internal("GL_NV_viewport_array2");
+ else
+ require_extension_internal("GL_ARB_shader_viewport_layer_array");
+ }
return "gl_Layer";
+ }
case BuiltInViewportIndex:
return "gl_ViewportIndex";
case BuiltInTessLevelOuter:
@@ -10228,7 +10428,8 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice
if (!is_ptr_chain)
mod_flags &= ~ACCESS_CHAIN_PTR_CHAIN_BIT;
access_chain_internal_append_index(expr, base, type, mod_flags, access_chain_is_arrayed, index);
- check_physical_type_cast(expr, type, physical_type);
+ if (check_physical_type_cast(expr, type, physical_type))
+ physical_type = 0;
};
for (uint32_t i = 0; i < count; i++)
@@ -10572,7 +10773,7 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice
type = &get(type_id);
}
// Vector -> Scalar
- else if (type->vecsize > 1)
+ else if (type->op == OpTypeCooperativeMatrixKHR || type->vecsize > 1)
{
string deferred_index;
if (row_major_matrix_needs_conversion)
@@ -10634,9 +10835,9 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice
if (is_literal)
{
- bool out_of_bounds = (index >= type->vecsize);
+ bool out_of_bounds = index >= type->vecsize && type->op != OpTypeCooperativeMatrixKHR;
- if (!is_packed && !row_major_matrix_needs_conversion)
+ if (!is_packed && !row_major_matrix_needs_conversion && type->op != OpTypeCooperativeMatrixKHR)
{
expr += ".";
expr += index_to_swizzle(out_of_bounds ? 0 : index);
@@ -10736,8 +10937,9 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice
return expr;
}
-void CompilerGLSL::check_physical_type_cast(std::string &, const SPIRType *, uint32_t)
+bool CompilerGLSL::check_physical_type_cast(std::string &, const SPIRType *, uint32_t)
{
+ return false;
}
bool CompilerGLSL::prepare_access_chain_for_scalar_access(std::string &, const SPIRType &, spv::StorageClass, bool &)
@@ -11239,7 +11441,7 @@ bool CompilerGLSL::should_dereference(uint32_t id)
{
const auto &type = expression_type(id);
// Non-pointer expressions don't need to be dereferenced.
- if (!type.pointer)
+ if (!is_pointer(type))
return false;
// Handles shouldn't be dereferenced either.
@@ -11247,8 +11449,9 @@ bool CompilerGLSL::should_dereference(uint32_t id)
return false;
// If id is a variable but not a phi variable, we should not dereference it.
+ // BDA passed around as parameters are always pointers.
if (auto *var = maybe_get(id))
- return var->phi_variable;
+ return (var->parameter && is_physical_pointer(type)) || var->phi_variable;
if (auto *expr = maybe_get(id))
{
@@ -11281,6 +11484,16 @@ bool CompilerGLSL::should_dereference(uint32_t id)
return true;
}
+bool CompilerGLSL::should_dereference_caller_param(uint32_t id)
+{
+ const auto &type = expression_type(id);
+ // BDA is always passed around as pointers.
+ if (is_physical_pointer(type))
+ return false;
+
+ return should_dereference(id);
+}
+
bool CompilerGLSL::should_forward(uint32_t id) const
{
// If id is a variable we will try to forward it regardless of force_temporary check below
@@ -11575,7 +11788,8 @@ string CompilerGLSL::build_composite_combiner(uint32_t return_type, const uint32
// Can only merge swizzles for vectors.
auto &type = get(return_type);
- bool can_apply_swizzle_opt = type.basetype != SPIRType::Struct && type.array.empty() && type.columns == 1;
+ bool can_apply_swizzle_opt = type.basetype != SPIRType::Struct && type.array.empty() && type.columns == 1 &&
+ type.op != spv::OpTypeCooperativeMatrixKHR;
bool swizzle_optimization = false;
for (uint32_t i = 0; i < length; i++)
@@ -12132,6 +12346,33 @@ CompilerGLSL::TemporaryCopy CompilerGLSL::handle_instruction_precision(const Ins
return {};
}
+static pair split_coopmat_pointer(const string &expr)
+{
+ auto ptr_expr = expr;
+ string index_expr;
+
+ if (ptr_expr.back() != ']')
+ SPIRV_CROSS_THROW("Access chain for coopmat must be indexed into an array.");
+
+ // Strip the access chain.
+ ptr_expr.pop_back();
+ uint32_t counter = 1;
+ while (counter && !ptr_expr.empty())
+ {
+ if (ptr_expr.back() == ']')
+ counter++;
+ else if (ptr_expr.back() == '[')
+ counter--;
+ ptr_expr.pop_back();
+ }
+
+ if (ptr_expr.empty())
+ SPIRV_CROSS_THROW("Invalid pointer expression for coopmat.");
+
+ index_expr = expr.substr(ptr_expr.size() + 1, expr.size() - (ptr_expr.size() + 1) - 1);
+ return { std::move(ptr_expr), std::move(index_expr) };
+}
+
void CompilerGLSL::emit_instruction(const Instruction &instruction)
{
auto ops = stream(instruction);
@@ -12675,6 +12916,9 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
if (composite_type_is_complex)
allow_base_expression = false;
+ if (composite_type.op == spv::OpTypeCooperativeMatrixKHR)
+ allow_base_expression = false;
+
// Packed expressions or physical ID mapped expressions cannot be split up.
if (has_extended_decoration(ops[2], SPIRVCrossDecorationPhysicalTypePacked) ||
has_extended_decoration(ops[2], SPIRVCrossDecorationPhysicalTypeID))
@@ -13618,13 +13862,42 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
break;
}
+ case OpCooperativeMatrixConvertNV:
+ if (!options.vulkan_semantics)
+ SPIRV_CROSS_THROW("CooperativeMatrixConvertNV requires vulkan semantics.");
+ require_extension_internal("GL_NV_cooperative_matrix2");
+ // fallthrough
case OpFConvert:
{
uint32_t result_type = ops[0];
uint32_t id = ops[1];
- auto func = type_to_glsl_constructor(get(result_type));
- emit_unary_func_op(result_type, id, ops[2], func.c_str());
+ auto &type = get(result_type);
+
+ if (type.op == OpTypeCooperativeMatrixKHR && opcode == OpFConvert)
+ {
+ auto &expr_type = expression_type(ops[2]);
+ if (get(type.cooperative.use_id).scalar() !=
+ get(expr_type.cooperative.use_id).scalar())
+ {
+ // Somewhat questionable with spec constant uses.
+ if (!options.vulkan_semantics)
+ SPIRV_CROSS_THROW("NV_cooperative_matrix2 requires vulkan semantics.");
+ require_extension_internal("GL_NV_cooperative_matrix2");
+ }
+ }
+
+ if ((type.basetype == SPIRType::FloatE4M3 || type.basetype == SPIRType::FloatE5M2) &&
+ has_decoration(id, spv::DecorationSaturatedToLargestFloat8NormalConversionEXT))
+ {
+ emit_uninitialized_temporary_expression(result_type, id);
+ statement("saturatedConvertEXT(", to_expression(id), ", ", to_unpacked_expression(ops[2]), ");");
+ }
+ else
+ {
+ auto func = type_to_glsl_constructor(type);
+ emit_unary_func_op(result_type, id, ops[2], func.c_str());
+ }
break;
}
@@ -13843,8 +14116,11 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
const char *increment = unsigned_type ? "0u" : "0";
emit_op(ops[0], ops[1],
join(op, "(",
- to_non_uniform_aware_expression(ops[2]), ", ", increment, ")"), false);
+ to_atomic_ptr_expression(ops[2]), ", ", increment, ")"), false);
flush_all_atomic_capable_variables();
+
+ if (type.basetype == SPIRType::UInt64 || type.basetype == SPIRType::Int64)
+ require_extension_internal("GL_EXT_shader_atomic_int64");
break;
}
@@ -13856,8 +14132,12 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
// Ignore semantics for now, probably only relevant to CL.
uint32_t val = ops[3];
const char *op = check_atomic_image(ptr) ? "imageAtomicExchange" : "atomicExchange";
- statement(op, "(", to_non_uniform_aware_expression(ptr), ", ", to_expression(val), ");");
+ statement(op, "(", to_atomic_ptr_expression(ptr), ", ", to_expression(val), ");");
flush_all_atomic_capable_variables();
+
+ auto &type = expression_type(ptr);
+ if (type.basetype == SPIRType::UInt64 || type.basetype == SPIRType::Int64)
+ require_extension_internal("GL_EXT_shader_atomic_int64");
break;
}
@@ -13892,7 +14172,10 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
increment = "-1";
emit_op(ops[0], ops[1],
- join(op, "(", to_non_uniform_aware_expression(ops[2]), ", ", increment, ")"), false);
+ join(op, "(", to_atomic_ptr_expression(ops[2]), ", ", increment, ")"), false);
+
+ if (type.basetype == SPIRType::UInt64 || type.basetype == SPIRType::Int64)
+ require_extension_internal("GL_EXT_shader_atomic_int64");
}
flush_all_atomic_capable_variables();
@@ -13911,9 +14194,13 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
{
const char *op = check_atomic_image(ops[2]) ? "imageAtomicAdd" : "atomicAdd";
forced_temporaries.insert(ops[1]);
- auto expr = join(op, "(", to_non_uniform_aware_expression(ops[2]), ", -", to_enclosed_expression(ops[5]), ")");
+ auto expr = join(op, "(", to_atomic_ptr_expression(ops[2]), ", -", to_enclosed_expression(ops[5]), ")");
emit_op(ops[0], ops[1], expr, should_forward(ops[2]) && should_forward(ops[5]));
flush_all_atomic_capable_variables();
+
+ auto &type = get(ops[0]);
+ if (type.basetype == SPIRType::UInt64 || type.basetype == SPIRType::Int64)
+ require_extension_internal("GL_EXT_shader_atomic_int64");
break;
}
@@ -14717,6 +15004,20 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
break;
}
+ case OpExtInstWithForwardRefsKHR:
+ {
+ uint32_t extension_set = ops[2];
+ auto ext = get(extension_set).ext;
+ if (ext != SPIRExtension::SPV_debug_info &&
+ ext != SPIRExtension::NonSemanticShaderDebugInfo &&
+ ext != SPIRExtension::NonSemanticGeneric)
+ {
+ SPIRV_CROSS_THROW("Unexpected use of ExtInstWithForwardRefsKHR.");
+ }
+
+ break;
+ }
+
case OpExtInst:
{
uint32_t extension_set = ops[2];
@@ -14757,7 +15058,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
SPIRV_CROSS_THROW("Debug printf is only supported in Vulkan GLSL.\n");
require_extension_internal("GL_EXT_debug_printf");
auto &format_string = get(ops[4]).str;
- string expr = join("debugPrintfEXT(\"", format_string, "\"");
+ string expr = join(backend.printf_function, "(\"", format_string, "\"");
for (uint32_t i = 5; i < length; i++)
{
expr += ", ";
@@ -14956,6 +15257,9 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
case OpGroupNonUniformLogicalXor:
case OpGroupNonUniformQuadSwap:
case OpGroupNonUniformQuadBroadcast:
+ case OpGroupNonUniformQuadAllKHR:
+ case OpGroupNonUniformQuadAnyKHR:
+ case OpGroupNonUniformRotateKHR:
emit_subgroup_op(instruction);
break;
@@ -15175,8 +15479,8 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
case OpConvertUToPtr:
{
auto &type = get(ops[0]);
- if (type.storage != StorageClassPhysicalStorageBufferEXT)
- SPIRV_CROSS_THROW("Only StorageClassPhysicalStorageBufferEXT is supported by OpConvertUToPtr.");
+ if (type.storage != StorageClassPhysicalStorageBuffer)
+ SPIRV_CROSS_THROW("Only StorageClassPhysicalStorageBuffer is supported by OpConvertUToPtr.");
auto &in_type = expression_type(ops[2]);
if (in_type.vecsize == 2)
@@ -15191,8 +15495,8 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
{
auto &type = get(ops[0]);
auto &ptr_type = expression_type(ops[2]);
- if (ptr_type.storage != StorageClassPhysicalStorageBufferEXT)
- SPIRV_CROSS_THROW("Only StorageClassPhysicalStorageBufferEXT is supported by OpConvertPtrToU.");
+ if (ptr_type.storage != StorageClassPhysicalStorageBuffer)
+ SPIRV_CROSS_THROW("Only StorageClassPhysicalStorageBuffer is supported by OpConvertPtrToU.");
if (type.vecsize == 2)
require_extension_internal("GL_EXT_buffer_reference_uvec2");
@@ -15291,6 +15595,169 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
break;
}
+ case OpCooperativeMatrixLengthKHR:
+ {
+ // Need to synthesize a dummy temporary, since the SPIR-V opcode is based on the type.
+ uint32_t result_type = ops[0];
+ uint32_t id = ops[1];
+ set(
+ id, join(type_to_glsl(get(result_type)),
+ "(", type_to_glsl(get(ops[2])), "(0).length())"),
+ result_type, true);
+ break;
+ }
+
+ case OpCooperativeMatrixLoadKHR:
+ {
+ // Spec contradicts itself if stride is optional or not.
+ if (length < 5)
+ SPIRV_CROSS_THROW("Stride is not provided.");
+
+ uint32_t result_type = ops[0];
+ uint32_t id = ops[1];
+ emit_uninitialized_temporary_expression(result_type, id);
+
+ auto expr = to_expression(ops[2]);
+ pair split_expr;
+ if (!is_forcing_recompilation())
+ split_expr = split_coopmat_pointer(expr);
+
+ string layout_expr;
+ if (const auto *layout = maybe_get(ops[3]))
+ {
+ if (!layout->specialization)
+ {
+ if (layout->scalar() == spv::CooperativeMatrixLayoutColumnMajorKHR)
+ layout_expr = "gl_CooperativeMatrixLayoutColumnMajor";
+ else
+ layout_expr = "gl_CooperativeMatrixLayoutRowMajor";
+ }
+ }
+
+ if (layout_expr.empty())
+ layout_expr = join("int(", to_expression(ops[3]), ")");
+
+ statement("coopMatLoad(",
+ to_expression(id), ", ",
+ split_expr.first, ", ",
+ split_expr.second, ", ",
+ to_expression(ops[4]), ", ",
+ layout_expr, ");");
+
+ register_read(id, ops[2], false);
+ break;
+ }
+
+ case OpCooperativeMatrixStoreKHR:
+ {
+ // Spec contradicts itself if stride is optional or not.
+ if (length < 4)
+ SPIRV_CROSS_THROW("Stride is not provided.");
+
+ // SPIR-V and GLSL don't agree how to pass the expression.
+ // In SPIR-V it's a pointer, but in GLSL it's reference to array + index.
+
+ auto expr = to_expression(ops[0]);
+ pair split_expr;
+ if (!is_forcing_recompilation())
+ split_expr = split_coopmat_pointer(expr);
+
+ string layout_expr;
+ if (const auto *layout = maybe_get(ops[2]))
+ {
+ if (!layout->specialization)
+ {
+ if (layout->scalar() == spv::CooperativeMatrixLayoutColumnMajorKHR)
+ layout_expr = "gl_CooperativeMatrixLayoutColumnMajor";
+ else
+ layout_expr = "gl_CooperativeMatrixLayoutRowMajor";
+ }
+ }
+
+ if (layout_expr.empty())
+ layout_expr = join("int(", to_expression(ops[2]), ")");
+
+ statement("coopMatStore(",
+ to_expression(ops[1]), ", ",
+ split_expr.first, ", ",
+ split_expr.second, ", ",
+ to_expression(ops[3]), ", ",
+ layout_expr, ");");
+
+ // TODO: Do we care about memory operands?
+
+ register_write(ops[0]);
+ break;
+ }
+
+ case OpCooperativeMatrixMulAddKHR:
+ {
+ uint32_t result_type = ops[0];
+ uint32_t id = ops[1];
+ uint32_t A = ops[2];
+ uint32_t B = ops[3];
+ uint32_t C = ops[4];
+ bool forward = should_forward(A) && should_forward(B) && should_forward(C);
+ emit_op(result_type, id,
+ join("coopMatMulAdd(",
+ to_unpacked_expression(A), ", ",
+ to_unpacked_expression(B), ", ",
+ to_unpacked_expression(C), ", ",
+ (length >= 6 ? ops[5] : 0),
+ ")"),
+ forward);
+
+ inherit_expression_dependencies(id, A);
+ inherit_expression_dependencies(id, B);
+ inherit_expression_dependencies(id, C);
+ break;
+ }
+
+ case OpCompositeConstructReplicateEXT:
+ {
+ uint32_t result_type = ops[0];
+ uint32_t id = ops[1];
+
+ auto &type = get(result_type);
+ auto value_to_replicate = to_expression(ops[2]);
+ std::string rhs;
+ // Matrices don't have a replicating constructor for vectors. Need to manually replicate
+ if (type.op == spv::OpTypeMatrix || type.op == spv::OpTypeArray)
+ {
+ if (type.op == spv::OpTypeArray && type.array.size() != 1)
+ {
+ SPIRV_CROSS_THROW(
+ "Multi-dimensional arrays currently not supported for OpCompositeConstructReplicateEXT");
+ }
+ uint32_t num_elements = type.op == spv::OpTypeMatrix ? type.columns : type.array[0];
+ if (backend.use_initializer_list && type.op == spv::OpTypeArray)
+ {
+ rhs += "{";
+ }
+ else
+ {
+ rhs += type_to_glsl_constructor(type);
+ rhs += "(";
+ }
+ for (uint32_t i = 0; i < num_elements; i++)
+ {
+ rhs += value_to_replicate;
+ if (i < num_elements - 1)
+ rhs += ", ";
+ }
+ if (backend.use_initializer_list && type.op == spv::OpTypeArray)
+ rhs += "}";
+ else
+ rhs += ")";
+ }
+ else
+ {
+ rhs = join(type_to_glsl(type), "(", to_expression(ops[2]), ")");
+ }
+ emit_op(result_type, id, rhs, true);
+ break;
+ }
+
default:
statement("// unimplemented op ", instruction.op);
break;
@@ -15689,7 +16156,10 @@ string CompilerGLSL::argument_decl(const SPIRFunction::Parameter &arg)
auto &type = expression_type(arg.id);
const char *direction = "";
- if (type.pointer)
+ if (is_pointer(type) &&
+ (type.storage == StorageClassFunction ||
+ type.storage == StorageClassPrivate ||
+ type.storage == StorageClassOutput))
{
// If we're passing around block types to function, we really mean reference in a pointer sense,
// but DXC does not like inout for mesh blocks, so workaround that. out is technically not correct,
@@ -15763,13 +16233,24 @@ string CompilerGLSL::variable_decl(const SPIRVariable &variable)
else if (options.force_zero_initialized_variables && type_can_zero_initialize(type))
res += join(" = ", to_zero_initialized_expression(get_variable_data_type_id(variable)));
}
- else if (variable.initializer && !variable_decl_is_remapped_storage(variable, StorageClassWorkgroup))
+ else if (variable.initializer)
{
- uint32_t expr = variable.initializer;
- if (ir.ids[expr].get_type() != TypeUndef)
- res += join(" = ", to_initializer_expression(variable));
- else if (options.force_zero_initialized_variables && type_can_zero_initialize(type))
- res += join(" = ", to_zero_initialized_expression(get_variable_data_type_id(variable)));
+ if (!variable_decl_is_remapped_storage(variable, StorageClassWorkgroup))
+ {
+ uint32_t expr = variable.initializer;
+ if (ir.ids[expr].get_type() != TypeUndef)
+ res += join(" = ", to_initializer_expression(variable));
+ else if (options.force_zero_initialized_variables && type_can_zero_initialize(type))
+ res += join(" = ", to_zero_initialized_expression(get_variable_data_type_id(variable)));
+ }
+ else
+ {
+ // Workgroup memory requires special handling. First, it can only be Null-Initialized.
+ // GLSL will handle this with null initializer, while others require more work after the decl
+ require_extension_internal("GL_EXT_null_initializer");
+ if (!backend.constant_null_initializer.empty())
+ res += join(" = ", backend.constant_null_initializer);
+ }
}
return res;
@@ -15849,7 +16330,7 @@ string CompilerGLSL::to_array_size(const SPIRType &type, uint32_t index)
string CompilerGLSL::type_to_array_glsl(const SPIRType &type, uint32_t)
{
- if (type.pointer && type.storage == StorageClassPhysicalStorageBufferEXT && type.basetype != SPIRType::Struct)
+ if (type.pointer && type.storage == StorageClassPhysicalStorageBuffer && type.basetype != SPIRType::Struct)
{
// We are using a wrapped pointer type, and we should not emit any array declarations here.
return "";
@@ -16124,6 +16605,61 @@ string CompilerGLSL::type_to_glsl(const SPIRType &type, uint32_t id)
require_extension_internal("GL_ARB_shader_atomic_counters");
}
+ const SPIRType *coop_type = &type;
+ while (is_pointer(*coop_type) || is_array(*coop_type))
+ coop_type = &get(coop_type->parent_type);
+
+ if (coop_type->op == spv::OpTypeCooperativeMatrixKHR)
+ {
+ require_extension_internal("GL_KHR_cooperative_matrix");
+ if (!options.vulkan_semantics)
+ SPIRV_CROSS_THROW("Cooperative matrix only available in Vulkan.");
+ // GLSL doesn't support this as spec constant, which makes sense ...
+ uint32_t use_type = get(coop_type->cooperative.use_id).scalar();
+
+ const char *use = nullptr;
+ switch (use_type)
+ {
+ case CooperativeMatrixUseMatrixAKHR:
+ use = "gl_MatrixUseA";
+ break;
+
+ case CooperativeMatrixUseMatrixBKHR:
+ use = "gl_MatrixUseB";
+ break;
+
+ case CooperativeMatrixUseMatrixAccumulatorKHR:
+ use = "gl_MatrixUseAccumulator";
+ break;
+
+ default:
+ SPIRV_CROSS_THROW("Invalid matrix use.");
+ }
+
+ string scope_expr;
+ if (const auto *scope = maybe_get(coop_type->cooperative.scope_id))
+ {
+ if (!scope->specialization)
+ {
+ require_extension_internal("GL_KHR_memory_scope_semantics");
+ if (scope->scalar() == spv::ScopeSubgroup)
+ scope_expr = "gl_ScopeSubgroup";
+ else if (scope->scalar() == spv::ScopeWorkgroup)
+ scope_expr = "gl_ScopeWorkgroup";
+ else
+ SPIRV_CROSS_THROW("Invalid scope for cooperative matrix.");
+ }
+ }
+
+ if (scope_expr.empty())
+ scope_expr = to_expression(coop_type->cooperative.scope_id);
+
+ return join("coopmat<", type_to_glsl(get(coop_type->parent_type)), ", ",
+ scope_expr, ", ",
+ to_expression(coop_type->cooperative.rows_id), ", ",
+ to_expression(coop_type->cooperative.columns_id), ", ", use, ">");
+ }
+
if (type.vecsize == 1 && type.columns == 1) // Scalar builtin
{
switch (type.basetype)
@@ -16146,6 +16682,21 @@ string CompilerGLSL::type_to_glsl(const SPIRType &type, uint32_t id)
return "atomic_uint";
case SPIRType::Half:
return "float16_t";
+ case SPIRType::BFloat16:
+ if (!options.vulkan_semantics)
+ SPIRV_CROSS_THROW("bfloat16 requires Vulkan semantics.");
+ require_extension_internal("GL_EXT_bfloat16");
+ return "bfloat16_t";
+ case SPIRType::FloatE4M3:
+ if (!options.vulkan_semantics)
+ SPIRV_CROSS_THROW("floate4m3_t requires Vulkan semantics.");
+ require_extension_internal("GL_EXT_float_e4m3");
+ return "floate4m3_t";
+ case SPIRType::FloatE5M2:
+ if (!options.vulkan_semantics)
+ SPIRV_CROSS_THROW("floate5m2_t requires Vulkan semantics.");
+ require_extension_internal("GL_EXT_float_e5m2");
+ return "floate5m2_t";
case SPIRType::Float:
return "float";
case SPIRType::Double:
@@ -16178,6 +16729,21 @@ string CompilerGLSL::type_to_glsl(const SPIRType &type, uint32_t id)
return join("uvec", type.vecsize);
case SPIRType::Half:
return join("f16vec", type.vecsize);
+ case SPIRType::BFloat16:
+ if (!options.vulkan_semantics)
+ SPIRV_CROSS_THROW("bfloat16 requires Vulkan semantics.");
+ require_extension_internal("GL_EXT_bfloat16");
+ return join("bf16vec", type.vecsize);
+ case SPIRType::FloatE4M3:
+ if (!options.vulkan_semantics)
+ SPIRV_CROSS_THROW("floate4m3_t requires Vulkan semantics.");
+ require_extension_internal("GL_EXT_float_e4m3");
+ return join("fe4m3vec", type.vecsize);
+ case SPIRType::FloatE5M2:
+ if (!options.vulkan_semantics)
+ SPIRV_CROSS_THROW("floate5m2_t requires Vulkan semantics.");
+ require_extension_internal("GL_EXT_float_e5m2");
+ return join("fe5m2vec", type.vecsize);
case SPIRType::Float:
return join("vec", type.vecsize);
case SPIRType::Double:
@@ -16353,6 +16919,11 @@ void CompilerGLSL::add_function_overload(const SPIRFunction &func)
// but that will not change the signature in GLSL/HLSL,
// so strip the pointer type before hashing.
uint32_t type_id = get_pointee_type_id(arg.type);
+
+ // Workaround glslang bug. It seems to only consider the base type when resolving overloads.
+ if (get(type_id).op == spv::OpTypeCooperativeMatrixKHR)
+ type_id = get(type_id).parent_type;
+
auto &type = get(type_id);
if (!combined_image_samplers.empty())
@@ -16492,6 +17063,7 @@ void CompilerGLSL::emit_function(SPIRFunction &func, const Bitset &return_flags)
{
// Recursively emit functions which are called.
uint32_t id = ops[2];
+
emit_function(get(id), ir.meta[ops[1]].decoration.decoration_flags);
}
}
@@ -16530,6 +17102,12 @@ void CompilerGLSL::emit_function(SPIRFunction &func, const Bitset &return_flags)
// Comes from MSL which can push global variables as local variables in main function.
add_local_variable_name(var.self);
statement(variable_decl(var), ";");
+
+ // "Real" workgroup variables in compute shaders needs extra caretaking.
+ // They need to be initialized with an extra routine as they come in arbitrary form.
+ if (var.storage == StorageClassWorkgroup && var.initializer)
+ emit_workgroup_initialization(var);
+
var.deferred_declaration = false;
}
else if (var.storage == StorageClassPrivate)
@@ -16636,6 +17214,10 @@ void CompilerGLSL::emit_fixup()
}
}
+void CompilerGLSL::emit_workgroup_initialization(const SPIRVariable &)
+{
+}
+
void CompilerGLSL::flush_phi(BlockID from, BlockID to)
{
auto &child = get(to);
@@ -17869,6 +18451,14 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block)
case SPIRBlock::Unreachable:
{
+ // If the entry point ends with unreachable and has a return value, insert a return
+ // statement to avoid potential compiler errors from non-void functions without a return value.
+ if (block.return_value)
+ {
+ statement("return ", to_unpacked_expression(block.return_value), ";");
+ break;
+ }
+
// Avoid emitting false fallthrough, which can happen for
// if (cond) break; else discard; inside a case label.
// Discard is not always implementable as a terminator.
diff --git a/thirdparty/spirv-cross/spirv_glsl.hpp b/thirdparty/spirv-cross/spirv_glsl.hpp
index 8a00263234..03ff330ccf 100644
--- a/thirdparty/spirv-cross/spirv_glsl.hpp
+++ b/thirdparty/spirv-cross/spirv_glsl.hpp
@@ -297,6 +297,9 @@ public:
float_formatter = formatter;
}
+ // Returns the macro name corresponding to constant id
+ std::string constant_value_macro_name(uint32_t id) const;
+
protected:
struct ShaderSubgroupSupportHelper
{
@@ -450,6 +453,7 @@ protected:
virtual std::string variable_decl(const SPIRType &type, const std::string &name, uint32_t id = 0);
virtual bool variable_decl_is_remapped_storage(const SPIRVariable &var, spv::StorageClass storage) const;
virtual std::string to_func_call_arg(const SPIRFunction::Parameter &arg, uint32_t id);
+ virtual void emit_workgroup_initialization(const SPIRVariable &var);
struct TextureFunctionBaseArguments
{
@@ -622,6 +626,8 @@ protected:
const char *uint16_t_literal_suffix = "us";
const char *nonuniform_qualifier = "nonuniformEXT";
const char *boolean_mix_function = "mix";
+ const char *printf_function = "debugPrintfEXT";
+ std::string constant_null_initializer = "";
SPIRType::BaseType boolean_in_struct_remapped_type = SPIRType::Boolean;
bool swizzle_is_function = false;
bool shared_is_implied = false;
@@ -629,6 +635,7 @@ protected:
bool explicit_struct_type = false;
bool use_initializer_list = false;
bool use_typed_initializer_list = false;
+ bool requires_matching_array_initializer = false;
bool can_declare_struct_inline = true;
bool can_declare_arrays_inline = true;
bool native_row_major_matrix = true;
@@ -679,7 +686,6 @@ protected:
const SmallVector &indices);
void emit_block_chain(SPIRBlock &block);
void emit_hoisted_temporaries(SmallVector> &temporaries);
- std::string constant_value_macro_name(uint32_t id);
int get_constant_mapping_to_workgroup_component(const SPIRConstant &constant) const;
void emit_constant(const SPIRConstant &constant);
void emit_specialization_constant_op(const SPIRConstantOp &constant);
@@ -695,6 +701,7 @@ protected:
void emit_variable_temporary_copies(const SPIRVariable &var);
bool should_dereference(uint32_t id);
+ bool should_dereference_caller_param(uint32_t id);
bool should_forward(uint32_t id) const;
bool should_suppress_usage_tracking(uint32_t id) const;
void emit_mix_op(uint32_t result_type, uint32_t id, uint32_t left, uint32_t right, uint32_t lerp);
@@ -762,7 +769,7 @@ protected:
spv::StorageClass get_expression_effective_storage_class(uint32_t ptr);
virtual bool access_chain_needs_stage_io_builtin_translation(uint32_t base);
- virtual void check_physical_type_cast(std::string &expr, const SPIRType *type, uint32_t physical_type);
+ virtual bool check_physical_type_cast(std::string &expr, const SPIRType *type, uint32_t physical_type);
virtual bool prepare_access_chain_for_scalar_access(std::string &expr, const SPIRType &type,
spv::StorageClass storage, bool &is_packed);
@@ -792,8 +799,9 @@ protected:
std::string declare_temporary(uint32_t type, uint32_t id);
void emit_uninitialized_temporary(uint32_t type, uint32_t id);
SPIRExpression &emit_uninitialized_temporary_expression(uint32_t type, uint32_t id);
- void append_global_func_args(const SPIRFunction &func, uint32_t index, SmallVector &arglist);
+ virtual void append_global_func_args(const SPIRFunction &func, uint32_t index, SmallVector &arglist);
std::string to_non_uniform_aware_expression(uint32_t id);
+ std::string to_atomic_ptr_expression(uint32_t id);
std::string to_expression(uint32_t id, bool register_expression_read = true);
std::string to_composite_constructor_expression(const SPIRType &parent_type, uint32_t id, bool block_like_type);
std::string to_rerolled_array_expression(const SPIRType &parent_type, const std::string &expr, const SPIRType &type);
@@ -1009,6 +1017,8 @@ protected:
const Instruction *get_next_instruction_in_block(const Instruction &instr);
static uint32_t mask_relevant_memory_semantics(uint32_t semantics);
+ std::string convert_floate4m3_to_string(const SPIRConstant &value, uint32_t col, uint32_t row);
+ std::string convert_floate5m2_to_string(const SPIRConstant &value, uint32_t col, uint32_t row);
std::string convert_half_to_string(const SPIRConstant &value, uint32_t col, uint32_t row);
std::string convert_float_to_string(const SPIRConstant &value, uint32_t col, uint32_t row);
std::string convert_double_to_string(const SPIRConstant &value, uint32_t col, uint32_t row);
diff --git a/thirdparty/spirv-cross/spirv_msl.cpp b/thirdparty/spirv-cross/spirv_msl.cpp
index 642fcfa59a..1c4f1ed13a 100644
--- a/thirdparty/spirv-cross/spirv_msl.cpp
+++ b/thirdparty/spirv-cross/spirv_msl.cpp
@@ -272,16 +272,22 @@ void CompilerMSL::build_implicit_builtins()
(active_input_builtins.get(BuiltInVertexId) || active_input_builtins.get(BuiltInVertexIndex) ||
active_input_builtins.get(BuiltInBaseVertex) || active_input_builtins.get(BuiltInInstanceId) ||
active_input_builtins.get(BuiltInInstanceIndex) || active_input_builtins.get(BuiltInBaseInstance));
- bool need_local_invocation_index = (msl_options.emulate_subgroups && active_input_builtins.get(BuiltInSubgroupId)) || is_mesh_shader();
+ bool need_local_invocation_index =
+ (msl_options.emulate_subgroups && active_input_builtins.get(BuiltInSubgroupId)) || is_mesh_shader() ||
+ needs_workgroup_zero_init || needs_local_invocation_index;
bool need_workgroup_size = msl_options.emulate_subgroups && active_input_builtins.get(BuiltInNumSubgroups);
bool force_frag_depth_passthrough =
get_execution_model() == ExecutionModelFragment && !uses_explicit_early_fragment_test() && need_subpass_input &&
msl_options.enable_frag_depth_builtin && msl_options.input_attachment_is_ds_attachment;
+ bool need_point_size =
+ msl_options.enable_point_size_builtin && msl_options.enable_point_size_default &&
+ get_execution_model() == ExecutionModelVertex;
if (need_subpass_input || need_sample_pos || need_subgroup_mask || need_vertex_params || need_tesc_params ||
need_tese_params || need_multiview || need_dispatch_base || need_vertex_base_params || need_grid_params ||
needs_sample_id || needs_subgroup_invocation_id || needs_subgroup_size || needs_helper_invocation ||
- has_additional_fixed_sample_mask() || need_local_invocation_index || need_workgroup_size || force_frag_depth_passthrough || is_mesh_shader())
+ has_additional_fixed_sample_mask() || need_local_invocation_index || need_workgroup_size ||
+ force_frag_depth_passthrough || need_point_size || is_mesh_shader())
{
bool has_frag_coord = false;
bool has_sample_id = false;
@@ -299,6 +305,7 @@ void CompilerMSL::build_implicit_builtins()
bool has_local_invocation_index = false;
bool has_workgroup_size = false;
bool has_frag_depth = false;
+ bool has_point_size = false;
uint32_t workgroup_id_type = 0;
ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) {
@@ -306,6 +313,22 @@ void CompilerMSL::build_implicit_builtins()
return;
if (!interface_variable_exists_in_entry_point(var.self))
return;
+
+ auto &type = this->get(var.basetype);
+ if (need_point_size && has_decoration(type.self, DecorationBlock))
+ {
+ const auto member_count = static_cast(type.member_types.size());
+ for (uint32_t i = 0; i < member_count; i++)
+ {
+ if (get_member_decoration(type.self, i, DecorationBuiltIn) == BuiltInPointSize)
+ {
+ has_point_size = true;
+ active_output_builtins.set(BuiltInPointSize);
+ break;
+ }
+ }
+ }
+
if (!has_decoration(var.self, DecorationBuiltIn))
return;
@@ -328,6 +351,12 @@ void CompilerMSL::build_implicit_builtins()
}
}
+ if (builtin == BuiltInPointSize)
+ {
+ has_point_size = true;
+ active_output_builtins.set(BuiltInPointSize);
+ }
+
if (builtin == BuiltInPrimitivePointIndicesEXT ||
builtin == BuiltInPrimitiveLineIndicesEXT ||
builtin == BuiltInPrimitiveTriangleIndicesEXT)
@@ -481,7 +510,7 @@ void CompilerMSL::build_implicit_builtins()
has_local_invocation_index = true;
}
- if (need_workgroup_size && builtin == BuiltInLocalInvocationId)
+ if (need_workgroup_size && builtin == BuiltInWorkgroupSize)
{
builtin_workgroup_size_id = var.self;
mark_implicit_builtin(StorageClassInput, BuiltInWorkgroupSize, var.self);
@@ -903,25 +932,55 @@ void CompilerMSL::build_implicit_builtins()
if (need_workgroup_size && !has_workgroup_size)
{
- uint32_t offset = ir.increase_bound_by(2);
- uint32_t type_ptr_id = offset;
- uint32_t var_id = offset + 1;
+ auto &execution = get_entry_point();
+ // First, check if the workgroup size _constant_ were defined.
+ // If it were, we don't need to do--in fact, shouldn't do--anything.
+ builtin_workgroup_size_id = execution.workgroup_size.constant;
+ if (builtin_workgroup_size_id == 0)
+ {
+ uint32_t var_id = ir.increase_bound_by(1);
- // Create gl_WorkgroupSize.
- uint32_t type_id = build_extended_vector_type(get_uint_type_id(), 3);
- SPIRType uint_type_ptr = get(type_id);
- uint_type_ptr.op = OpTypePointer;
- uint_type_ptr.pointer = true;
- uint_type_ptr.pointer_depth++;
- uint_type_ptr.parent_type = type_id;
- uint_type_ptr.storage = StorageClassInput;
+ // Create gl_WorkgroupSize.
+ uint32_t type_id = build_extended_vector_type(get_uint_type_id(), 3);
+ // If we have LocalSize or LocalSizeId, use those to define the workgroup size.
+ if (execution.flags.get(ExecutionModeLocalSizeId))
+ {
+ const SPIRConstant *init[] = { &get(execution.workgroup_size.id_x),
+ &get(execution.workgroup_size.id_y),
+ &get(execution.workgroup_size.id_z) };
+ bool specialized = init[0]->specialization || init[1]->specialization || init[2]->specialization;
+ set(var_id, type_id, init, 3, specialized);
+ execution.workgroup_size.constant = var_id;
+ }
+ else if (execution.flags.get(ExecutionModeLocalSize))
+ {
+ uint32_t offset = ir.increase_bound_by(3);
+ const SPIRConstant *init[] = {
+ &set(offset, get_uint_type_id(), execution.workgroup_size.x, false),
+ &set(offset + 1, get_uint_type_id(), execution.workgroup_size.y, false),
+ &set(offset + 2, get_uint_type_id(), execution.workgroup_size.z, false)
+ };
+ set(var_id, type_id, init, 3, false);
+ execution.workgroup_size.constant = var_id;
+ }
+ else
+ {
+ uint32_t type_ptr_id = ir.increase_bound_by(1);
+ SPIRType uint_type_ptr = get(type_id);
+ uint_type_ptr.op = OpTypePointer;
+ uint_type_ptr.pointer = true;
+ uint_type_ptr.pointer_depth++;
+ uint_type_ptr.parent_type = type_id;
+ uint_type_ptr.storage = StorageClassInput;
- auto &ptr_type = set(type_ptr_id, uint_type_ptr);
- ptr_type.self = type_id;
- set(var_id, type_ptr_id, StorageClassInput);
- set_decoration(var_id, DecorationBuiltIn, BuiltInWorkgroupSize);
- builtin_workgroup_size_id = var_id;
- mark_implicit_builtin(StorageClassInput, BuiltInWorkgroupSize, var_id);
+ auto &ptr_type = set(type_ptr_id, uint_type_ptr);
+ ptr_type.self = type_id;
+ set(var_id, type_ptr_id, StorageClassInput);
+ mark_implicit_builtin(StorageClassInput, BuiltInWorkgroupSize, var_id);
+ }
+ set_decoration(var_id, DecorationBuiltIn, BuiltInWorkgroupSize);
+ builtin_workgroup_size_id = var_id;
+ }
}
if (!has_frag_depth && force_frag_depth_passthrough)
@@ -953,6 +1012,34 @@ void CompilerMSL::build_implicit_builtins()
mark_implicit_builtin(StorageClassOutput, BuiltInFragDepth, var_id);
active_output_builtins.set(BuiltInFragDepth);
}
+
+ if (!has_point_size && need_point_size)
+ {
+ uint32_t offset = ir.increase_bound_by(3);
+ uint32_t type_id = offset;
+ uint32_t type_ptr_id = offset + 1;
+ uint32_t var_id = offset + 2;
+
+ // Create gl_PointSize
+ SPIRType float_type { OpTypeFloat };
+ float_type.basetype = SPIRType::Float;
+ float_type.width = 32;
+ float_type.vecsize = 1;
+ set(type_id, float_type);
+
+ SPIRType float_type_ptr_in = float_type;
+ float_type_ptr_in.op = spv::OpTypePointer;
+ float_type_ptr_in.pointer = true;
+ float_type_ptr_in.pointer_depth++;
+ float_type_ptr_in.parent_type = type_id;
+ float_type_ptr_in.storage = StorageClassOutput;
+
+ auto &ptr_in_type = set(type_ptr_id, float_type_ptr_in);
+ ptr_in_type.self = type_id;
+ set(var_id, type_ptr_id, StorageClassOutput);
+ set_decoration(var_id, DecorationBuiltIn, BuiltInPointSize);
+ mark_implicit_builtin(StorageClassOutput, BuiltInPointSize, var_id);
+ }
}
if (needs_swizzle_buffer_def)
@@ -1003,6 +1090,7 @@ void CompilerMSL::build_implicit_builtins()
// If we're returning a struct from a vertex-like entry point, we must return a position attribute.
bool need_position = (get_execution_model() == ExecutionModelVertex || is_tese_shader()) &&
!capture_output_to_buffer && !get_is_rasterization_disabled() &&
+ !msl_options.auto_disable_rasterization &&
!active_output_builtins.get(BuiltInPosition);
if (need_position)
@@ -1039,6 +1127,10 @@ void CompilerMSL::build_implicit_builtins()
});
need_position = has_output && !active_output_builtins.get(BuiltInPosition);
}
+ else if (!active_output_builtins.get(BuiltInPosition) && msl_options.auto_disable_rasterization)
+ {
+ is_rasterization_disabled = true;
+ }
if (need_position)
{
@@ -1593,6 +1685,7 @@ string CompilerMSL::compile()
backend.basic_int16_type = "short";
backend.basic_uint16_type = "ushort";
backend.boolean_mix_function = "select";
+ backend.printf_function = "os_log_default.log";
backend.swizzle_is_function = false;
backend.shared_is_implied = false;
backend.use_initializer_list = true;
@@ -1606,7 +1699,7 @@ string CompilerMSL::compile()
backend.nonuniform_qualifier = "";
backend.support_small_type_sampling_result = true;
backend.force_merged_mesh_block = false;
- backend.force_gl_in_out_block = get_execution_model() == ExecutionModelMeshEXT;
+ backend.force_gl_in_out_block = false;
backend.supports_empty_struct = true;
backend.support_64bit_switch = true;
backend.boolean_in_struct_remapped_type = SPIRType::Short;
@@ -1644,6 +1737,7 @@ string CompilerMSL::compile()
analyze_image_and_sampler_usage();
analyze_sampled_image_usage();
analyze_interlocked_resource_usage();
+ analyze_workgroup_variables();
preprocess_op_codes();
build_implicit_builtins();
@@ -1777,7 +1871,7 @@ void CompilerMSL::preprocess_op_codes()
if (preproc.uses_atomics)
{
add_header_line("#include ");
- add_pragma_line("#pragma clang diagnostic ignored \"-Wunused-variable\"");
+ add_pragma_line("#pragma clang diagnostic ignored \"-Wunused-variable\"", false);
}
// Before MSL 2.1 (2.2 for textures), Metal vertex functions that write to
@@ -1794,6 +1888,8 @@ void CompilerMSL::preprocess_op_codes()
capture_output_to_buffer = true;
}
+ if (preproc.needs_local_invocation_index)
+ needs_local_invocation_index = true;
if (preproc.needs_subgroup_invocation_id)
needs_subgroup_invocation_id = true;
if (preproc.needs_subgroup_size)
@@ -2147,6 +2243,36 @@ void CompilerMSL::extract_global_variables_from_function(uint32_t func_id, std::
break;
}
+ case OpGroupNonUniformRotateKHR:
+ {
+ // Add the correct invocation ID for calculating clustered rotate case.
+ if (i.length > 5)
+ added_arg_ids.insert(static_cast(evaluate_constant_u32(ops[2])) == ScopeSubgroup
+ ? builtin_subgroup_invocation_id_id : builtin_local_invocation_index_id);
+ break;
+ }
+
+ case OpGroupNonUniformFAdd:
+ case OpGroupNonUniformFMul:
+ case OpGroupNonUniformFMin:
+ case OpGroupNonUniformFMax:
+ case OpGroupNonUniformIAdd:
+ case OpGroupNonUniformIMul:
+ case OpGroupNonUniformSMin:
+ case OpGroupNonUniformSMax:
+ case OpGroupNonUniformUMin:
+ case OpGroupNonUniformUMax:
+ case OpGroupNonUniformBitwiseAnd:
+ case OpGroupNonUniformBitwiseOr:
+ case OpGroupNonUniformBitwiseXor:
+ case OpGroupNonUniformLogicalAnd:
+ case OpGroupNonUniformLogicalOr:
+ case OpGroupNonUniformLogicalXor:
+ if ((get_execution_model() != ExecutionModelFragment || msl_options.supports_msl_version(2, 2)) &&
+ ops[3] == GroupOperationClusteredReduce)
+ added_arg_ids.insert(builtin_subgroup_invocation_id_id);
+ break;
+
case OpDemoteToHelperInvocation:
if (needs_manual_helper_invocation_updates() && needs_helper_invocation)
added_arg_ids.insert(builtin_helper_invocation_id);
@@ -2317,7 +2443,14 @@ void CompilerMSL::extract_global_variables_from_function(uint32_t func_id, std::
v.storage = StorageClassWorkgroup;
// Ensure the existing variable has a valid name and the new variable has all the same meta info
- set_name(arg_id, ensure_valid_name(to_name(arg_id), "v"));
+ if (ir.meta[arg_id].decoration.builtin)
+ {
+ set_name(arg_id, builtin_to_glsl(bi_type, var.storage));
+ }
+ else
+ {
+ set_name(arg_id, ensure_valid_name(to_name(arg_id), "v"));
+ }
ir.meta[next_id] = ir.meta[arg_id];
}
else if (is_builtin && has_decoration(p_type->self, DecorationBlock))
@@ -3182,41 +3315,62 @@ void CompilerMSL::add_composite_member_variable_to_interface_block(StorageClass
string mbr_name = ensure_valid_name(append_member_name(mbr_name_qual, var_type, mbr_idx) + (mbr_is_indexable ? join("_", i) : ""), "m");
set_member_name(ib_type.self, ib_mbr_idx, mbr_name);
+ // The SPIRV location of interface variable, used to obtain the initial
+ // MSL location (the location variable) and interface matching
+ uint32_t ir_location = UINT32_MAX;
+ bool has_member_loc_decor = has_member_decoration(var_type.self, mbr_idx, DecorationLocation);
+ bool has_var_loc_decor = has_decoration(var.self, DecorationLocation);
+ uint32_t orig_vecsize = UINT32_MAX;
+
+ // If we haven't established a location base yet, do so here.
+ if (location == UINT32_MAX)
+ {
+ if (has_member_loc_decor)
+ ir_location = get_member_decoration(var_type.self, mbr_idx, DecorationLocation);
+ else if (has_var_loc_decor)
+ ir_location = get_accumulated_member_location(var, mbr_idx, meta.strip_array);
+ else if (is_builtin)
+ {
+ if (is_tessellation_shader() && storage == StorageClassInput && inputs_by_builtin.count(builtin))
+ ir_location = inputs_by_builtin[builtin].location;
+ else if (capture_output_to_buffer && storage == StorageClassOutput && outputs_by_builtin.count(builtin))
+ ir_location = outputs_by_builtin[builtin].location;
+ }
+ }
+
// Once we determine the location of the first member within nested structures,
// from a var of the topmost structure, the remaining flattened members of
// the nested structures will have consecutive location values. At this point,
// we've recursively tunnelled into structs, arrays, and matrices, and are
// down to a single location for each member now.
- if (!is_builtin && location != UINT32_MAX)
+ if (location == UINT32_MAX && ir_location != UINT32_MAX)
+ location = ir_location + i;
+
+ if (storage == StorageClassInput && (has_member_loc_decor || has_var_loc_decor))
{
- set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, location);
- mark_location_as_used_by_shader(location, *usable_type, storage);
- location++;
+ uint32_t component = 0;
+ uint32_t orig_mbr_type_id = usable_type->self;
+
+ if (has_member_loc_decor)
+ component = get_member_decoration(var_type.self, mbr_idx, DecorationComponent);
+
+ var.basetype = ensure_correct_input_type(var.basetype, location, component, 0, meta.strip_array);
+ mbr_type_id = ensure_correct_input_type(usable_type->self, location, component, 0, meta.strip_array);
+
+ // For members of the composite interface block, we only change the interface block type
+ // when interface matching happens. In the meantime, we store the original vector size
+ // and insert a swizzle when loading from metal interface block (see fixup below)
+ if (mbr_type_id != orig_mbr_type_id)
+ orig_vecsize = get(orig_mbr_type_id).vecsize;
+
+ if (storage == StorageClassInput && pull_model_inputs.count(var.self))
+ ib_type.member_types[ib_mbr_idx] = build_msl_interpolant_type(mbr_type_id, is_noperspective);
+ else
+ ib_type.member_types[ib_mbr_idx] = mbr_type_id;
}
- else if (has_member_decoration(var_type.self, mbr_idx, DecorationLocation))
+
+ if ((!is_builtin && location != UINT32_MAX) || (is_builtin && ir_location != UINT32_MAX))
{
- location = get_member_decoration(var_type.self, mbr_idx, DecorationLocation) + i;
- set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, location);
- mark_location_as_used_by_shader(location, *usable_type, storage);
- location++;
- }
- else if (has_decoration(var.self, DecorationLocation))
- {
- location = get_accumulated_member_location(var, mbr_idx, meta.strip_array) + i;
- set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, location);
- mark_location_as_used_by_shader(location, *usable_type, storage);
- location++;
- }
- else if (is_builtin && is_tessellation_shader() && storage == StorageClassInput && inputs_by_builtin.count(builtin))
- {
- location = inputs_by_builtin[builtin].location + i;
- set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, location);
- mark_location_as_used_by_shader(location, *usable_type, storage);
- location++;
- }
- else if (is_builtin && capture_output_to_buffer && storage == StorageClassOutput && outputs_by_builtin.count(builtin))
- {
- location = outputs_by_builtin[builtin].location + i;
set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, location);
mark_location_as_used_by_shader(location, *usable_type, storage);
location++;
@@ -3256,6 +3410,7 @@ void CompilerMSL::add_composite_member_variable_to_interface_block(StorageClass
case StorageClassInput:
entry_func.fixup_hooks_in.push_back([=, &var]() {
string lerp_call;
+ string swizzle;
if (pull_model_inputs.count(var.self))
{
if (is_centroid)
@@ -3265,7 +3420,9 @@ void CompilerMSL::add_composite_member_variable_to_interface_block(StorageClass
else
lerp_call = ".interpolate_at_center()";
}
- statement(var_chain, " = ", ib_var_ref, ".", mbr_name, lerp_call, ";");
+ if (orig_vecsize != UINT32_MAX)
+ swizzle = vector_swizzle(orig_vecsize, 0);
+ statement(var_chain, " = ", ib_var_ref, ".", mbr_name, lerp_call, swizzle, ";");
});
break;
@@ -3333,6 +3490,55 @@ void CompilerMSL::add_plain_member_variable_to_interface_block(StorageClass stor
qual_var_name += ".interpolate_at_center()";
}
+ // The SPIRV location of interface variable, used to obtain the initial
+ // MSL location (the location variable) and interface matching
+ uint32_t ir_location = UINT32_MAX;
+ bool has_member_loc_decor = has_member_decoration(var_type.self, mbr_idx, DecorationLocation);
+ bool has_var_loc_decor = has_decoration(var.self, DecorationLocation);
+ uint32_t orig_vecsize = UINT32_MAX;
+
+ if (has_member_loc_decor)
+ ir_location = get_member_decoration(var_type.self, mbr_idx, DecorationLocation);
+ else if (has_var_loc_decor)
+ ir_location = get_accumulated_member_location(var, mbr_idx, meta.strip_array);
+ else if (is_builtin)
+ {
+ if (is_tessellation_shader() && storage == StorageClassInput && inputs_by_builtin.count(builtin))
+ ir_location = inputs_by_builtin[builtin].location;
+ else if (capture_output_to_buffer && storage == StorageClassOutput && outputs_by_builtin.count(builtin))
+ ir_location = outputs_by_builtin[builtin].location;
+ }
+
+ // Once we determine the location of the first member within nested structures,
+ // from a var of the topmost structure, the remaining flattened members of
+ // the nested structures will have consecutive location values. At this point,
+ // we've recursively tunnelled into structs, arrays, and matrices, and are
+ // down to a single location for each member now.
+ if (location == UINT32_MAX && ir_location != UINT32_MAX)
+ location = ir_location;
+
+ if (storage == StorageClassInput && (has_member_loc_decor || has_var_loc_decor))
+ {
+ uint32_t component = 0;
+ uint32_t orig_mbr_type_id = mbr_type_id;
+
+ if (has_member_loc_decor)
+ component = get_member_decoration(var_type.self, mbr_idx, DecorationComponent);
+
+ mbr_type_id = ensure_correct_input_type(mbr_type_id, location, component, 0, meta.strip_array);
+
+ // For members of the composite interface block, we only change the interface block type
+ // when interface matching happens. In the meantime, we store the original vector size
+ // and insert a swizzle when loading from metal interface block (see fixup below)
+ if (mbr_type_id != orig_mbr_type_id)
+ orig_vecsize = get(orig_mbr_type_id).vecsize;
+
+ if (storage == StorageClassInput && pull_model_inputs.count(var.self))
+ ib_type.member_types[ib_mbr_idx] = build_msl_interpolant_type(mbr_type_id, is_noperspective);
+ else
+ ib_type.member_types[ib_mbr_idx] = mbr_type_id;
+ }
+
bool flatten_stage_out = false;
string var_chain = var_chain_qual + "." + to_member_name(var_type, mbr_idx);
if (is_builtin && !meta.strip_array)
@@ -3348,7 +3554,11 @@ void CompilerMSL::add_plain_member_variable_to_interface_block(StorageClass stor
{
case StorageClassInput:
entry_func.fixup_hooks_in.push_back([=]() {
- statement(var_chain, " = ", qual_var_name, ";");
+ string swizzle;
+ // Insert swizzle for widened interface block vector from interface matching
+ if (orig_vecsize != UINT32_MAX)
+ swizzle = vector_swizzle(orig_vecsize, 0);
+ statement(var_chain, " = ", qual_var_name, swizzle, ";");
});
break;
@@ -3364,64 +3574,12 @@ void CompilerMSL::add_plain_member_variable_to_interface_block(StorageClass stor
}
}
- // Once we determine the location of the first member within nested structures,
- // from a var of the topmost structure, the remaining flattened members of
- // the nested structures will have consecutive location values. At this point,
- // we've recursively tunnelled into structs, arrays, and matrices, and are
- // down to a single location for each member now.
- if (!is_builtin && location != UINT32_MAX)
+ if ((!is_builtin && location != UINT32_MAX) || (is_builtin && ir_location != UINT32_MAX))
{
set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, location);
mark_location_as_used_by_shader(location, get(mbr_type_id), storage);
location += type_to_location_count(get(mbr_type_id));
}
- else if (has_member_decoration(var_type.self, mbr_idx, DecorationLocation))
- {
- location = get_member_decoration(var_type.self, mbr_idx, DecorationLocation);
- uint32_t comp = get_member_decoration(var_type.self, mbr_idx, DecorationComponent);
- if (storage == StorageClassInput)
- {
- mbr_type_id = ensure_correct_input_type(mbr_type_id, location, comp, 0, meta.strip_array);
- var_type.member_types[mbr_idx] = mbr_type_id;
- if (storage == StorageClassInput && pull_model_inputs.count(var.self))
- ib_type.member_types[ib_mbr_idx] = build_msl_interpolant_type(mbr_type_id, is_noperspective);
- else
- ib_type.member_types[ib_mbr_idx] = mbr_type_id;
- }
- set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, location);
- mark_location_as_used_by_shader(location, get(mbr_type_id), storage);
- location += type_to_location_count(get(mbr_type_id));
- }
- else if (has_decoration(var.self, DecorationLocation))
- {
- location = get_accumulated_member_location(var, mbr_idx, meta.strip_array);
- if (storage == StorageClassInput)
- {
- mbr_type_id = ensure_correct_input_type(mbr_type_id, location, 0, 0, meta.strip_array);
- var_type.member_types[mbr_idx] = mbr_type_id;
- if (storage == StorageClassInput && pull_model_inputs.count(var.self))
- ib_type.member_types[ib_mbr_idx] = build_msl_interpolant_type(mbr_type_id, is_noperspective);
- else
- ib_type.member_types[ib_mbr_idx] = mbr_type_id;
- }
- set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, location);
- mark_location_as_used_by_shader(location, get(mbr_type_id), storage);
- location += type_to_location_count(get(mbr_type_id));
- }
- else if (is_builtin && is_tessellation_shader() && storage == StorageClassInput && inputs_by_builtin.count(builtin))
- {
- location = inputs_by_builtin[builtin].location;
- set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, location);
- mark_location_as_used_by_shader(location, get(mbr_type_id), storage);
- location += type_to_location_count(get(mbr_type_id));
- }
- else if (is_builtin && capture_output_to_buffer && storage == StorageClassOutput && outputs_by_builtin.count(builtin))
- {
- location = outputs_by_builtin[builtin].location;
- set_member_decoration(ib_type.self, ib_mbr_idx, DecorationLocation, location);
- mark_location_as_used_by_shader(location, get(mbr_type_id), storage);
- location += type_to_location_count(get(mbr_type_id));
- }
// Copy the component location, if present.
if (has_member_decoration(var_type.self, mbr_idx, DecorationComponent))
@@ -3720,6 +3878,20 @@ void CompilerMSL::add_variable_to_interface_block(StorageClass storage, const st
return;
}
+ // Tesselation stages pass I/O via buffer content which may contain nested structs.
+ // Ensure the vector sizes of any nested struct members within these input variables match
+ // the vector sizes of the corresponding output variables from the previous pipeline stage.
+ // This adjustment is handled here instead of ensure_correct_input_type() in order to
+ // perform the necessary recursive processing.
+ if (storage == StorageClassInput && var_type.basetype == SPIRType::Struct &&
+ ((is_tesc_shader() && msl_options.multi_patch_workgroup) ||
+ (is_tese_shader() && msl_options.raw_buffer_tese_input)) &&
+ has_decoration(var.self, DecorationLocation))
+ {
+ uint32_t locn = get_decoration(var.self, DecorationLocation);
+ ensure_struct_members_valid_vecsizes(get_variable_data_type(var), locn);
+ }
+
if (storage == StorageClassInput && has_decoration(var.self, DecorationPerVertexKHR))
SPIRV_CROSS_THROW("PerVertexKHR decoration is not supported in MSL.");
@@ -3915,6 +4087,43 @@ void CompilerMSL::add_variable_to_interface_block(StorageClass storage, const st
}
}
+// Recursively iterate into the input struct type, and adjust the vecsize
+// of any nested members, based on location info provided through the API.
+// The location parameter is modified recursively.
+void CompilerMSL::ensure_struct_members_valid_vecsizes(SPIRType &struct_type, uint32_t &location)
+{
+ assert(struct_type.basetype == SPIRType::Struct);
+
+ auto mbr_cnt = struct_type.member_types.size();
+ for (size_t mbr_idx = 0; mbr_idx < mbr_cnt; mbr_idx++)
+ {
+ auto mbr_type_id = struct_type.member_types[mbr_idx];
+ auto &mbr_type = get(mbr_type_id);
+
+ if (mbr_type.basetype == SPIRType::Struct)
+ ensure_struct_members_valid_vecsizes(mbr_type, location);
+ else
+ {
+ auto p_va = inputs_by_location.find({location, 0});
+ if (p_va != end(inputs_by_location) && p_va->second.vecsize > mbr_type.vecsize)
+ {
+ // Set a new member type into the struct type, and all its parent types.
+ auto new_mbr_type_id = build_extended_vector_type(mbr_type_id, p_va->second.vecsize);
+ for (auto *p_type = &struct_type; p_type; p_type = maybe_get(p_type->parent_type))
+ p_type->member_types[mbr_idx] = new_mbr_type_id;
+ }
+
+ // Calc location of next member
+ uint32_t loc_cnt = mbr_type.columns;
+ auto dim_cnt = mbr_type.array.size();
+ for (uint32_t i = 0; i < dim_cnt; i++)
+ loc_cnt *= to_array_size_literal(mbr_type, i);
+
+ location += loc_cnt;
+ }
+ }
+}
+
// Fix up the mapping of variables to interface member indices, which is used to compile access chains
// for per-vertex variables in a tessellation control shader.
void CompilerMSL::fix_up_interface_member_indices(StorageClass storage, uint32_t ib_type_id)
@@ -4219,8 +4428,9 @@ uint32_t CompilerMSL::add_interface_block(StorageClass storage, bool patch)
// If the entry point should return the output struct, set the entry function
// to return the output interface struct, otherwise to return nothing.
// Watch out for the rare case where the terminator of the last entry point block is a
- // Kill, instead of a Return. Based on SPIR-V's block-domination rules, we assume that
- // any block that has a Kill will also have a terminating Return, except the last block.
+ // Kill or Unreachable, instead of a Return. Based on SPIR-V's block-domination rules,
+ // we assume that any block that has a Kill will also have a terminating Return, except
+ // the last block.
// Indicate the output var requires early initialization.
bool ep_should_return_output = !get_is_rasterization_disabled();
uint32_t rtn_id = ep_should_return_output ? ib_var_id : 0;
@@ -4230,7 +4440,8 @@ uint32_t CompilerMSL::add_interface_block(StorageClass storage, bool patch)
for (auto &blk_id : entry_func.blocks)
{
auto &blk = get(blk_id);
- if (blk.terminator == SPIRBlock::Return || (blk.terminator == SPIRBlock::Kill && blk_id == entry_func.blocks.back()))
+ auto last_blk_return = blk.terminator == SPIRBlock::Kill || blk.terminator == SPIRBlock::Unreachable;
+ if (blk.terminator == SPIRBlock::Return || (last_blk_return && blk_id == entry_func.blocks.back()))
blk.return_value = rtn_id;
}
vars_needing_early_declaration.push_back(ib_var_id);
@@ -5539,18 +5750,44 @@ void CompilerMSL::emit_header()
{
// This particular line can be overridden during compilation, so make it a flag and not a pragma line.
if (suppress_missing_prototypes)
- statement("#pragma clang diagnostic ignored \"-Wmissing-prototypes\"");
+ add_pragma_line("#pragma clang diagnostic ignored \"-Wmissing-prototypes\"", false);
if (suppress_incompatible_pointer_types_discard_qualifiers)
- statement("#pragma clang diagnostic ignored \"-Wincompatible-pointer-types-discards-qualifiers\"");
+ add_pragma_line("#pragma clang diagnostic ignored \"-Wincompatible-pointer-types-discards-qualifiers\"", false);
+
+ // Disable warning about "sometimes unitialized" when zero-initializing simple threadgroup variables
+ if (suppress_sometimes_unitialized)
+ add_pragma_line("#pragma clang diagnostic ignored \"-Wsometimes-uninitialized\"", false);
// Disable warning about missing braces for array template to make arrays a value type
if (spv_function_implementations.count(SPVFuncImplUnsafeArray) != 0)
- statement("#pragma clang diagnostic ignored \"-Wmissing-braces\"");
+ add_pragma_line("#pragma clang diagnostic ignored \"-Wmissing-braces\"", false);
+
+ // Floating point fast math compile declarations
+ if (msl_options.use_fast_math_pragmas && msl_options.supports_msl_version(3, 2))
+ {
+ uint32_t contract_mask = FPFastMathModeAllowContractMask;
+ uint32_t relax_mask = (FPFastMathModeNSZMask | FPFastMathModeAllowRecipMask | FPFastMathModeAllowReassocMask);
+ uint32_t fast_mask = (relax_mask | FPFastMathModeNotNaNMask | FPFastMathModeNotInfMask);
+
+ // FP math mode
+ uint32_t fp_flags = get_fp_fast_math_flags(true);
+ const char *math_mode = "safe";
+ if ((fp_flags & fast_mask) == fast_mask) // Must have all flags
+ math_mode = "fast";
+ else if ((fp_flags & relax_mask) == relax_mask) // Must have all flags
+ math_mode = "relaxed";
+
+ add_pragma_line(join("#pragma metal fp math_mode(", math_mode, ")"), false);
+
+ // FP contraction
+ const char *contract_mode = ((fp_flags & contract_mask) == contract_mask) ? "fast" : "off";
+ add_pragma_line(join("#pragma metal fp contract(", contract_mode, ")"), false);
+ }
for (auto &pragma : pragma_lines)
statement(pragma);
- if (!pragma_lines.empty() || suppress_missing_prototypes)
+ if (!pragma_lines.empty())
statement("");
statement("#include ");
@@ -5570,18 +5807,23 @@ void CompilerMSL::emit_header()
statement("");
}
-void CompilerMSL::add_pragma_line(const string &line)
+void CompilerMSL::add_pragma_line(const string &line, bool recompile_on_unique)
{
- auto rslt = pragma_lines.insert(line);
- if (rslt.second)
- force_recompile();
+ if (std::find(pragma_lines.begin(), pragma_lines.end(), line) == pragma_lines.end())
+ {
+ pragma_lines.push_back(line);
+ if (recompile_on_unique)
+ force_recompile();
+ }
}
void CompilerMSL::add_typedef_line(const string &line)
{
- auto rslt = typedef_lines.insert(line);
- if (rslt.second)
+ if (std::find(typedef_lines.begin(), typedef_lines.end(), line) == typedef_lines.end())
+ {
+ typedef_lines.push_back(line);
force_recompile();
+ }
}
// Template struct like spvUnsafeArray<> need to be declared *before* any resources are declared
@@ -5789,7 +6031,6 @@ void CompilerMSL::emit_custom_functions()
if (!msl_options.supports_msl_version(2))
SPIRV_CROSS_THROW(
"spvDynamicImageSampler requires default-constructible texture objects, which require MSL 2.0.");
- spv_function_implementations.insert(SPVFuncImplForwardArgs);
spv_function_implementations.insert(SPVFuncImplTextureSwizzle);
if (msl_options.swizzle_texture_samples)
spv_function_implementations.insert(SPVFuncImplGatherSwizzle);
@@ -5803,16 +6044,22 @@ void CompilerMSL::emit_custom_functions()
spv_function_implementations.insert(SPVFuncImplConvertYCbCrBT2020);
}
- for (uint32_t i = SPVFuncImplChromaReconstructNearest2Plane;
- i <= SPVFuncImplChromaReconstructLinear420XMidpointYMidpoint3Plane; i++)
- if (spv_function_implementations.count(static_cast(i)))
- spv_function_implementations.insert(SPVFuncImplForwardArgs);
+ if (spv_function_implementations.count(SPVFuncImplGatherSwizzle) ||
+ spv_function_implementations.count(SPVFuncImplGatherConstOffsets))
+ {
+ spv_function_implementations.insert(SPVFuncImplGatherReturn);
+ }
+
+ if (spv_function_implementations.count(SPVFuncImplGatherCompareSwizzle) ||
+ spv_function_implementations.count(SPVFuncImplGatherCompareConstOffsets))
+ {
+ spv_function_implementations.insert(SPVFuncImplGatherCompareReturn);
+ }
if (spv_function_implementations.count(SPVFuncImplTextureSwizzle) ||
spv_function_implementations.count(SPVFuncImplGatherSwizzle) ||
spv_function_implementations.count(SPVFuncImplGatherCompareSwizzle))
{
- spv_function_implementations.insert(SPVFuncImplForwardArgs);
spv_function_implementations.insert(SPVFuncImplGetSwizzle);
}
@@ -5820,6 +6067,17 @@ void CompilerMSL::emit_custom_functions()
{
switch (spv_func)
{
+ case SPVFuncImplSMod:
+ statement("// Implementation of signed integer mod accurate to SPIR-V specification");
+ statement("template");
+ statement("inline Tx spvSMod(Tx x, Ty y)");
+ begin_scope();
+ statement("Tx remainder = x - y * (x / y);");
+ statement("return select(Tx(remainder + y), remainder, remainder == 0 || (x >= 0) == (y >= 0));");
+ end_scope();
+ statement("");
+ break;
+
case SPVFuncImplMod:
statement("// Implementation of the GLSL mod() function, which is slightly different than Metal fmod()");
statement("template");
@@ -6282,23 +6540,6 @@ void CompilerMSL::emit_custom_functions()
statement("");
break;
- case SPVFuncImplForwardArgs:
- statement("template struct spvRemoveReference { typedef T type; };");
- statement("template struct spvRemoveReference { typedef T type; };");
- statement("template struct spvRemoveReference { typedef T type; };");
- statement("template inline constexpr thread T&& spvForward(thread typename "
- "spvRemoveReference::type& x)");
- begin_scope();
- statement("return static_cast(x);");
- end_scope();
- statement("template inline constexpr thread T&& spvForward(thread typename "
- "spvRemoveReference::type&& x)");
- begin_scope();
- statement("return static_cast(x);");
- end_scope();
- statement("");
- break;
-
case SPVFuncImplGetSwizzle:
statement("enum class spvSwizzle : uint");
begin_scope();
@@ -6356,11 +6597,22 @@ void CompilerMSL::emit_custom_functions()
statement("");
break;
+ case SPVFuncImplGatherReturn:
+ statement("template");
+ statement("using spvGatherReturn = decltype(declval().gather(declval(), declval()...));");
+ statement("");
+ break;
+
+ case SPVFuncImplGatherCompareReturn:
+ statement("template");
+ statement("using spvGatherCompareReturn = decltype(declval().gather_compare(declval(), declval()...));");
+ statement("");
+ break;
+
case SPVFuncImplGatherSwizzle:
statement("// Wrapper function that swizzles texture gathers.");
- statement("template class Tex, "
- "typename... Ts>");
- statement("inline vec spvGatherSwizzle(const thread Tex& t, sampler s, "
+ statement("template");
+ statement("inline spvGatherReturn spvGatherSwizzle(const thread Tex& t, sampler s, "
"uint sw, component c, Ts... params) METAL_CONST_ARG(c)");
begin_scope();
statement("if (sw)");
@@ -6370,17 +6622,17 @@ void CompilerMSL::emit_custom_functions()
statement("case spvSwizzle::none:");
statement(" break;");
statement("case spvSwizzle::zero:");
- statement(" return vec(0, 0, 0, 0);");
+ statement(" return spvGatherReturn(0, 0, 0, 0);");
statement("case spvSwizzle::one:");
- statement(" return vec(1, 1, 1, 1);");
+ statement(" return spvGatherReturn(1, 1, 1, 1);");
statement("case spvSwizzle::red:");
- statement(" return t.gather(s, spvForward(params)..., component::x);");
+ statement(" return t.gather(s, params..., component::x);");
statement("case spvSwizzle::green:");
- statement(" return t.gather(s, spvForward(params)..., component::y);");
+ statement(" return t.gather(s, params..., component::y);");
statement("case spvSwizzle::blue:");
- statement(" return t.gather(s, spvForward(params)..., component::z);");
+ statement(" return t.gather(s, params..., component::z);");
statement("case spvSwizzle::alpha:");
- statement(" return t.gather(s, spvForward(params)..., component::w);");
+ statement(" return t.gather(s, params..., component::w);");
end_scope();
end_scope();
// texture::gather insists on its component parameter being a constant
@@ -6388,13 +6640,13 @@ void CompilerMSL::emit_custom_functions()
statement("switch (c)");
begin_scope();
statement("case component::x:");
- statement(" return t.gather(s, spvForward(params)..., component::x);");
+ statement(" return t.gather(s, params..., component::x);");
statement("case component::y:");
- statement(" return t.gather(s, spvForward(params)..., component::y);");
+ statement(" return t.gather(s, params..., component::y);");
statement("case component::z:");
- statement(" return t.gather(s, spvForward(params)..., component::z);");
+ statement(" return t.gather(s, params..., component::z);");
statement("case component::w:");
- statement(" return t.gather(s, spvForward(params)..., component::w);");
+ statement(" return t.gather(s, params..., component::w);");
end_scope();
end_scope();
statement("");
@@ -6402,10 +6654,8 @@ void CompilerMSL::emit_custom_functions()
case SPVFuncImplGatherCompareSwizzle:
statement("// Wrapper function that swizzles depth texture gathers.");
- statement("template class Tex, "
- "typename... Ts>");
- statement("inline vec spvGatherCompareSwizzle(const thread Tex& t, sampler "
- "s, uint sw, Ts... params) ");
+ statement("template");
+ statement("inline spvGatherCompareReturn spvGatherCompareSwizzle(const thread Tex& t, sampler s, uint sw, Ts... params)");
begin_scope();
statement("if (sw)");
begin_scope();
@@ -6418,12 +6668,12 @@ void CompilerMSL::emit_custom_functions()
statement("case spvSwizzle::green:");
statement("case spvSwizzle::blue:");
statement("case spvSwizzle::alpha:");
- statement(" return vec(0, 0, 0, 0);");
+ statement(" return spvGatherCompareReturn(0, 0, 0, 0);");
statement("case spvSwizzle::one:");
- statement(" return vec(1, 1, 1, 1);");
+ statement(" return spvGatherCompareReturn(1, 1, 1, 1);");
end_scope();
end_scope();
- statement("return t.gather_compare(s, spvForward(params)...);");
+ statement("return t.gather_compare(s, params...);");
end_scope();
statement("");
break;
@@ -6433,33 +6683,32 @@ void CompilerMSL::emit_custom_functions()
for (uint32_t i = 0; i < texture_addr_space_count; i++)
{
statement("// Wrapper function that processes a ", texture_addr_spaces[i], " texture gather with a constant offset array.");
- statement("template class Tex, "
- "typename Toff, typename... Tp>");
- statement("inline vec spvGatherConstOffsets(const ", texture_addr_spaces[i], " Tex& t, sampler s, "
+ statement("template");
+ statement("inline spvGatherReturn spvGatherConstOffsets(const ", texture_addr_spaces[i], " Tex& t, sampler s, "
"Toff coffsets, component c, Tp... params) METAL_CONST_ARG(c)");
begin_scope();
- statement("vec rslts[4];");
+ statement("spvGatherReturn rslts[4];");
statement("for (uint i = 0; i < 4; i++)");
begin_scope();
statement("switch (c)");
begin_scope();
// Work around texture::gather() requiring its component parameter to be a constant expression
statement("case component::x:");
- statement(" rslts[i] = t.gather(s, spvForward(params)..., coffsets[i], component::x);");
+ statement(" rslts[i] = t.gather(s, params..., coffsets[i], component::x);");
statement(" break;");
statement("case component::y:");
- statement(" rslts[i] = t.gather(s, spvForward(params)..., coffsets[i], component::y);");
+ statement(" rslts[i] = t.gather(s, params..., coffsets[i], component::y);");
statement(" break;");
statement("case component::z:");
- statement(" rslts[i] = t.gather(s, spvForward(params)..., coffsets[i], component::z);");
+ statement(" rslts[i] = t.gather(s, params..., coffsets[i], component::z);");
statement(" break;");
statement("case component::w:");
- statement(" rslts[i] = t.gather(s, spvForward(params)..., coffsets[i], component::w);");
+ statement(" rslts[i] = t.gather(s, params..., coffsets[i], component::w);");
statement(" break;");
end_scope();
end_scope();
// Pull all values from the i0j0 component of each gather footprint
- statement("return vec(rslts[0].w, rslts[1].w, rslts[2].w, rslts[3].w);");
+ statement("return spvGatherReturn(rslts[0].w, rslts[1].w, rslts[2].w, rslts[3].w);");
end_scope();
statement("");
}
@@ -6470,18 +6719,17 @@ void CompilerMSL::emit_custom_functions()
for (uint32_t i = 0; i < texture_addr_space_count; i++)
{
statement("// Wrapper function that processes a ", texture_addr_spaces[i], " texture gather with a constant offset array.");
- statement("template class Tex, "
- "typename Toff, typename... Tp>");
- statement("inline vec spvGatherCompareConstOffsets(const ", texture_addr_spaces[i], " Tex& t, sampler s, "
+ statement("template");
+ statement("inline spvGatherCompareReturn spvGatherCompareConstOffsets(const ", texture_addr_spaces[i], " Tex& t, sampler s, "
"Toff coffsets, Tp... params)");
begin_scope();
- statement("vec rslts[4];");
+ statement("spvGatherCompareReturn rslts[4];");
statement("for (uint i = 0; i < 4; i++)");
begin_scope();
- statement(" rslts[i] = t.gather_compare(s, spvForward(params)..., coffsets[i]);");
+ statement(" rslts[i] = t.gather_compare(s, params..., coffsets[i]);");
end_scope();
// Pull all values from the i0j0 component of each gather footprint
- statement("return vec(rslts[0].w, rslts[1].w, rslts[2].w, rslts[3].w);");
+ statement("return spvGatherCompareReturn(rslts[0].w, rslts[1].w, rslts[2].w, rslts[3].w);");
end_scope();
statement("");
}
@@ -6826,6 +7074,135 @@ void CompilerMSL::emit_custom_functions()
statement("");
break;
+ case SPVFuncImplSubgroupRotate:
+ statement("template");
+ statement("inline T spvSubgroupRotate(T value, ushort delta)");
+ begin_scope();
+ if (msl_options.use_quadgroup_operation())
+ statement("return quad_shuffle_rotate_down(value, delta);");
+ else
+ statement("return simd_shuffle_rotate_down(value, delta);");
+ end_scope();
+ statement("");
+ statement("template<>");
+ statement("inline bool spvSubgroupRotate(bool value, ushort delta)");
+ begin_scope();
+ if (msl_options.use_quadgroup_operation())
+ statement("return !!quad_shuffle_rotate_down((ushort)value, delta);");
+ else
+ statement("return !!simd_shuffle_rotate_down((ushort)value, delta);");
+ end_scope();
+ statement("");
+ statement("template");
+ statement("inline vec spvSubgroupRotate(vec value, ushort delta)");
+ begin_scope();
+ if (msl_options.use_quadgroup_operation())
+ statement("return (vec)quad_shuffle_rotate_down((vec)value, delta);");
+ else
+ statement("return (vec)simd_shuffle_rotate_down((vec)value, delta);");
+ end_scope();
+ statement("");
+ break;
+
+ // C++ disallows partial specializations of function templates,
+ // hence the use of a struct.
+ // clang-format off
+#define FUNC_SUBGROUP_CLUSTERED(spv, msl, combine, op, ident) \
+ case SPVFuncImplSubgroupClustered##spv: \
+ statement("template"); \
+ statement("struct spvClustered" #spv "Detail;"); \
+ statement(""); \
+ statement("// Base cases"); \
+ statement("template<>"); \
+ statement("struct spvClustered" #spv "Detail<1, 0>"); \
+ begin_scope(); \
+ statement("template"); \
+ statement("static T op(T value, uint)"); \
+ begin_scope(); \
+ statement("return value;"); \
+ end_scope(); \
+ end_scope_decl(); \
+ statement(""); \
+ statement("template"); \
+ statement("struct spvClustered" #spv "Detail<1, offset>"); \
+ begin_scope(); \
+ statement("template"); \
+ statement("static T op(T value, uint lid)"); \
+ begin_scope(); \
+ statement("// If the target lane is inactive, then return identity."); \
+ if (msl_options.use_quadgroup_operation()) \
+ statement("if (!extract_bits((quad_vote::vote_t)quad_active_threads_mask(), (lid ^ offset), 1))"); \
+ else \
+ statement("if (!extract_bits(as_type((simd_vote::vote_t)simd_active_threads_mask())[(lid ^ offset) / 32], (lid ^ offset) % 32, 1))"); \
+ statement(" return " #ident ";"); \
+ if (msl_options.use_quadgroup_operation()) \
+ statement("return quad_shuffle_xor(value, offset);"); \
+ else \
+ statement("return simd_shuffle_xor(value, offset);"); \
+ end_scope(); \
+ end_scope_decl(); \
+ statement(""); \
+ statement("template<>"); \
+ statement("struct spvClustered" #spv "Detail<4, 0>"); \
+ begin_scope(); \
+ statement("template"); \
+ statement("static T op(T value, uint)"); \
+ begin_scope(); \
+ statement("return quad_" #msl "(value);"); \
+ end_scope(); \
+ end_scope_decl(); \
+ statement(""); \
+ statement("template"); \
+ statement("struct spvClustered" #spv "Detail<4, offset>"); \
+ begin_scope(); \
+ statement("template