diff --git a/COPYRIGHT.txt b/COPYRIGHT.txt index a1a6a3e4cb..82d05a2ac5 100644 --- a/COPYRIGHT.txt +++ b/COPYRIGHT.txt @@ -183,6 +183,18 @@ Comment: AccessKit Copyright: 2023, The AccessKit Authors. License: Expat +Files: servers/rendering/renderer_rd/shaders/smaa_blending.glsl + servers/rendering/renderer_rd/shaders/smaa_weight_calculation.glsl + servers/rendering/renderer_rd/shaders/smaa_edge_detection.glsl + thirdparty/smaa/* +Comment: Subpixel Morphological Antialiasing +Copyright: 2013 Jorge Jimenez (jorge@iryoku.com) + 2013 Jose I. Echevarria (joseignacioechevarria@gmail.com) + 2013 Belen Masia (bmasia@unizar.es) + 2013 Fernando Navarro (fernandn@microsoft.com) + 2013 Diego Gutierrez (diegog@unizar.es) +License: Expat + Files: thirdparty/amd-fsr/* Comment: AMD FidelityFX Super Resolution Copyright: 2021, Advanced Micro Devices, Inc. diff --git a/core/config/engine.cpp b/core/config/engine.cpp index 8e5d58638e..f74925fd72 100644 --- a/core/config/engine.cpp +++ b/core/config/engine.cpp @@ -277,40 +277,23 @@ String Engine::get_license_text() const { String Engine::get_architecture_name() const { #if defined(__x86_64) || defined(__x86_64__) || defined(__amd64__) || defined(_M_X64) return "x86_64"; - #elif defined(__i386) || defined(__i386__) || defined(_M_IX86) return "x86_32"; - #elif defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) return "arm64"; - #elif defined(__arm__) || defined(_M_ARM) return "arm32"; - #elif defined(__riscv) -#if __riscv_xlen == 8 return "rv64"; -#else - return "riscv"; -#endif - -#elif defined(__powerpc__) -#if defined(__powerpc64__) +#elif defined(__powerpc64__) return "ppc64"; -#else - return "ppc"; -#endif - #elif defined(__loongarch64) return "loongarch64"; - -#elif defined(__wasm__) -#if defined(__wasm64__) +#elif defined(__wasm64__) return "wasm64"; #elif defined(__wasm32__) return "wasm32"; #endif -#endif } bool Engine::is_abort_on_gpu_errors_enabled() const { diff --git a/core/io/dir_access.cpp b/core/io/dir_access.cpp index 55b1dedc53..b2cbfa13f1 100644 --- a/core/io/dir_access.cpp +++ b/core/io/dir_access.cpp @@ -674,6 +674,8 @@ void DirAccess::_bind_methods() { ClassDB::bind_method(D_METHOD("set_include_hidden", "enable"), &DirAccess::set_include_hidden); ClassDB::bind_method(D_METHOD("get_include_hidden"), &DirAccess::get_include_hidden); + ClassDB::bind_method(D_METHOD("get_filesystem_type"), &DirAccess::get_filesystem_type); + ClassDB::bind_method(D_METHOD("is_case_sensitive", "path"), &DirAccess::is_case_sensitive); ClassDB::bind_method(D_METHOD("is_equivalent", "path_a", "path_b"), &DirAccess::is_equivalent); diff --git a/core/io/image.cpp b/core/io/image.cpp index 03e28feba0..f117cf14e7 100644 --- a/core/io/image.cpp +++ b/core/io/image.cpp @@ -4279,7 +4279,7 @@ Image::Image(const uint8_t *p_mem_png_jpg, int p_len) { } } -Ref Image::duplicate(bool p_subresources) const { +Ref Image::_duplicate(const DuplicateParams &p_params) const { Ref copy; copy.instantiate(); copy->_copy_internals_from(*this); diff --git a/core/io/image.h b/core/io/image.h index adad55c666..7dc310a712 100644 --- a/core/io/image.h +++ b/core/io/image.h @@ -247,6 +247,8 @@ public: static Ref (*basis_universal_unpacker_ptr)(const uint8_t *p_data, int p_size); protected: + virtual Ref _duplicate(const DuplicateParams &p_params) const override; + static void _bind_methods(); private: @@ -429,8 +431,6 @@ public: void convert_ra_rgba8_to_rg(); void convert_rgba8_to_bgra8(); - virtual Ref duplicate(bool p_subresources = false) const override; - UsedChannels detect_used_channels(CompressSource p_source = COMPRESS_SOURCE_GENERIC) const; void optimize_channels(); diff --git a/core/io/pck_packer.cpp b/core/io/pck_packer.cpp index 9edc9acbf1..35ee87eb36 100644 --- a/core/io/pck_packer.cpp +++ b/core/io/pck_packer.cpp @@ -120,7 +120,7 @@ Error PCKPacker::add_file_removal(const String &p_target_path) { pf.size = 0; pf.removal = true; - pf.md5.resize_zeroed(16); + pf.md5.resize_initialized(16); files.push_back(pf); diff --git a/core/io/resource.cpp b/core/io/resource.cpp index 490ae3bbba..39969e554b 100644 --- a/core/io/resource.cpp +++ b/core/io/resource.cpp @@ -36,6 +36,7 @@ #include "core/math/math_funcs.h" #include "core/math/random_pcg.h" #include "core/os/os.h" +#include "core/variant/container_type_validate.h" #include "scene/main/node.h" //only so casting works void Resource::emit_changed() { @@ -267,76 +268,178 @@ void Resource::reload_from_file() { copy_from(s); } -void Resource::_dupe_sub_resources(Variant &r_variant, Node *p_for_scene, HashMap, Ref> &p_remap_cache) { - switch (r_variant.get_type()) { - case Variant::ARRAY: { - Array a = r_variant; - for (int i = 0; i < a.size(); i++) { - _dupe_sub_resources(a[i], p_for_scene, p_remap_cache); - } - } break; - case Variant::DICTIONARY: { - Dictionary d = r_variant; - for (Variant &k : d.get_key_list()) { - if (k.get_type() == Variant::OBJECT) { - // Replace in dictionary key. - Ref sr = k; - if (sr.is_valid() && sr->is_local_to_scene()) { - if (p_remap_cache.has(sr)) { - d[p_remap_cache[sr]] = d[k]; - d.erase(k); - } else { - Ref dupe = sr->duplicate_for_local_scene(p_for_scene, p_remap_cache); - d[dupe] = d[k]; - d.erase(k); - p_remap_cache[sr] = dupe; +Variant Resource::_duplicate_recursive(const Variant &p_variant, const DuplicateParams &p_params, uint32_t p_usage) const { + // Anything other than object can be simply skipped in case of a shallow copy. + if (!p_params.deep && p_variant.get_type() != Variant::OBJECT) { + return p_variant; + } + + switch (p_variant.get_type()) { + case Variant::OBJECT: { + const Ref &sr = p_variant; + bool should_duplicate = false; + if (sr.is_valid()) { + if ((p_usage & PROPERTY_USAGE_ALWAYS_DUPLICATE)) { + should_duplicate = true; + } else if ((p_usage & PROPERTY_USAGE_NEVER_DUPLICATE)) { + should_duplicate = false; + } else if (p_params.local_scene) { + should_duplicate = sr->is_local_to_scene(); + } else { + switch (p_params.subres_mode) { + case RESOURCE_DEEP_DUPLICATE_NONE: { + should_duplicate = false; + } break; + case RESOURCE_DEEP_DUPLICATE_INTERNAL: { + should_duplicate = p_params.deep && sr->is_built_in(); + } break; + case RESOURCE_DEEP_DUPLICATE_ALL: { + should_duplicate = p_params.deep; + } break; + default: { + DEV_ASSERT(false); } } - } else { - _dupe_sub_resources(k, p_for_scene, p_remap_cache); } - - _dupe_sub_resources(d[k], p_for_scene, p_remap_cache); + } + if (should_duplicate) { + if (thread_duplicate_remap_cache->has(sr)) { + return thread_duplicate_remap_cache->get(sr); + } else { + const Ref &dupe = p_params.local_scene + ? sr->duplicate_for_local_scene(p_params.local_scene, *thread_duplicate_remap_cache) + : sr->_duplicate(p_params); + thread_duplicate_remap_cache->insert(sr, dupe); + return dupe; + } + } else { + return p_variant; } } break; - case Variant::OBJECT: { - Ref sr = r_variant; - if (sr.is_valid() && sr->is_local_to_scene()) { - if (p_remap_cache.has(sr)) { - r_variant = p_remap_cache[sr]; - } else { - Ref dupe = sr->duplicate_for_local_scene(p_for_scene, p_remap_cache); - r_variant = dupe; - p_remap_cache[sr] = dupe; - } + case Variant::ARRAY: { + const Array &src = p_variant; + Array dst; + if (src.is_typed()) { + dst.set_typed(src.get_element_type()); } + dst.resize(src.size()); + for (int i = 0; i < src.size(); i++) { + dst[i] = _duplicate_recursive(src[i], p_params); + } + return dst; + } break; + case Variant::DICTIONARY: { + const Dictionary &src = p_variant; + Dictionary dst; + if (src.is_typed()) { + dst.set_typed(src.get_key_type(), src.get_value_type()); + } + for (const Variant &k : src.get_key_list()) { + const Variant &v = src[k]; + dst.set( + _duplicate_recursive(k, p_params), + _duplicate_recursive(v, p_params)); + } + return dst; + } break; + case Variant::PACKED_BYTE_ARRAY: + case Variant::PACKED_INT32_ARRAY: + case Variant::PACKED_INT64_ARRAY: + case Variant::PACKED_FLOAT32_ARRAY: + case Variant::PACKED_FLOAT64_ARRAY: + case Variant::PACKED_STRING_ARRAY: + case Variant::PACKED_VECTOR2_ARRAY: + case Variant::PACKED_VECTOR3_ARRAY: + case Variant::PACKED_COLOR_ARRAY: + case Variant::PACKED_VECTOR4_ARRAY: { + return p_variant.duplicate(); } break; default: { + return p_variant; } } } -Ref Resource::duplicate_for_local_scene(Node *p_for_scene, HashMap, Ref> &p_remap_cache) { +Ref Resource::_duplicate(const DuplicateParams &p_params) const { + ERR_FAIL_COND_V_MSG(p_params.local_scene && p_params.subres_mode != RESOURCE_DEEP_DUPLICATE_MAX, Ref(), "Duplication for local-to-scene can't specify a deep duplicate mode."); + + DuplicateRemapCacheT *remap_cache_backup = thread_duplicate_remap_cache; + +// These are for avoiding potential duplicates that can happen in custom code +// from participating in the same duplication session (remap cache). +#define BEFORE_USER_CODE thread_duplicate_remap_cache = nullptr; +#define AFTER_USER_CODE thread_duplicate_remap_cache = remap_cache_backup; + List plist; get_property_list(&plist); + BEFORE_USER_CODE Ref r = Object::cast_to(ClassDB::instantiate(get_class())); + AFTER_USER_CODE ERR_FAIL_COND_V(r.is_null(), Ref()); - r->local_scene = p_for_scene; + thread_duplicate_remap_cache->insert(Ref(this), r); + + if (p_params.local_scene) { + r->local_scene = p_params.local_scene; + } + + // Duplicate script first, so the scripted properties are considered. + BEFORE_USER_CODE + r->set_script(get_script()); + AFTER_USER_CODE for (const PropertyInfo &E : plist) { if (!(E.usage & PROPERTY_USAGE_STORAGE)) { continue; } - Variant p = get(E.name).duplicate(true); + if (E.name == "script") { + continue; + } - _dupe_sub_resources(p, p_for_scene, p_remap_cache); + BEFORE_USER_CODE + Variant p = get(E.name); + AFTER_USER_CODE + p = _duplicate_recursive(p, p_params, E.usage); + + BEFORE_USER_CODE r->set(E.name, p); + AFTER_USER_CODE } return r; + +#undef BEFORE_USER_CODE +#undef AFTER_USER_CODE +} + +Ref Resource::duplicate_for_local_scene(Node *p_for_scene, DuplicateRemapCacheT &p_remap_cache) const { +#ifdef DEBUG_ENABLED + // The only possibilities for the remap cache passed being valid are these: + // a) It's the same already used as the one of the thread. That happens when this function + // is called within some recursion level within a duplication. + // b) There's no current thread remap cache, which means this function is acting as an entry point. + // This check failing means that this function is being called as an entry point during an ongoing + // duplication, likely due to custom instantiation or setter code. It would be an engine bug because + // code starting or joining a duplicate session must ensure to exit it temporarily when making calls + // that may in turn invoke such custom code. + if (thread_duplicate_remap_cache && &p_remap_cache != thread_duplicate_remap_cache) { + ERR_PRINT("Resource::duplicate_for_local_scene() called during an ongoing duplication session. This is an engine bug."); + } +#endif + + DuplicateRemapCacheT *remap_cache_backup = thread_duplicate_remap_cache; + thread_duplicate_remap_cache = &p_remap_cache; + + DuplicateParams params; + params.deep = true; + params.local_scene = p_for_scene; + const Ref &dupe = _duplicate(params); + + thread_duplicate_remap_cache = remap_cache_backup; + + return dupe; } void Resource::_find_sub_resources(const Variant &p_variant, HashSet> &p_resources_found) { @@ -365,7 +468,7 @@ void Resource::_find_sub_resources(const Variant &p_variant, HashSet, Ref> &p_remap_cache) { +void Resource::configure_for_local_scene(Node *p_for_scene, DuplicateRemapCacheT &p_remap_cache) { List plist; get_property_list(&plist); @@ -392,53 +495,90 @@ void Resource::configure_for_local_scene(Node *p_for_scene, HashMap Resource::duplicate(bool p_subresources) const { - List plist; - get_property_list(&plist); +Ref Resource::duplicate(bool p_deep) const { + DuplicateRemapCacheT remap_cache; + bool started_session = false; + if (!thread_duplicate_remap_cache) { + thread_duplicate_remap_cache = &remap_cache; + started_session = true; + } - Ref r = static_cast(ClassDB::instantiate(get_class())); - ERR_FAIL_COND_V(r.is_null(), Ref()); + DuplicateParams params; + params.deep = p_deep; + params.subres_mode = RESOURCE_DEEP_DUPLICATE_INTERNAL; + const Ref &dupe = _duplicate(params); - for (const PropertyInfo &E : plist) { - if (!(E.usage & PROPERTY_USAGE_STORAGE)) { - continue; - } - Variant p = get(E.name); + if (started_session) { + thread_duplicate_remap_cache = nullptr; + } - switch (p.get_type()) { - case Variant::Type::DICTIONARY: - case Variant::Type::ARRAY: - case Variant::Type::PACKED_BYTE_ARRAY: - case Variant::Type::PACKED_COLOR_ARRAY: - case Variant::Type::PACKED_INT32_ARRAY: - case Variant::Type::PACKED_INT64_ARRAY: - case Variant::Type::PACKED_FLOAT32_ARRAY: - case Variant::Type::PACKED_FLOAT64_ARRAY: - case Variant::Type::PACKED_STRING_ARRAY: - case Variant::Type::PACKED_VECTOR2_ARRAY: - case Variant::Type::PACKED_VECTOR3_ARRAY: - case Variant::Type::PACKED_VECTOR4_ARRAY: { - r->set(E.name, p.duplicate(p_subresources)); - } break; + return dupe; +} - case Variant::Type::OBJECT: { - if (!(E.usage & PROPERTY_USAGE_NEVER_DUPLICATE) && (p_subresources || (E.usage & PROPERTY_USAGE_ALWAYS_DUPLICATE))) { - Ref sr = p; - if (sr.is_valid()) { - r->set(E.name, sr->duplicate(p_subresources)); - } - } else { - r->set(E.name, p); - } - } break; +Ref Resource::duplicate_deep(ResourceDeepDuplicateMode p_deep_subresources_mode) const { + ERR_FAIL_INDEX_V(p_deep_subresources_mode, RESOURCE_DEEP_DUPLICATE_MAX, Ref()); - default: { - r->set(E.name, p); - } + DuplicateRemapCacheT remap_cache; + bool started_session = false; + if (!thread_duplicate_remap_cache) { + thread_duplicate_remap_cache = &remap_cache; + started_session = true; + } + + DuplicateParams params; + params.deep = true; + params.subres_mode = p_deep_subresources_mode; + const Ref &dupe = _duplicate(params); + + if (started_session) { + thread_duplicate_remap_cache = nullptr; + } + + return dupe; +} + +Ref Resource::_duplicate_from_variant(bool p_deep, ResourceDeepDuplicateMode p_deep_subresources_mode, int p_recursion_count) const { + // A call without deep duplication would have been early-rejected at Variant::duplicate() unless it's the root call. + DEV_ASSERT(!(p_recursion_count > 0 && p_deep_subresources_mode == RESOURCE_DEEP_DUPLICATE_NONE)); + + // When duplicating from Variant, this function may be called multiple times from + // different parts of the data structure being copied. Therefore, we need to create + // a remap cache instance in a way that can be shared among all of the calls. + // Whatever Variant, Array or Dictionary that initiated the call chain will eventually + // claim it, when the stack unwinds up to the root call. + // One exception is that this is the root call. + + if (p_recursion_count == 0) { + if (p_deep) { + return duplicate_deep(p_deep_subresources_mode); + } else { + return duplicate(false); } } - return r; + if (thread_duplicate_remap_cache) { + Resource::DuplicateRemapCacheT::Iterator E = thread_duplicate_remap_cache->find(Ref(this)); + if (E) { + return E->value; + } + } else { + thread_duplicate_remap_cache = memnew(DuplicateRemapCacheT); + } + + DuplicateParams params; + params.deep = p_deep; + params.subres_mode = p_deep_subresources_mode; + + const Ref dupe = _duplicate(params); + + return dupe; +} + +void Resource::_teardown_duplicate_from_variant() { + if (thread_duplicate_remap_cache) { + memdelete(thread_duplicate_remap_cache); + thread_duplicate_remap_cache = nullptr; + } } void Resource::_set_path(const String &p_path) { @@ -585,7 +725,14 @@ void Resource::_bind_methods() { ClassDB::bind_method(D_METHOD("emit_changed"), &Resource::emit_changed); - ClassDB::bind_method(D_METHOD("duplicate", "subresources"), &Resource::duplicate, DEFVAL(false)); + ClassDB::bind_method(D_METHOD("duplicate", "deep"), &Resource::duplicate, DEFVAL(false)); + ClassDB::bind_method(D_METHOD("duplicate_deep", "deep_subresources_mode"), &Resource::duplicate_deep, DEFVAL(RESOURCE_DEEP_DUPLICATE_INTERNAL)); + + // For the bindings, it's much more natural to expose this enum from the Variant realm via Resource. + ClassDB::bind_integer_constant(get_class_static(), StringName("ResourceDeepDuplicateMode"), "RESOURCE_DEEP_DUPLICATE_NONE", RESOURCE_DEEP_DUPLICATE_NONE); + ClassDB::bind_integer_constant(get_class_static(), StringName("ResourceDeepDuplicateMode"), "RESOURCE_DEEP_DUPLICATE_INTERNAL", RESOURCE_DEEP_DUPLICATE_INTERNAL); + ClassDB::bind_integer_constant(get_class_static(), StringName("ResourceDeepDuplicateMode"), "RESOURCE_DEEP_DUPLICATE_ALL", RESOURCE_DEEP_DUPLICATE_ALL); + ADD_SIGNAL(MethodInfo("changed")); ADD_SIGNAL(MethodInfo("setup_local_to_scene_requested")); diff --git a/core/io/resource.h b/core/io/resource.h index 3eb1e01fd8..22726f8929 100644 --- a/core/io/resource.h +++ b/core/io/resource.h @@ -59,6 +59,13 @@ public: static void register_custom_data_to_otdb() { ClassDB::add_resource_base_extension("res", get_class_static()); } virtual String get_base_extension() const { return "res"; } +protected: + struct DuplicateParams { + bool deep = false; + ResourceDeepDuplicateMode subres_mode = RESOURCE_DEEP_DUPLICATE_MAX; + Node *local_scene = nullptr; + }; + private: friend class ResBase; friend class ResourceCache; @@ -85,7 +92,10 @@ private: SelfList remapped_list; - void _dupe_sub_resources(Variant &r_variant, Node *p_for_scene, HashMap, Ref> &p_remap_cache); + using DuplicateRemapCacheT = HashMap, Ref>; + static thread_local inline DuplicateRemapCacheT *thread_duplicate_remap_cache = nullptr; + + Variant _duplicate_recursive(const Variant &p_variant, const DuplicateParams &p_params, uint32_t p_usage = 0) const; void _find_sub_resources(const Variant &p_variant, HashSet> &p_resources_found); protected: @@ -106,6 +116,8 @@ protected: GDVIRTUAL1C(_set_path_cache, String); GDVIRTUAL0(_reset_state); + virtual Ref _duplicate(const DuplicateParams &p_params) const; + public: static Node *(*_get_local_scene_func)(); //used by editor static void (*_update_configuration_warning)(); //used by editor @@ -133,8 +145,11 @@ public: void set_scene_unique_id(const String &p_id); String get_scene_unique_id() const; - virtual Ref duplicate(bool p_subresources = false) const; - Ref duplicate_for_local_scene(Node *p_for_scene, HashMap, Ref> &p_remap_cache); + Ref duplicate(bool p_deep = false) const; + Ref duplicate_deep(ResourceDeepDuplicateMode p_deep_subresources_mode = RESOURCE_DEEP_DUPLICATE_INTERNAL) const; + Ref _duplicate_from_variant(bool p_deep, ResourceDeepDuplicateMode p_deep_subresources_mode, int p_recursion_count) const; + static void _teardown_duplicate_from_variant(); + Ref duplicate_for_local_scene(Node *p_for_scene, HashMap, Ref> &p_remap_cache) const; void configure_for_local_scene(Node *p_for_scene, HashMap, Ref> &p_remap_cache); void set_local_to_scene(bool p_enable); @@ -170,6 +185,8 @@ public: ~Resource(); }; +VARIANT_ENUM_CAST(ResourceDeepDuplicateMode); + class ResourceCache { friend class Resource; friend class ResourceLoader; //need the lock diff --git a/core/io/resource_uid.cpp b/core/io/resource_uid.cpp index 964a975c88..7e4a4e6fe4 100644 --- a/core/io/resource_uid.cpp +++ b/core/io/resource_uid.cpp @@ -366,6 +366,10 @@ void ResourceUID::_bind_methods() { ClassDB::bind_method(D_METHOD("get_id_path", "id"), &ResourceUID::get_id_path); ClassDB::bind_method(D_METHOD("remove_id", "id"), &ResourceUID::remove_id); + ClassDB::bind_static_method("ResourceUID", D_METHOD("uid_to_path", "uid"), &ResourceUID::uid_to_path); + ClassDB::bind_static_method("ResourceUID", D_METHOD("path_to_uid", "path"), &ResourceUID::path_to_uid); + ClassDB::bind_static_method("ResourceUID", D_METHOD("ensure_path", "path_or_uid"), &ResourceUID::ensure_path); + BIND_CONSTANT(INVALID_ID) } ResourceUID *ResourceUID::singleton = nullptr; diff --git a/core/math/bvh.h b/core/math/bvh.h index c55fd5bf97..4fb2c21b26 100644 --- a/core/math/bvh.h +++ b/core/math/bvh.h @@ -771,7 +771,7 @@ private: // for collision pairing, // maintain a list of all items moved etc on each frame / tick - LocalVector changed_items; + LocalVector changed_items; uint32_t _tick = 1; // Start from 1 so items with 0 indicate never updated. class BVHLockedFunction { diff --git a/core/math/bvh_public.inc b/core/math/bvh_public.inc index fc1c67a21b..f0a8edb08a 100644 --- a/core/math/bvh_public.inc +++ b/core/math/bvh_public.inc @@ -202,7 +202,7 @@ void item_remove(BVHHandle p_handle) { // swap back and decrement for fast unordered remove _active_refs[active_ref_id] = ref_id_moved_back; - _active_refs.resize(_active_refs.size() - 1); + _active_refs.resize_uninitialized(_active_refs.size() - 1); // keep the moved active reference up to date _extra[ref_id_moved_back].active_ref_id = active_ref_id; diff --git a/core/math/bvh_structs.inc b/core/math/bvh_structs.inc index 6326cd63ef..771fc13835 100644 --- a/core/math/bvh_structs.inc +++ b/core/math/bvh_structs.inc @@ -172,13 +172,13 @@ PooledList _leaves; // we can maintain an un-ordered list of which references are active, // in order to do a slow incremental optimize of the tree over each frame. // This will work best if dynamic objects and static objects are in a different tree. -LocalVector _active_refs; +LocalVector _active_refs; uint32_t _current_active_ref = 0; // instead of translating directly to the userdata output, // we keep an intermediate list of hits as reference IDs, which can be used // for pairing collision detection -LocalVector _cull_hits; +LocalVector _cull_hits; // We can now have a user definable number of trees. // This allows using e.g. a non-pairable and pairable tree, diff --git a/core/math/color.cpp b/core/math/color.cpp index aadd345503..ce77ffed72 100644 --- a/core/math/color.cpp +++ b/core/math/color.cpp @@ -248,6 +248,19 @@ void Color::set_ok_hsl(float p_h, float p_s, float p_l, float p_alpha) { a = c.a; } +void Color::set_ok_hsv(float p_h, float p_s, float p_v, float p_alpha) { + ok_color::HSV hsv; + hsv.h = p_h; + hsv.s = p_s; + hsv.v = p_v; + ok_color::RGB rgb = ok_color::okhsv_to_srgb(hsv); + Color c = Color(rgb.r, rgb.g, rgb.b, p_alpha).clamp(); + r = c.r; + g = c.g; + b = c.b; + a = c.a; +} + bool Color::is_equal_approx(const Color &p_color) const { return Math::is_equal_approx(r, p_color.r) && Math::is_equal_approx(g, p_color.g) && Math::is_equal_approx(b, p_color.b) && Math::is_equal_approx(a, p_color.a); } @@ -478,6 +491,12 @@ Color Color::from_ok_hsl(float p_h, float p_s, float p_l, float p_alpha) { return c; } +Color Color::from_ok_hsv(float p_h, float p_s, float p_l, float p_alpha) { + Color c; + c.set_ok_hsv(p_h, p_s, p_l, p_alpha); + return c; +} + float Color::get_ok_hsl_h() const { ok_color::RGB rgb; rgb.r = r; diff --git a/core/math/color.h b/core/math/color.h index 4a7839b27c..ab4c139bd1 100644 --- a/core/math/color.h +++ b/core/math/color.h @@ -64,6 +64,7 @@ struct [[nodiscard]] Color { float get_ok_hsl_s() const; float get_ok_hsl_l() const; void set_ok_hsl(float p_h, float p_s, float p_l, float p_alpha = 1.0f); + void set_ok_hsv(float p_h, float p_s, float p_v, float p_alpha = 1.0f); _FORCE_INLINE_ float &operator[](int p_idx) { return components[p_idx]; @@ -216,6 +217,7 @@ struct [[nodiscard]] Color { static Color from_string(const String &p_string, const Color &p_default); static Color from_hsv(float p_h, float p_s, float p_v, float p_alpha = 1.0f); static Color from_ok_hsl(float p_h, float p_s, float p_l, float p_alpha = 1.0f); + static Color from_ok_hsv(float p_h, float p_s, float p_l, float p_alpha = 1.0f); static Color from_rgbe9995(uint32_t p_rgbe); static Color from_rgba8(int64_t p_r8, int64_t p_g8, int64_t p_b8, int64_t p_a8 = 255); diff --git a/core/object/class_db.h b/core/object/class_db.h index 64c822c059..a81b643af2 100644 --- a/core/object/class_db.h +++ b/core/object/class_db.h @@ -533,24 +533,9 @@ public: #ifdef DEBUG_ENABLED -_FORCE_INLINE_ void errarray_add_str(Vector &arr) { -} - -_FORCE_INLINE_ void errarray_add_str(Vector &arr, const Error &p_err) { - arr.push_back(p_err); -} - -template -_FORCE_INLINE_ void errarray_add_str(Vector &arr, const Error &p_err, P... p_args) { - arr.push_back(p_err); - errarray_add_str(arr, p_args...); -} - template _FORCE_INLINE_ Vector errarray(P... p_args) { - Vector arr; - errarray_add_str(arr, p_args...); - return arr; + return Vector({ p_args... }); } #define BIND_METHOD_ERR_RETURN_DOC(m_method, ...) \ diff --git a/core/object/object.cpp b/core/object/object.cpp index 68246e8e4b..043db603e3 100644 --- a/core/object/object.cpp +++ b/core/object/object.cpp @@ -1926,6 +1926,7 @@ void Object::_bind_methods() { BIND_ENUM_CONSTANT(CONNECT_PERSIST); BIND_ENUM_CONSTANT(CONNECT_ONE_SHOT); BIND_ENUM_CONSTANT(CONNECT_REFERENCE_COUNTED); + BIND_ENUM_CONSTANT(CONNECT_APPEND_SOURCE_OBJECT); } void Object::set_deferred(const StringName &p_property, const Variant &p_value) { diff --git a/core/object/object.h b/core/object/object.h index 3c9a019b4f..efcf2a0c1d 100644 --- a/core/object/object.h +++ b/core/object/object.h @@ -95,7 +95,7 @@ enum PropertyHint { PROPERTY_HINT_TOOL_BUTTON, PROPERTY_HINT_ONESHOT, ///< the property will be changed by self after setting, such as AudioStreamPlayer.playing, Particles.emitting. PROPERTY_HINT_NO_NODEPATH, /// < this property will not contain a NodePath, regardless of type (Array, Dictionary, List, etc.). Needed for SceneTreeDock. - PROPERTY_HINT_GROUP_ENABLE, ///< used to make the property's group checkable. Only use for boolean types. + PROPERTY_HINT_GROUP_ENABLE, ///< used to make the property's group checkable. Only use for boolean types. Optional "feature" hint string force hides anything inside when unchecked. PROPERTY_HINT_INPUT_NAME, PROPERTY_HINT_MAX, }; @@ -397,23 +397,95 @@ struct ObjectGDExtension { * much alone defines the object model. */ -// This is a barebones version of GDCLASS, -// only intended for simple classes deriving from Object -// so that they can support the `Object::cast_to()` method. -#define GDSOFTCLASS(m_class, m_inherits) \ -public: \ - using self_type = m_class; \ - using super_type = m_inherits; \ - static _FORCE_INLINE_ void *get_class_ptr_static() { \ - static int ptr; \ - return &ptr; \ - } \ - virtual bool is_class_ptr(void *p_ptr) const override { \ - return (p_ptr == get_class_ptr_static()) || m_inherits::is_class_ptr(p_ptr); \ - } \ - \ +/// `GDSOFTCLASS` provides `Object` functionality, such as being able to use `Object::cast_to()`. +/// Use this for `Object` subclasses that are not registered in `ClassDB` (use `GDCLASS` otherwise). +#define GDSOFTCLASS(m_class, m_inherits) \ +public: \ + using self_type = m_class; \ + using super_type = m_inherits; \ + static _FORCE_INLINE_ void *get_class_ptr_static() { \ + static int ptr; \ + return &ptr; \ + } \ + virtual bool is_class_ptr(void *p_ptr) const override { \ + return (p_ptr == get_class_ptr_static()) || m_inherits::is_class_ptr(p_ptr); \ + } \ + \ +protected: \ + _FORCE_INLINE_ bool (Object::*_get_get() const)(const StringName &p_name, Variant &) const { \ + return (bool (Object::*)(const StringName &, Variant &) const) & m_class::_get; \ + } \ + virtual bool _getv(const StringName &p_name, Variant &r_ret) const override { \ + if (m_class::_get_get() != m_inherits::_get_get()) { \ + if (_get(p_name, r_ret)) { \ + return true; \ + } \ + } \ + return m_inherits::_getv(p_name, r_ret); \ + } \ + _FORCE_INLINE_ bool (Object::*_get_set() const)(const StringName &p_name, const Variant &p_property) { \ + return (bool (Object::*)(const StringName &, const Variant &)) & m_class::_set; \ + } \ + virtual bool _setv(const StringName &p_name, const Variant &p_property) override { \ + if (m_inherits::_setv(p_name, p_property)) { \ + return true; \ + } \ + if (m_class::_get_set() != m_inherits::_get_set()) { \ + return _set(p_name, p_property); \ + } \ + return false; \ + } \ + _FORCE_INLINE_ void (Object::*_get_validate_property() const)(PropertyInfo & p_property) const { \ + return (void (Object::*)(PropertyInfo &) const) & m_class::_validate_property; \ + } \ + virtual void _validate_propertyv(PropertyInfo &p_property) const override { \ + m_inherits::_validate_propertyv(p_property); \ + if (m_class::_get_validate_property() != m_inherits::_get_validate_property()) { \ + _validate_property(p_property); \ + } \ + } \ + _FORCE_INLINE_ bool (Object::*_get_property_can_revert() const)(const StringName &p_name) const { \ + return (bool (Object::*)(const StringName &) const) & m_class::_property_can_revert; \ + } \ + virtual bool _property_can_revertv(const StringName &p_name) const override { \ + if (m_class::_get_property_can_revert() != m_inherits::_get_property_can_revert()) { \ + if (_property_can_revert(p_name)) { \ + return true; \ + } \ + } \ + return m_inherits::_property_can_revertv(p_name); \ + } \ + _FORCE_INLINE_ bool (Object::*_get_property_get_revert() const)(const StringName &p_name, Variant &) const { \ + return (bool (Object::*)(const StringName &, Variant &) const) & m_class::_property_get_revert; \ + } \ + virtual bool _property_get_revertv(const StringName &p_name, Variant &r_ret) const override { \ + if (m_class::_get_property_get_revert() != m_inherits::_get_property_get_revert()) { \ + if (_property_get_revert(p_name, r_ret)) { \ + return true; \ + } \ + } \ + return m_inherits::_property_get_revertv(p_name, r_ret); \ + } \ + _FORCE_INLINE_ void (Object::*_get_notification() const)(int) { \ + return (void (Object::*)(int)) & m_class::_notification; \ + } \ + virtual void _notification_forwardv(int p_notification) override { \ + m_inherits::_notification_forwardv(p_notification); \ + if (m_class::_get_notification() != m_inherits::_get_notification()) { \ + _notification(p_notification); \ + } \ + } \ + virtual void _notification_backwardv(int p_notification) override { \ + if (m_class::_get_notification() != m_inherits::_get_notification()) { \ + _notification(p_notification); \ + } \ + m_inherits::_notification_backwardv(p_notification); \ + } \ + \ private: +/// `GDSOFTCLASS` provides `Object` functionality, such as being able to use `Object::cast_to()`. +/// Use this for `Object` subclasses that are registered in `ObjectDB` (use `GDSOFTCLASS` otherwise). #define GDCLASS(m_class, m_inherits) \ GDSOFTCLASS(m_class, m_inherits) \ private: \ @@ -468,29 +540,6 @@ protected: virtual void _initialize_classv() override { \ initialize_class(); \ } \ - _FORCE_INLINE_ bool (Object::*_get_get() const)(const StringName &p_name, Variant &) const { \ - return (bool (Object::*)(const StringName &, Variant &) const) & m_class::_get; \ - } \ - virtual bool _getv(const StringName &p_name, Variant &r_ret) const override { \ - if (m_class::_get_get() != m_inherits::_get_get()) { \ - if (_get(p_name, r_ret)) { \ - return true; \ - } \ - } \ - return m_inherits::_getv(p_name, r_ret); \ - } \ - _FORCE_INLINE_ bool (Object::*_get_set() const)(const StringName &p_name, const Variant &p_property) { \ - return (bool (Object::*)(const StringName &, const Variant &)) & m_class::_set; \ - } \ - virtual bool _setv(const StringName &p_name, const Variant &p_property) override { \ - if (m_inherits::_setv(p_name, p_property)) { \ - return true; \ - } \ - if (m_class::_get_set() != m_inherits::_get_set()) { \ - return _set(p_name, p_property); \ - } \ - return false; \ - } \ _FORCE_INLINE_ void (Object::*_get_get_property_list() const)(List * p_list) const { \ return (void (Object::*)(List *) const) & m_class::_get_property_list; \ } \ @@ -506,52 +555,6 @@ protected: if (p_reversed) { \ m_inherits::_get_property_listv(p_list, p_reversed); \ } \ - } \ - _FORCE_INLINE_ void (Object::*_get_validate_property() const)(PropertyInfo & p_property) const { \ - return (void (Object::*)(PropertyInfo &) const) & m_class::_validate_property; \ - } \ - virtual void _validate_propertyv(PropertyInfo &p_property) const override { \ - m_inherits::_validate_propertyv(p_property); \ - if (m_class::_get_validate_property() != m_inherits::_get_validate_property()) { \ - _validate_property(p_property); \ - } \ - } \ - _FORCE_INLINE_ bool (Object::*_get_property_can_revert() const)(const StringName &p_name) const { \ - return (bool (Object::*)(const StringName &) const) & m_class::_property_can_revert; \ - } \ - virtual bool _property_can_revertv(const StringName &p_name) const override { \ - if (m_class::_get_property_can_revert() != m_inherits::_get_property_can_revert()) { \ - if (_property_can_revert(p_name)) { \ - return true; \ - } \ - } \ - return m_inherits::_property_can_revertv(p_name); \ - } \ - _FORCE_INLINE_ bool (Object::*_get_property_get_revert() const)(const StringName &p_name, Variant &) const { \ - return (bool (Object::*)(const StringName &, Variant &) const) & m_class::_property_get_revert; \ - } \ - virtual bool _property_get_revertv(const StringName &p_name, Variant &r_ret) const override { \ - if (m_class::_get_property_get_revert() != m_inherits::_get_property_get_revert()) { \ - if (_property_get_revert(p_name, r_ret)) { \ - return true; \ - } \ - } \ - return m_inherits::_property_get_revertv(p_name, r_ret); \ - } \ - _FORCE_INLINE_ void (Object::*_get_notification() const)(int) { \ - return (void (Object::*)(int)) & m_class::_notification; \ - } \ - virtual void _notification_forwardv(int p_notification) override { \ - m_inherits::_notification_forwardv(p_notification); \ - if (m_class::_get_notification() != m_inherits::_get_notification()) { \ - _notification(p_notification); \ - } \ - } \ - virtual void _notification_backwardv(int p_notification) override { \ - if (m_class::_get_notification() != m_inherits::_get_notification()) { \ - _notification(p_notification); \ - } \ - m_inherits::_notification_backwardv(p_notification); \ } \ \ private: @@ -572,10 +575,11 @@ public: enum ConnectFlags { CONNECT_DEFERRED = 1, - CONNECT_PERSIST = 2, // hint for scene to save this connection + CONNECT_PERSIST = 2, // Hint for scene to save this connection. CONNECT_ONE_SHOT = 4, CONNECT_REFERENCE_COUNTED = 8, - CONNECT_INHERITED = 16, // Used in editor builds. + CONNECT_APPEND_SOURCE_OBJECT = 16, + CONNECT_INHERITED = 32, // Used in editor builds. }; struct Connection { diff --git a/core/object/script_language.cpp b/core/object/script_language.cpp index 086a0bde8f..e1867ae87b 100644 --- a/core/object/script_language.cpp +++ b/core/object/script_language.cpp @@ -174,7 +174,7 @@ void Script::_bind_methods() { ClassDB::bind_method(D_METHOD("is_tool"), &Script::is_tool); ClassDB::bind_method(D_METHOD("is_abstract"), &Script::is_abstract); - ClassDB::bind_method(D_METHOD("get_rpc_config"), &Script::get_rpc_config); + ClassDB::bind_method(D_METHOD("get_rpc_config"), &Script::_get_rpc_config_bind); ADD_PROPERTY(PropertyInfo(Variant::STRING, "source_code", PROPERTY_HINT_NONE, "", PROPERTY_USAGE_NONE), "set_source_code", "get_source_code"); } diff --git a/core/object/script_language.h b/core/object/script_language.h index eb3bdfa5ec..2f23934020 100644 --- a/core/object/script_language.h +++ b/core/object/script_language.h @@ -137,6 +137,10 @@ protected: void _set_debugger_break_language(); + Variant _get_rpc_config_bind() const { + return get_rpc_config().duplicate(true); + } + public: virtual void reload_from_file() override; @@ -193,7 +197,7 @@ public: virtual bool is_placeholder_fallback_enabled() const { return false; } - virtual Variant get_rpc_config() const = 0; + virtual const Variant get_rpc_config() const = 0; Script() {} }; diff --git a/core/object/script_language_extension.h b/core/object/script_language_extension.h index 26cad36cb6..a3e669a9f5 100644 --- a/core/object/script_language_extension.h +++ b/core/object/script_language_extension.h @@ -211,7 +211,7 @@ public: GDVIRTUAL0RC_REQUIRED(Variant, _get_rpc_config) - virtual Variant get_rpc_config() const override { + virtual const Variant get_rpc_config() const override { Variant ret; GDVIRTUAL_CALL(_get_rpc_config, ret); return ret; diff --git a/core/os/memory.h b/core/os/memory.h index 366eefda21..3d455548fd 100644 --- a/core/os/memory.h +++ b/core/os/memory.h @@ -198,19 +198,15 @@ T *memnew_arr_template(size_t p_elements) { } // Fast alternative to a loop constructor pattern. -template +template _FORCE_INLINE_ void memnew_arr_placement(T *p_start, size_t p_num) { - if constexpr (std::is_trivially_constructible_v && !p_ensure_zero) { - // Don't need to do anything :) - (void)p_start; - (void)p_num; - } else if constexpr (is_zero_constructible_v) { + if constexpr (is_zero_constructible_v) { // Can optimize with memset. memset(static_cast(p_start), 0, p_num * sizeof(T)); } else { // Need to use a for loop. for (size_t i = 0; i < p_num; i++) { - memnew_placement(p_start + i, T); + memnew_placement(p_start + i, T()); } } } diff --git a/core/os/spin_lock.h b/core/os/spin_lock.h index fb1ea3a5e0..a481240d4f 100644 --- a/core/os/spin_lock.h +++ b/core/os/spin_lock.h @@ -83,7 +83,7 @@ _ALWAYS_INLINE_ static void _cpu_pause() { __builtin_ia32_pause(); #elif defined(__arm__) || defined(__aarch64__) // ARM. asm volatile("yield"); -#elif defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) // PowerPC. +#elif defined(__powerpc__) // PowerPC. asm volatile("or 27,27,27"); #elif defined(__riscv) // RISC-V. asm volatile(".insn i 0x0F, 0, x0, x0, 0x010"); diff --git a/core/os/time.cpp b/core/os/time.cpp index 53d8e48396..ead49fbe21 100644 --- a/core/os/time.cpp +++ b/core/os/time.cpp @@ -39,6 +39,33 @@ #define IS_LEAP_YEAR(year) (!((year) % 4) && (((year) % 100) || !((year) % 400))) #define YEAR_SIZE(year) (IS_LEAP_YEAR(year) ? 366 : 365) +static constexpr int64_t total_leap_days(int64_t p_year) { + if (p_year > 0) { + --p_year; + return 1 + (p_year / 4 - p_year / 100 + p_year / 400); + } + + return p_year / 4 - p_year / 100 + p_year / 400; +} + +static constexpr int64_t year_to_days(int64_t p_year) { + return p_year * 365 + total_leap_days(p_year); +} + +static constexpr int64_t days_to_year(int64_t p_days) { + int64_t year = 400 * p_days / year_to_days(400); + if (year < 0) { + --year; + } + if (year_to_days(year) > p_days) { + --year; + } + if (year_to_days(year + 1) <= p_days) { + ++year; + } + return year; +} + #define YEAR_KEY "year" #define MONTH_KEY "month" #define DAY_KEY "day" @@ -74,16 +101,10 @@ static const uint8_t MONTH_DAYS_TABLE[2][12] = { int64_t day_number = Math::floor(p_unix_time_val / (double)SECONDS_PER_DAY); \ { \ int64_t day_number_copy = day_number; \ - year = UNIX_EPOCH_YEAR_AD; \ + day_number_copy += year_to_days(UNIX_EPOCH_YEAR_AD); \ + year = days_to_year(day_number_copy); \ + day_number_copy -= year_to_days(year); \ uint8_t month_zero_index = 0; \ - while (day_number_copy >= YEAR_SIZE(year)) { \ - day_number_copy -= YEAR_SIZE(year); \ - year++; \ - } \ - while (day_number_copy < 0) { \ - year--; \ - day_number_copy += YEAR_SIZE(year); \ - } \ /* After the above, day_number now represents the day of the year (0-index). */ \ while (day_number_copy >= MONTH_DAYS_TABLE[IS_LEAP_YEAR(year)][month_zero_index]) { \ day_number_copy -= MONTH_DAYS_TABLE[IS_LEAP_YEAR(year)][month_zero_index]; \ @@ -118,15 +139,8 @@ static const uint8_t MONTH_DAYS_TABLE[2][12] = { day_number += MONTH_DAYS_TABLE[IS_LEAP_YEAR(year)][i]; \ } \ /* Add the days in the years to day_number. */ \ - if (year >= UNIX_EPOCH_YEAR_AD) { \ - for (int64_t iyear = UNIX_EPOCH_YEAR_AD; iyear < year; iyear++) { \ - day_number += YEAR_SIZE(iyear); \ - } \ - } else { \ - for (int64_t iyear = UNIX_EPOCH_YEAR_AD - 1; iyear >= year; iyear--) { \ - day_number -= YEAR_SIZE(iyear); \ - } \ - } + day_number += year_to_days(year); \ + day_number -= year_to_days(UNIX_EPOCH_YEAR_AD); #define PARSE_ISO8601_STRING(ret) \ int64_t year = UNIX_EPOCH_YEAR_AD; \ diff --git a/core/string/fuzzy_search.cpp b/core/string/fuzzy_search.cpp index 22343aa267..31484f4124 100644 --- a/core/string/fuzzy_search.cpp +++ b/core/string/fuzzy_search.cpp @@ -267,12 +267,19 @@ void FuzzySearch::sort_and_filter(Vector &p_results) const { } void FuzzySearch::set_query(const String &p_query) { - tokens.clear(); - for (const String &string : p_query.split(" ", false)) { - tokens.append({ static_cast(tokens.size()), string }); - } + set_query(p_query, !p_query.is_lowercase()); +} - case_sensitive = !p_query.is_lowercase(); +void FuzzySearch::set_query(const String &p_query, bool p_case_sensitive) { + tokens.clear(); + case_sensitive = p_case_sensitive; + + for (const String &string : p_query.split(" ", false)) { + tokens.append({ + static_cast(tokens.size()), + p_case_sensitive ? string : string.to_lower(), + }); + } struct TokenComparator { bool operator()(const FuzzySearchToken &A, const FuzzySearchToken &B) const { diff --git a/core/string/fuzzy_search.h b/core/string/fuzzy_search.h index be58f3b6fd..355c789486 100644 --- a/core/string/fuzzy_search.h +++ b/core/string/fuzzy_search.h @@ -86,16 +86,17 @@ public: class FuzzySearch { Vector tokens; + bool case_sensitive = false; void sort_and_filter(Vector &p_results) const; public: int start_offset = 0; - bool case_sensitive = false; int max_results = 100; int max_misses = 2; bool allow_subsequences = true; void set_query(const String &p_query); + void set_query(const String &p_query, bool p_case_sensitive); bool search(const String &p_target, FuzzySearchResult &p_result) const; void search_all(const PackedStringArray &p_targets, Vector &p_results) const; }; diff --git a/core/string/translation.cpp b/core/string/translation.cpp index 69366392ee..80045c2b7b 100644 --- a/core/string/translation.cpp +++ b/core/string/translation.cpp @@ -32,7 +32,6 @@ #include "translation.h" -#include "core/os/os.h" #include "core/os/thread.h" #include "core/string/translation_server.h" @@ -76,21 +75,6 @@ void Translation::_set_messages(const Dictionary &p_messages) { void Translation::set_locale(const String &p_locale) { locale = TranslationServer::get_singleton()->standardize_locale(p_locale); - - if (Thread::is_main_thread()) { - _notify_translation_changed_if_applies(); - } else { - // This has to happen on the main thread (bypassing the ResourceLoader per-thread call queue) - // because it interacts with the generally non-thread-safe window management, leading to - // different issues across platforms otherwise. - MessageQueue::get_main_singleton()->push_callable(callable_mp(this, &Translation::_notify_translation_changed_if_applies)); - } -} - -void Translation::_notify_translation_changed_if_applies() { - if (OS::get_singleton()->get_main_loop() && TranslationServer::get_singleton()->get_loaded_locales().has(get_locale())) { - OS::get_singleton()->get_main_loop()->notification(MainLoop::NOTIFICATION_TRANSLATION_CHANGED); - } } void Translation::add_message(const StringName &p_src_text, const StringName &p_xlated_text, const StringName &p_context) { diff --git a/core/string/translation.h b/core/string/translation.h index d05a41c7e2..5319e5715c 100644 --- a/core/string/translation.h +++ b/core/string/translation.h @@ -47,8 +47,6 @@ class Translation : public Resource { virtual Dictionary _get_messages() const; virtual void _set_messages(const Dictionary &p_messages); - void _notify_translation_changed_if_applies(); - protected: static void _bind_methods(); diff --git a/core/string/translation_domain.cpp b/core/string/translation_domain.cpp index 4a53b3fd61..34a59a7d81 100644 --- a/core/string/translation_domain.cpp +++ b/core/string/translation_domain.cpp @@ -289,7 +289,11 @@ void TranslationDomain::clear() { } StringName TranslationDomain::translate(const StringName &p_message, const StringName &p_context) const { - const String &locale = TranslationServer::get_singleton()->get_locale(); + if (!enabled) { + return p_message; + } + + const String &locale = locale_override.is_empty() ? TranslationServer::get_singleton()->get_locale() : locale_override; StringName res = get_message_from_translations(locale, p_message, p_context); const String &fallback = TranslationServer::get_singleton()->get_fallback_locale(); @@ -304,7 +308,11 @@ StringName TranslationDomain::translate(const StringName &p_message, const Strin } StringName TranslationDomain::translate_plural(const StringName &p_message, const StringName &p_message_plural, int p_n, const StringName &p_context) const { - const String &locale = TranslationServer::get_singleton()->get_locale(); + if (!enabled) { + return p_n == 1 ? p_message : p_message_plural; + } + + const String &locale = locale_override.is_empty() ? TranslationServer::get_singleton()->get_locale() : locale_override; StringName res = get_message_from_translations(locale, p_message, p_message_plural, p_n, p_context); const String &fallback = TranslationServer::get_singleton()->get_fallback_locale(); @@ -321,6 +329,22 @@ StringName TranslationDomain::translate_plural(const StringName &p_message, cons return res; } +String TranslationDomain::get_locale_override() const { + return locale_override; +} + +void TranslationDomain::set_locale_override(const String &p_locale) { + locale_override = p_locale.is_empty() ? p_locale : TranslationServer::get_singleton()->standardize_locale(p_locale); +} + +bool TranslationDomain::is_enabled() const { + return enabled; +} + +void TranslationDomain::set_enabled(bool p_enabled) { + enabled = p_enabled; +} + bool TranslationDomain::is_pseudolocalization_enabled() const { return pseudolocalization.enabled; } @@ -426,6 +450,10 @@ void TranslationDomain::_bind_methods() { ClassDB::bind_method(D_METHOD("clear"), &TranslationDomain::clear); ClassDB::bind_method(D_METHOD("translate", "message", "context"), &TranslationDomain::translate, DEFVAL(StringName())); ClassDB::bind_method(D_METHOD("translate_plural", "message", "message_plural", "n", "context"), &TranslationDomain::translate_plural, DEFVAL(StringName())); + ClassDB::bind_method(D_METHOD("get_locale_override"), &TranslationDomain::get_locale_override); + ClassDB::bind_method(D_METHOD("set_locale_override", "locale"), &TranslationDomain::set_locale_override); + ClassDB::bind_method(D_METHOD("is_enabled"), &TranslationDomain::is_enabled); + ClassDB::bind_method(D_METHOD("set_enabled", "enabled"), &TranslationDomain::set_enabled); ClassDB::bind_method(D_METHOD("is_pseudolocalization_enabled"), &TranslationDomain::is_pseudolocalization_enabled); ClassDB::bind_method(D_METHOD("set_pseudolocalization_enabled", "enabled"), &TranslationDomain::set_pseudolocalization_enabled); @@ -447,6 +475,7 @@ void TranslationDomain::_bind_methods() { ClassDB::bind_method(D_METHOD("set_pseudolocalization_suffix", "suffix"), &TranslationDomain::set_pseudolocalization_suffix); ClassDB::bind_method(D_METHOD("pseudolocalize", "message"), &TranslationDomain::pseudolocalize); + ADD_PROPERTY(PropertyInfo(Variant::Type::BOOL, "enabled"), "set_enabled", "is_enabled"); ADD_PROPERTY(PropertyInfo(Variant::Type::BOOL, "pseudolocalization_enabled"), "set_pseudolocalization_enabled", "is_pseudolocalization_enabled"); ADD_PROPERTY(PropertyInfo(Variant::Type::BOOL, "pseudolocalization_accents_enabled"), "set_pseudolocalization_accents_enabled", "is_pseudolocalization_accents_enabled"); ADD_PROPERTY(PropertyInfo(Variant::Type::BOOL, "pseudolocalization_double_vowels_enabled"), "set_pseudolocalization_double_vowels_enabled", "is_pseudolocalization_double_vowels_enabled"); diff --git a/core/string/translation_domain.h b/core/string/translation_domain.h index 0171b31d3c..b7bde1b221 100644 --- a/core/string/translation_domain.h +++ b/core/string/translation_domain.h @@ -51,6 +51,9 @@ class TranslationDomain : public RefCounted { String suffix = "]"; }; + bool enabled = true; + + String locale_override; HashSet> translations; PseudolocalizationConfig pseudolocalization; @@ -81,6 +84,12 @@ public: StringName translate(const StringName &p_message, const StringName &p_context) const; StringName translate_plural(const StringName &p_message, const StringName &p_message_plural, int p_n, const StringName &p_context) const; + String get_locale_override() const; + void set_locale_override(const String &p_locale); + + bool is_enabled() const; + void set_enabled(bool p_enabled); + bool is_pseudolocalization_enabled() const; void set_pseudolocalization_enabled(bool p_enabled); bool is_pseudolocalization_accents_enabled() const; diff --git a/core/string/translation_server.cpp b/core/string/translation_server.cpp index 3c00538931..739daae552 100644 --- a/core/string/translation_server.cpp +++ b/core/string/translation_server.cpp @@ -408,21 +408,10 @@ void TranslationServer::clear() { } StringName TranslationServer::translate(const StringName &p_message, const StringName &p_context) const { - if (!enabled) { - return p_message; - } - return main_domain->translate(p_message, p_context); } StringName TranslationServer::translate_plural(const StringName &p_message, const StringName &p_message_plural, int p_n, const StringName &p_context) const { - if (!enabled) { - if (p_n == 1) { - return p_message; - } - return p_message_plural; - } - return main_domain->translate_plural(p_message, p_message_plural, p_n, p_context); } diff --git a/core/string/translation_server.h b/core/string/translation_server.h index 29e3067bb2..ef256d3d01 100644 --- a/core/string/translation_server.h +++ b/core/string/translation_server.h @@ -49,8 +49,6 @@ class TranslationServer : public Object { mutable HashMap locale_compare_cache; - bool enabled = true; - static inline TranslationServer *singleton = nullptr; static void _bind_methods(); @@ -98,11 +96,9 @@ class TranslationServer : public Object { public: _FORCE_INLINE_ static TranslationServer *get_singleton() { return singleton; } + Ref get_main_domain() const { return main_domain; } Ref get_editor_domain() const { return editor_domain; } - void set_enabled(bool p_enabled) { enabled = p_enabled; } - _FORCE_INLINE_ bool is_enabled() const { return enabled; } - void set_locale(const String &p_locale); String get_locale() const; String get_fallback_locale() const; diff --git a/core/string/ustring.h b/core/string/ustring.h index c6fa626066..4a07edf5da 100644 --- a/core/string/ustring.h +++ b/core/string/ustring.h @@ -184,7 +184,7 @@ public: _FORCE_INLINE_ operator Span() const { return Span(ptr(), length()); } _FORCE_INLINE_ Span span() const { return Span(ptr(), length()); } - _FORCE_INLINE_ Error resize(int p_size) { return _cowdata.resize(p_size); } + _FORCE_INLINE_ Error resize(int p_size) { return _cowdata.template resize(p_size); } _FORCE_INLINE_ T get(int p_index) const { return _cowdata.get(p_index); } _FORCE_INLINE_ void set(int p_index, const T &p_elem) { _cowdata.set(p_index, p_elem); } @@ -326,7 +326,7 @@ public: _FORCE_INLINE_ char32_t get(int p_index) const { return _cowdata.get(p_index); } _FORCE_INLINE_ void set(int p_index, const char32_t &p_elem) { _cowdata.set(p_index, p_elem); } - Error resize(int p_size) { return _cowdata.resize(p_size); } + Error resize(int p_size) { return _cowdata.resize(p_size); } _FORCE_INLINE_ const char32_t &operator[](int p_index) const { if (unlikely(p_index == _cowdata.size())) { @@ -795,22 +795,7 @@ _FORCE_INLINE_ String ETRN(const String &p_text, const String &p_text_plural, in bool select_word(const String &p_s, int p_col, int &r_beg, int &r_end); -_FORCE_INLINE_ void sarray_add_str(Vector &arr) { -} - -_FORCE_INLINE_ void sarray_add_str(Vector &arr, const String &p_str) { - arr.push_back(p_str); -} - -template -_FORCE_INLINE_ void sarray_add_str(Vector &arr, const String &p_str, P... p_args) { - arr.push_back(p_str); - sarray_add_str(arr, p_args...); -} - template _FORCE_INLINE_ Vector sarray(P... p_args) { - Vector arr; - sarray_add_str(arr, p_args...); - return arr; + return Vector({ String(p_args)... }); } diff --git a/core/templates/cowdata.h b/core/templates/cowdata.h index a95d784b8d..ad4bd748d7 100644 --- a/core/templates/cowdata.h +++ b/core/templates/cowdata.h @@ -210,7 +210,7 @@ public: return _ptr[p_index]; } - template + template Error resize(Size p_size); _FORCE_INLINE_ void remove_at(Size p_index) { @@ -284,6 +284,12 @@ void CowData::_unref() { // Free memory. Memory::free_static((uint8_t *)prev_ptr - DATA_OFFSET, false); + +#ifdef DEBUG_ENABLED + // If any destructors access us through pointers, it is a bug. + // We can't really test for that, but we can at least check no items have been added. + ERR_FAIL_COND_MSG(_ptr != nullptr, "Internal bug, please report: CowData was modified during destruction."); +#endif } template @@ -368,7 +374,7 @@ Error CowData::_fork_allocate(USize p_size) { } template -template +template Error CowData::resize(Size p_size) { ERR_FAIL_COND_V(p_size < 0, ERR_INVALID_PARAMETER); @@ -382,8 +388,12 @@ Error CowData::resize(Size p_size) { return error; } - if (p_size > prev_size) { - memnew_arr_placement(_ptr + prev_size, p_size - prev_size); + if constexpr (p_initialize) { + if (p_size > prev_size) { + memnew_arr_placement(_ptr + prev_size, p_size - prev_size); + } + } else { + static_assert(std::is_trivially_destructible_v); } return OK; diff --git a/core/templates/fixed_vector.h b/core/templates/fixed_vector.h index 10f2a62599..f0e130c37d 100644 --- a/core/templates/fixed_vector.h +++ b/core/templates/fixed_vector.h @@ -109,7 +109,7 @@ public: constexpr Error resize_initialized(uint32_t p_size) { if (p_size > _size) { ERR_FAIL_COND_V(p_size > CAPACITY, ERR_OUT_OF_MEMORY); - memnew_arr_placement(ptr() + _size, p_size - _size); + memnew_arr_placement(ptr() + _size, p_size - _size); } else if (p_size < _size) { if constexpr (!std::is_trivially_destructible_v) { for (uint32_t i = p_size; i < _size; i++) { diff --git a/core/templates/hash_map.h b/core/templates/hash_map.h index 5203e0344a..c78a85daa5 100644 --- a/core/templates/hash_map.h +++ b/core/templates/hash_map.h @@ -93,9 +93,19 @@ private: return hash; } + _FORCE_INLINE_ static constexpr void _increment_mod(uint32_t &r_pos, const uint32_t p_capacity) { + r_pos++; + // `if` is faster than both fastmod and mod. + if (unlikely(r_pos == p_capacity)) { + r_pos = 0; + } + } + static _FORCE_INLINE_ uint32_t _get_probe_length(const uint32_t p_pos, const uint32_t p_hash, const uint32_t p_capacity, const uint64_t p_capacity_inv) { const uint32_t original_pos = fastmod(p_hash, p_capacity_inv, p_capacity); - return fastmod(p_pos - original_pos + p_capacity, p_capacity_inv, p_capacity); + const uint32_t distance_pos = p_pos - original_pos + p_capacity; + // At most p_capacity over 0, so we can use an if (faster than fastmod). + return distance_pos >= p_capacity ? distance_pos - p_capacity : distance_pos; } bool _lookup_pos(const TKey &p_key, uint32_t &r_pos) const { @@ -123,7 +133,7 @@ private: return true; } - pos = fastmod((pos + 1), capacity_inv, capacity); + _increment_mod(pos, capacity); distance++; } } @@ -154,7 +164,7 @@ private: distance = existing_probe_len; } - pos = fastmod((pos + 1), capacity_inv, capacity); + _increment_mod(pos, capacity); distance++; } } @@ -351,7 +361,7 @@ public: SWAP(hashes[next_pos], hashes[pos]); SWAP(elements[next_pos], elements[pos]); pos = next_pos; - next_pos = fastmod((pos + 1), capacity_inv, capacity); + _increment_mod(next_pos, capacity); } hashes[pos] = EMPTY_HASH; @@ -400,7 +410,7 @@ public: SWAP(hashes[next_pos], hashes[pos]); SWAP(elements[next_pos], elements[pos]); pos = next_pos; - next_pos = fastmod((pos + 1), capacity_inv, capacity); + _increment_mod(next_pos, capacity); } hashes[pos] = EMPTY_HASH; elements[pos] = nullptr; diff --git a/core/templates/hash_set.h b/core/templates/hash_set.h index c97c001bd1..8df77e970d 100644 --- a/core/templates/hash_set.h +++ b/core/templates/hash_set.h @@ -72,9 +72,19 @@ private: return hash; } + _FORCE_INLINE_ static constexpr void _increment_mod(uint32_t &r_pos, const uint32_t p_capacity) { + r_pos++; + // `if` is faster than both fastmod and mod. + if (unlikely(r_pos == p_capacity)) { + r_pos = 0; + } + } + static _FORCE_INLINE_ uint32_t _get_probe_length(const uint32_t p_pos, const uint32_t p_hash, const uint32_t p_capacity, const uint64_t p_capacity_inv) { const uint32_t original_pos = fastmod(p_hash, p_capacity_inv, p_capacity); - return fastmod(p_pos - original_pos + p_capacity, p_capacity_inv, p_capacity); + const uint32_t distance_pos = p_pos - original_pos + p_capacity; + // At most p_capacity over 0, so we can use an if (faster than fastmod). + return distance_pos >= p_capacity ? distance_pos - p_capacity : distance_pos; } bool _lookup_pos(const TKey &p_key, uint32_t &r_pos) const { @@ -93,16 +103,16 @@ private: return false; } - if (distance > _get_probe_length(pos, hashes[pos], capacity, capacity_inv)) { - return false; - } - if (hashes[pos] == hash && Comparator::compare(keys[hash_to_key[pos]], p_key)) { r_pos = hash_to_key[pos]; return true; } - pos = fastmod(pos + 1, capacity_inv, capacity); + if (distance > _get_probe_length(pos, hashes[pos], capacity, capacity_inv)) { + return false; + } + + _increment_mod(pos, capacity); distance++; } } @@ -132,7 +142,7 @@ private: distance = existing_probe_len; } - pos = fastmod(pos + 1, capacity_inv, capacity); + _increment_mod(pos, capacity); distance++; } } @@ -270,7 +280,7 @@ public: SWAP(hash_to_key[next_pos], hash_to_key[pos]); pos = next_pos; - next_pos = fastmod(pos + 1, capacity_inv, capacity); + _increment_mod(next_pos, capacity); } hashes[pos] = EMPTY_HASH; diff --git a/core/templates/local_vector.h b/core/templates/local_vector.h index 1871f24689..abf1363414 100644 --- a/core/templates/local_vector.h +++ b/core/templates/local_vector.h @@ -44,11 +44,37 @@ // Otherwise, it grows exponentially (the default and what you want in most cases). template class LocalVector { + static_assert(!force_trivial, "force_trivial is no longer supported. Use resize_uninitialized instead."); + private: U count = 0; U capacity = 0; T *data = nullptr; + template + void _resize(U p_size) { + if (p_size < count) { + if constexpr (!std::is_trivially_destructible_v) { + for (U i = p_size; i < count; i++) { + data[i].~T(); + } + } + count = p_size; + } else if (p_size > count) { + if (unlikely(p_size > capacity)) { + capacity = tight ? p_size : nearest_power_of_2_templated(p_size); + data = (T *)memrealloc(data, capacity * sizeof(T)); + CRASH_COND_MSG(!data, "Out of memory"); + } + if constexpr (p_init) { + memnew_arr_placement(data + count, p_size - count); + } else { + static_assert(std::is_trivially_destructible_v, "T must be trivially destructible to resize uninitialized"); + } + count = p_size; + } + } + public: _FORCE_INLINE_ T *ptr() { return data; } _FORCE_INLINE_ const T *ptr() const { return data; } @@ -156,26 +182,21 @@ public: } } + /// Resize the vector. + /// Elements are initialized (or not) depending on what the default C++ behavior for T is. + /// Note: If force_trivial is set, this will behave like resize_uninitialized instead. void resize(U p_size) { - // We must statically assert this in a function because otherwise, - // `LocalVector` cannot be used with a forward-declared type. - static_assert(!force_trivial || std::is_trivially_destructible_v, "T must be trivially destructible if force_trivial is set"); - - if (p_size < count) { - if constexpr (!std::is_trivially_destructible_v) { - for (U i = p_size; i < count; i++) { - data[i].~T(); - } - } - count = p_size; - } else if (p_size > count) { - reserve(p_size); - if constexpr (!std::is_trivially_constructible_v && !force_trivial) { - memnew_arr_placement(data + count, p_size - count); - } - count = p_size; - } + // Don't init when trivially constructible. + _resize>(p_size); } + + /// Resize and set all values to 0 / false / nullptr. + _FORCE_INLINE_ void resize_initialized(U p_size) { _resize(p_size); } + + /// Resize and set all values to 0 / false / nullptr. + /// This is only available for trivially destructible types (otherwise, trivial resize might be UB). + _FORCE_INLINE_ void resize_uninitialized(U p_size) { _resize(p_size); } + _FORCE_INLINE_ const T &operator[](U p_index) const { CRASH_BAD_UNSIGNED_INDEX(p_index, count); return data[p_index]; @@ -390,8 +411,8 @@ public: } }; -template -using TightLocalVector = LocalVector; +template +using TightLocalVector = LocalVector; // Zero-constructing LocalVector initializes count, capacity and data to 0 and thus empty. template diff --git a/core/templates/pooled_list.h b/core/templates/pooled_list.h index e18f366d67..a15437a629 100644 --- a/core/templates/pooled_list.h +++ b/core/templates/pooled_list.h @@ -58,8 +58,8 @@ template class PooledList { - LocalVector list; - LocalVector freelist; + LocalVector list; + LocalVector freelist; // not all list members are necessarily used U _used_size; @@ -104,13 +104,17 @@ public: // pop from freelist int new_size = freelist.size() - 1; r_id = freelist[new_size]; - freelist.resize(new_size); + freelist.resize_uninitialized(new_size); return &list[r_id]; } r_id = list.size(); - list.resize(r_id + 1); + if constexpr (force_trivial || std::is_trivially_constructible_v) { + list.resize_uninitialized(r_id + 1); + } else { + list.resize_initialized(r_id + 1); + } static_assert((!zero_on_first_request) || (__is_pod(T)), "zero_on_first_request requires trivial type"); if constexpr (zero_on_first_request && __is_pod(T)) { @@ -171,7 +175,7 @@ public: // expand the active map (this should be in sync with the pool list if (_pool.used_size() > _active_map.size()) { - _active_map.resize(_pool.used_size()); + _active_map.resize_uninitialized(_pool.used_size()); } // store in the active map diff --git a/core/templates/vector.h b/core/templates/vector.h index 7381e43c04..22ecf2d2fb 100644 --- a/core/templates/vector.h +++ b/core/templates/vector.h @@ -95,14 +95,32 @@ public: _FORCE_INLINE_ operator Span() const { return _cowdata.span(); } _FORCE_INLINE_ Span span() const { return _cowdata.span(); } - _FORCE_INLINE_ void clear() { resize(0); } + _FORCE_INLINE_ void clear() { _cowdata.clear(); } _FORCE_INLINE_ bool is_empty() const { return _cowdata.is_empty(); } _FORCE_INLINE_ T get(Size p_index) { return _cowdata.get(p_index); } _FORCE_INLINE_ const T &get(Size p_index) const { return _cowdata.get(p_index); } _FORCE_INLINE_ void set(Size p_index, const T &p_elem) { _cowdata.set(p_index, p_elem); } - Error resize(Size p_size) { return _cowdata.resize(p_size); } - Error resize_zeroed(Size p_size) { return _cowdata.template resize(p_size); } + + /// Resize the vector. + /// Elements are initialized (or not) depending on what the default C++ behavior for this type is. + _FORCE_INLINE_ Error resize(Size p_size) { + return _cowdata.template resize>(p_size); + } + + /// Resize and set all values to 0 / false / nullptr. + /// This is only available for zero constructible types. + _FORCE_INLINE_ Error resize_initialized(Size p_size) { + return _cowdata.template resize(p_size); + } + + /// Resize and set all values to 0 / false / nullptr. + /// This is only available for trivially destructible types (otherwise, trivial resize might be UB). + _FORCE_INLINE_ Error resize_uninitialized(Size p_size) { + // resize() statically asserts that T is compatible, no need to do it ourselves. + return _cowdata.template resize(p_size); + } + _FORCE_INLINE_ const T &operator[](Size p_index) const { return _cowdata.get(p_index); } // Must take a copy instead of a reference (see GH-31736). Error insert(Size p_pos, T p_val) { return _cowdata.insert(p_pos, p_val); } diff --git a/core/variant/array.cpp b/core/variant/array.cpp index 8ff75a6c07..6fe047e68f 100644 --- a/core/variant/array.cpp +++ b/core/variant/array.cpp @@ -39,7 +39,6 @@ #include "core/templates/vector.h" #include "core/variant/callable.h" #include "core/variant/dictionary.h" -#include "core/variant/variant.h" struct ArrayPrivate { SafeRefCount refcount; @@ -306,7 +305,7 @@ Error Array::resize(int p_new_size) { ERR_FAIL_COND_V_MSG(_p->read_only, ERR_LOCKED, "Array is in read-only state."); Variant::Type &variant_type = _p->typed.type; int old_size = _p->array.size(); - Error err = _p->array.resize_zeroed(p_new_size); + Error err = _p->array.resize_initialized(p_new_size); if (!err && variant_type != Variant::NIL && variant_type != Variant::OBJECT) { for (int i = old_size; i < p_new_size; i++) { VariantInternal::initialize(&_p->array.write[i], variant_type); @@ -520,10 +519,14 @@ const Variant &Array::get(int p_idx) const { } Array Array::duplicate(bool p_deep) const { - return recursive_duplicate(p_deep, 0); + return recursive_duplicate(p_deep, RESOURCE_DEEP_DUPLICATE_NONE, 0); } -Array Array::recursive_duplicate(bool p_deep, int recursion_count) const { +Array Array::duplicate_deep(ResourceDeepDuplicateMode p_deep_subresources_mode) const { + return recursive_duplicate(true, p_deep_subresources_mode, 0); +} + +Array Array::recursive_duplicate(bool p_deep, ResourceDeepDuplicateMode p_deep_subresources_mode, int recursion_count) const { Array new_arr; new_arr._p->typed = _p->typed; @@ -533,12 +536,19 @@ Array Array::recursive_duplicate(bool p_deep, int recursion_count) const { } if (p_deep) { + bool is_call_chain_end = recursion_count == 0; + recursion_count++; int element_count = size(); new_arr.resize(element_count); Variant *write = new_arr._p->array.ptrw(); for (int i = 0; i < element_count; i++) { - write[i] = get(i).recursive_duplicate(true, recursion_count); + write[i] = get(i).recursive_duplicate(true, p_deep_subresources_mode, recursion_count); + } + + // Variant::recursive_duplicate() may have created a remap cache by now. + if (is_call_chain_end) { + Resource::_teardown_duplicate_from_variant(); } } else { new_arr._p->array = _p->array; diff --git a/core/variant/array.h b/core/variant/array.h index d3e8a85258..e1d3069081 100644 --- a/core/variant/array.h +++ b/core/variant/array.h @@ -33,6 +33,7 @@ #pragma once #include "core/typedefs.h" +#include "core/variant/variant_deep_duplicate.h" #include #include @@ -166,7 +167,8 @@ public: Variant pop_at(int p_pos); Array duplicate(bool p_deep = false) const; - Array recursive_duplicate(bool p_deep, int recursion_count) const; + Array duplicate_deep(ResourceDeepDuplicateMode p_deep_subresources_mode = RESOURCE_DEEP_DUPLICATE_INTERNAL) const; + Array recursive_duplicate(bool p_deep, ResourceDeepDuplicateMode p_deep_subresources_mode, int recursion_count) const; Array slice(int p_begin, int p_end = INT_MAX, int p_step = 1, bool p_deep = false) const; Array filter(const Callable &p_callable) const; diff --git a/core/variant/dictionary.cpp b/core/variant/dictionary.cpp index 4503cbfce6..3bfc29b8c3 100644 --- a/core/variant/dictionary.cpp +++ b/core/variant/dictionary.cpp @@ -571,7 +571,11 @@ const Variant *Dictionary::next(const Variant *p_key) const { } Dictionary Dictionary::duplicate(bool p_deep) const { - return recursive_duplicate(p_deep, 0); + return recursive_duplicate(p_deep, RESOURCE_DEEP_DUPLICATE_NONE, 0); +} + +Dictionary Dictionary::duplicate_deep(ResourceDeepDuplicateMode p_deep_subresources_mode) const { + return recursive_duplicate(true, p_deep_subresources_mode, 0); } void Dictionary::make_read_only() { @@ -583,7 +587,7 @@ bool Dictionary::is_read_only() const { return _p->read_only != nullptr; } -Dictionary Dictionary::recursive_duplicate(bool p_deep, int recursion_count) const { +Dictionary Dictionary::recursive_duplicate(bool p_deep, ResourceDeepDuplicateMode p_deep_subresources_mode, int recursion_count) const { Dictionary n; n._p->typed_key = _p->typed_key; n._p->typed_value = _p->typed_value; @@ -594,9 +598,16 @@ Dictionary Dictionary::recursive_duplicate(bool p_deep, int recursion_count) con } if (p_deep) { + bool is_call_chain_end = recursion_count == 0; + recursion_count++; for (const KeyValue &E : _p->variant_map) { - n[E.key.recursive_duplicate(true, recursion_count)] = E.value.recursive_duplicate(true, recursion_count); + n[E.key.recursive_duplicate(true, p_deep_subresources_mode, recursion_count)] = E.value.recursive_duplicate(true, p_deep_subresources_mode, recursion_count); + } + + // Variant::recursive_duplicate() may have created a remap cache by now. + if (is_call_chain_end) { + Resource::_teardown_duplicate_from_variant(); } } else { for (const KeyValue &E : _p->variant_map) { @@ -645,6 +656,10 @@ bool Dictionary::is_typed_value() const { return _p->typed_value.type != Variant::NIL; } +bool Dictionary::is_same_instance(const Dictionary &p_other) const { + return _p == p_other._p; +} + bool Dictionary::is_same_typed(const Dictionary &p_other) const { return is_same_typed_key(p_other) && is_same_typed_value(p_other); } diff --git a/core/variant/dictionary.h b/core/variant/dictionary.h index 485b734287..52e0002be3 100644 --- a/core/variant/dictionary.h +++ b/core/variant/dictionary.h @@ -37,6 +37,7 @@ #include "core/templates/local_vector.h" #include "core/templates/pair.h" #include "core/variant/array.h" +#include "core/variant/variant_deep_duplicate.h" class Variant; @@ -100,7 +101,8 @@ public: Array values() const; Dictionary duplicate(bool p_deep = false) const; - Dictionary recursive_duplicate(bool p_deep, int recursion_count) const; + Dictionary duplicate_deep(ResourceDeepDuplicateMode p_deep_subresources_mode = RESOURCE_DEEP_DUPLICATE_INTERNAL) const; + Dictionary recursive_duplicate(bool p_deep, ResourceDeepDuplicateMode p_deep_subresources_mode, int recursion_count) const; void set_typed(const ContainerType &p_key_type, const ContainerType &p_value_type); void set_typed(uint32_t p_key_type, const StringName &p_key_class_name, const Variant &p_key_script, uint32_t p_value_type, const StringName &p_value_class_name, const Variant &p_value_script); @@ -108,6 +110,7 @@ public: bool is_typed() const; bool is_typed_key() const; bool is_typed_value() const; + bool is_same_instance(const Dictionary &p_other) const; bool is_same_typed(const Dictionary &p_other) const; bool is_same_typed_key(const Dictionary &p_other) const; bool is_same_typed_value(const Dictionary &p_other) const; diff --git a/core/variant/variant.h b/core/variant/variant.h index b7741078e2..ef97d6feb0 100644 --- a/core/variant/variant.h +++ b/core/variant/variant.h @@ -63,6 +63,7 @@ #include "core/variant/array.h" #include "core/variant/callable.h" #include "core/variant/dictionary.h" +#include "core/variant/variant_deep_duplicate.h" class Object; class RefCounted; @@ -614,7 +615,8 @@ public: void zero(); Variant duplicate(bool p_deep = false) const; - Variant recursive_duplicate(bool p_deep, int recursion_count) const; + Variant duplicate_deep(ResourceDeepDuplicateMode p_deep_subresources_mode = RESOURCE_DEEP_DUPLICATE_INTERNAL) const; + Variant recursive_duplicate(bool p_deep, ResourceDeepDuplicateMode p_deep_subresources_mode, int recursion_count) const; /* Built-In Methods */ diff --git a/core/variant/variant_call.cpp b/core/variant/variant_call.cpp index 60b137dc30..e4554c6469 100644 --- a/core/variant/variant_call.cpp +++ b/core/variant/variant_call.cpp @@ -2410,6 +2410,7 @@ static void _register_variant_builtin_methods_misc() { bind_method(Dictionary, keys, sarray(), varray()); bind_method(Dictionary, values, sarray(), varray()); bind_method(Dictionary, duplicate, sarray("deep"), varray(false)); + bind_method(Dictionary, duplicate_deep, sarray("deep_subresources_mode"), varray(RESOURCE_DEEP_DUPLICATE_INTERNAL)); bind_method(Dictionary, get, sarray("key", "default"), varray(Variant())); bind_method(Dictionary, get_or_add, sarray("key", "default"), varray(Variant())); bind_method(Dictionary, set, sarray("key", "value"), varray()); @@ -2468,6 +2469,7 @@ static void _register_variant_builtin_methods_array() { bind_method(Array, bsearch_custom, sarray("value", "func", "before"), varray(true)); bind_method(Array, reverse, sarray(), varray()); bind_method(Array, duplicate, sarray("deep"), varray(false)); + bind_method(Array, duplicate_deep, sarray("deep_subresources_mode"), varray(RESOURCE_DEEP_DUPLICATE_INTERNAL)); bind_method(Array, slice, sarray("begin", "end", "step", "deep"), varray(INT_MAX, 1, false)); bind_method(Array, filter, sarray("method"), varray()); bind_method(Array, map, sarray("method"), varray()); @@ -2516,7 +2518,7 @@ static void _register_variant_builtin_methods_array() { bind_method(PackedByteArray, remove_at, sarray("index"), varray()); bind_method(PackedByteArray, insert, sarray("at_index", "value"), varray()); bind_method(PackedByteArray, fill, sarray("value"), varray()); - bind_methodv(PackedByteArray, resize, &PackedByteArray::resize_zeroed, sarray("new_size"), varray()); + bind_methodv(PackedByteArray, resize, &PackedByteArray::resize_initialized, sarray("new_size"), varray()); bind_method(PackedByteArray, clear, sarray(), varray()); bind_method(PackedByteArray, has, sarray("value"), varray()); bind_method(PackedByteArray, reverse, sarray(), varray()); @@ -2587,7 +2589,7 @@ static void _register_variant_builtin_methods_array() { bind_method(PackedInt32Array, remove_at, sarray("index"), varray()); bind_method(PackedInt32Array, insert, sarray("at_index", "value"), varray()); bind_method(PackedInt32Array, fill, sarray("value"), varray()); - bind_methodv(PackedInt32Array, resize, &PackedInt32Array::resize_zeroed, sarray("new_size"), varray()); + bind_methodv(PackedInt32Array, resize, &PackedInt32Array::resize_initialized, sarray("new_size"), varray()); bind_method(PackedInt32Array, clear, sarray(), varray()); bind_method(PackedInt32Array, has, sarray("value"), varray()); bind_method(PackedInt32Array, reverse, sarray(), varray()); @@ -2611,7 +2613,7 @@ static void _register_variant_builtin_methods_array() { bind_method(PackedInt64Array, remove_at, sarray("index"), varray()); bind_method(PackedInt64Array, insert, sarray("at_index", "value"), varray()); bind_method(PackedInt64Array, fill, sarray("value"), varray()); - bind_methodv(PackedInt64Array, resize, &PackedInt64Array::resize_zeroed, sarray("new_size"), varray()); + bind_methodv(PackedInt64Array, resize, &PackedInt64Array::resize_initialized, sarray("new_size"), varray()); bind_method(PackedInt64Array, clear, sarray(), varray()); bind_method(PackedInt64Array, has, sarray("value"), varray()); bind_method(PackedInt64Array, reverse, sarray(), varray()); @@ -2635,7 +2637,7 @@ static void _register_variant_builtin_methods_array() { bind_method(PackedFloat32Array, remove_at, sarray("index"), varray()); bind_method(PackedFloat32Array, insert, sarray("at_index", "value"), varray()); bind_method(PackedFloat32Array, fill, sarray("value"), varray()); - bind_methodv(PackedFloat32Array, resize, &PackedFloat32Array::resize_zeroed, sarray("new_size"), varray()); + bind_methodv(PackedFloat32Array, resize, &PackedFloat32Array::resize_initialized, sarray("new_size"), varray()); bind_method(PackedFloat32Array, clear, sarray(), varray()); bind_method(PackedFloat32Array, has, sarray("value"), varray()); bind_method(PackedFloat32Array, reverse, sarray(), varray()); @@ -2659,7 +2661,7 @@ static void _register_variant_builtin_methods_array() { bind_method(PackedFloat64Array, remove_at, sarray("index"), varray()); bind_method(PackedFloat64Array, insert, sarray("at_index", "value"), varray()); bind_method(PackedFloat64Array, fill, sarray("value"), varray()); - bind_methodv(PackedFloat64Array, resize, &PackedFloat64Array::resize_zeroed, sarray("new_size"), varray()); + bind_methodv(PackedFloat64Array, resize, &PackedFloat64Array::resize_initialized, sarray("new_size"), varray()); bind_method(PackedFloat64Array, clear, sarray(), varray()); bind_method(PackedFloat64Array, has, sarray("value"), varray()); bind_method(PackedFloat64Array, reverse, sarray(), varray()); @@ -2683,7 +2685,7 @@ static void _register_variant_builtin_methods_array() { bind_method(PackedStringArray, remove_at, sarray("index"), varray()); bind_method(PackedStringArray, insert, sarray("at_index", "value"), varray()); bind_method(PackedStringArray, fill, sarray("value"), varray()); - bind_methodv(PackedStringArray, resize, &PackedStringArray::resize_zeroed, sarray("new_size"), varray()); + bind_methodv(PackedStringArray, resize, &PackedStringArray::resize_initialized, sarray("new_size"), varray()); bind_method(PackedStringArray, clear, sarray(), varray()); bind_method(PackedStringArray, has, sarray("value"), varray()); bind_method(PackedStringArray, reverse, sarray(), varray()); @@ -2707,7 +2709,7 @@ static void _register_variant_builtin_methods_array() { bind_method(PackedVector2Array, remove_at, sarray("index"), varray()); bind_method(PackedVector2Array, insert, sarray("at_index", "value"), varray()); bind_method(PackedVector2Array, fill, sarray("value"), varray()); - bind_methodv(PackedVector2Array, resize, &PackedVector2Array::resize_zeroed, sarray("new_size"), varray()); + bind_methodv(PackedVector2Array, resize, &PackedVector2Array::resize_initialized, sarray("new_size"), varray()); bind_method(PackedVector2Array, clear, sarray(), varray()); bind_method(PackedVector2Array, has, sarray("value"), varray()); bind_method(PackedVector2Array, reverse, sarray(), varray()); @@ -2731,7 +2733,7 @@ static void _register_variant_builtin_methods_array() { bind_method(PackedVector3Array, remove_at, sarray("index"), varray()); bind_method(PackedVector3Array, insert, sarray("at_index", "value"), varray()); bind_method(PackedVector3Array, fill, sarray("value"), varray()); - bind_methodv(PackedVector3Array, resize, &PackedVector3Array::resize_zeroed, sarray("new_size"), varray()); + bind_methodv(PackedVector3Array, resize, &PackedVector3Array::resize_initialized, sarray("new_size"), varray()); bind_method(PackedVector3Array, clear, sarray(), varray()); bind_method(PackedVector3Array, has, sarray("value"), varray()); bind_method(PackedVector3Array, reverse, sarray(), varray()); @@ -2755,7 +2757,7 @@ static void _register_variant_builtin_methods_array() { bind_method(PackedColorArray, remove_at, sarray("index"), varray()); bind_method(PackedColorArray, insert, sarray("at_index", "value"), varray()); bind_method(PackedColorArray, fill, sarray("value"), varray()); - bind_methodv(PackedColorArray, resize, &PackedColorArray::resize_zeroed, sarray("new_size"), varray()); + bind_methodv(PackedColorArray, resize, &PackedColorArray::resize_initialized, sarray("new_size"), varray()); bind_method(PackedColorArray, clear, sarray(), varray()); bind_method(PackedColorArray, has, sarray("value"), varray()); bind_method(PackedColorArray, reverse, sarray(), varray()); @@ -2779,7 +2781,7 @@ static void _register_variant_builtin_methods_array() { bind_method(PackedVector4Array, remove_at, sarray("index"), varray()); bind_method(PackedVector4Array, insert, sarray("at_index", "value"), varray()); bind_method(PackedVector4Array, fill, sarray("value"), varray()); - bind_methodv(PackedVector4Array, resize, &PackedVector4Array::resize_zeroed, sarray("new_size"), varray()); + bind_methodv(PackedVector4Array, resize, &PackedVector4Array::resize_initialized, sarray("new_size"), varray()); bind_method(PackedVector4Array, clear, sarray(), varray()); bind_method(PackedVector4Array, has, sarray("value"), varray()); bind_method(PackedVector4Array, reverse, sarray(), varray()); diff --git a/core/variant/variant_deep_duplicate.h b/core/variant/variant_deep_duplicate.h new file mode 100644 index 0000000000..7e013a84d8 --- /dev/null +++ b/core/variant/variant_deep_duplicate.h @@ -0,0 +1,43 @@ +/**************************************************************************/ +/* variant_deep_duplicate.h */ +/**************************************************************************/ +/* This file is part of: */ +/* REDOT ENGINE */ +/* https://redotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2024-present Redot Engine contributors */ +/* (see REDOT_AUTHORS.md) */ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#pragma once + +// This would be ideally declared nested in Variant, but that would cause circular +// includes with Array and Dictionary, for instance. +// Also, this enum is be exposed via Resource. +enum ResourceDeepDuplicateMode { + RESOURCE_DEEP_DUPLICATE_NONE, + RESOURCE_DEEP_DUPLICATE_INTERNAL, + RESOURCE_DEEP_DUPLICATE_ALL, + RESOURCE_DEEP_DUPLICATE_MAX +}; diff --git a/core/variant/variant_setget.cpp b/core/variant/variant_setget.cpp index 9d0901c020..b39ac7defa 100644 --- a/core/variant/variant_setget.cpp +++ b/core/variant/variant_setget.cpp @@ -31,9 +31,10 @@ /**************************************************************************/ #include "variant_setget.h" - #include "variant_callable.h" +#include "core/io/resource.h" + struct VariantSetterGetterInfo { void (*setter)(Variant *base, const Variant *value, bool &valid); void (*getter)(const Variant *base, Variant *value); @@ -1971,26 +1972,33 @@ Variant Variant::iter_get(const Variant &r_iter, bool &r_valid) const { } Variant Variant::duplicate(bool p_deep) const { - return recursive_duplicate(p_deep, 0); + return recursive_duplicate(p_deep, RESOURCE_DEEP_DUPLICATE_NONE, 0); } -Variant Variant::recursive_duplicate(bool p_deep, int recursion_count) const { +Variant Variant::duplicate_deep(ResourceDeepDuplicateMode p_deep_subresources_mode) const { + ERR_FAIL_INDEX_V(p_deep_subresources_mode, RESOURCE_DEEP_DUPLICATE_MAX, Variant()); + return recursive_duplicate(true, p_deep_subresources_mode, 0); +} + +Variant Variant::recursive_duplicate(bool p_deep, ResourceDeepDuplicateMode p_deep_subresources_mode, int recursion_count) const { switch (type) { case OBJECT: { - /* breaks stuff :( - if (p_deep && !_get_obj().ref.is_null()) { - Ref resource = _get_obj().ref; - if (resource.is_valid()) { - return resource->duplicate(true); - } + // If the root target of duplicate() is a Resource, we can't early-reject because that + // resource itself must be duplicated, much as if Resource::duplicate() had been called. + if (p_deep_subresources_mode == RESOURCE_DEEP_DUPLICATE_NONE && recursion_count > 0) { + return *this; + } + Resource *res = Object::cast_to(_get_obj().obj); + if (res) { + return res->_duplicate_from_variant(p_deep, p_deep_subresources_mode, recursion_count); + } else { + return *this; } - */ - return *this; } break; case DICTIONARY: - return operator Dictionary().recursive_duplicate(p_deep, recursion_count); + return operator Dictionary().recursive_duplicate(p_deep, p_deep_subresources_mode, recursion_count); case ARRAY: - return operator Array().recursive_duplicate(p_deep, recursion_count); + return operator Array().recursive_duplicate(p_deep, p_deep_subresources_mode, recursion_count); case PACKED_BYTE_ARRAY: return operator Vector().duplicate(); case PACKED_INT32_ARRAY: diff --git a/doc/classes/@GlobalScope.xml b/doc/classes/@GlobalScope.xml index d99e243036..110396eb32 100644 --- a/doc/classes/@GlobalScope.xml +++ b/doc/classes/@GlobalScope.xml @@ -3036,7 +3036,8 @@ Hints that a property will be changed on its own after setting, such as [member AudioStreamPlayer.playing] or [member GPUParticles3D.emitting]. - Hints that a boolean property will enable the feature associated with the group that it occurs in. Only works within a group or subgroup. + Hints that a boolean property will enable the feature associated with the group that it occurs in. Only works within a group or subgroup. Use the optional hint string [code]"feature"[/code] when the group only has variables that are meaningful when the feature is enabled. + [b]Note:[/b] The [code]"feature"[/code] hint string does not modify or reset any values. Hints that a [String] or [StringName] property is the name of an input action. This allows the selection of any action name from the Input Map in the Project Settings. The hint string may contain two options separated by commas: diff --git a/doc/classes/AcceptDialog.xml b/doc/classes/AcceptDialog.xml index ccbe2ddd55..a5b589842a 100644 --- a/doc/classes/AcceptDialog.xml +++ b/doc/classes/AcceptDialog.xml @@ -62,7 +62,7 @@ Sets autowrapping for the text in the dialog. - If [code]true[/code], the dialog will be hidden when the escape key ([constant KEY_ESCAPE]) is pressed. + If [code]true[/code], the dialog will be hidden when the [code]ui_cancel[/code] action is pressed (by default, this action is bound to [constant KEY_ESCAPE]). If [code]true[/code], the dialog is hidden when the OK button is pressed. You can set it to [code]false[/code] if you want to do e.g. input validation when receiving the [signal confirmed] signal, and handle hiding the dialog in your own logic. diff --git a/doc/classes/Array.xml b/doc/classes/Array.xml index 3dbf34f5a5..35415b3b3a 100644 --- a/doc/classes/Array.xml +++ b/doc/classes/Array.xml @@ -332,7 +332,16 @@ Returns a new copy of the array. - By default, a [b]shallow[/b] copy is returned: all nested [Array] and [Dictionary] elements are shared with the original array. Modifying them in one array will also affect them in the other.[br]If [param deep] is [code]true[/code], a [b]deep[/b] copy is returned: all nested arrays and dictionaries are also duplicated (recursively). + By default, a [b]shallow[/b] copy is returned: all nested [Array], [Dictionary], and [Resource] elements are shared with the original array. Modifying any of those in one array will also affect them in the other. + If [param deep] is [code]true[/code], a [b]deep[/b] copy is returned: all nested arrays and dictionaries are also duplicated (recursively). Any [Resource] is still shared with the original array, though. + + + + + + + Duplicates this array, deeply, like [method duplicate][code](true)[/code], with extra control over how subresources are handled. + [param deep_subresources_mode] must be one of the values from [enum Resource.ResourceDeepDuplicateMode]. By default, only internal resources will be duplicated (recursively). diff --git a/doc/classes/CapsuleShape2D.xml b/doc/classes/CapsuleShape2D.xml index 52ea7af292..c8652e4ec4 100644 --- a/doc/classes/CapsuleShape2D.xml +++ b/doc/classes/CapsuleShape2D.xml @@ -11,7 +11,10 @@ - The capsule's height. + The capsule's full height, including the semicircles. + + + The capsule's height, excluding the semicircles. This is the height of the central rectangular part in the middle of the capsule, and is the distance between the centers of the two semicircles. This is a wrapper for [member height]. The capsule's radius. diff --git a/doc/classes/CapsuleShape3D.xml b/doc/classes/CapsuleShape3D.xml index 4c6b3a870f..4e7c3d2edb 100644 --- a/doc/classes/CapsuleShape3D.xml +++ b/doc/classes/CapsuleShape3D.xml @@ -12,7 +12,10 @@ - The capsule's height. + The capsule's full height, including the hemispheres. + + + The capsule's height, excluding the hemispheres. This is the height of the central cylindrical part in the middle of the capsule, and is the distance between the centers of the two hemispheres. This is a wrapper for [member height]. The capsule's radius. diff --git a/doc/classes/ColorPicker.xml b/doc/classes/ColorPicker.xml index 0edd38df73..e96671be2a 100644 --- a/doc/classes/ColorPicker.xml +++ b/doc/classes/ColorPicker.xml @@ -76,6 +76,9 @@ If [code]true[/code], shows an alpha channel slider (opacity). + + If [code]true[/code], shows an intensity slider. The intensity is applied as follows: multiply the color by [code]2 ** intensity[/code] in linear RGB space, and then convert it back to sRGB. + If [code]true[/code], the hex color code input field is visible. @@ -117,13 +120,15 @@ - Allows editing the color with Red/Green/Blue sliders. + Allows editing the color with Red/Green/Blue sliders in sRGB color space. Allows editing the color with Hue/Saturation/Value sliders. - - Allows the color R, G, B component values to go beyond 1.0, which can be used for certain special operations that require it (like tinting without darkening or rendering sprites in HDR). + + + + Allows editing the color with Red/Green/Blue sliders in linear color space. Allows editing the color with Hue/Saturation/Lightness sliders. @@ -177,6 +182,9 @@ Custom texture for the hue selection slider on the right. + + The icon for the button that switches color text to hexadecimal. + The icon for color preset drop down menu when expanded. diff --git a/doc/classes/ColorPickerButton.xml b/doc/classes/ColorPickerButton.xml index bec2520397..0852929fe6 100644 --- a/doc/classes/ColorPickerButton.xml +++ b/doc/classes/ColorPickerButton.xml @@ -35,6 +35,9 @@ If [code]true[/code], the alpha channel in the displayed [ColorPicker] will be visible. + + If [code]true[/code], the intensity slider in the displayed [ColorPicker] will be visible. + diff --git a/doc/classes/Dictionary.xml b/doc/classes/Dictionary.xml index 89fc46f931..d68b0856a6 100644 --- a/doc/classes/Dictionary.xml +++ b/doc/classes/Dictionary.xml @@ -187,7 +187,17 @@ - Creates and returns a new copy of the dictionary. If [param deep] is [code]true[/code], inner [Dictionary] and [Array] keys and values are also copied, recursively. + Returns a new copy of the dictionary. + By default, a [b]shallow[/b] copy is returned: all nested [Array], [Dictionary], and [Resource] keys and values are shared with the original dictionary. Modifying any of those in one dictionary will also affect them in the other. + If [param deep] is [code]true[/code], a [b]deep[/b] copy is returned: all nested arrays and dictionaries are also duplicated (recursively). Any [Resource] is still shared with the original dictionary, though. + + + + + + + Duplicates this dictionary, deeply, like [method duplicate][code](true)[/code], with extra control over how subresources are handled. + [param deep_subresources_mode] must be one of the values from [enum Resource.ResourceDeepDuplicateMode]. By default, only internal resources will be duplicated (recursively). diff --git a/doc/classes/DirAccess.xml b/doc/classes/DirAccess.xml index beb57aef2c..1afc7960d8 100644 --- a/doc/classes/DirAccess.xml +++ b/doc/classes/DirAccess.xml @@ -213,6 +213,13 @@ [b]Note:[/b] When used on a [code]res://[/code] path in an exported project, only the files included in the PCK at the given folder level are returned. In practice, this means that since imported resources are stored in a top-level [code].godot/[/code] folder, only paths to [code].gd[/code] and [code].import[/code] files are returned (plus a few other files, such as [code]project.godot[/code] or [code]project.binary[/code] and the project icon). In an exported project, the list of returned files will also vary depending on [member ProjectSettings.editor/export/convert_text_resources_to_binary]. + + + + Returns file system type name of the current directory's disk. Returned values are uppercase strings like [code]NTFS[/code], [code]FAT32[/code], [code]EXFAT[/code], [code]APFS[/code], [code]EXT4[/code], [code]BTRFS[/code], and so on. + [b]Note:[/b] This method is implemented on macOS, Linux, Windows and for PCK virtual file system. + + diff --git a/doc/classes/EditorContextMenuPlugin.xml b/doc/classes/EditorContextMenuPlugin.xml index 62f2845f19..a476ac4754 100644 --- a/doc/classes/EditorContextMenuPlugin.xml +++ b/doc/classes/EditorContextMenuPlugin.xml @@ -89,7 +89,7 @@ Context menu of Script editor's script tabs. [method _popup_menu] will be called with the path to the currently edited script, while option callback will receive reference to that script. - The "Create..." submenu of FileSystem dock's context menu. [method _popup_menu] and option callback will be called with list of paths of the currently selected files. + The "Create..." submenu of FileSystem dock's context menu, or the "New" section of the main context menu when empty space is clicked. [method _popup_menu] and option callback will be called with list of paths of the currently selected files. When clicking the empty space, the list of paths for popup method will be empty. Context menu of Script editor's code editor. [method _popup_menu] will be called with the path to the [CodeEdit] node. You can fetch it using this code: diff --git a/doc/classes/EditorProperty.xml b/doc/classes/EditorProperty.xml index 7f011236a9..19f73c1a1d 100644 --- a/doc/classes/EditorProperty.xml +++ b/doc/classes/EditorProperty.xml @@ -48,13 +48,15 @@ - Gets the edited object. + Returns the edited object. + [b]Note:[/b] This method could return [code]null[/code] if the editor has not yet been associated with a property. However, in [method _update_property] and [method _set_read_only], this value is [i]guaranteed[/i] to be non-[code]null[/code]. - Gets the edited property. If your editor is for a single property (added via [method EditorInspectorPlugin._parse_property]), then this will return the property. + Returns the edited property. If your editor is for a single property (added via [method EditorInspectorPlugin._parse_property]), then this will return the property. + [b]Note:[/b] This method could return [code]null[/code] if the editor has not yet been associated with a property. However, in [method _update_property] and [method _set_read_only], this value is [i]guaranteed[/i] to be non-[code]null[/code]. @@ -95,7 +97,7 @@ - Forces refresh of the property display. + Forces a refresh of the property display. diff --git a/doc/classes/EditorSettings.xml b/doc/classes/EditorSettings.xml index 6c11cce7dc..b4ebe21c81 100644 --- a/doc/classes/EditorSettings.xml +++ b/doc/classes/EditorSettings.xml @@ -973,6 +973,9 @@ If [code]true[/code], automatically expands property groups in the Inspector dock when opening a scene that hasn't been opened previously. If [code]false[/code], all groups remain collapsed by default. + + If [code]true[/code], show the intensity slider in the [ColorPicker]s opened in the editor. + The default color picker mode to use when opening [ColorPicker]s in the editor. This mode can be temporarily adjusted on the color picker itself. diff --git a/doc/classes/Engine.xml b/doc/classes/Engine.xml index 39db8dec9c..fa79b23a32 100644 --- a/doc/classes/Engine.xml +++ b/doc/classes/Engine.xml @@ -22,7 +22,7 @@ - Returns the name of the CPU architecture the Redot binary was built for. Possible return values include [code]"x86_64"[/code], [code]"x86_32"[/code], [code]"arm64"[/code], [code]"arm32"[/code], [code]"rv64"[/code], [code]"riscv"[/code], [code]"ppc64"[/code], [code]"ppc"[/code], [code]"wasm64"[/code], and [code]"wasm32"[/code]. + Returns the name of the CPU architecture the Redot binary was built for. Possible return values include [code]"x86_64"[/code], [code]"x86_32"[/code], [code]"arm64"[/code], [code]"arm32"[/code], [code]"rv64"[/code], [code]"ppc64"[/code], [code]"loongarch64"[/code], [code]"wasm64"[/code], and [code]"wasm32"[/code]. To detect whether the current build is 64-bit, or the type of architecture, don't use the architecture name. Instead, use [method OS.has_feature] to check for the [code]"64"[/code] feature tag, or tags such as [code]"x86"[/code] or [code]"arm"[/code]. See the [url=$DOCS_URL/tutorials/export/feature_tags.html]Feature Tags[/url] documentation for more details. [b]Note:[/b] This method does [i]not[/i] return the name of the system's CPU architecture (like [method OS.get_processor_name]). For example, when running an [code]x86_32[/code] Redot binary on an [code]x86_64[/code] system, the returned value will still be [code]"x86_32"[/code]. diff --git a/doc/classes/Node.xml b/doc/classes/Node.xml index 1f9e22e1bb..09b7a425e5 100644 --- a/doc/classes/Node.xml +++ b/doc/classes/Node.xml @@ -506,6 +506,13 @@ Fetches a node by [NodePath]. Similar to [method get_node], but does not generate an error if [param path] does not point to a valid node. + + + + Returns a [Dictionary] mapping method names to their RPC configuration defined for this node using [method rpc_config]. + [b]Note:[/b] This method only returns the RPC configuration assigned via [method rpc_config]. See [method Script.get_rpc_config] to retrieve the RPCs defined by the [Script]. + + @@ -549,12 +556,6 @@ [b]Note:[/b] The returned value will be larger than expected if running at a framerate lower than [member Engine.physics_ticks_per_second] / [member Engine.max_physics_steps_per_frame] FPS. This is done to avoid "spiral of death" scenarios where performance would plummet due to an ever-increasing number of physics steps per frame. This behavior affects both [method _process] and [method _physics_process]. As a result, avoid using [code]delta[/code] for time measurements in real-world seconds. Use the [Time] singleton's methods for this purpose instead, such as [method Time.get_ticks_usec]. - - - - Returns a [Dictionary] mapping method names to their RPC configuration defined for this node using [method rpc_config]. - - diff --git a/doc/classes/Object.xml b/doc/classes/Object.xml index 9a48764373..5d94d6a7ed 100644 --- a/doc/classes/Object.xml +++ b/doc/classes/Object.xml @@ -1039,5 +1039,8 @@ Reference-counted connections can be assigned to the same [Callable] multiple times. Each disconnection decreases the internal counter. The signal fully disconnects only when the counter reaches 0. + + The source object is automatically bound when a [PackedScene] is instantiated. If this flag bit is enabled, the source object will be appended right after the original arguments of the signal. + diff --git a/doc/classes/PhysicsServer3D.xml b/doc/classes/PhysicsServer3D.xml index f3f8ea9403..af066bde3b 100644 --- a/doc/classes/PhysicsServer3D.xml +++ b/doc/classes/PhysicsServer3D.xml @@ -1058,6 +1058,42 @@ Adds the given body to the list of bodies exempt from collisions. + + + + + + Distributes and applies a force to all points. A force is time dependent and meant to be applied every physics update. + + + + + + + + Distributes and applies an impulse to all points. + An impulse is time-independent! Applying an impulse every frame would result in a framerate-dependent force. For this reason, it should only be used when simulating one-time impacts (use the "_force" functions otherwise). + + + + + + + + + Applies a force to a point. A force is time dependent and meant to be applied every physics update. + + + + + + + + + Applies an impulse to a point. + An impulse is time-independent! Applying an impulse every frame would result in a framerate-dependent force. For this reason, it should only be used when simulating one-time impacts (use the "_force" functions otherwise). + + @@ -1121,6 +1157,13 @@ Returns the pressure coefficient of the given soft body. + + + + + Returns the shrinking factor of the given soft body. + + @@ -1258,6 +1301,14 @@ Sets whether the given soft body will be pickable when using object picking. + + + + + + Sets the shrinking factor of the given soft body. + + diff --git a/doc/classes/PhysicsServer3DExtension.xml b/doc/classes/PhysicsServer3DExtension.xml index e58a7ff9a8..fdc110d0be 100644 --- a/doc/classes/PhysicsServer3DExtension.xml +++ b/doc/classes/PhysicsServer3DExtension.xml @@ -989,6 +989,36 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -1049,6 +1079,12 @@ + + + + + + @@ -1166,6 +1202,13 @@ + + + + + + + diff --git a/doc/classes/ProjectSettings.xml b/doc/classes/ProjectSettings.xml index 978390a5e7..92b8df5790 100644 --- a/doc/classes/ProjectSettings.xml +++ b/doc/classes/ProjectSettings.xml @@ -2482,6 +2482,12 @@ [b]Dummy[/b] is a 3D physics server that does nothing and returns only dummy values, effectively disabling all 3D physics functionality. Third-party extensions and modules can add other physics engines to select with this setting. + + The approach used for 3D scene traversal when physics interpolation is enabled. + - [code]DEFAULT[/code]: The default optimized method. + - [code]Legacy[/code]: The previous reference method used for scene tree traversal, which is slower. + - [code]Debug[/code]: Swaps between [code]DEFAULT[/code] and [code]Legacy[/code] methods on alternating frames, and provides logging information (which in turn makes it slower). Intended for debugging only; you should use the [code]DEFAULT[/code] method in most cases. + If [code]true[/code], the 3D physics server runs on a separate thread, making better use of multi-core CPUs. If [code]false[/code], the 3D physics server runs on the main thread. Running the physics server on a separate thread can increase performance, but restricts API access to only physics process. [b]Note:[/b] When [member physics/3d/physics_engine] is set to [code]Jolt Physics[/code], enabling this setting will prevent the 3D physics server from being able to provide any context when reporting errors and warnings, and will instead always refer to nodes as [code]<unknown>[/code]. @@ -2706,6 +2712,10 @@ [b]Note:[/b] Screen-space antialiasing is only supported in the Forward+ and Mobile rendering methods, not Compatibility. [b]Note:[/b] This property is only read when the project starts. To set the screen-space antialiasing mode at runtime, set [member Viewport.screen_space_aa] on the root [Viewport] instead, or use [method RenderingServer.viewport_set_screen_space_aa]. + + Sets the sensitivity to edges when using SMAA for antialiasing. Lower values will catch more edges, at a potentially higher performance cost. + [b]Note:[/b] This property is only read when the project starts. There is currently no way to change this setting at run-time. + If [code]true[/code], uses a fast post-processing filter to make banding significantly less visible in 3D. 2D rendering is [i]not[/i] affected by debanding unless the [member Environment.background_mode] is [constant Environment.BG_CANVAS]. In some cases, debanding may introduce a slightly noticeable dithering pattern. It's recommended to enable debanding only when actually needed since the dithering pattern will make lossless-compressed screenshots larger. diff --git a/doc/classes/RenderingServer.xml b/doc/classes/RenderingServer.xml index 5f25467822..0d725a1a9b 100644 --- a/doc/classes/RenderingServer.xml +++ b/doc/classes/RenderingServer.xml @@ -5141,7 +5141,10 @@ Use fast approximate antialiasing. FXAA is a popular screen-space antialiasing method, which is fast but will make the image look blurry, especially at lower resolutions. It can still work relatively well at large resolutions such as 1440p and 4K. - + + Use subpixel morphological antialiasing. SMAA may produce clearer results than FXAA, but at a slightly higher performance cost. + + Represents the size of the [enum ViewportScreenSpaceAA] enum. diff --git a/doc/classes/Resource.xml b/doc/classes/Resource.xml index 81596e2cf2..18a08ea0e0 100644 --- a/doc/classes/Resource.xml +++ b/doc/classes/Resource.xml @@ -50,15 +50,24 @@ - + Duplicates this resource, returning a new resource with its [code]export[/code]ed or [constant PROPERTY_USAGE_STORAGE] properties copied from the original. - If [param subresources] is [code]false[/code], a shallow copy is returned; nested resources within subresources are not duplicated and are shared with the original resource (with one exception; see below). If [param subresources] is [code]true[/code], a deep copy is returned; nested subresources will be duplicated and are not shared (with two exceptions; see below). - [param subresources] is usually respected, with the following exceptions: - - Subresource properties with the [constant PROPERTY_USAGE_ALWAYS_DUPLICATE] flag are always duplicated. + If [param deep] is [code]false[/code], a [b]shallow[/b] copy is returned: nested [Array], [Dictionary], and [Resource] properties are not duplicated and are shared with the original resource. + If [param deep] is [code]true[/code], a [b]deep[/b] copy is returned: all nested arrays, dictionaries, and packed arrays are also duplicated (recursively). Any [Resource] found inside will only be duplicated if it's local, like [constant RESOURCE_DEEP_DUPLICATE_INTERNAL] used with [method duplicate_deep]. + The following exceptions apply: + - Subresource properties with the [constant PROPERTY_USAGE_ALWAYS_DUPLICATE] flag are always duplicated (recursively or not, depending on [param deep]). - Subresource properties with the [constant PROPERTY_USAGE_NEVER_DUPLICATE] flag are never duplicated. - - Subresources inside [Array] and [Dictionary] properties are never duplicated. [b]Note:[/b] For custom resources, this method will fail if [method Object._init] has been defined with required parameters. + [b]Note:[/b] When duplicating with [param deep] set to [code]true[/code], each resource found, including the one on which this method is called, will be only duplicated once and referenced as many times as needed in the duplicate. For instance, if you are duplicating resource A that happens to have resource B referenced twice, you'll get a new resource A' referencing a new resource B' twice. + + + + + + + Duplicates this resource, deeply, like [method duplicate][code](true)[/code], with extra control over how subresources are handled. + [param deep_subresources_mode] must be one of the values from [enum ResourceDeepDuplicateMode]. @@ -176,4 +185,15 @@ + + + No subresorces at all are duplicated. This is useful even in a deep duplication to have all the arrays and dictionaries duplicated but still pointing to the original resources. + + + Only subresources without a path or with a scene-local path will be duplicated. + + + Every subresource found will be duplicated, even if it has a non-local path. In other words, even potentially big resources stored separately will be duplicated. + + diff --git a/doc/classes/ResourceUID.xml b/doc/classes/ResourceUID.xml index a87fb32898..250034225f 100644 --- a/doc/classes/ResourceUID.xml +++ b/doc/classes/ResourceUID.xml @@ -33,6 +33,13 @@ Like [method create_id], but the UID is seeded with the provided [param path] and project name. UIDs generated for that path will be always the same within the current project. + + + + + Returns a path, converting [param path_or_uid] if necessary. Prints an error if provided an invalid UID. + + @@ -55,6 +62,13 @@ Converts the given UID to a [code]uid://[/code] string value. + + + + + Converts the provided resource [param path] to a UID. Returns the unchanged path if it has no associated UID. + + @@ -79,6 +93,13 @@ Extracts the UID value from the given [code]uid://[/code] string. + + + + + Converts the provided [param uid] to a path. Prints an error if the UID is invalid. + + diff --git a/doc/classes/SoftBody3D.xml b/doc/classes/SoftBody3D.xml index f1046b4d8a..d91204657b 100644 --- a/doc/classes/SoftBody3D.xml +++ b/doc/classes/SoftBody3D.xml @@ -19,6 +19,38 @@ Adds a body to the list of bodies that this body can't collide with. + + + + + Distributes and applies a force to all points. A force is time dependent and meant to be applied every physics update. + + + + + + + Distributes and applies an impulse to all points. + An impulse is time-independent! Applying an impulse every frame would result in a framerate-dependent force. For this reason, it should only be used when simulating one-time impacts (use the "_force" functions otherwise). + + + + + + + + Applies a force to a point. A force is time dependent and meant to be applied every physics update. + + + + + + + + Applies an impulse to a point. + An impulse is time-independent! Applying an impulse every frame would result in a framerate-dependent force. For this reason, it should only be used when simulating one-time impacts (use the "_force" functions otherwise). + + @@ -124,6 +156,10 @@ If [code]true[/code], the [SoftBody3D] will respond to [RayCast3D]s. + + Scales the rest lengths of [SoftBody3D]'s edge constraints. Positive values shrink the mesh, while negative values expand it. For example, a value of [code]0.1[/code] shortens the edges of the mesh by 10%, while [code]-0.1[/code] expands the edges by 10%. + [b]Note:[/b] [member shrinking_factor] is best used on surface meshes with pinned points. + Increasing this value will improve the resulting simulation, but can affect performance. Use with care. diff --git a/doc/classes/SpringBoneCollisionCapsule3D.xml b/doc/classes/SpringBoneCollisionCapsule3D.xml index 9af9aca331..1e5767cf8c 100644 --- a/doc/classes/SpringBoneCollisionCapsule3D.xml +++ b/doc/classes/SpringBoneCollisionCapsule3D.xml @@ -10,11 +10,14 @@ - The capsule's height. + The capsule's full height, including the hemispheres. If [code]true[/code], the collision acts to trap the joint within the collision. + + The capsule's height, excluding the hemispheres. This is the height of the central cylindrical part in the middle of the capsule, and is the distance between the centers of the two hemispheres. This is a wrapper for [member height]. + The capsule's radius. diff --git a/doc/classes/TranslationDomain.xml b/doc/classes/TranslationDomain.xml index 5045f86260..3f5873b2a1 100644 --- a/doc/classes/TranslationDomain.xml +++ b/doc/classes/TranslationDomain.xml @@ -23,6 +23,12 @@ Removes all translations. + + + + Returns the locale override of the domain. Returns an empty string if locale override is disabled. + + @@ -44,6 +50,15 @@ Removes the given translation. + + + + + Sets the locale override of the domain. + If [param locale] is an empty string, locale override is disabled. Otherwise, [param locale] will be standardized to match known locales (e.g. [code]en-US[/code] would be matched to [code]en_US[/code]). + [b]Note:[/b] Calling this method does not automatically update texts in the scene tree. Please propagate the [constant MainLoop.NOTIFICATION_TRANSLATION_CHANGED] signal manually. + + @@ -65,6 +80,9 @@ + + If [code]true[/code], translation is enabled. Otherwise, [method translate] and [method translate_plural] will return the input message unchanged regardless of the current locale. + Replace all characters with their accented variants during pseudolocalization. [b]Note:[/b] Updating this property does not automatically update texts in the scene tree. Please propagate the [constant MainLoop.NOTIFICATION_TRANSLATION_CHANGED] notification manually after you have finished modifying pseudolocalization related options. diff --git a/doc/classes/Viewport.xml b/doc/classes/Viewport.xml index 8f82d9f587..c6e07123a5 100644 --- a/doc/classes/Viewport.xml +++ b/doc/classes/Viewport.xml @@ -598,7 +598,10 @@ Use fast approximate antialiasing. FXAA is a popular screen-space antialiasing method, which is fast but will make the image look blurry, especially at lower resolutions. It can still work relatively well at large resolutions such as 1440p and 4K. - + + Use subpixel morphological antialiasing. SMAA may produce clearer results than FXAA, but at a slightly higher performance cost. + + Represents the size of the [enum ScreenSpaceAA] enum. diff --git a/drivers/accesskit/accessibility_driver_accesskit.cpp b/drivers/accesskit/accessibility_driver_accesskit.cpp index 7ed829ebab..c68b4a90c3 100644 --- a/drivers/accesskit/accessibility_driver_accesskit.cpp +++ b/drivers/accesskit/accessibility_driver_accesskit.cpp @@ -329,10 +329,10 @@ RID AccessibilityDriverAccessKit::accessibility_create_sub_text_edit_elements(co Vector char_positions; Vector char_widths; - char_positions.resize_zeroed(t.length()); + char_positions.resize_initialized(t.length()); float *positions_ptr = char_positions.ptrw(); - char_widths.resize_zeroed(t.length()); + char_widths.resize_initialized(t.length()); float *widths_ptr = char_widths.ptrw(); float size_x = 0.0; diff --git a/drivers/d3d12/SCsub b/drivers/d3d12/SCsub index 0eee7de812..5957dbf70c 100644 --- a/drivers/d3d12/SCsub +++ b/drivers/d3d12/SCsub @@ -45,6 +45,7 @@ if env["use_pix"]: if "dcomp" in env.get("supported", []): env_d3d12_rdd.Append(CPPDEFINES=["DCOMP_ENABLED"]) + env.Append(CPPDEFINES=["DCOMP_ENABLED"]) # Used in header included in platform. # Mesa (SPIR-V to DXIL functionality). diff --git a/drivers/d3d12/rendering_device_driver_d3d12.cpp b/drivers/d3d12/rendering_device_driver_d3d12.cpp index daca94d916..074b3a6dfe 100644 --- a/drivers/d3d12/rendering_device_driver_d3d12.cpp +++ b/drivers/d3d12/rendering_device_driver_d3d12.cpp @@ -40,7 +40,6 @@ #include "thirdparty/zlib/zlib.h" #include "d3d12_godot_nir_bridge.h" -#include "dxil_hash.h" #include "rendering_context_driver_d3d12.h" #include @@ -74,9 +73,6 @@ extern "C" { static const D3D12_RANGE VOID_RANGE = {}; -static const uint32_t ROOT_CONSTANT_REGISTER = GODOT_NIR_DESCRIPTOR_SET_MULTIPLIER * (RDD::MAX_UNIFORM_SETS + 1); -static const uint32_t RUNTIME_DATA_REGISTER = GODOT_NIR_DESCRIPTOR_SET_MULTIPLIER * (RDD::MAX_UNIFORM_SETS + 2); - /*****************/ /**** GENERIC ****/ /*****************/ @@ -2984,109 +2980,6 @@ void RenderingDeviceDriverD3D12::framebuffer_free(FramebufferID p_framebuffer) { /**** SHADER ****/ /****************/ -static uint32_t SHADER_STAGES_BIT_OFFSET_INDICES[RenderingDevice::SHADER_STAGE_MAX] = { - /* SHADER_STAGE_VERTEX */ 0, - /* SHADER_STAGE_FRAGMENT */ 1, - /* SHADER_STAGE_TESSELATION_CONTROL */ UINT32_MAX, - /* SHADER_STAGE_TESSELATION_EVALUATION */ UINT32_MAX, - /* SHADER_STAGE_COMPUTE */ 2, -}; - -uint32_t RenderingDeviceDriverD3D12::_shader_patch_dxil_specialization_constant( - PipelineSpecializationConstantType p_type, - const void *p_value, - const uint64_t (&p_stages_bit_offsets)[D3D12_BITCODE_OFFSETS_NUM_STAGES], - HashMap> &r_stages_bytecodes, - bool p_is_first_patch) { - uint32_t patch_val = 0; - switch (p_type) { - case PIPELINE_SPECIALIZATION_CONSTANT_TYPE_INT: { - uint32_t int_value = *((const int *)p_value); - ERR_FAIL_COND_V(int_value & (1 << 31), 0); - patch_val = int_value; - } break; - case PIPELINE_SPECIALIZATION_CONSTANT_TYPE_BOOL: { - bool bool_value = *((const bool *)p_value); - patch_val = (uint32_t)bool_value; - } break; - case PIPELINE_SPECIALIZATION_CONSTANT_TYPE_FLOAT: { - uint32_t int_value = *((const int *)p_value); - ERR_FAIL_COND_V(int_value & (1 << 31), 0); - patch_val = (int_value >> 1); - } break; - } - // For VBR encoding to encode the number of bits we expect (32), we need to set the MSB unconditionally. - // However, signed VBR moves the MSB to the LSB, so setting the MSB to 1 wouldn't help. Therefore, - // the bit we set to 1 is the one at index 30. - patch_val |= (1 << 30); - patch_val <<= 1; // What signed VBR does. - - auto tamper_bits = [](uint8_t *p_start, uint64_t p_bit_offset, uint64_t p_tb_value) -> uint64_t { - uint64_t original = 0; - uint32_t curr_input_byte = p_bit_offset / 8; - uint8_t curr_input_bit = p_bit_offset % 8; - auto get_curr_input_bit = [&]() -> bool { - return ((p_start[curr_input_byte] >> curr_input_bit) & 1); - }; - auto move_to_next_input_bit = [&]() { - if (curr_input_bit == 7) { - curr_input_bit = 0; - curr_input_byte++; - } else { - curr_input_bit++; - } - }; - auto tamper_input_bit = [&](bool p_new_bit) { - p_start[curr_input_byte] &= ~((uint8_t)1 << curr_input_bit); - if (p_new_bit) { - p_start[curr_input_byte] |= (uint8_t)1 << curr_input_bit; - } - }; - uint8_t value_bit_idx = 0; - for (uint32_t i = 0; i < 5; i++) { // 32 bits take 5 full bytes in VBR. - for (uint32_t j = 0; j < 7; j++) { - bool input_bit = get_curr_input_bit(); - original |= (uint64_t)(input_bit ? 1 : 0) << value_bit_idx; - tamper_input_bit((p_tb_value >> value_bit_idx) & 1); - move_to_next_input_bit(); - value_bit_idx++; - } -#ifdef DEV_ENABLED - bool input_bit = get_curr_input_bit(); - DEV_ASSERT((i < 4 && input_bit) || (i == 4 && !input_bit)); -#endif - move_to_next_input_bit(); - } - return original; - }; - uint32_t stages_patched_mask = 0; - for (int stage = 0; stage < SHADER_STAGE_MAX; stage++) { - if (!r_stages_bytecodes.has((ShaderStage)stage)) { - continue; - } - - uint64_t offset = p_stages_bit_offsets[SHADER_STAGES_BIT_OFFSET_INDICES[stage]]; - if (offset == 0) { - // This constant does not appear at this stage. - continue; - } - - Vector &bytecode = r_stages_bytecodes[(ShaderStage)stage]; -#ifdef DEV_ENABLED - uint64_t orig_patch_val = tamper_bits(bytecode.ptrw(), offset, patch_val); - // Checking against the value the NIR patch should have set. - DEV_ASSERT(!p_is_first_patch || ((orig_patch_val >> 1) & GODOT_NIR_SC_SENTINEL_MAGIC_MASK) == GODOT_NIR_SC_SENTINEL_MAGIC); - uint64_t readback_patch_val = tamper_bits(bytecode.ptrw(), offset, patch_val); - DEV_ASSERT(readback_patch_val == patch_val); -#else - tamper_bits(bytecode.ptrw(), offset, patch_val); -#endif - - stages_patched_mask |= (1 << stage); - } - return stages_patched_mask; -} - bool RenderingDeviceDriverD3D12::_shader_apply_specialization_constants( const ShaderInfo *p_shader_info, VectorView p_specialization_constants, @@ -3103,7 +2996,7 @@ bool RenderingDeviceDriverD3D12::_shader_apply_specialization_constants( for (const ShaderInfo::SpecializationConstant &sc : p_shader_info->specialization_constants) { if (psc.constant_id == sc.constant_id) { if (psc.int_value != sc.int_value) { - stages_re_sign_mask |= _shader_patch_dxil_specialization_constant(psc.type, &psc.int_value, sc.stages_bit_offsets, r_final_stages_bytecode, false); + stages_re_sign_mask |= RenderingDXIL::patch_specialization_constant(psc.type, &psc.int_value, sc.stages_bit_offsets, r_final_stages_bytecode, false); } break; } @@ -3114,732 +3007,45 @@ bool RenderingDeviceDriverD3D12::_shader_apply_specialization_constants( ShaderStage stage = E.key; if ((stages_re_sign_mask & (1 << stage))) { Vector &bytecode = E.value; - _shader_sign_dxil_bytecode(stage, bytecode); + RenderingDXIL::sign_bytecode(stage, bytecode); } } return true; } -void RenderingDeviceDriverD3D12::_shader_sign_dxil_bytecode(ShaderStage p_stage, Vector &r_dxil_blob) { - uint8_t *w = r_dxil_blob.ptrw(); - compute_dxil_hash(w + 20, r_dxil_blob.size() - 20, w + 4); -} +RDD::ShaderID RenderingDeviceDriverD3D12::shader_create_from_container(const Ref &p_shader_container, const Vector &p_immutable_samplers) { + ShaderReflection shader_refl = p_shader_container->get_shader_reflection(); + ShaderInfo shader_info_in; + const RenderingShaderContainerD3D12 *shader_container_d3d12 = Object::cast_to(p_shader_container.ptr()); + ERR_FAIL_NULL_V_MSG(shader_container_d3d12, ShaderID(), "Shader container is not a recognized format."); -String RenderingDeviceDriverD3D12::shader_get_binary_cache_key() { - return "D3D12-SV" + uitos(ShaderBinary::VERSION) + "-" + itos(shader_capabilities.shader_model); -} - -Vector RenderingDeviceDriverD3D12::shader_compile_binary_from_spirv(VectorView p_spirv, const String &p_shader_name) { - ShaderReflection shader_refl; - if (_reflect_spirv(p_spirv, shader_refl) != OK) { - return Vector(); + RenderingShaderContainerD3D12::ShaderReflectionD3D12 shader_refl_d3d12 = shader_container_d3d12->get_shader_reflection_d3d12(); + if (shader_refl_d3d12.dxil_push_constant_stages != 0) { + shader_info_in.dxil_push_constant_size = shader_refl.push_constant_size; } - // Collect reflection data into binary data. - ShaderBinary::Data binary_data; - Vector> sets_bindings; - Vector specialization_constants; - { - binary_data.vertex_input_mask = shader_refl.vertex_input_mask; - binary_data.fragment_output_mask = shader_refl.fragment_output_mask; - binary_data.specialization_constants_count = shader_refl.specialization_constants.size(); - binary_data.is_compute = shader_refl.is_compute; - binary_data.compute_local_size[0] = shader_refl.compute_local_size[0]; - binary_data.compute_local_size[1] = shader_refl.compute_local_size[1]; - binary_data.compute_local_size[2] = shader_refl.compute_local_size[2]; - binary_data.set_count = shader_refl.uniform_sets.size(); - binary_data.push_constant_size = shader_refl.push_constant_size; - binary_data.nir_runtime_data_root_param_idx = UINT32_MAX; - binary_data.stage_count = p_spirv.size(); - - for (const Vector &spirv_set : shader_refl.uniform_sets) { - Vector bindings; - for (const ShaderUniform &spirv_uniform : spirv_set) { - ShaderBinary::DataBinding binding; - binding.type = (uint32_t)spirv_uniform.type; - binding.binding = spirv_uniform.binding; - binding.stages = (uint32_t)spirv_uniform.stages; - binding.length = spirv_uniform.length; - binding.writable = (uint32_t)spirv_uniform.writable; - bindings.push_back(binding); - } - sets_bindings.push_back(bindings); - } - - for (const ShaderSpecializationConstant &spirv_sc : shader_refl.specialization_constants) { - ShaderBinary::SpecializationConstant spec_constant; - spec_constant.type = (uint32_t)spirv_sc.type; - spec_constant.constant_id = spirv_sc.constant_id; - spec_constant.int_value = spirv_sc.int_value; - spec_constant.stage_flags = spirv_sc.stages; - specialization_constants.push_back(spec_constant); - - binary_data.spirv_specialization_constants_ids_mask |= (1 << spirv_sc.constant_id); - } - } - - // Translate SPIR-V shaders to DXIL, and collect shader info from the new representation. - HashMap> dxil_blobs; - BitField stages_processed = {}; - { - HashMap stages_nir_shaders; - - auto free_nir_shaders = [&]() { - for (KeyValue &E : stages_nir_shaders) { - ralloc_free(E.value); - } - stages_nir_shaders.clear(); - }; - - // This is based on spirv2dxil.c. May need updates when it changes. - // Also, this has to stay around until after linking. - nir_shader_compiler_options nir_options = *dxil_get_nir_compiler_options(); - nir_options.lower_base_vertex = false; - - dxil_spirv_runtime_conf dxil_runtime_conf = {}; - dxil_runtime_conf.runtime_data_cbv.base_shader_register = RUNTIME_DATA_REGISTER; - dxil_runtime_conf.push_constant_cbv.base_shader_register = ROOT_CONSTANT_REGISTER; - dxil_runtime_conf.zero_based_vertex_instance_id = true; - dxil_runtime_conf.zero_based_compute_workgroup_id = true; - dxil_runtime_conf.declared_read_only_images_as_srvs = true; - // Making this explicit to let maintainers know that in practice this didn't improve performance, - // probably because data generated by one shader and consumed by another one forces the resource - // to transition from UAV to SRV, and back, instead of being an UAV all the time. - // In case someone wants to try, care must be taken so in case of incompatible bindings across stages - // happen as a result, all the stages are re-translated. That can happen if, for instance, a stage only - // uses an allegedly writable resource only for reading but the next stage doesn't. - dxil_runtime_conf.inferred_read_only_images_as_srvs = false; - - // - Translate SPIR-V to NIR. - for (uint32_t i = 0; i < p_spirv.size(); i++) { - ShaderStage stage = (ShaderStage)p_spirv[i].shader_stage; - ShaderStage stage_flag = (ShaderStage)(1 << p_spirv[i].shader_stage); - - stages_processed.set_flag(stage_flag); - - { - const char *entry_point = "main"; - - static const gl_shader_stage SPIRV_TO_MESA_STAGES[SHADER_STAGE_MAX] = { - /* SHADER_STAGE_VERTEX */ MESA_SHADER_VERTEX, - /* SHADER_STAGE_FRAGMENT */ MESA_SHADER_FRAGMENT, - /* SHADER_STAGE_TESSELATION_CONTROL */ MESA_SHADER_TESS_CTRL, - /* SHADER_STAGE_TESSELATION_EVALUATION */ MESA_SHADER_TESS_EVAL, - /* SHADER_STAGE_COMPUTE */ MESA_SHADER_COMPUTE, - }; - - nir_shader *shader = spirv_to_nir( - (const uint32_t *)p_spirv[i].spirv.ptr(), - p_spirv[i].spirv.size() / sizeof(uint32_t), - nullptr, - 0, - SPIRV_TO_MESA_STAGES[stage], - entry_point, - dxil_spirv_nir_get_spirv_options(), &nir_options); - if (!shader) { - free_nir_shaders(); - ERR_FAIL_V_MSG(Vector(), "Shader translation (step 1) at stage " + String(SHADER_STAGE_NAMES[stage]) + " failed."); - } + shader_info_in.spirv_specialization_constants_ids_mask = shader_refl_d3d12.spirv_specialization_constants_ids_mask; + shader_info_in.nir_runtime_data_root_param_idx = shader_refl_d3d12.nir_runtime_data_root_param_idx; + shader_info_in.is_compute = shader_refl.is_compute; + shader_info_in.sets.resize(shader_refl.uniform_sets.size()); + for (uint32_t i = 0; i < shader_info_in.sets.size(); i++) { + shader_info_in.sets[i].bindings.resize(shader_refl.uniform_sets[i].size()); + for (uint32_t j = 0; j < shader_info_in.sets[i].bindings.size(); j++) { + const ShaderUniform &uniform = shader_refl.uniform_sets[i][j]; + const RenderingShaderContainerD3D12::ReflectionBindingDataD3D12 &uniform_d3d12 = shader_refl_d3d12.reflection_binding_set_uniforms_d3d12[i][j]; + ShaderInfo::UniformBindingInfo &binding = shader_info_in.sets[i].bindings[j]; + binding.stages = uniform_d3d12.dxil_stages; + binding.res_class = (ResourceClass)(uniform_d3d12.resource_class); + binding.type = UniformType(uniform.type); + binding.length = uniform.length; #ifdef DEV_ENABLED - nir_validate_shader(shader, "Validate before feeding NIR to the DXIL compiler"); + binding.writable = uniform.writable; #endif - if (stage == SHADER_STAGE_VERTEX) { - dxil_runtime_conf.yz_flip.y_mask = 0xffff; - dxil_runtime_conf.yz_flip.mode = DXIL_SPIRV_Y_FLIP_UNCONDITIONAL; - } else { - dxil_runtime_conf.yz_flip.y_mask = 0; - dxil_runtime_conf.yz_flip.mode = DXIL_SPIRV_YZ_FLIP_NONE; - } - - // This is based on spirv2dxil.c. May need updates when it changes. - dxil_spirv_nir_prep(shader); - bool requires_runtime_data = {}; - dxil_spirv_nir_passes(shader, &dxil_runtime_conf, &requires_runtime_data); - - stages_nir_shaders[stage] = shader; - } - } - - // - Link NIR shaders. - bool can_use_multiview = D3D12Hooks::get_singleton() != nullptr; - for (int i = SHADER_STAGE_MAX - 1; i >= 0; i--) { - if (!stages_nir_shaders.has(i)) { - continue; - } - nir_shader *shader = stages_nir_shaders[i]; - nir_shader *prev_shader = nullptr; - for (int j = i - 1; j >= 0; j--) { - if (stages_nir_shaders.has(j)) { - prev_shader = stages_nir_shaders[j]; - break; - } - } - // There is a bug in the Direct3D runtime during creation of a PSO with view instancing. If a fragment - // shader uses front/back face detection (SV_IsFrontFace), its signature must include the pixel position - // builtin variable (SV_Position), otherwise an Internal Runtime error will occur. - if (i == SHADER_STAGE_FRAGMENT && can_use_multiview) { - const bool use_front_face = - nir_find_variable_with_location(shader, nir_var_shader_in, VARYING_SLOT_FACE) || - (shader->info.inputs_read & VARYING_BIT_FACE) || - nir_find_variable_with_location(shader, nir_var_system_value, SYSTEM_VALUE_FRONT_FACE) || - BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_FRONT_FACE); - const bool use_position = - nir_find_variable_with_location(shader, nir_var_shader_in, VARYING_SLOT_POS) || - (shader->info.inputs_read & VARYING_BIT_POS) || - nir_find_variable_with_location(shader, nir_var_system_value, SYSTEM_VALUE_FRAG_COORD) || - BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_FRAG_COORD); - if (use_front_face && !use_position) { - nir_variable *const pos = nir_variable_create(shader, nir_var_shader_in, glsl_vec4_type(), "gl_FragCoord"); - pos->data.location = VARYING_SLOT_POS; - shader->info.inputs_read |= VARYING_BIT_POS; - } - } - if (prev_shader) { - bool requires_runtime_data = {}; - dxil_spirv_nir_link(shader, prev_shader, &dxil_runtime_conf, &requires_runtime_data); - } - } - - // - Translate NIR to DXIL. - for (uint32_t i = 0; i < p_spirv.size(); i++) { - ShaderStage stage = (ShaderStage)p_spirv[i].shader_stage; - - struct ShaderData { - ShaderStage stage; - ShaderBinary::Data &binary_data; - Vector> &sets_bindings; - Vector &specialization_constants; - } shader_data{ stage, binary_data, sets_bindings, specialization_constants }; - - GodotNirCallbacks godot_nir_callbacks = {}; - godot_nir_callbacks.data = &shader_data; - - godot_nir_callbacks.report_resource = [](uint32_t p_register, uint32_t p_space, uint32_t p_dxil_type, void *p_data) { - ShaderData &shader_data_in = *(ShaderData *)p_data; - - // Types based on Mesa's dxil_container.h. - static const uint32_t DXIL_RES_SAMPLER = 1; - static const ResourceClass DXIL_TYPE_TO_CLASS[] = { - /* DXIL_RES_INVALID */ RES_CLASS_INVALID, - /* DXIL_RES_SAMPLER */ RES_CLASS_INVALID, // Handling sampler as a flag. - /* DXIL_RES_CBV */ RES_CLASS_CBV, - /* DXIL_RES_SRV_TYPED */ RES_CLASS_SRV, - /* DXIL_RES_SRV_RAW */ RES_CLASS_SRV, - /* DXIL_RES_SRV_STRUCTURED */ RES_CLASS_SRV, - /* DXIL_RES_UAV_TYPED */ RES_CLASS_UAV, - /* DXIL_RES_UAV_RAW */ RES_CLASS_UAV, - /* DXIL_RES_UAV_STRUCTURED */ RES_CLASS_UAV, - /* DXIL_RES_UAV_STRUCTURED_WITH_COUNTER */ RES_CLASS_INVALID, - }; - DEV_ASSERT(p_dxil_type < ARRAY_SIZE(DXIL_TYPE_TO_CLASS)); - ResourceClass res_class = DXIL_TYPE_TO_CLASS[p_dxil_type]; - - if (p_register == ROOT_CONSTANT_REGISTER && p_space == 0) { - DEV_ASSERT(res_class == RES_CLASS_CBV); - shader_data_in.binary_data.dxil_push_constant_stages |= (1 << shader_data_in.stage); - } else if (p_register == RUNTIME_DATA_REGISTER && p_space == 0) { - DEV_ASSERT(res_class == RES_CLASS_CBV); - shader_data_in.binary_data.nir_runtime_data_root_param_idx = 1; // Temporary, to be determined later. - } else { - DEV_ASSERT(p_space == 0); - - uint32_t set = p_register / GODOT_NIR_DESCRIPTOR_SET_MULTIPLIER; - uint32_t binding = (p_register % GODOT_NIR_DESCRIPTOR_SET_MULTIPLIER) / GODOT_NIR_BINDING_MULTIPLIER; - - DEV_ASSERT(set < (uint32_t)shader_data_in.sets_bindings.size()); - [[maybe_unused]] bool found = false; - for (int j = 0; j < shader_data_in.sets_bindings[set].size(); j++) { - if (shader_data_in.sets_bindings[set][j].binding != binding) { - continue; - } - - ShaderBinary::DataBinding &binding_info = shader_data_in.sets_bindings.write[set].write[j]; - - binding_info.dxil_stages |= (1 << shader_data_in.stage); - - if (res_class != RES_CLASS_INVALID) { - DEV_ASSERT(binding_info.res_class == (uint32_t)RES_CLASS_INVALID || binding_info.res_class == (uint32_t)res_class); - binding_info.res_class = res_class; - } else if (p_dxil_type == DXIL_RES_SAMPLER) { - binding_info.has_sampler = (uint32_t)true; - } else { - CRASH_NOW(); - } - found = true; - break; - } - DEV_ASSERT(found); - } - }; - - godot_nir_callbacks.report_sc_bit_offset_fn = [](uint32_t p_sc_id, uint64_t p_bit_offset, void *p_data) { - ShaderData &shader_data_in = *(ShaderData *)p_data; - [[maybe_unused]] bool found = false; - for (int j = 0; j < shader_data_in.specialization_constants.size(); j++) { - if (shader_data_in.specialization_constants[j].constant_id != p_sc_id) { - continue; - } - - uint32_t offset_idx = SHADER_STAGES_BIT_OFFSET_INDICES[shader_data_in.stage]; - DEV_ASSERT(shader_data_in.specialization_constants.write[j].stages_bit_offsets[offset_idx] == 0); - shader_data_in.specialization_constants.write[j].stages_bit_offsets[offset_idx] = p_bit_offset; - found = true; - break; - } - DEV_ASSERT(found); - }; - - godot_nir_callbacks.report_bitcode_bit_offset_fn = [](uint64_t p_bit_offset, void *p_data) { - DEV_ASSERT(p_bit_offset % 8 == 0); - ShaderData &shader_data_in = *(ShaderData *)p_data; - uint32_t offset_idx = SHADER_STAGES_BIT_OFFSET_INDICES[shader_data_in.stage]; - for (int j = 0; j < shader_data_in.specialization_constants.size(); j++) { - if (shader_data_in.specialization_constants.write[j].stages_bit_offsets[offset_idx] == 0) { - // This SC has been optimized out from this stage. - continue; - } - shader_data_in.specialization_constants.write[j].stages_bit_offsets[offset_idx] += p_bit_offset; - } - }; - - auto shader_model_d3d_to_dxil = [](D3D_SHADER_MODEL p_d3d_shader_model) -> dxil_shader_model { - static_assert(SHADER_MODEL_6_0 == 0x60000); - static_assert(SHADER_MODEL_6_3 == 0x60003); - static_assert(D3D_SHADER_MODEL_6_0 == 0x60); - static_assert(D3D_SHADER_MODEL_6_3 == 0x63); - return (dxil_shader_model)((p_d3d_shader_model >> 4) * 0x10000 + (p_d3d_shader_model & 0xf)); - }; - - nir_to_dxil_options nir_to_dxil_options = {}; - nir_to_dxil_options.environment = DXIL_ENVIRONMENT_VULKAN; - nir_to_dxil_options.shader_model_max = shader_model_d3d_to_dxil(shader_capabilities.shader_model); - nir_to_dxil_options.validator_version_max = NO_DXIL_VALIDATION; - nir_to_dxil_options.godot_nir_callbacks = &godot_nir_callbacks; - - dxil_logger logger = {}; - logger.log = [](void *p_priv, const char *p_msg) { -#ifdef DEBUG_ENABLED - print_verbose(p_msg); -#endif - }; - - blob dxil_blob = {}; - bool ok = nir_to_dxil(stages_nir_shaders[stage], &nir_to_dxil_options, &logger, &dxil_blob); - ralloc_free(stages_nir_shaders[stage]); - stages_nir_shaders.erase(stage); - if (!ok) { - free_nir_shaders(); - ERR_FAIL_V_MSG(Vector(), "Shader translation at stage " + String(SHADER_STAGE_NAMES[stage]) + " failed."); - } - - Vector blob_copy; - blob_copy.resize(dxil_blob.size); - memcpy(blob_copy.ptrw(), dxil_blob.data, dxil_blob.size); - blob_finish(&dxil_blob); - dxil_blobs.insert(stage, blob_copy); - } - } - -#if 0 - if (dxil_blobs.has(SHADER_STAGE_FRAGMENT)) { - Ref f = FileAccess::open("res://1.dxil", FileAccess::WRITE); - f->store_buffer(dxil_blobs[SHADER_STAGE_FRAGMENT].ptr(), dxil_blobs[SHADER_STAGE_FRAGMENT].size()); - } -#endif - - // Patch with default values of specialization constants. - if (specialization_constants.size()) { - for (const ShaderBinary::SpecializationConstant &sc : specialization_constants) { - _shader_patch_dxil_specialization_constant((PipelineSpecializationConstantType)sc.type, &sc.int_value, sc.stages_bit_offsets, dxil_blobs, true); - } -#if 0 - if (dxil_blobs.has(SHADER_STAGE_FRAGMENT)) { - Ref f = FileAccess::open("res://2.dxil", FileAccess::WRITE); - f->store_buffer(dxil_blobs[SHADER_STAGE_FRAGMENT].ptr(), dxil_blobs[SHADER_STAGE_FRAGMENT].size()); - } -#endif - } - - // Sign. - for (KeyValue> &E : dxil_blobs) { - ShaderStage stage = E.key; - Vector &dxil_blob = E.value; - _shader_sign_dxil_bytecode(stage, dxil_blob); - } - - // Build the root signature. - ComPtr root_sig_blob; - { - auto stages_to_d3d12_visibility = [](uint32_t p_stages_mask) -> D3D12_SHADER_VISIBILITY { - switch (p_stages_mask) { - case SHADER_STAGE_VERTEX_BIT: { - return D3D12_SHADER_VISIBILITY_VERTEX; - } - case SHADER_STAGE_FRAGMENT_BIT: { - return D3D12_SHADER_VISIBILITY_PIXEL; - } - default: { - return D3D12_SHADER_VISIBILITY_ALL; - } - } - }; - - LocalVector root_params; - - // Root (push) constants. - if (binary_data.dxil_push_constant_stages) { - CD3DX12_ROOT_PARAMETER1 push_constant; - push_constant.InitAsConstants( - binary_data.push_constant_size / sizeof(uint32_t), - ROOT_CONSTANT_REGISTER, - 0, - stages_to_d3d12_visibility(binary_data.dxil_push_constant_stages)); - root_params.push_back(push_constant); - } - - // NIR-DXIL runtime data. - if (binary_data.nir_runtime_data_root_param_idx == 1) { // Set above to 1 when discovering runtime data is needed. - DEV_ASSERT(!binary_data.is_compute); // Could be supported if needed, but it's pointless as of now. - binary_data.nir_runtime_data_root_param_idx = root_params.size(); - CD3DX12_ROOT_PARAMETER1 nir_runtime_data; - nir_runtime_data.InitAsConstants( - sizeof(dxil_spirv_vertex_runtime_data) / sizeof(uint32_t), - RUNTIME_DATA_REGISTER, - 0, - D3D12_SHADER_VISIBILITY_VERTEX); - root_params.push_back(nir_runtime_data); - } - - // Descriptor tables (up to two per uniform set, for resources and/or samplers). - - // These have to stay around until serialization! - struct TraceableDescriptorTable { - uint32_t stages_mask = {}; - Vector ranges; - Vector root_sig_locations; - }; - Vector resource_tables_maps; - Vector sampler_tables_maps; - - for (int set = 0; set < sets_bindings.size(); set++) { - bool first_resource_in_set = true; - bool first_sampler_in_set = true; - sets_bindings.write[set].sort(); - for (int i = 0; i < sets_bindings[set].size(); i++) { - const ShaderBinary::DataBinding &binding = sets_bindings[set][i]; - - bool really_used = binding.dxil_stages != 0; -#ifdef DEV_ENABLED - bool anybody_home = (ResourceClass)binding.res_class != RES_CLASS_INVALID || binding.has_sampler; - DEV_ASSERT(anybody_home == really_used); -#endif - if (!really_used) { - continue; // Existed in SPIR-V; went away in DXIL. - } - - auto insert_range = [](D3D12_DESCRIPTOR_RANGE_TYPE p_range_type, - uint32_t p_num_descriptors, - uint32_t p_dxil_register, - uint32_t p_dxil_stages_mask, - ShaderBinary::DataBinding::RootSignatureLocation(&p_root_sig_locations), - Vector &r_tables, - bool &r_first_in_set) { - if (r_first_in_set) { - r_tables.resize(r_tables.size() + 1); - r_first_in_set = false; - } - TraceableDescriptorTable &table = r_tables.write[r_tables.size() - 1]; - table.stages_mask |= p_dxil_stages_mask; - - CD3DX12_DESCRIPTOR_RANGE1 range; - // Due to the aliasing hack for SRV-UAV of different families, - // we can be causing an unintended change of data (sometimes the validation layers catch it). - D3D12_DESCRIPTOR_RANGE_FLAGS flags = D3D12_DESCRIPTOR_RANGE_FLAG_NONE; - if (p_range_type == D3D12_DESCRIPTOR_RANGE_TYPE_SRV || p_range_type == D3D12_DESCRIPTOR_RANGE_TYPE_UAV) { - flags = D3D12_DESCRIPTOR_RANGE_FLAG_DATA_VOLATILE; - } else if (p_range_type == D3D12_DESCRIPTOR_RANGE_TYPE_CBV) { - flags = D3D12_DESCRIPTOR_RANGE_FLAG_DATA_STATIC_WHILE_SET_AT_EXECUTE; - } - range.Init(p_range_type, p_num_descriptors, p_dxil_register, 0, flags); - - table.ranges.push_back(range); - table.root_sig_locations.push_back(&p_root_sig_locations); - }; - - uint32_t num_descriptors = 1; - - D3D12_DESCRIPTOR_RANGE_TYPE resource_range_type = {}; - switch ((ResourceClass)binding.res_class) { - case RES_CLASS_INVALID: { - num_descriptors = binding.length; - DEV_ASSERT(binding.has_sampler); - } break; - case RES_CLASS_CBV: { - resource_range_type = D3D12_DESCRIPTOR_RANGE_TYPE_CBV; - DEV_ASSERT(!binding.has_sampler); - } break; - case RES_CLASS_SRV: { - resource_range_type = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; - num_descriptors = MAX(1u, binding.length); // An unbound R/O buffer is reflected as zero-size. - } break; - case RES_CLASS_UAV: { - resource_range_type = D3D12_DESCRIPTOR_RANGE_TYPE_UAV; - num_descriptors = MAX(1u, binding.length); // An unbound R/W buffer is reflected as zero-size. - DEV_ASSERT(!binding.has_sampler); - } break; - } - - uint32_t dxil_register = set * GODOT_NIR_DESCRIPTOR_SET_MULTIPLIER + binding.binding * GODOT_NIR_BINDING_MULTIPLIER; - - if (binding.res_class != RES_CLASS_INVALID) { - insert_range( - resource_range_type, - num_descriptors, - dxil_register, - sets_bindings[set][i].dxil_stages, - sets_bindings.write[set].write[i].root_sig_locations[RS_LOC_TYPE_RESOURCE], - resource_tables_maps, - first_resource_in_set); - } - if (binding.has_sampler) { - insert_range( - D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, - num_descriptors, - dxil_register, - sets_bindings[set][i].dxil_stages, - sets_bindings.write[set].write[i].root_sig_locations[RS_LOC_TYPE_SAMPLER], - sampler_tables_maps, - first_sampler_in_set); - } - } - } - - auto make_descriptor_tables = [&root_params, &stages_to_d3d12_visibility](const Vector &p_tables) { - for (const TraceableDescriptorTable &table : p_tables) { - D3D12_SHADER_VISIBILITY visibility = stages_to_d3d12_visibility(table.stages_mask); - DEV_ASSERT(table.ranges.size() == table.root_sig_locations.size()); - for (int i = 0; i < table.ranges.size(); i++) { - // By now we know very well which root signature location corresponds to the pointed uniform. - table.root_sig_locations[i]->root_param_idx = root_params.size(); - table.root_sig_locations[i]->range_idx = i; - } - - CD3DX12_ROOT_PARAMETER1 root_table; - root_table.InitAsDescriptorTable(table.ranges.size(), table.ranges.ptr(), visibility); - root_params.push_back(root_table); - } - }; - - make_descriptor_tables(resource_tables_maps); - make_descriptor_tables(sampler_tables_maps); - - CD3DX12_VERSIONED_ROOT_SIGNATURE_DESC root_sig_desc = {}; - D3D12_ROOT_SIGNATURE_FLAGS root_sig_flags = - D3D12_ROOT_SIGNATURE_FLAG_DENY_HULL_SHADER_ROOT_ACCESS | - D3D12_ROOT_SIGNATURE_FLAG_DENY_DOMAIN_SHADER_ROOT_ACCESS | - D3D12_ROOT_SIGNATURE_FLAG_DENY_GEOMETRY_SHADER_ROOT_ACCESS | - D3D12_ROOT_SIGNATURE_FLAG_DENY_AMPLIFICATION_SHADER_ROOT_ACCESS | - D3D12_ROOT_SIGNATURE_FLAG_DENY_MESH_SHADER_ROOT_ACCESS; - if (!stages_processed.has_flag(SHADER_STAGE_VERTEX_BIT)) { - root_sig_flags |= D3D12_ROOT_SIGNATURE_FLAG_DENY_VERTEX_SHADER_ROOT_ACCESS; - } - if (!stages_processed.has_flag(SHADER_STAGE_FRAGMENT_BIT)) { - root_sig_flags |= D3D12_ROOT_SIGNATURE_FLAG_DENY_PIXEL_SHADER_ROOT_ACCESS; - } - if (binary_data.vertex_input_mask) { - root_sig_flags |= D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT; - } - root_sig_desc.Init_1_1(root_params.size(), root_params.ptr(), 0, nullptr, root_sig_flags); - - ComPtr error_blob; - HRESULT res = D3DX12SerializeVersionedRootSignature(context_driver->lib_d3d12, &root_sig_desc, D3D_ROOT_SIGNATURE_VERSION_1_1, root_sig_blob.GetAddressOf(), error_blob.GetAddressOf()); - ERR_FAIL_COND_V_MSG(!SUCCEEDED(res), Vector(), - "Serialization of root signature failed with error " + vformat("0x%08ux", (uint64_t)res) + " and the following message:\n" + String::ascii(Span((char *)error_blob->GetBufferPointer(), error_blob->GetBufferSize()))); - - binary_data.root_signature_crc = crc32(0, nullptr, 0); - binary_data.root_signature_crc = crc32(binary_data.root_signature_crc, (const Bytef *)root_sig_blob->GetBufferPointer(), root_sig_blob->GetBufferSize()); - } - - Vector> compressed_stages; - Vector zstd_size; - - uint32_t stages_binary_size = 0; - - for (uint32_t i = 0; i < p_spirv.size(); i++) { - Vector zstd; - Vector &dxil_blob = dxil_blobs[p_spirv[i].shader_stage]; - zstd.resize(Compression::get_max_compressed_buffer_size(dxil_blob.size(), Compression::MODE_ZSTD)); - int dst_size = Compression::compress(zstd.ptrw(), dxil_blob.ptr(), dxil_blob.size(), Compression::MODE_ZSTD); - - zstd_size.push_back(dst_size); - zstd.resize(dst_size); - compressed_stages.push_back(zstd); - - uint32_t s = compressed_stages[i].size(); - stages_binary_size += STEPIFY(s, 4); - } - - CharString shader_name_utf = p_shader_name.utf8(); - - binary_data.shader_name_len = shader_name_utf.length(); - - uint32_t total_size = sizeof(uint32_t) * 3; // Header + version + main datasize;. - total_size += sizeof(ShaderBinary::Data); - - total_size += STEPIFY(binary_data.shader_name_len, 4); - - for (int i = 0; i < sets_bindings.size(); i++) { - total_size += sizeof(uint32_t); - total_size += sets_bindings[i].size() * sizeof(ShaderBinary::DataBinding); - } - - total_size += sizeof(ShaderBinary::SpecializationConstant) * specialization_constants.size(); - - total_size += compressed_stages.size() * sizeof(uint32_t) * 3; // Sizes. - total_size += stages_binary_size; - - binary_data.root_signature_len = root_sig_blob->GetBufferSize(); - total_size += binary_data.root_signature_len; - - Vector ret; - ret.resize(total_size); - { - uint32_t offset = 0; - uint8_t *binptr = ret.ptrw(); - binptr[0] = 'G'; - binptr[1] = 'S'; - binptr[2] = 'B'; - binptr[3] = 'D'; // Redot shader binary data. - offset += 4; - encode_uint32(ShaderBinary::VERSION, binptr + offset); - offset += sizeof(uint32_t); - encode_uint32(sizeof(ShaderBinary::Data), binptr + offset); - offset += sizeof(uint32_t); - memcpy(binptr + offset, &binary_data, sizeof(ShaderBinary::Data)); - offset += sizeof(ShaderBinary::Data); - -#define ADVANCE_OFFSET_WITH_ALIGNMENT(m_bytes) \ - { \ - offset += m_bytes; \ - uint32_t padding = STEPIFY(m_bytes, 4) - m_bytes; \ - memset(binptr + offset, 0, padding); /* Avoid garbage data. */ \ - offset += padding; \ - } - - if (binary_data.shader_name_len > 0) { - memcpy(binptr + offset, shader_name_utf.ptr(), binary_data.shader_name_len); - ADVANCE_OFFSET_WITH_ALIGNMENT(binary_data.shader_name_len); - } - - for (int i = 0; i < sets_bindings.size(); i++) { - int count = sets_bindings[i].size(); - encode_uint32(count, binptr + offset); - offset += sizeof(uint32_t); - if (count > 0) { - memcpy(binptr + offset, sets_bindings[i].ptr(), sizeof(ShaderBinary::DataBinding) * count); - offset += sizeof(ShaderBinary::DataBinding) * count; - } - } - - if (specialization_constants.size()) { - memcpy(binptr + offset, specialization_constants.ptr(), sizeof(ShaderBinary::SpecializationConstant) * specialization_constants.size()); - offset += sizeof(ShaderBinary::SpecializationConstant) * specialization_constants.size(); - } - - for (int i = 0; i < compressed_stages.size(); i++) { - encode_uint32(p_spirv[i].shader_stage, binptr + offset); - offset += sizeof(uint32_t); - encode_uint32(dxil_blobs[p_spirv[i].shader_stage].size(), binptr + offset); - offset += sizeof(uint32_t); - encode_uint32(zstd_size[i], binptr + offset); - offset += sizeof(uint32_t); - memcpy(binptr + offset, compressed_stages[i].ptr(), compressed_stages[i].size()); - ADVANCE_OFFSET_WITH_ALIGNMENT(compressed_stages[i].size()); - } - - memcpy(binptr + offset, root_sig_blob->GetBufferPointer(), root_sig_blob->GetBufferSize()); - offset += root_sig_blob->GetBufferSize(); - - ERR_FAIL_COND_V(offset != (uint32_t)ret.size(), Vector()); - } - - return ret; -} - -RDD::ShaderID RenderingDeviceDriverD3D12::shader_create_from_bytecode(const Vector &p_shader_binary, ShaderDescription &r_shader_desc, String &r_name, const Vector &p_immutable_samplers) { - r_shader_desc = {}; // Driver-agnostic. - ShaderInfo shader_info_in; // Driver-specific. - - const uint8_t *binptr = p_shader_binary.ptr(); - uint32_t binsize = p_shader_binary.size(); - - uint32_t read_offset = 0; - - // Consistency check. - ERR_FAIL_COND_V(binsize < sizeof(uint32_t) * 3 + sizeof(ShaderBinary::Data), ShaderID()); - ERR_FAIL_COND_V(binptr[0] != 'G' || binptr[1] != 'S' || binptr[2] != 'B' || binptr[3] != 'D', ShaderID()); - - uint32_t bin_version = decode_uint32(binptr + 4); - ERR_FAIL_COND_V(bin_version != ShaderBinary::VERSION, ShaderID()); - - uint32_t bin_data_size = decode_uint32(binptr + 8); - - const ShaderBinary::Data &binary_data = *(reinterpret_cast(binptr + 12)); - - r_shader_desc.push_constant_size = binary_data.push_constant_size; - shader_info_in.dxil_push_constant_size = binary_data.dxil_push_constant_stages ? binary_data.push_constant_size : 0; - shader_info_in.nir_runtime_data_root_param_idx = binary_data.nir_runtime_data_root_param_idx; - - r_shader_desc.vertex_input_mask = binary_data.vertex_input_mask; - r_shader_desc.fragment_output_mask = binary_data.fragment_output_mask; - - r_shader_desc.is_compute = binary_data.is_compute; - shader_info_in.is_compute = binary_data.is_compute; - r_shader_desc.compute_local_size[0] = binary_data.compute_local_size[0]; - r_shader_desc.compute_local_size[1] = binary_data.compute_local_size[1]; - r_shader_desc.compute_local_size[2] = binary_data.compute_local_size[2]; - - read_offset += sizeof(uint32_t) * 3 + bin_data_size; - - if (binary_data.shader_name_len) { - r_name.clear(); - r_name.append_utf8((const char *)(binptr + read_offset), binary_data.shader_name_len); - read_offset += STEPIFY(binary_data.shader_name_len, 4); - } - - r_shader_desc.uniform_sets.resize(binary_data.set_count); - shader_info_in.sets.resize(binary_data.set_count); - - for (uint32_t i = 0; i < binary_data.set_count; i++) { - ERR_FAIL_COND_V(read_offset + sizeof(uint32_t) >= binsize, ShaderID()); - uint32_t set_count = decode_uint32(binptr + read_offset); - read_offset += sizeof(uint32_t); - const ShaderBinary::DataBinding *set_ptr = reinterpret_cast(binptr + read_offset); - uint32_t set_size = set_count * sizeof(ShaderBinary::DataBinding); - ERR_FAIL_COND_V(read_offset + set_size >= binsize, ShaderID()); - - shader_info_in.sets[i].bindings.reserve(set_count); - - for (uint32_t j = 0; j < set_count; j++) { - ShaderUniform info; - info.type = UniformType(set_ptr[j].type); - info.writable = set_ptr[j].writable; - info.length = set_ptr[j].length; - info.binding = set_ptr[j].binding; - - ShaderInfo::UniformBindingInfo binding; - binding.stages = set_ptr[j].dxil_stages; - binding.res_class = (ResourceClass)set_ptr[j].res_class; - binding.type = info.type; - binding.length = info.length; -#ifdef DEV_ENABLED - binding.writable = set_ptr[j].writable; -#endif - static_assert(sizeof(ShaderInfo::UniformBindingInfo::root_sig_locations) == sizeof(ShaderBinary::DataBinding::root_sig_locations)); - memcpy((void *)&binding.root_sig_locations, (void *)&set_ptr[j].root_sig_locations, sizeof(ShaderInfo::UniformBindingInfo::root_sig_locations)); + static_assert(sizeof(ShaderInfo::UniformBindingInfo::root_sig_locations) == sizeof(RenderingShaderContainerD3D12::ReflectionBindingDataD3D12::root_signature_locations)); + memcpy((void *)&binding.root_sig_locations, (void *)&uniform_d3d12.root_signature_locations, sizeof(ShaderInfo::UniformBindingInfo::root_sig_locations)); if (binding.root_sig_locations.resource.root_param_idx != UINT32_MAX) { shader_info_in.sets[i].num_root_params.resources++; @@ -3847,80 +3053,50 @@ RDD::ShaderID RenderingDeviceDriverD3D12::shader_create_from_bytecode(const Vect if (binding.root_sig_locations.sampler.root_param_idx != UINT32_MAX) { shader_info_in.sets[i].num_root_params.samplers++; } - - r_shader_desc.uniform_sets.write[i].push_back(info); - shader_info_in.sets[i].bindings.push_back(binding); } - - read_offset += set_size; } - ERR_FAIL_COND_V(read_offset + binary_data.specialization_constants_count * sizeof(ShaderBinary::SpecializationConstant) >= binsize, ShaderID()); - - r_shader_desc.specialization_constants.resize(binary_data.specialization_constants_count); - shader_info_in.specialization_constants.resize(binary_data.specialization_constants_count); - for (uint32_t i = 0; i < binary_data.specialization_constants_count; i++) { - const ShaderBinary::SpecializationConstant &src_sc = *(reinterpret_cast(binptr + read_offset)); - ShaderSpecializationConstant sc; - sc.type = PipelineSpecializationConstantType(src_sc.type); + shader_info_in.specialization_constants.resize(shader_refl.specialization_constants.size()); + for (uint32_t i = 0; i < shader_info_in.specialization_constants.size(); i++) { + ShaderInfo::SpecializationConstant &sc = shader_info_in.specialization_constants[i]; + const ShaderSpecializationConstant &src_sc = shader_refl.specialization_constants[i]; + const RenderingShaderContainerD3D12::ReflectionSpecializationDataD3D12 &src_sc_d3d12 = shader_refl_d3d12.reflection_specialization_data_d3d12[i]; sc.constant_id = src_sc.constant_id; sc.int_value = src_sc.int_value; - sc.stages = src_sc.stage_flags; - r_shader_desc.specialization_constants.write[i] = sc; - - ShaderInfo::SpecializationConstant ssc; - ssc.constant_id = src_sc.constant_id; - ssc.int_value = src_sc.int_value; - memcpy(ssc.stages_bit_offsets, src_sc.stages_bit_offsets, sizeof(ssc.stages_bit_offsets)); - shader_info_in.specialization_constants[i] = ssc; - - read_offset += sizeof(ShaderBinary::SpecializationConstant); - } - shader_info_in.spirv_specialization_constants_ids_mask = binary_data.spirv_specialization_constants_ids_mask; - - for (uint32_t i = 0; i < binary_data.stage_count; i++) { - ERR_FAIL_COND_V(read_offset + sizeof(uint32_t) * 3 >= binsize, ShaderID()); - - uint32_t stage = decode_uint32(binptr + read_offset); - read_offset += sizeof(uint32_t); - uint32_t dxil_size = decode_uint32(binptr + read_offset); - read_offset += sizeof(uint32_t); - uint32_t zstd_size = decode_uint32(binptr + read_offset); - read_offset += sizeof(uint32_t); - - // Decompress. - Vector dxil; - dxil.resize(dxil_size); - int dec_dxil_size = Compression::decompress(dxil.ptrw(), dxil.size(), binptr + read_offset, zstd_size, Compression::MODE_ZSTD); - ERR_FAIL_COND_V(dec_dxil_size != (int32_t)dxil_size, ShaderID()); - shader_info_in.stages_bytecode[ShaderStage(stage)] = dxil; - - zstd_size = STEPIFY(zstd_size, 4); - read_offset += zstd_size; - ERR_FAIL_COND_V(read_offset > binsize, ShaderID()); - - r_shader_desc.stages.push_back(ShaderStage(stage)); + memcpy(sc.stages_bit_offsets, src_sc_d3d12.stages_bit_offsets, sizeof(sc.stages_bit_offsets)); } - const uint8_t *root_sig_data_ptr = binptr + read_offset; + Vector decompressed_code; + for (uint32_t i = 0; i < shader_refl.stages_vector.size(); i++) { + const RenderingShaderContainer::Shader &shader = p_shader_container->shaders[i]; + bool requires_decompression = (shader.code_decompressed_size > 0); + if (requires_decompression) { + decompressed_code.resize(shader.code_decompressed_size); + bool decompressed = p_shader_container->decompress_code(shader.code_compressed_bytes.ptr(), shader.code_compressed_bytes.size(), shader.code_compression_flags, decompressed_code.ptrw(), decompressed_code.size()); + ERR_FAIL_COND_V_MSG(!decompressed, ShaderID(), vformat("Failed to decompress code on shader stage %s.", String(SHADER_STAGE_NAMES[shader_refl.stages_vector[i]]))); + } + + if (requires_decompression) { + shader_info_in.stages_bytecode[shader.shader_stage] = decompressed_code; + } else { + shader_info_in.stages_bytecode[shader.shader_stage] = shader.code_compressed_bytes; + } + } PFN_D3D12_CREATE_ROOT_SIGNATURE_DESERIALIZER d3d_D3D12CreateRootSignatureDeserializer = (PFN_D3D12_CREATE_ROOT_SIGNATURE_DESERIALIZER)(void *)GetProcAddress(context_driver->lib_d3d12, "D3D12CreateRootSignatureDeserializer"); ERR_FAIL_NULL_V(d3d_D3D12CreateRootSignatureDeserializer, ShaderID()); - HRESULT res = d3d_D3D12CreateRootSignatureDeserializer(root_sig_data_ptr, binary_data.root_signature_len, IID_PPV_ARGS(shader_info_in.root_signature_deserializer.GetAddressOf())); + HRESULT res = d3d_D3D12CreateRootSignatureDeserializer(shader_refl_d3d12.root_signature_bytes.ptr(), shader_refl_d3d12.root_signature_bytes.size(), IID_PPV_ARGS(shader_info_in.root_signature_deserializer.GetAddressOf())); ERR_FAIL_COND_V_MSG(!SUCCEEDED(res), ShaderID(), "D3D12CreateRootSignatureDeserializer failed with error " + vformat("0x%08ux", (uint64_t)res) + "."); - read_offset += binary_data.root_signature_len; - - ERR_FAIL_COND_V(read_offset != binsize, ShaderID()); ComPtr root_signature; - res = device->CreateRootSignature(0, root_sig_data_ptr, binary_data.root_signature_len, IID_PPV_ARGS(shader_info_in.root_signature.GetAddressOf())); + res = device->CreateRootSignature(0, shader_refl_d3d12.root_signature_bytes.ptr(), shader_refl_d3d12.root_signature_bytes.size(), IID_PPV_ARGS(shader_info_in.root_signature.GetAddressOf())); ERR_FAIL_COND_V_MSG(!SUCCEEDED(res), ShaderID(), "CreateRootSignature failed with error " + vformat("0x%08ux", (uint64_t)res) + "."); + shader_info_in.root_signature_desc = shader_info_in.root_signature_deserializer->GetRootSignatureDesc(); - shader_info_in.root_signature_crc = binary_data.root_signature_crc; + shader_info_in.root_signature_crc = shader_refl_d3d12.root_signature_crc; // Bookkeep. - ShaderInfo *shader_info_ptr = VersatileResource::allocate(resources_allocator); *shader_info_ptr = shader_info_in; return ShaderID(shader_info_ptr); @@ -6445,6 +5621,10 @@ const RDD::Capabilities &RenderingDeviceDriverD3D12::get_capabilities() const { return device_capabilities; } +const RenderingShaderContainerFormat &RenderingDeviceDriverD3D12::get_shader_container_format() const { + return shader_container_format; +} + bool RenderingDeviceDriverD3D12::is_composite_alpha_supported(CommandQueueID p_queue) const { if (has_comp_alpha.has((uint64_t)p_queue.id)) { return has_comp_alpha[(uint64_t)p_queue.id]; @@ -6666,6 +5846,8 @@ Error RenderingDeviceDriverD3D12::_check_capabilities() { print_verbose(" model: " + D3D_SHADER_MODEL_TO_STRING(shader_capabilities.shader_model)); } + shader_container_format.set_lib_d3d12(context_driver->lib_d3d12); + D3D12_FEATURE_DATA_D3D12_OPTIONS options = {}; res = device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS, &options, sizeof(options)); if (SUCCEEDED(res)) { diff --git a/drivers/d3d12/rendering_device_driver_d3d12.h b/drivers/d3d12/rendering_device_driver_d3d12.h index 335611efac..7a6bc59ed6 100644 --- a/drivers/d3d12/rendering_device_driver_d3d12.h +++ b/drivers/d3d12/rendering_device_driver_d3d12.h @@ -35,6 +35,7 @@ #include "core/templates/hash_map.h" #include "core/templates/paged_allocator.h" #include "core/templates/self_list.h" +#include "rendering_shader_container_d3d12.h" #include "servers/rendering/rendering_device_driver.h" #ifndef _MSC_VER @@ -56,8 +57,6 @@ using Microsoft::WRL::ComPtr; -#define D3D12_BITCODE_OFFSETS_NUM_STAGES 3 - #ifdef DEV_ENABLED #define CUSTOM_INFO_QUEUE_ENABLED 0 #endif @@ -133,6 +132,7 @@ class RenderingDeviceDriverD3D12 : public RenderingDeviceDriver { FormatCapabilities format_capabilities; BarrierCapabilities barrier_capabilities; MiscFeaturesSupport misc_features_support; + RenderingShaderContainerFormatD3D12 shader_container_format; String pipeline_cache_id; class DescriptorsHeap { @@ -520,6 +520,7 @@ public: /****************/ /**** SHADER ****/ /****************/ + private: static const uint32_t ROOT_SIGNATURE_SIZE = 256; static const uint32_t PUSH_CONSTANT_SIZE = 128; // Mimicking Vulkan. @@ -537,82 +538,6 @@ private: MAX_UNIFORM_SETS = (ROOT_SIGNATURE_SIZE - PUSH_CONSTANT_SIZE) / sizeof(uint32_t), }; - enum RootSignatureLocationType { - RS_LOC_TYPE_RESOURCE, - RS_LOC_TYPE_SAMPLER, - }; - - enum ResourceClass { - RES_CLASS_INVALID, - RES_CLASS_CBV, - RES_CLASS_SRV, - RES_CLASS_UAV, - }; - - struct ShaderBinary { - // Version 1: Initial. - // Version 2: 64-bit vertex input mask. - // Version 3: Added SC stage mask. - static const uint32_t VERSION = 3; - - // Phase 1: SPIR-V reflection, where the Vulkan/RD interface of the shader is discovered. - // Phase 2: SPIR-V to DXIL translation, where the DXIL interface is discovered, which may have gaps due to optimizations. - - struct DataBinding { - // - Phase 1. - uint32_t type = 0; - uint32_t binding = 0; - uint32_t stages = 0; - uint32_t length = 0; // Size of arrays (in total elements), or ubos (in bytes * total elements). - uint32_t writable = 0; - // - Phase 2. - uint32_t res_class = 0; - uint32_t has_sampler = 0; - uint32_t dxil_stages = 0; - struct RootSignatureLocation { - uint32_t root_param_idx = UINT32_MAX; // UINT32_MAX if unused. - uint32_t range_idx = UINT32_MAX; // UINT32_MAX if unused. - }; - RootSignatureLocation root_sig_locations[2]; // Index is RootSignatureLocationType. - - // We need to sort these to fill the root signature locations properly. - bool operator<(const DataBinding &p_other) const { - return binding < p_other.binding; - } - }; - - struct SpecializationConstant { - // - Phase 1. - uint32_t type = 0; - uint32_t constant_id = 0; - union { - uint32_t int_value = 0; - float float_value; - bool bool_value; - }; - uint32_t stage_flags = 0; - // - Phase 2. - uint64_t stages_bit_offsets[D3D12_BITCODE_OFFSETS_NUM_STAGES] = {}; - }; - - struct Data { - uint64_t vertex_input_mask = 0; - uint32_t fragment_output_mask = 0; - uint32_t specialization_constants_count = 0; - uint32_t spirv_specialization_constants_ids_mask = 0; - uint32_t is_compute = 0; - uint32_t compute_local_size[3] = {}; - uint32_t set_count = 0; - uint32_t push_constant_size = 0; - uint32_t dxil_push_constant_stages = 0; // Phase 2. - uint32_t nir_runtime_data_root_param_idx = 0; // Phase 2. - uint32_t stage_count = 0; - uint32_t shader_name_len = 0; - uint32_t root_signature_len = 0; - uint32_t root_signature_crc = 0; - }; - }; - struct ShaderInfo { uint32_t dxil_push_constant_size = 0; uint32_t nir_runtime_data_root_param_idx = UINT32_MAX; @@ -663,22 +588,13 @@ private: uint32_t root_signature_crc = 0; }; - uint32_t _shader_patch_dxil_specialization_constant( - PipelineSpecializationConstantType p_type, - const void *p_value, - const uint64_t (&p_stages_bit_offsets)[D3D12_BITCODE_OFFSETS_NUM_STAGES], - HashMap> &r_stages_bytecodes, - bool p_is_first_patch); bool _shader_apply_specialization_constants( const ShaderInfo *p_shader_info, VectorView p_specialization_constants, HashMap> &r_final_stages_bytecode); - void _shader_sign_dxil_bytecode(ShaderStage p_stage, Vector &r_dxil_blob); public: - virtual String shader_get_binary_cache_key() override final; - virtual Vector shader_compile_binary_from_spirv(VectorView p_spirv, const String &p_shader_name) override final; - virtual ShaderID shader_create_from_bytecode(const Vector &p_shader_binary, ShaderDescription &r_shader_desc, String &r_name, const Vector &p_immutable_samplers) override final; + virtual ShaderID shader_create_from_container(const Ref &p_shader_container, const Vector &p_immutable_samplers) override final; virtual uint32_t shader_get_layout_hash(ShaderID p_shader) override final; virtual void shader_free(ShaderID p_shader) override final; virtual void shader_destroy_modules(ShaderID p_shader) override final; @@ -981,6 +897,7 @@ public: virtual String get_api_version() const override final; virtual String get_pipeline_cache_uuid() const override final; virtual const Capabilities &get_capabilities() const override final; + virtual const RenderingShaderContainerFormat &get_shader_container_format() const override final; virtual bool is_composite_alpha_supported(CommandQueueID p_queue) const override final; diff --git a/drivers/d3d12/rendering_shader_container_d3d12.cpp b/drivers/d3d12/rendering_shader_container_d3d12.cpp new file mode 100644 index 0000000000..df0bb2dede --- /dev/null +++ b/drivers/d3d12/rendering_shader_container_d3d12.cpp @@ -0,0 +1,914 @@ +/**************************************************************************/ +/* rendering_shader_container_d3d12.cpp */ +/**************************************************************************/ +/* This file is part of: */ +/* REDOT ENGINE */ +/* https://redotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2024-present Redot Engine contributors */ +/* (see REDOT_AUTHORS.md) */ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#include "rendering_shader_container_d3d12.h" + +#include "core/templates/sort_array.h" + +#include "dxil_hash.h" + +#include + +#ifndef _MSC_VER +// Match current version used by MinGW, MSVC and Direct3D 12 headers use 500. +#define __REQUIRED_RPCNDR_H_VERSION__ 475 +#endif + +#if defined(__GNUC__) && !defined(__clang__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wnon-virtual-dtor" +#pragma GCC diagnostic ignored "-Wshadow" +#pragma GCC diagnostic ignored "-Wswitch" +#pragma GCC diagnostic ignored "-Wmissing-field-initializers" +#pragma GCC diagnostic ignored "-Wimplicit-fallthrough" +#elif defined(__clang__) +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wnon-virtual-dtor" +#pragma clang diagnostic ignored "-Wstring-plus-int" +#pragma clang diagnostic ignored "-Wswitch" +#pragma clang diagnostic ignored "-Wmissing-field-initializers" +#pragma clang diagnostic ignored "-Wimplicit-fallthrough" +#endif + +#include "d3dx12.h" +#include +#define D3D12MA_D3D12_HEADERS_ALREADY_INCLUDED +#include "D3D12MemAlloc.h" + +#include + +#if defined(_MSC_VER) && defined(MemoryBarrier) +// Annoying define from winnt.h. Reintroduced by some of the headers above. +#undef MemoryBarrier +#endif + +// No point in fighting warnings in Mesa. +#if defined(_MSC_VER) +#pragma warning(push) +#pragma warning(disable : 4200) // "nonstandard extension used: zero-sized array in struct/union". +#pragma warning(disable : 4806) // "'&': unsafe operation: no value of type 'bool' promoted to type 'uint32_t' can equal the given constant". +#endif + +#include "nir_spirv.h" +#include "nir_to_dxil.h" +#include "spirv_to_dxil.h" +extern "C" { +#include "dxil_spirv_nir.h" +} + +#if defined(__GNUC__) && !defined(__clang__) +#pragma GCC diagnostic pop +#elif defined(__clang__) +#pragma clang diagnostic pop +#endif + +#if defined(_MSC_VER) +#pragma warning(pop) +#endif + +static D3D12_SHADER_VISIBILITY stages_to_d3d12_visibility(uint32_t p_stages_mask) { + switch (p_stages_mask) { + case RenderingDeviceCommons::SHADER_STAGE_VERTEX_BIT: + return D3D12_SHADER_VISIBILITY_VERTEX; + case RenderingDeviceCommons::SHADER_STAGE_FRAGMENT_BIT: + return D3D12_SHADER_VISIBILITY_PIXEL; + default: + return D3D12_SHADER_VISIBILITY_ALL; + } +} + +uint32_t RenderingDXIL::patch_specialization_constant( + RenderingDeviceCommons::PipelineSpecializationConstantType p_type, + const void *p_value, + const uint64_t (&p_stages_bit_offsets)[D3D12_BITCODE_OFFSETS_NUM_STAGES], + HashMap> &r_stages_bytecodes, + bool p_is_first_patch) { + uint32_t patch_val = 0; + switch (p_type) { + case RenderingDeviceCommons::PIPELINE_SPECIALIZATION_CONSTANT_TYPE_INT: { + uint32_t int_value = *((const int *)p_value); + ERR_FAIL_COND_V(int_value & (1 << 31), 0); + patch_val = int_value; + } break; + case RenderingDeviceCommons::PIPELINE_SPECIALIZATION_CONSTANT_TYPE_BOOL: { + bool bool_value = *((const bool *)p_value); + patch_val = (uint32_t)bool_value; + } break; + case RenderingDeviceCommons::PIPELINE_SPECIALIZATION_CONSTANT_TYPE_FLOAT: { + uint32_t int_value = *((const int *)p_value); + ERR_FAIL_COND_V(int_value & (1 << 31), 0); + patch_val = (int_value >> 1); + } break; + } + // For VBR encoding to encode the number of bits we expect (32), we need to set the MSB unconditionally. + // However, signed VBR moves the MSB to the LSB, so setting the MSB to 1 wouldn't help. Therefore, + // the bit we set to 1 is the one at index 30. + patch_val |= (1 << 30); + patch_val <<= 1; // What signed VBR does. + + auto tamper_bits = [](uint8_t *p_start, uint64_t p_bit_offset, uint64_t p_tb_value) -> uint64_t { + uint64_t original = 0; + uint32_t curr_input_byte = p_bit_offset / 8; + uint8_t curr_input_bit = p_bit_offset % 8; + auto get_curr_input_bit = [&]() -> bool { + return ((p_start[curr_input_byte] >> curr_input_bit) & 1); + }; + auto move_to_next_input_bit = [&]() { + if (curr_input_bit == 7) { + curr_input_bit = 0; + curr_input_byte++; + } else { + curr_input_bit++; + } + }; + auto tamper_input_bit = [&](bool p_new_bit) { + p_start[curr_input_byte] &= ~((uint8_t)1 << curr_input_bit); + if (p_new_bit) { + p_start[curr_input_byte] |= (uint8_t)1 << curr_input_bit; + } + }; + uint8_t value_bit_idx = 0; + for (uint32_t i = 0; i < 5; i++) { // 32 bits take 5 full bytes in VBR. + for (uint32_t j = 0; j < 7; j++) { + bool input_bit = get_curr_input_bit(); + original |= (uint64_t)(input_bit ? 1 : 0) << value_bit_idx; + tamper_input_bit((p_tb_value >> value_bit_idx) & 1); + move_to_next_input_bit(); + value_bit_idx++; + } +#ifdef DEV_ENABLED + bool input_bit = get_curr_input_bit(); + DEV_ASSERT((i < 4 && input_bit) || (i == 4 && !input_bit)); +#endif + move_to_next_input_bit(); + } + return original; + }; + uint32_t stages_patched_mask = 0; + for (int stage = 0; stage < RenderingDeviceCommons::SHADER_STAGE_MAX; stage++) { + if (!r_stages_bytecodes.has((RenderingDeviceCommons::ShaderStage)stage)) { + continue; + } + + uint64_t offset = p_stages_bit_offsets[RenderingShaderContainerD3D12::SHADER_STAGES_BIT_OFFSET_INDICES[stage]]; + if (offset == 0) { + // This constant does not appear at this stage. + continue; + } + + Vector &bytecode = r_stages_bytecodes[(RenderingDeviceCommons::ShaderStage)stage]; +#ifdef DEV_ENABLED + uint64_t orig_patch_val = tamper_bits(bytecode.ptrw(), offset, patch_val); + // Checking against the value the NIR patch should have set. + DEV_ASSERT(!p_is_first_patch || ((orig_patch_val >> 1) & GODOT_NIR_SC_SENTINEL_MAGIC_MASK) == GODOT_NIR_SC_SENTINEL_MAGIC); + uint64_t readback_patch_val = tamper_bits(bytecode.ptrw(), offset, patch_val); + DEV_ASSERT(readback_patch_val == patch_val); +#else + tamper_bits(bytecode.ptrw(), offset, patch_val); +#endif + + stages_patched_mask |= (1 << stage); + } + + return stages_patched_mask; +} + +void RenderingDXIL::sign_bytecode(RenderingDeviceCommons::ShaderStage p_stage, Vector &r_dxil_blob) { + uint8_t *w = r_dxil_blob.ptrw(); + compute_dxil_hash(w + 20, r_dxil_blob.size() - 20, w + 4); +} + +// RenderingShaderContainerD3D12 + +uint32_t RenderingShaderContainerD3D12::_format() const { + return 0x43443344; +} + +uint32_t RenderingShaderContainerD3D12::_format_version() const { + return FORMAT_VERSION; +} + +uint32_t RenderingShaderContainerD3D12::_from_bytes_reflection_extra_data(const uint8_t *p_bytes) { + reflection_data_d3d12 = *(const ReflectionDataD3D12 *)(p_bytes); + return sizeof(ReflectionDataD3D12); +} + +uint32_t RenderingShaderContainerD3D12::_from_bytes_reflection_binding_uniform_extra_data_start(const uint8_t *p_bytes) { + reflection_binding_set_uniforms_data_d3d12.resize(reflection_binding_set_uniforms_data.size()); + return 0; +} + +uint32_t RenderingShaderContainerD3D12::_from_bytes_reflection_binding_uniform_extra_data(const uint8_t *p_bytes, uint32_t p_index) { + reflection_binding_set_uniforms_data_d3d12.ptrw()[p_index] = *(const ReflectionBindingDataD3D12 *)(p_bytes); + return sizeof(ReflectionBindingDataD3D12); +} + +uint32_t RenderingShaderContainerD3D12::_from_bytes_reflection_specialization_extra_data_start(const uint8_t *p_bytes) { + reflection_specialization_data_d3d12.resize(reflection_specialization_data.size()); + return 0; +} + +uint32_t RenderingShaderContainerD3D12::_from_bytes_reflection_specialization_extra_data(const uint8_t *p_bytes, uint32_t p_index) { + reflection_specialization_data_d3d12.ptrw()[p_index] = *(const ReflectionSpecializationDataD3D12 *)(p_bytes); + return sizeof(ReflectionSpecializationDataD3D12); +} + +uint32_t RenderingShaderContainerD3D12::_from_bytes_footer_extra_data(const uint8_t *p_bytes) { + ContainerFooterD3D12 footer = *(const ContainerFooterD3D12 *)(p_bytes); + root_signature_crc = footer.root_signature_crc; + root_signature_bytes.resize(footer.root_signature_length); + memcpy(root_signature_bytes.ptrw(), p_bytes + sizeof(ContainerFooterD3D12), root_signature_bytes.size()); + return sizeof(ContainerFooterD3D12) + footer.root_signature_length; +} + +uint32_t RenderingShaderContainerD3D12::_to_bytes_reflection_extra_data(uint8_t *p_bytes) const { + if (p_bytes != nullptr) { + *(ReflectionDataD3D12 *)(p_bytes) = reflection_data_d3d12; + } + + return sizeof(ReflectionDataD3D12); +} + +uint32_t RenderingShaderContainerD3D12::_to_bytes_reflection_binding_uniform_extra_data(uint8_t *p_bytes, uint32_t p_index) const { + if (p_bytes != nullptr) { + *(ReflectionBindingDataD3D12 *)(p_bytes) = reflection_binding_set_uniforms_data_d3d12[p_index]; + } + + return sizeof(ReflectionBindingDataD3D12); +} + +uint32_t RenderingShaderContainerD3D12::_to_bytes_reflection_specialization_extra_data(uint8_t *p_bytes, uint32_t p_index) const { + if (p_bytes != nullptr) { + *(ReflectionSpecializationDataD3D12 *)(p_bytes) = reflection_specialization_data_d3d12[p_index]; + } + + return sizeof(ReflectionSpecializationDataD3D12); +} + +uint32_t RenderingShaderContainerD3D12::_to_bytes_footer_extra_data(uint8_t *p_bytes) const { + if (p_bytes != nullptr) { + ContainerFooterD3D12 &footer = *(ContainerFooterD3D12 *)(p_bytes); + footer.root_signature_length = root_signature_bytes.size(); + footer.root_signature_crc = root_signature_crc; + memcpy(p_bytes + sizeof(ContainerFooterD3D12), root_signature_bytes.ptr(), root_signature_bytes.size()); + } + + return sizeof(ContainerFooterD3D12) + root_signature_bytes.size(); +} + +#if NIR_ENABLED +bool RenderingShaderContainerD3D12::_convert_spirv_to_nir(const Vector &p_spirv, const nir_shader_compiler_options *p_compiler_options, HashMap &r_stages_nir_shaders, Vector &r_stages, BitField &r_stages_processed) { + r_stages_processed.clear(); + + dxil_spirv_runtime_conf dxil_runtime_conf = {}; + dxil_runtime_conf.runtime_data_cbv.base_shader_register = RUNTIME_DATA_REGISTER; + dxil_runtime_conf.push_constant_cbv.base_shader_register = ROOT_CONSTANT_REGISTER; + dxil_runtime_conf.zero_based_vertex_instance_id = true; + dxil_runtime_conf.zero_based_compute_workgroup_id = true; + dxil_runtime_conf.declared_read_only_images_as_srvs = true; + + // Making this explicit to let maintainers know that in practice this didn't improve performance, + // probably because data generated by one shader and consumed by another one forces the resource + // to transition from UAV to SRV, and back, instead of being an UAV all the time. + // In case someone wants to try, care must be taken so in case of incompatible bindings across stages + // happen as a result, all the stages are re-translated. That can happen if, for instance, a stage only + // uses an allegedly writable resource only for reading but the next stage doesn't. + dxil_runtime_conf.inferred_read_only_images_as_srvs = false; + + // Translate SPIR-V to NIR. + for (int64_t i = 0; i < p_spirv.size(); i++) { + RenderingDeviceCommons::ShaderStage stage = p_spirv[i].shader_stage; + RenderingDeviceCommons::ShaderStage stage_flag = (RenderingDeviceCommons::ShaderStage)(1 << stage); + r_stages.push_back(stage); + r_stages_processed.set_flag(stage_flag); + + const char *entry_point = "main"; + static const gl_shader_stage SPIRV_TO_MESA_STAGES[RenderingDeviceCommons::SHADER_STAGE_MAX] = { + MESA_SHADER_VERTEX, // SHADER_STAGE_VERTEX + MESA_SHADER_FRAGMENT, // SHADER_STAGE_FRAGMENT + MESA_SHADER_TESS_CTRL, // SHADER_STAGE_TESSELATION_CONTROL + MESA_SHADER_TESS_EVAL, // SHADER_STAGE_TESSELATION_EVALUATION + MESA_SHADER_COMPUTE, // SHADER_STAGE_COMPUTE + }; + + nir_shader *shader = spirv_to_nir( + (const uint32_t *)(p_spirv[i].spirv.ptr()), + p_spirv[i].spirv.size() / sizeof(uint32_t), + nullptr, + 0, + SPIRV_TO_MESA_STAGES[stage], + entry_point, + dxil_spirv_nir_get_spirv_options(), + p_compiler_options); + + ERR_FAIL_NULL_V_MSG(shader, false, "Shader translation (step 1) at stage " + String(RenderingDeviceCommons::SHADER_STAGE_NAMES[stage]) + " failed."); + +#ifdef DEV_ENABLED + nir_validate_shader(shader, "Validate before feeding NIR to the DXIL compiler"); +#endif + + if (stage == RenderingDeviceCommons::SHADER_STAGE_VERTEX) { + dxil_runtime_conf.yz_flip.y_mask = 0xffff; + dxil_runtime_conf.yz_flip.mode = DXIL_SPIRV_Y_FLIP_UNCONDITIONAL; + } else { + dxil_runtime_conf.yz_flip.y_mask = 0; + dxil_runtime_conf.yz_flip.mode = DXIL_SPIRV_YZ_FLIP_NONE; + } + + dxil_spirv_nir_prep(shader); + bool requires_runtime_data = false; + dxil_spirv_nir_passes(shader, &dxil_runtime_conf, &requires_runtime_data); + + r_stages_nir_shaders[stage] = shader; + } + + // Link NIR shaders. + for (int i = RenderingDeviceCommons::SHADER_STAGE_MAX - 1; i >= 0; i--) { + if (!r_stages_nir_shaders.has(i)) { + continue; + } + nir_shader *shader = r_stages_nir_shaders[i]; + nir_shader *prev_shader = nullptr; + for (int j = i - 1; j >= 0; j--) { + if (r_stages_nir_shaders.has(j)) { + prev_shader = r_stages_nir_shaders[j]; + break; + } + } + // There is a bug in the Direct3D runtime during creation of a PSO with view instancing. If a fragment + // shader uses front/back face detection (SV_IsFrontFace), its signature must include the pixel position + // builtin variable (SV_Position), otherwise an Internal Runtime error will occur. + if (i == RenderingDeviceCommons::SHADER_STAGE_FRAGMENT) { + const bool use_front_face = + nir_find_variable_with_location(shader, nir_var_shader_in, VARYING_SLOT_FACE) || + (shader->info.inputs_read & VARYING_BIT_FACE) || + nir_find_variable_with_location(shader, nir_var_system_value, SYSTEM_VALUE_FRONT_FACE) || + BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_FRONT_FACE); + const bool use_position = + nir_find_variable_with_location(shader, nir_var_shader_in, VARYING_SLOT_POS) || + (shader->info.inputs_read & VARYING_BIT_POS) || + nir_find_variable_with_location(shader, nir_var_system_value, SYSTEM_VALUE_FRAG_COORD) || + BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_FRAG_COORD); + if (use_front_face && !use_position) { + nir_variable *const pos = nir_variable_create(shader, nir_var_shader_in, glsl_vec4_type(), "gl_FragCoord"); + pos->data.location = VARYING_SLOT_POS; + shader->info.inputs_read |= VARYING_BIT_POS; + } + } + if (prev_shader) { + bool requires_runtime_data = {}; + dxil_spirv_nir_link(shader, prev_shader, &dxil_runtime_conf, &requires_runtime_data); + } + } + + return true; +} + +struct GodotNirCallbackUserData { + RenderingShaderContainerD3D12 *container; + RenderingDeviceCommons::ShaderStage stage; +}; + +static dxil_shader_model shader_model_d3d_to_dxil(D3D_SHADER_MODEL p_d3d_shader_model) { + static_assert(SHADER_MODEL_6_0 == 0x60000); + static_assert(SHADER_MODEL_6_3 == 0x60003); + static_assert(D3D_SHADER_MODEL_6_0 == 0x60); + static_assert(D3D_SHADER_MODEL_6_3 == 0x63); + return (dxil_shader_model)((p_d3d_shader_model >> 4) * 0x10000 + (p_d3d_shader_model & 0xf)); +} + +bool RenderingShaderContainerD3D12::_convert_nir_to_dxil(const HashMap &p_stages_nir_shaders, BitField p_stages_processed, HashMap> &r_dxil_blobs) { + // Translate NIR to DXIL. + for (KeyValue it : p_stages_nir_shaders) { + RenderingDeviceCommons::ShaderStage stage = (RenderingDeviceCommons::ShaderStage)(it.key); + GodotNirCallbackUserData godot_nir_callback_user_data; + godot_nir_callback_user_data.container = this; + godot_nir_callback_user_data.stage = stage; + + GodotNirCallbacks godot_nir_callbacks = {}; + godot_nir_callbacks.data = &godot_nir_callback_user_data; + godot_nir_callbacks.report_resource = _nir_report_resource; + godot_nir_callbacks.report_sc_bit_offset_fn = _nir_report_sc_bit_offset; + godot_nir_callbacks.report_bitcode_bit_offset_fn = _nir_report_bitcode_bit_offset; + + nir_to_dxil_options nir_to_dxil_options = {}; + nir_to_dxil_options.environment = DXIL_ENVIRONMENT_VULKAN; + nir_to_dxil_options.shader_model_max = shader_model_d3d_to_dxil(D3D_SHADER_MODEL(REQUIRED_SHADER_MODEL)); + nir_to_dxil_options.validator_version_max = NO_DXIL_VALIDATION; + nir_to_dxil_options.godot_nir_callbacks = &godot_nir_callbacks; + + dxil_logger logger = {}; + logger.log = [](void *p_priv, const char *p_msg) { +#ifdef DEBUG_ENABLED + print_verbose(p_msg); +#endif + }; + + blob dxil_blob = {}; + bool ok = nir_to_dxil(it.value, &nir_to_dxil_options, &logger, &dxil_blob); + ERR_FAIL_COND_V_MSG(!ok, false, "Shader translation at stage " + String(RenderingDeviceCommons::SHADER_STAGE_NAMES[stage]) + " failed."); + + Vector blob_copy; + blob_copy.resize(dxil_blob.size); + memcpy(blob_copy.ptrw(), dxil_blob.data, dxil_blob.size); + blob_finish(&dxil_blob); + r_dxil_blobs.insert(stage, blob_copy); + } + + return true; +} + +bool RenderingShaderContainerD3D12::_convert_spirv_to_dxil(const Vector &p_spirv, HashMap> &r_dxil_blobs, Vector &r_stages, BitField &r_stages_processed) { + r_dxil_blobs.clear(); + + HashMap stages_nir_shaders; + auto free_nir_shaders = [&]() { + for (KeyValue &E : stages_nir_shaders) { + ralloc_free(E.value); + } + stages_nir_shaders.clear(); + }; + + // This structure must live as long as the shaders are alive. + nir_shader_compiler_options compiler_options = *dxil_get_nir_compiler_options(); + compiler_options.lower_base_vertex = false; + + // This is based on spirv2dxil.c. May need updates when it changes. + // Also, this has to stay around until after linking. + if (!_convert_spirv_to_nir(p_spirv, &compiler_options, stages_nir_shaders, r_stages, r_stages_processed)) { + free_nir_shaders(); + return false; + } + + if (!_convert_nir_to_dxil(stages_nir_shaders, r_stages_processed, r_dxil_blobs)) { + free_nir_shaders(); + return false; + } + + free_nir_shaders(); + return true; +} + +bool RenderingShaderContainerD3D12::_generate_root_signature(BitField p_stages_processed) { + // Root (push) constants. + LocalVector root_params; + if (reflection_data_d3d12.dxil_push_constant_stages) { + CD3DX12_ROOT_PARAMETER1 push_constant; + push_constant.InitAsConstants( + reflection_data.push_constant_size / sizeof(uint32_t), + ROOT_CONSTANT_REGISTER, + 0, + stages_to_d3d12_visibility(reflection_data_d3d12.dxil_push_constant_stages)); + + root_params.push_back(push_constant); + } + + // NIR-DXIL runtime data. + if (reflection_data_d3d12.nir_runtime_data_root_param_idx == 1) { // Set above to 1 when discovering runtime data is needed. + DEV_ASSERT(!reflection_data.is_compute); // Could be supported if needed, but it's pointless as of now. + reflection_data_d3d12.nir_runtime_data_root_param_idx = root_params.size(); + CD3DX12_ROOT_PARAMETER1 nir_runtime_data; + nir_runtime_data.InitAsConstants( + sizeof(dxil_spirv_vertex_runtime_data) / sizeof(uint32_t), + RUNTIME_DATA_REGISTER, + 0, + D3D12_SHADER_VISIBILITY_VERTEX); + root_params.push_back(nir_runtime_data); + } + + // Descriptor tables (up to two per uniform set, for resources and/or samplers). + // These have to stay around until serialization! + struct TraceableDescriptorTable { + uint32_t stages_mask = {}; + Vector ranges; + Vector root_signature_locations; + }; + + uint32_t binding_start = 0; + Vector resource_tables_maps; + Vector sampler_tables_maps; + for (uint32_t i = 0; i < reflection_binding_set_uniforms_count.size(); i++) { + bool first_resource_in_set = true; + bool first_sampler_in_set = true; + uint32_t uniform_count = reflection_binding_set_uniforms_count[i]; + for (uint32_t j = 0; j < uniform_count; j++) { + const ReflectionBindingData &uniform = reflection_binding_set_uniforms_data[binding_start + j]; + ReflectionBindingDataD3D12 &uniform_d3d12 = reflection_binding_set_uniforms_data_d3d12.ptrw()[binding_start + j]; + bool really_used = uniform_d3d12.dxil_stages != 0; +#ifdef DEV_ENABLED + bool anybody_home = (ResourceClass)(uniform_d3d12.resource_class) != RES_CLASS_INVALID || uniform_d3d12.has_sampler; + DEV_ASSERT(anybody_home == really_used); +#endif + if (!really_used) { + continue; // Existed in SPIR-V; went away in DXIL. + } + + auto insert_range = [](D3D12_DESCRIPTOR_RANGE_TYPE p_range_type, + uint32_t p_num_descriptors, + uint32_t p_dxil_register, + uint32_t p_dxil_stages_mask, + RootSignatureLocation *p_root_sig_locations, + Vector &r_tables, + bool &r_first_in_set) { + if (r_first_in_set) { + r_tables.resize(r_tables.size() + 1); + r_first_in_set = false; + } + + TraceableDescriptorTable &table = r_tables.write[r_tables.size() - 1]; + table.stages_mask |= p_dxil_stages_mask; + + CD3DX12_DESCRIPTOR_RANGE1 range; + // Due to the aliasing hack for SRV-UAV of different families, + // we can be causing an unintended change of data (sometimes the validation layers catch it). + D3D12_DESCRIPTOR_RANGE_FLAGS flags = D3D12_DESCRIPTOR_RANGE_FLAG_NONE; + if (p_range_type == D3D12_DESCRIPTOR_RANGE_TYPE_SRV || p_range_type == D3D12_DESCRIPTOR_RANGE_TYPE_UAV) { + flags = D3D12_DESCRIPTOR_RANGE_FLAG_DATA_VOLATILE; + } else if (p_range_type == D3D12_DESCRIPTOR_RANGE_TYPE_CBV) { + flags = D3D12_DESCRIPTOR_RANGE_FLAG_DATA_STATIC_WHILE_SET_AT_EXECUTE; + } + range.Init(p_range_type, p_num_descriptors, p_dxil_register, 0, flags); + + table.ranges.push_back(range); + table.root_signature_locations.push_back(p_root_sig_locations); + }; + + uint32_t num_descriptors = 1; + D3D12_DESCRIPTOR_RANGE_TYPE resource_range_type = {}; + switch ((ResourceClass)(uniform_d3d12.resource_class)) { + case RES_CLASS_INVALID: { + num_descriptors = uniform.length; + DEV_ASSERT(uniform_d3d12.has_sampler); + } break; + case RES_CLASS_CBV: { + resource_range_type = D3D12_DESCRIPTOR_RANGE_TYPE_CBV; + DEV_ASSERT(!uniform_d3d12.has_sampler); + } break; + case RES_CLASS_SRV: { + resource_range_type = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; + num_descriptors = MAX(1u, uniform.length); // An unbound R/O buffer is reflected as zero-size. + } break; + case RES_CLASS_UAV: { + resource_range_type = D3D12_DESCRIPTOR_RANGE_TYPE_UAV; + num_descriptors = MAX(1u, uniform.length); // An unbound R/W buffer is reflected as zero-size. + DEV_ASSERT(!uniform_d3d12.has_sampler); + } break; + } + + uint32_t dxil_register = i * GODOT_NIR_DESCRIPTOR_SET_MULTIPLIER + uniform.binding * GODOT_NIR_BINDING_MULTIPLIER; + if (uniform_d3d12.resource_class != RES_CLASS_INVALID) { + insert_range( + resource_range_type, + num_descriptors, + dxil_register, + uniform_d3d12.dxil_stages, + &uniform_d3d12.root_signature_locations[RS_LOC_TYPE_RESOURCE], + resource_tables_maps, + first_resource_in_set); + } + + if (uniform_d3d12.has_sampler) { + insert_range( + D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, + num_descriptors, + dxil_register, + uniform_d3d12.dxil_stages, + &uniform_d3d12.root_signature_locations[RS_LOC_TYPE_SAMPLER], + sampler_tables_maps, + first_sampler_in_set); + } + } + + binding_start += uniform_count; + } + + auto make_descriptor_tables = [&root_params](const Vector &p_tables) { + for (const TraceableDescriptorTable &table : p_tables) { + D3D12_SHADER_VISIBILITY visibility = stages_to_d3d12_visibility(table.stages_mask); + DEV_ASSERT(table.ranges.size() == table.root_signature_locations.size()); + for (int i = 0; i < table.ranges.size(); i++) { + // By now we know very well which root signature location corresponds to the pointed uniform. + table.root_signature_locations[i]->root_param_index = root_params.size(); + table.root_signature_locations[i]->range_index = i; + } + + CD3DX12_ROOT_PARAMETER1 root_table; + root_table.InitAsDescriptorTable(table.ranges.size(), table.ranges.ptr(), visibility); + root_params.push_back(root_table); + } + }; + + make_descriptor_tables(resource_tables_maps); + make_descriptor_tables(sampler_tables_maps); + + CD3DX12_VERSIONED_ROOT_SIGNATURE_DESC root_sig_desc = {}; + D3D12_ROOT_SIGNATURE_FLAGS root_sig_flags = + D3D12_ROOT_SIGNATURE_FLAG_DENY_HULL_SHADER_ROOT_ACCESS | + D3D12_ROOT_SIGNATURE_FLAG_DENY_DOMAIN_SHADER_ROOT_ACCESS | + D3D12_ROOT_SIGNATURE_FLAG_DENY_GEOMETRY_SHADER_ROOT_ACCESS | + D3D12_ROOT_SIGNATURE_FLAG_DENY_AMPLIFICATION_SHADER_ROOT_ACCESS | + D3D12_ROOT_SIGNATURE_FLAG_DENY_MESH_SHADER_ROOT_ACCESS; + + if (!p_stages_processed.has_flag(RenderingDeviceCommons::SHADER_STAGE_VERTEX_BIT)) { + root_sig_flags |= D3D12_ROOT_SIGNATURE_FLAG_DENY_VERTEX_SHADER_ROOT_ACCESS; + } + + if (!p_stages_processed.has_flag(RenderingDeviceCommons::SHADER_STAGE_FRAGMENT_BIT)) { + root_sig_flags |= D3D12_ROOT_SIGNATURE_FLAG_DENY_PIXEL_SHADER_ROOT_ACCESS; + } + + if (reflection_data.vertex_input_mask) { + root_sig_flags |= D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT; + } + + root_sig_desc.Init_1_1(root_params.size(), root_params.ptr(), 0, nullptr, root_sig_flags); + + // Create and store the root signature and its CRC32. + ID3DBlob *error_blob = nullptr; + ID3DBlob *root_sig_blob = nullptr; + HRESULT res = D3DX12SerializeVersionedRootSignature(HMODULE(lib_d3d12), &root_sig_desc, D3D_ROOT_SIGNATURE_VERSION_1_1, &root_sig_blob, &error_blob); + if (SUCCEEDED(res)) { + root_signature_bytes.resize(root_sig_blob->GetBufferSize()); + memcpy(root_signature_bytes.ptrw(), root_sig_blob->GetBufferPointer(), root_sig_blob->GetBufferSize()); + + root_signature_crc = crc32(0, nullptr, 0); + root_signature_crc = crc32(root_signature_crc, (const Bytef *)root_sig_blob->GetBufferPointer(), root_sig_blob->GetBufferSize()); + + return true; + } else { + if (root_sig_blob != nullptr) { + root_sig_blob->Release(); + } + + String error_string; + if (error_blob != nullptr) { + error_string = vformat("Serialization of root signature failed with error 0x%08ux and the following message:\n%s", uint32_t(res), String::ascii(Span((char *)error_blob->GetBufferPointer(), error_blob->GetBufferSize()))); + error_blob->Release(); + } else { + error_string = vformat("Serialization of root signature failed with error 0x%08ux", uint32_t(res)); + } + + ERR_FAIL_V_MSG(false, error_string); + } +} + +void RenderingShaderContainerD3D12::_nir_report_resource(uint32_t p_register, uint32_t p_space, uint32_t p_dxil_type, void *p_data) { + const GodotNirCallbackUserData &user_data = *(GodotNirCallbackUserData *)p_data; + + // Types based on Mesa's dxil_container.h. + static const uint32_t DXIL_RES_SAMPLER = 1; + static const ResourceClass DXIL_TYPE_TO_CLASS[] = { + RES_CLASS_INVALID, // DXIL_RES_INVALID + RES_CLASS_INVALID, // DXIL_RES_SAMPLER + RES_CLASS_CBV, // DXIL_RES_CBV + RES_CLASS_SRV, // DXIL_RES_SRV_TYPED + RES_CLASS_SRV, // DXIL_RES_SRV_RAW + RES_CLASS_SRV, // DXIL_RES_SRV_STRUCTURED + RES_CLASS_UAV, // DXIL_RES_UAV_TYPED + RES_CLASS_UAV, // DXIL_RES_UAV_RAW + RES_CLASS_UAV, // DXIL_RES_UAV_STRUCTURED + RES_CLASS_INVALID, // DXIL_RES_UAV_STRUCTURED_WITH_COUNTER + }; + + DEV_ASSERT(p_dxil_type < ARRAY_SIZE(DXIL_TYPE_TO_CLASS)); + ResourceClass resource_class = DXIL_TYPE_TO_CLASS[p_dxil_type]; + + if (p_register == ROOT_CONSTANT_REGISTER && p_space == 0) { + DEV_ASSERT(resource_class == RES_CLASS_CBV); + user_data.container->reflection_data_d3d12.dxil_push_constant_stages |= (1 << user_data.stage); + } else if (p_register == RUNTIME_DATA_REGISTER && p_space == 0) { + DEV_ASSERT(resource_class == RES_CLASS_CBV); + user_data.container->reflection_data_d3d12.nir_runtime_data_root_param_idx = 1; // Temporary, to be determined later. + } else { + DEV_ASSERT(p_space == 0); + + uint32_t set = p_register / GODOT_NIR_DESCRIPTOR_SET_MULTIPLIER; + uint32_t binding = (p_register % GODOT_NIR_DESCRIPTOR_SET_MULTIPLIER) / GODOT_NIR_BINDING_MULTIPLIER; + + DEV_ASSERT(set < (uint32_t)user_data.container->reflection_binding_set_uniforms_count.size()); + + uint32_t binding_start = 0; + for (uint32_t i = 0; i < set; i++) { + binding_start += user_data.container->reflection_binding_set_uniforms_count[i]; + } + + [[maybe_unused]] bool found = false; + for (uint32_t i = 0; i < user_data.container->reflection_binding_set_uniforms_count[set]; i++) { + const ReflectionBindingData &uniform = user_data.container->reflection_binding_set_uniforms_data[binding_start + i]; + ReflectionBindingDataD3D12 &uniform_d3d12 = user_data.container->reflection_binding_set_uniforms_data_d3d12.ptrw()[binding_start + i]; + if (uniform.binding != binding) { + continue; + } + + uniform_d3d12.dxil_stages |= (1 << user_data.stage); + if (resource_class != RES_CLASS_INVALID) { + DEV_ASSERT(uniform_d3d12.resource_class == (uint32_t)RES_CLASS_INVALID || uniform_d3d12.resource_class == (uint32_t)resource_class); + uniform_d3d12.resource_class = resource_class; + } else if (p_dxil_type == DXIL_RES_SAMPLER) { + uniform_d3d12.has_sampler = (uint32_t)true; + } else { + DEV_ASSERT(false && "Unknown resource class."); + } + found = true; + } + + DEV_ASSERT(found); + } +} + +void RenderingShaderContainerD3D12::_nir_report_sc_bit_offset(uint32_t p_sc_id, uint64_t p_bit_offset, void *p_data) { + const GodotNirCallbackUserData &user_data = *(GodotNirCallbackUserData *)p_data; + [[maybe_unused]] bool found = false; + for (int64_t i = 0; i < user_data.container->reflection_specialization_data.size(); i++) { + const ReflectionSpecializationData &sc = user_data.container->reflection_specialization_data[i]; + ReflectionSpecializationDataD3D12 &sc_d3d12 = user_data.container->reflection_specialization_data_d3d12.ptrw()[i]; + if (sc.constant_id != p_sc_id) { + continue; + } + + uint32_t offset_idx = SHADER_STAGES_BIT_OFFSET_INDICES[user_data.stage]; + DEV_ASSERT(sc_d3d12.stages_bit_offsets[offset_idx] == 0); + sc_d3d12.stages_bit_offsets[offset_idx] = p_bit_offset; + found = true; + break; + } + + DEV_ASSERT(found); +} + +void RenderingShaderContainerD3D12::_nir_report_bitcode_bit_offset(uint64_t p_bit_offset, void *p_data) { + DEV_ASSERT(p_bit_offset % 8 == 0); + + const GodotNirCallbackUserData &user_data = *(GodotNirCallbackUserData *)p_data; + uint32_t offset_idx = SHADER_STAGES_BIT_OFFSET_INDICES[user_data.stage]; + for (int64_t i = 0; i < user_data.container->reflection_specialization_data.size(); i++) { + ReflectionSpecializationDataD3D12 &sc_d3d12 = user_data.container->reflection_specialization_data_d3d12.ptrw()[i]; + if (sc_d3d12.stages_bit_offsets[offset_idx] == 0) { + // This SC has been optimized out from this stage. + continue; + } + + sc_d3d12.stages_bit_offsets[offset_idx] += p_bit_offset; + } +} +#endif + +void RenderingShaderContainerD3D12::_set_from_shader_reflection_post(const String &p_shader_name, const RenderingDeviceCommons::ShaderReflection &p_reflection) { + reflection_binding_set_uniforms_data_d3d12.resize(reflection_binding_set_uniforms_data.size()); + reflection_specialization_data_d3d12.resize(reflection_specialization_data.size()); + + // Sort bindings inside each uniform set. This guarantees the root signature will be generated in the correct order. + SortArray sorter; + uint32_t binding_start = 0; + for (uint32_t i = 0; i < reflection_binding_set_uniforms_count.size(); i++) { + uint32_t uniform_count = reflection_binding_set_uniforms_count[i]; + if (uniform_count > 0) { + sorter.sort(&reflection_binding_set_uniforms_data.ptrw()[binding_start], uniform_count); + binding_start += uniform_count; + } + } +} + +bool RenderingShaderContainerD3D12::_set_code_from_spirv(const Vector &p_spirv) { +#if NIR_ENABLED + reflection_data_d3d12.nir_runtime_data_root_param_idx = UINT32_MAX; + + for (int64_t i = 0; i < reflection_specialization_data.size(); i++) { + DEV_ASSERT(reflection_specialization_data[i].constant_id < (sizeof(reflection_data_d3d12.spirv_specialization_constants_ids_mask) * 8) && "Constant IDs with values above 31 are not supported."); + reflection_data_d3d12.spirv_specialization_constants_ids_mask |= (1 << reflection_specialization_data[i].constant_id); + } + + // Translate SPIR-V shaders to DXIL, and collect shader info from the new representation. + HashMap> dxil_blobs; + Vector stages; + BitField stages_processed = {}; + if (!_convert_spirv_to_dxil(p_spirv, dxil_blobs, stages, stages_processed)) { + return false; + } + + // Patch with default values of specialization constants. + DEV_ASSERT(reflection_specialization_data.size() == reflection_specialization_data_d3d12.size()); + for (int32_t i = 0; i < reflection_specialization_data.size(); i++) { + const ReflectionSpecializationData &sc = reflection_specialization_data[i]; + const ReflectionSpecializationDataD3D12 &sc_d3d12 = reflection_specialization_data_d3d12[i]; + RenderingDXIL::patch_specialization_constant((RenderingDeviceCommons::PipelineSpecializationConstantType)(sc.type), &sc.int_value, sc_d3d12.stages_bit_offsets, dxil_blobs, true); + } + + // Sign. + uint32_t shader_index = 0; + for (KeyValue> &E : dxil_blobs) { + RenderingDXIL::sign_bytecode(E.key, E.value); + } + + // Store compressed DXIL blobs as the shaders. + shaders.resize(p_spirv.size()); + for (int64_t i = 0; i < shaders.size(); i++) { + const PackedByteArray &dxil_bytes = dxil_blobs[stages[i]]; + RenderingShaderContainer::Shader &shader = shaders.ptrw()[i]; + uint32_t compressed_size = 0; + shader.shader_stage = stages[i]; + shader.code_decompressed_size = dxil_bytes.size(); + shader.code_compressed_bytes.resize(dxil_bytes.size()); + + bool compressed = compress_code(dxil_bytes.ptr(), dxil_bytes.size(), shader.code_compressed_bytes.ptrw(), &compressed_size, &shader.code_compression_flags); + ERR_FAIL_COND_V_MSG(!compressed, false, vformat("Failed to compress native code to native for SPIR-V #%d.", shader_index)); + + shader.code_compressed_bytes.resize(compressed_size); + } + + if (!_generate_root_signature(stages_processed)) { + return false; + } + + return true; +#else + ERR_FAIL_V_MSG(false, "Shader compilation is not supported at runtime without NIR."); +#endif +} + +RenderingShaderContainerD3D12::RenderingShaderContainerD3D12() { + // Default empty constructor. +} + +RenderingShaderContainerD3D12::RenderingShaderContainerD3D12(void *p_lib_d3d12) { + lib_d3d12 = p_lib_d3d12; +} + +RenderingShaderContainerD3D12::ShaderReflectionD3D12 RenderingShaderContainerD3D12::get_shader_reflection_d3d12() const { + ShaderReflectionD3D12 reflection; + reflection.spirv_specialization_constants_ids_mask = reflection_data_d3d12.spirv_specialization_constants_ids_mask; + reflection.dxil_push_constant_stages = reflection_data_d3d12.dxil_push_constant_stages; + reflection.nir_runtime_data_root_param_idx = reflection_data_d3d12.nir_runtime_data_root_param_idx; + reflection.reflection_specialization_data_d3d12 = reflection_specialization_data_d3d12; + reflection.root_signature_bytes = root_signature_bytes; + reflection.root_signature_crc = root_signature_crc; + + // Transform data vector into a vector of vectors that's easier to user. + uint32_t uniform_index = 0; + reflection.reflection_binding_set_uniforms_d3d12.resize(reflection_binding_set_uniforms_count.size()); + for (int64_t i = 0; i < reflection.reflection_binding_set_uniforms_d3d12.size(); i++) { + Vector &uniforms = reflection.reflection_binding_set_uniforms_d3d12.ptrw()[i]; + uniforms.resize(reflection_binding_set_uniforms_count[i]); + for (int64_t j = 0; j < uniforms.size(); j++) { + uniforms.ptrw()[j] = reflection_binding_set_uniforms_data_d3d12[uniform_index]; + uniform_index++; + } + } + + return reflection; +} + +// RenderingShaderContainerFormatD3D12 + +void RenderingShaderContainerFormatD3D12::set_lib_d3d12(void *p_lib_d3d12) { + lib_d3d12 = p_lib_d3d12; +} + +Ref RenderingShaderContainerFormatD3D12::create_container() const { + return memnew(RenderingShaderContainerD3D12(lib_d3d12)); +} + +RenderingDeviceCommons::ShaderLanguageVersion RenderingShaderContainerFormatD3D12::get_shader_language_version() const { + // NIR-DXIL is Vulkan 1.1-conformant. + return SHADER_LANGUAGE_VULKAN_VERSION_1_1; +} + +RenderingDeviceCommons::ShaderSpirvVersion RenderingShaderContainerFormatD3D12::get_shader_spirv_version() const { + // The SPIR-V part of Mesa supports 1.6, but: + // - SPIRV-Reflect won't be able to parse the compute workgroup size. + // - We want to play it safe with NIR-DXIL. + return SHADER_SPIRV_VERSION_1_5; +} + +RenderingShaderContainerFormatD3D12::RenderingShaderContainerFormatD3D12() {} + +RenderingShaderContainerFormatD3D12::~RenderingShaderContainerFormatD3D12() {} diff --git a/drivers/d3d12/rendering_shader_container_d3d12.h b/drivers/d3d12/rendering_shader_container_d3d12.h new file mode 100644 index 0000000000..63eac8def1 --- /dev/null +++ b/drivers/d3d12/rendering_shader_container_d3d12.h @@ -0,0 +1,181 @@ +/**************************************************************************/ +/* rendering_shader_container_d3d12.h */ +/**************************************************************************/ +/* This file is part of: */ +/* REDOT ENGINE */ +/* https://redotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2024-present Redot Engine contributors */ +/* (see REDOT_AUTHORS.md) */ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#pragma once + +#include "servers/rendering/rendering_shader_container.h" + +#define NIR_ENABLED 1 + +#ifdef SHADER_BAKER_RUNTIME_ENABLED +#undef NIR_ENABLED +#endif + +#include "d3d12_godot_nir_bridge.h" + +#define D3D12_BITCODE_OFFSETS_NUM_STAGES 3 + +#if NIR_ENABLED +struct nir_shader; +struct nir_shader_compiler_options; +#endif + +enum RootSignatureLocationType { + RS_LOC_TYPE_RESOURCE, + RS_LOC_TYPE_SAMPLER, +}; + +enum ResourceClass { + RES_CLASS_INVALID, + RES_CLASS_CBV, + RES_CLASS_SRV, + RES_CLASS_UAV, +}; + +struct RenderingDXIL { + static uint32_t patch_specialization_constant( + RenderingDeviceCommons::PipelineSpecializationConstantType p_type, + const void *p_value, + const uint64_t (&p_stages_bit_offsets)[D3D12_BITCODE_OFFSETS_NUM_STAGES], + HashMap> &r_stages_bytecodes, + bool p_is_first_patch); + + static void sign_bytecode(RenderingDeviceCommons::ShaderStage p_stage, Vector &r_dxil_blob); +}; + +class RenderingShaderContainerD3D12 : public RenderingShaderContainer { + GDSOFTCLASS(RenderingShaderContainerD3D12, RenderingShaderContainer); + +public: + static constexpr uint32_t REQUIRED_SHADER_MODEL = 0x62; // D3D_SHADER_MODEL_6_2 + static constexpr uint32_t ROOT_CONSTANT_REGISTER = GODOT_NIR_DESCRIPTOR_SET_MULTIPLIER * (RenderingDeviceCommons::MAX_UNIFORM_SETS + 1); + static constexpr uint32_t RUNTIME_DATA_REGISTER = GODOT_NIR_DESCRIPTOR_SET_MULTIPLIER * (RenderingDeviceCommons::MAX_UNIFORM_SETS + 2); + static constexpr uint32_t FORMAT_VERSION = 1; + static constexpr uint32_t SHADER_STAGES_BIT_OFFSET_INDICES[RenderingDeviceCommons::SHADER_STAGE_MAX] = { + 0, // SHADER_STAGE_VERTEX + 1, // SHADER_STAGE_FRAGMENT + UINT32_MAX, // SHADER_STAGE_TESSELATION_CONTROL + UINT32_MAX, // SHADER_STAGE_TESSELATION_EVALUATION + 2, // SHADER_STAGE_COMPUTE + }; + + struct RootSignatureLocation { + uint32_t root_param_index = UINT32_MAX; + uint32_t range_index = UINT32_MAX; + }; + + struct ReflectionBindingDataD3D12 { + uint32_t resource_class = 0; + uint32_t has_sampler = 0; + uint32_t dxil_stages = 0; + RootSignatureLocation root_signature_locations[2]; + }; + + struct ReflectionSpecializationDataD3D12 { + uint64_t stages_bit_offsets[D3D12_BITCODE_OFFSETS_NUM_STAGES] = {}; + }; + +protected: + struct ReflectionDataD3D12 { + uint32_t spirv_specialization_constants_ids_mask = 0; + uint32_t dxil_push_constant_stages = 0; + uint32_t nir_runtime_data_root_param_idx = 0; + }; + + struct ContainerFooterD3D12 { + uint32_t root_signature_length = 0; + uint32_t root_signature_crc = 0; + }; + + void *lib_d3d12 = nullptr; + ReflectionDataD3D12 reflection_data_d3d12; + Vector reflection_binding_set_uniforms_data_d3d12; + Vector reflection_specialization_data_d3d12; + Vector root_signature_bytes; + uint32_t root_signature_crc = 0; + +#if NIR_ENABLED + bool _convert_spirv_to_nir(const Vector &p_spirv, const nir_shader_compiler_options *p_compiler_options, HashMap &r_stages_nir_shaders, Vector &r_stages, BitField &r_stages_processed); + bool _convert_nir_to_dxil(const HashMap &p_stages_nir_shaders, BitField p_stages_processed, HashMap> &r_dxil_blobs); + bool _convert_spirv_to_dxil(const Vector &p_spirv, HashMap> &r_dxil_blobs, Vector &r_stages, BitField &r_stages_processed); + bool _generate_root_signature(BitField p_stages_processed); + + // GodotNirCallbacks. + static void _nir_report_resource(uint32_t p_register, uint32_t p_space, uint32_t p_dxil_type, void *p_data); + static void _nir_report_sc_bit_offset(uint32_t p_sc_id, uint64_t p_bit_offset, void *p_data); + static void _nir_report_bitcode_bit_offset(uint64_t p_bit_offset, void *p_data); +#endif + + // RenderingShaderContainer overrides. + virtual uint32_t _format() const override; + virtual uint32_t _format_version() const override; + virtual uint32_t _from_bytes_reflection_extra_data(const uint8_t *p_bytes) override; + virtual uint32_t _from_bytes_reflection_binding_uniform_extra_data_start(const uint8_t *p_bytes) override; + virtual uint32_t _from_bytes_reflection_binding_uniform_extra_data(const uint8_t *p_bytes, uint32_t p_index) override; + virtual uint32_t _from_bytes_reflection_specialization_extra_data_start(const uint8_t *p_bytes) override; + virtual uint32_t _from_bytes_reflection_specialization_extra_data(const uint8_t *p_bytes, uint32_t p_index) override; + virtual uint32_t _from_bytes_footer_extra_data(const uint8_t *p_bytes) override; + virtual uint32_t _to_bytes_reflection_extra_data(uint8_t *p_bytes) const override; + virtual uint32_t _to_bytes_reflection_binding_uniform_extra_data(uint8_t *p_bytes, uint32_t p_index) const override; + virtual uint32_t _to_bytes_reflection_specialization_extra_data(uint8_t *p_bytes, uint32_t p_index) const override; + virtual uint32_t _to_bytes_footer_extra_data(uint8_t *p_bytes) const override; + virtual void _set_from_shader_reflection_post(const String &p_shader_name, const RenderingDeviceCommons::ShaderReflection &p_reflection) override; + virtual bool _set_code_from_spirv(const Vector &p_spirv) override; + +public: + struct ShaderReflectionD3D12 { + uint32_t spirv_specialization_constants_ids_mask = 0; + uint32_t dxil_push_constant_stages = 0; + uint32_t nir_runtime_data_root_param_idx = 0; + Vector> reflection_binding_set_uniforms_d3d12; + Vector reflection_specialization_data_d3d12; + Vector root_signature_bytes; + uint32_t root_signature_crc = 0; + }; + + RenderingShaderContainerD3D12(); + RenderingShaderContainerD3D12(void *p_lib_d3d12); + ShaderReflectionD3D12 get_shader_reflection_d3d12() const; +}; + +class RenderingShaderContainerFormatD3D12 : public RenderingShaderContainerFormat { +protected: + void *lib_d3d12 = nullptr; + +public: + void set_lib_d3d12(void *p_lib_d3d12); + virtual Ref create_container() const override; + virtual ShaderLanguageVersion get_shader_language_version() const override; + virtual ShaderSpirvVersion get_shader_spirv_version() const override; + RenderingShaderContainerFormatD3D12(); + virtual ~RenderingShaderContainerFormatD3D12(); +}; diff --git a/drivers/gles3/rasterizer_gles3.cpp b/drivers/gles3/rasterizer_gles3.cpp index ad86579795..0f635193d9 100644 --- a/drivers/gles3/rasterizer_gles3.cpp +++ b/drivers/gles3/rasterizer_gles3.cpp @@ -439,6 +439,9 @@ void RasterizerGLES3::_blit_render_target_to_screen(RID p_render_target, Display glActiveTexture(GL_TEXTURE0); glBindTexture(GL_TEXTURE_2D, rt->color); + glEnable(GL_BLEND); + glBlendFunc(GL_ONE, GL_ZERO); + if (rt->view_count > 1) { copy_effects->copy_to_rect_3d(screenrect, p_layer, GLES3::Texture::TYPE_LAYERED); } else { diff --git a/drivers/gles3/shader_gles3.cpp b/drivers/gles3/shader_gles3.cpp index 679433c39d..3aa8dcfeeb 100644 --- a/drivers/gles3/shader_gles3.cpp +++ b/drivers/gles3/shader_gles3.cpp @@ -131,12 +131,6 @@ void ShaderGLES3::_setup(const char *p_vertex_code, const char *p_fragment_code, feedback_count = p_feedback_count; StringBuilder tohash; - /* - tohash.append("[SpirvCacheKey]"); - tohash.append(RenderingDevice::get_singleton()->shader_get_spirv_cache_key()); - tohash.append("[BinaryCacheKey]"); - tohash.append(RenderingDevice::get_singleton()->shader_get_binary_cache_key()); - */ tohash.append("[Vertex]"); tohash.append(p_vertex_code ? p_vertex_code : ""); tohash.append("[Fragment]"); diff --git a/drivers/gles3/storage/material_storage.cpp b/drivers/gles3/storage/material_storage.cpp index a348d0d493..fcd544b925 100644 --- a/drivers/gles3/storage/material_storage.cpp +++ b/drivers/gles3/storage/material_storage.cpp @@ -2166,7 +2166,7 @@ RID MaterialStorage::shader_allocate() { return shader_owner.allocate_rid(); } -void MaterialStorage::shader_initialize(RID p_rid) { +void MaterialStorage::shader_initialize(RID p_rid, bool p_embedded) { Shader shader; shader.data = nullptr; shader.mode = RS::SHADER_MAX; diff --git a/drivers/gles3/storage/material_storage.h b/drivers/gles3/storage/material_storage.h index 6b1e409c3f..d7813dec27 100644 --- a/drivers/gles3/storage/material_storage.h +++ b/drivers/gles3/storage/material_storage.h @@ -484,6 +484,7 @@ private: mutable RID_Owner material_owner; SelfList::List material_update_list; + HashSet dummy_embedded_set; public: static MaterialStorage *get_singleton(); @@ -576,7 +577,7 @@ public: void _shader_make_dirty(Shader *p_shader); virtual RID shader_allocate() override; - virtual void shader_initialize(RID p_rid) override; + virtual void shader_initialize(RID p_rid, bool p_embedded = true) override; virtual void shader_free(RID p_rid) override; virtual void shader_set_code(RID p_shader, const String &p_code) override; @@ -589,6 +590,9 @@ public: virtual Variant shader_get_parameter_default(RID p_shader, const StringName &p_name) const override; virtual RS::ShaderNativeSourceCode shader_get_native_source_code(RID p_shader) const override; + virtual void shader_embedded_set_lock() override {} + virtual const HashSet &shader_embedded_set_get() const override { return dummy_embedded_set; } + virtual void shader_embedded_set_unlock() override {} /* MATERIAL API */ diff --git a/drivers/gles3/storage/mesh_storage.cpp b/drivers/gles3/storage/mesh_storage.cpp index c5a7876fac..d62a96b70d 100644 --- a/drivers/gles3/storage/mesh_storage.cpp +++ b/drivers/gles3/storage/mesh_storage.cpp @@ -229,7 +229,7 @@ void MeshStorage::mesh_add_surface(RID p_mesh, const RS::SurfaceData &p_surface) if (!(new_surface.format & RS::ARRAY_FLAG_COMPRESS_ATTRIBUTES) && (new_surface.format & RS::ARRAY_FORMAT_NORMAL) && !(new_surface.format & RS::ARRAY_FORMAT_TANGENT)) { // Unfortunately, we need to copy the buffer, which is fine as doing a resize triggers a CoW anyway. Vector new_vertex_data; - new_vertex_data.resize_zeroed(new_surface.vertex_data.size() + sizeof(uint16_t) * 2); + new_vertex_data.resize_initialized(new_surface.vertex_data.size() + sizeof(uint16_t) * 2); memcpy(new_vertex_data.ptrw(), new_surface.vertex_data.ptr(), new_surface.vertex_data.size()); GLES3::Utilities::get_singleton()->buffer_allocate_data(GL_ARRAY_BUFFER, s->vertex_buffer, new_vertex_data.size(), new_vertex_data.ptr(), (s->format & RS::ARRAY_FLAG_USE_DYNAMIC_UPDATE) ? GL_DYNAMIC_DRAW : GL_STATIC_DRAW, "Mesh vertex buffer"); s->vertex_buffer_size = new_vertex_data.size(); diff --git a/drivers/gles3/storage/particles_storage.cpp b/drivers/gles3/storage/particles_storage.cpp index d71ed695b0..1c97d10b82 100644 --- a/drivers/gles3/storage/particles_storage.cpp +++ b/drivers/gles3/storage/particles_storage.cpp @@ -870,10 +870,10 @@ void ParticlesStorage::_particles_update_buffers(Particles *particles) { particles->process_buffer_stride_cache = sizeof(float) * 4 * particles->num_attrib_arrays_cache; PackedByteArray data; - data.resize_zeroed(particles->process_buffer_stride_cache * total_amount); + data.resize_initialized(particles->process_buffer_stride_cache * total_amount); PackedByteArray instance_data; - instance_data.resize_zeroed(particles->instance_buffer_size_cache); + instance_data.resize_initialized(particles->instance_buffer_size_cache); { glGenVertexArrays(1, &particles->front_vertex_array); diff --git a/drivers/metal/metal_device_properties.h b/drivers/metal/metal_device_properties.h index d00732c96e..74c74879b9 100644 --- a/drivers/metal/metal_device_properties.h +++ b/drivers/metal/metal_device_properties.h @@ -72,7 +72,8 @@ typedef NS_OPTIONS(NSUInteger, SampleCount) { }; struct API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MetalFeatures { - uint32_t mslVersion = 0; + uint32_t mslVersionMajor = 0; + uint32_t mslVersionMinor = 0; MTLGPUFamily highestFamily = MTLGPUFamilyApple4; bool supportsBCTextureCompression = false; bool supportsDepth24Stencil8 = false; diff --git a/drivers/metal/metal_device_properties.mm b/drivers/metal/metal_device_properties.mm index 7892cbd166..1c78bd8533 100644 --- a/drivers/metal/metal_device_properties.mm +++ b/drivers/metal/metal_device_properties.mm @@ -139,51 +139,8 @@ void MetalDeviceProperties::init_features(id p_device) { MTLCompileOptions *opts = [MTLCompileOptions new]; features.mslVersionEnum = opts.languageVersion; // By default, Metal uses the most recent language version. - -#define setMSLVersion(m_maj, m_min) \ - features.mslVersion = SPIRV_CROSS_NAMESPACE::CompilerMSL::Options::make_msl_version(m_maj, m_min) - - switch (features.mslVersionEnum) { -#if __MAC_OS_X_VERSION_MAX_ALLOWED >= 150000 || __IPHONE_OS_VERSION_MAX_ALLOWED >= 180000 || __TV_OS_VERSION_MAX_ALLOWED >= 180000 || __VISION_OS_VERSION_MAX_ALLOWED >= 20000 - case MTLLanguageVersion3_2: - setMSLVersion(3, 2); - break; -#endif -#if __MAC_OS_X_VERSION_MAX_ALLOWED >= 140000 || __IPHONE_OS_VERSION_MAX_ALLOWED >= 170000 || __TV_OS_VERSION_MAX_ALLOWED >= 170000 - case MTLLanguageVersion3_1: - setMSLVersion(3, 1); - break; -#endif - case MTLLanguageVersion3_0: - setMSLVersion(3, 0); - break; - case MTLLanguageVersion2_4: - setMSLVersion(2, 4); - break; - case MTLLanguageVersion2_3: - setMSLVersion(2, 3); - break; - case MTLLanguageVersion2_2: - setMSLVersion(2, 2); - break; - case MTLLanguageVersion2_1: - setMSLVersion(2, 1); - break; - case MTLLanguageVersion2_0: - setMSLVersion(2, 0); - break; - case MTLLanguageVersion1_2: - setMSLVersion(1, 2); - break; - case MTLLanguageVersion1_1: - setMSLVersion(1, 1); - break; -#if TARGET_OS_IPHONE && !TARGET_OS_MACCATALYST && !TARGET_OS_VISION - case MTLLanguageVersion1_0: - setMSLVersion(1, 0); - break; -#endif - } + features.mslVersionMajor = (opts.languageVersion >> 0x10) & 0xff; + features.mslVersionMinor = (opts.languageVersion >> 0x00) & 0xff; } void MetalDeviceProperties::init_limits(id p_device) { diff --git a/drivers/metal/metal_objects.h b/drivers/metal/metal_objects.h index 9018277840..f6832a8cbb 100644 --- a/drivers/metal/metal_objects.h +++ b/drivers/metal/metal_objects.h @@ -55,6 +55,7 @@ #import "metal_device_properties.h" #import "metal_utils.h" #import "pixel_formats.h" +#import "sha256_digest.h" #include "servers/rendering/rendering_device_driver.h" @@ -84,9 +85,6 @@ MTL_CLASS(Texture) } //namespace MTL -/// Metal buffer index for the view mask when rendering multi-view. -const uint32_t VIEW_MASK_BUFFER_INDEX = 24; - enum ShaderStageUsage : uint32_t { None = 0, Vertex = RDD::SHADER_STAGE_VERTEX_BIT, @@ -576,34 +574,6 @@ struct API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) BindingInfo { desc.arrayLength = arrayLength; return desc; } - - size_t serialize_size() const { - return sizeof(uint32_t) * 8 /* 8 uint32_t fields */; - } - - template - void serialize(W &p_writer) const { - p_writer.write((uint32_t)dataType); - p_writer.write(index); - p_writer.write((uint32_t)access); - p_writer.write((uint32_t)usage); - p_writer.write((uint32_t)textureType); - p_writer.write(imageFormat); - p_writer.write(arrayLength); - p_writer.write(isMultisampled); - } - - template - void deserialize(R &p_reader) { - p_reader.read((uint32_t &)dataType); - p_reader.read(index); - p_reader.read((uint32_t &)access); - p_reader.read((uint32_t &)usage); - p_reader.read((uint32_t &)textureType); - p_reader.read((uint32_t &)imageFormat); - p_reader.read(arrayLength); - p_reader.read(isMultisampled); - } }; using RDC = RenderingDeviceCommons; @@ -637,39 +607,29 @@ enum class ShaderLoadStrategy { /// A Metal shader library. @interface MDLibrary : NSObject { ShaderCacheEntry *_entry; + NSString *_original_source; }; - (id)library; - (NSError *)error; - (void)setLabel:(NSString *)label; +#ifdef DEV_ENABLED +- (NSString *)originalSource; +#endif + (instancetype)newLibraryWithCacheEntry:(ShaderCacheEntry *)entry device:(id)device source:(NSString *)source options:(MTLCompileOptions *)options strategy:(ShaderLoadStrategy)strategy; + ++ (instancetype)newLibraryWithCacheEntry:(ShaderCacheEntry *)entry + device:(id)device +#ifdef DEV_ENABLED + source:(NSString *)source +#endif + data:(dispatch_data_t)data; @end -struct SHA256Digest { - unsigned char data[CC_SHA256_DIGEST_LENGTH]; - - uint32_t hash() const { - uint32_t c = crc32(0, data, CC_SHA256_DIGEST_LENGTH); - return c; - } - - SHA256Digest() { - bzero(data, CC_SHA256_DIGEST_LENGTH); - } - - SHA256Digest(const char *p_data, size_t p_length) { - CC_SHA256(p_data, (CC_LONG)p_length, data); - } - - _FORCE_INLINE_ uint32_t short_sha() const { - return __builtin_bswap32(*(uint32_t *)&data[0]); - } -}; - template <> struct HashMapComparatorDefault { static bool compare(const SHA256Digest &p_lhs, const SHA256Digest &p_rhs) { @@ -719,9 +679,6 @@ public: MTLSize local = {}; MDLibrary *kernel; -#if DEV_ENABLED - CharString kernel_source; -#endif void encode_push_constant_data(VectorView p_data, MDCommandBuffer *p_cb) final; @@ -744,10 +701,6 @@ public: MDLibrary *vert; MDLibrary *frag; -#if DEV_ENABLED - CharString vert_source; - CharString frag_source; -#endif void encode_push_constant_data(VectorView p_data, MDCommandBuffer *p_cb) final; diff --git a/drivers/metal/metal_objects.mm b/drivers/metal/metal_objects.mm index 79b17cbc00..b7706ae952 100644 --- a/drivers/metal/metal_objects.mm +++ b/drivers/metal/metal_objects.mm @@ -55,6 +55,7 @@ #import "metal_utils.h" #import "pixel_formats.h" #import "rendering_device_driver_metal.h" +#import "rendering_shader_container_metal.h" #import @@ -1943,7 +1944,11 @@ void ShaderCacheEntry::notify_free() const { } @interface MDLibrary () -- (instancetype)initWithCacheEntry:(ShaderCacheEntry *)entry; +- (instancetype)initWithCacheEntry:(ShaderCacheEntry *)entry +#ifdef DEV_ENABLED + source:(NSString *)source; +#endif +; @end /// Loads the MTLLibrary when the library is first accessed. @@ -1977,6 +1982,18 @@ void ShaderCacheEntry::notify_free() const { options:(MTLCompileOptions *)options; @end +@interface MDBinaryLibrary : MDLibrary { + id _library; + NSError *_error; +} +- (instancetype)initWithCacheEntry:(ShaderCacheEntry *)entry + device:(id)device +#ifdef DEV_ENABLED + source:(NSString *)source +#endif + data:(dispatch_data_t)data; +@end + @implementation MDLibrary + (instancetype)newLibraryWithCacheEntry:(ShaderCacheEntry *)entry @@ -1994,6 +2011,26 @@ void ShaderCacheEntry::notify_free() const { } } ++ (instancetype)newLibraryWithCacheEntry:(ShaderCacheEntry *)entry + device:(id)device +#ifdef DEV_ENABLED + source:(NSString *)source +#endif + data:(dispatch_data_t)data { + return [[MDBinaryLibrary alloc] initWithCacheEntry:entry + device:device +#ifdef DEV_ENABLED + source:source +#endif + data:data]; +} + +#ifdef DEV_ENABLED +- (NSString *)originalSource { + return _original_source; +} +#endif + - (id)library { CRASH_NOW_MSG("Not implemented"); return nil; @@ -2007,10 +2044,17 @@ void ShaderCacheEntry::notify_free() const { - (void)setLabel:(NSString *)label { } -- (instancetype)initWithCacheEntry:(ShaderCacheEntry *)entry { +- (instancetype)initWithCacheEntry:(ShaderCacheEntry *)entry +#ifdef DEV_ENABLED + source:(NSString *)source +#endif +{ self = [super init]; _entry = entry; _entry->library = self; +#ifdef DEV_ENABLED + _original_source = source; +#endif return self; } @@ -2026,7 +2070,11 @@ void ShaderCacheEntry::notify_free() const { device:(id)device source:(NSString *)source options:(MTLCompileOptions *)options { - self = [super initWithCacheEntry:entry]; + self = [super initWithCacheEntry:entry +#ifdef DEV_ENABLED + source:source +#endif + ]; _complete = false; _ready = false; @@ -2078,7 +2126,11 @@ void ShaderCacheEntry::notify_free() const { device:(id)device source:(NSString *)source options:(MTLCompileOptions *)options { - self = [super initWithCacheEntry:entry]; + self = [super initWithCacheEntry:entry +#ifdef DEV_ENABLED + source:source +#endif + ]; _device = device; _source = source; _options = options; @@ -2123,3 +2175,36 @@ void ShaderCacheEntry::notify_free() const { } @end + +@implementation MDBinaryLibrary + +- (instancetype)initWithCacheEntry:(ShaderCacheEntry *)entry + device:(id)device +#ifdef DEV_ENABLED + source:(NSString *)source +#endif + data:(dispatch_data_t)data { + self = [super initWithCacheEntry:entry +#ifdef DEV_ENABLED + source:source +#endif + ]; + NSError *error = nil; + _library = [device newLibraryWithData:data error:&error]; + if (error != nil) { + _error = error; + NSString *desc = [error description]; + ERR_PRINT(vformat("Unable to load shader library: %s", desc.UTF8String)); + } + return self; +} + +- (id)library { + return _library; +} + +- (NSError *)error { + return _error; +} + +@end diff --git a/drivers/metal/rendering_context_driver_metal.h b/drivers/metal/rendering_context_driver_metal.h index 9a85697d80..f246c7efbb 100644 --- a/drivers/metal/rendering_context_driver_metal.h +++ b/drivers/metal/rendering_context_driver_metal.h @@ -58,6 +58,8 @@ class PixelFormats; class MDResourceCache; class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) RenderingContextDriverMetal : public RenderingContextDriver { + bool capture_available = false; + protected: #ifdef __OBJC__ id metal_device = nullptr; @@ -82,7 +84,7 @@ public: void surface_set_needs_resize(SurfaceID p_surface, bool p_needs_resize) final override; bool surface_get_needs_resize(SurfaceID p_surface) const final override; void surface_destroy(SurfaceID p_surface) final override; - bool is_debug_utils_enabled() const final override { return true; } + bool is_debug_utils_enabled() const final override { return capture_available; } #pragma mark - Metal-specific methods diff --git a/drivers/metal/rendering_context_driver_metal.mm b/drivers/metal/rendering_context_driver_metal.mm index 2816b45ead..b8c2ac61cf 100644 --- a/drivers/metal/rendering_context_driver_metal.mm +++ b/drivers/metal/rendering_context_driver_metal.mm @@ -47,6 +47,10 @@ RenderingContextDriverMetal::~RenderingContextDriverMetal() { } Error RenderingContextDriverMetal::initialize() { + if (OS::get_singleton()->get_environment(U"METAL_DEVICE_WRAPPER_TYPE") == "1") { + capture_available = true; + } + metal_device = MTLCreateSystemDefaultDevice(); #if TARGET_OS_OSX if (@available(macOS 13.3, *)) { diff --git a/drivers/metal/rendering_device_driver_metal.h b/drivers/metal/rendering_device_driver_metal.h index 196b8b14b4..8bc7a7819a 100644 --- a/drivers/metal/rendering_device_driver_metal.h +++ b/drivers/metal/rendering_device_driver_metal.h @@ -33,6 +33,7 @@ #pragma once #import "metal_objects.h" +#import "rendering_shader_container_metal.h" #include "servers/rendering/rendering_device_driver.h" @@ -59,9 +60,9 @@ class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) RenderingDeviceDriverMet RenderingContextDriver::Device context_device; id device = nil; - uint32_t version_major = 2; - uint32_t version_minor = 0; MetalDeviceProperties *device_properties = nullptr; + MetalDeviceProfile device_profile; + RenderingShaderContainerFormatMetal *shader_container_format = nullptr; PixelFormats *pixel_formats = nullptr; std::unique_ptr resource_cache; @@ -79,7 +80,7 @@ class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) RenderingDeviceDriverMet String pipeline_cache_id; Error _create_device(); - Error _check_capabilities(); + void _check_capabilities(); #pragma mark - Shader Cache @@ -243,21 +244,11 @@ private: friend struct ShaderBinaryData; friend struct PushConstantData; -private: - /// Contains additional metadata about the shader. - struct ShaderMeta { - /// Indicates whether the shader uses multiview. - bool has_multiview = false; - }; - - Error _reflect_spirv16(VectorView p_spirv, ShaderReflection &r_reflection, ShaderMeta &r_shader_meta); - public: - virtual String shader_get_binary_cache_key() override final; - virtual Vector shader_compile_binary_from_spirv(VectorView p_spirv, const String &p_shader_name) override final; - virtual ShaderID shader_create_from_bytecode(const Vector &p_shader_binary, ShaderDescription &r_shader_desc, String &r_name, const Vector &p_immutable_samplers) override final; + virtual ShaderID shader_create_from_container(const Ref &p_shader_container, const Vector &p_immutable_samplers) override final; virtual void shader_free(ShaderID p_shader) override final; virtual void shader_destroy_modules(ShaderID p_shader) override final; + virtual const RenderingShaderContainerFormat &get_shader_container_format() const override final; #pragma mark - Uniform Set diff --git a/drivers/metal/rendering_device_driver_metal.mm b/drivers/metal/rendering_device_driver_metal.mm index 760051bb16..65fd14b973 100644 --- a/drivers/metal/rendering_device_driver_metal.mm +++ b/drivers/metal/rendering_device_driver_metal.mm @@ -54,6 +54,7 @@ #import "pixel_formats.h" #import "rendering_context_driver_metal.h" +#import "rendering_shader_container_metal.h" #include "core/io/compression.h" #include "core/io/marshalls.h" @@ -1095,1349 +1096,6 @@ void RenderingDeviceDriverMetal::framebuffer_free(FramebufferID p_framebuffer) { #pragma mark - Shader -const uint32_t SHADER_BINARY_VERSION = 4; - -// region Serialization - -class BufWriter; - -template -concept Serializable = requires(T t, BufWriter &p_writer) { - { - t.serialize_size() - } -> std::same_as; - { - t.serialize(p_writer) - } -> std::same_as; -}; - -class BufWriter { - uint8_t *data = nullptr; - uint64_t length = 0; // Length of data. - uint64_t pos = 0; - -public: - BufWriter(uint8_t *p_data, uint64_t p_length) : - data(p_data), length(p_length) {} - - template - void write(T const &p_value) { - p_value.serialize(*this); - } - - _FORCE_INLINE_ void write(uint32_t p_value) { - DEV_ASSERT(pos + sizeof(uint32_t) <= length); - pos += encode_uint32(p_value, data + pos); - } - - _FORCE_INLINE_ void write(RD::ShaderStage p_value) { - write((uint32_t)p_value); - } - - _FORCE_INLINE_ void write(bool p_value) { - DEV_ASSERT(pos + sizeof(uint8_t) <= length); - *(data + pos) = p_value ? 1 : 0; - pos += 1; - } - - _FORCE_INLINE_ void write(int p_value) { - write((uint32_t)p_value); - } - - _FORCE_INLINE_ void write(uint64_t p_value) { - DEV_ASSERT(pos + sizeof(uint64_t) <= length); - pos += encode_uint64(p_value, data + pos); - } - - _FORCE_INLINE_ void write(float p_value) { - DEV_ASSERT(pos + sizeof(float) <= length); - pos += encode_float(p_value, data + pos); - } - - _FORCE_INLINE_ void write(double p_value) { - DEV_ASSERT(pos + sizeof(double) <= length); - pos += encode_double(p_value, data + pos); - } - - void write_compressed(CharString const &p_string) { - write(p_string.length()); // Uncompressed size. - - DEV_ASSERT(pos + sizeof(uint32_t) + Compression::get_max_compressed_buffer_size(p_string.length(), Compression::MODE_ZSTD) <= length); - - // Save pointer for compressed size. - uint8_t *dst_size_ptr = data + pos; // Compressed size. - pos += sizeof(uint32_t); - - int dst_size = Compression::compress(data + pos, reinterpret_cast(p_string.ptr()), p_string.length(), Compression::MODE_ZSTD); - encode_uint32(dst_size, dst_size_ptr); - pos += dst_size; - } - - void write(CharString const &p_string) { - write_buffer(reinterpret_cast(p_string.ptr()), p_string.length()); - } - - template - void write(VectorView p_vector) { - write(p_vector.size()); - for (uint32_t i = 0; i < p_vector.size(); i++) { - T const &e = p_vector[i]; - write(e); - } - } - - void write(VectorView p_vector) { - write_buffer(p_vector.ptr(), p_vector.size()); - } - - template - void write(HashMap const &p_map) { - write(p_map.size()); - for (KeyValue const &e : p_map) { - write(e.key); - write(e.value); - } - } - - uint64_t get_pos() const { - return pos; - } - - uint64_t get_length() const { - return length; - } - -private: - void write_buffer(uint8_t const *p_buffer, uint32_t p_length) { - write(p_length); - - DEV_ASSERT(pos + p_length <= length); - memcpy(data + pos, p_buffer, p_length); - pos += p_length; - } -}; - -class BufReader; - -template -concept Deserializable = requires(T t, BufReader &p_reader) { - { - t.serialize_size() - } -> std::same_as; - { - t.deserialize(p_reader) - } -> std::same_as; -}; - -class BufReader { - uint8_t const *data = nullptr; - uint64_t length = 0; - uint64_t pos = 0; - - bool check_length(size_t p_size) { - if (status != Status::OK) { - return false; - } - - if (pos + p_size > length) { - status = Status::SHORT_BUFFER; - return false; - } - return true; - } - -#define CHECK(p_size) \ - if (!check_length(p_size)) \ - return - -public: - enum class Status { - OK, - SHORT_BUFFER, - BAD_COMPRESSION, - }; - - Status status = Status::OK; - - BufReader(uint8_t const *p_data, uint64_t p_length) : - data(p_data), length(p_length) {} - - template - void read(T &p_value) { - p_value.deserialize(*this); - } - - _FORCE_INLINE_ void read(uint32_t &p_val) { - CHECK(sizeof(uint32_t)); - - p_val = decode_uint32(data + pos); - pos += sizeof(uint32_t); - } - - _FORCE_INLINE_ void read(RD::ShaderStage &p_val) { - uint32_t val; - read(val); - p_val = (RD::ShaderStage)val; - } - - _FORCE_INLINE_ void read(bool &p_val) { - CHECK(sizeof(uint8_t)); - - p_val = *(data + pos) > 0; - pos += 1; - } - - _FORCE_INLINE_ void read(uint64_t &p_val) { - CHECK(sizeof(uint64_t)); - - p_val = decode_uint64(data + pos); - pos += sizeof(uint64_t); - } - - _FORCE_INLINE_ void read(float &p_val) { - CHECK(sizeof(float)); - - p_val = decode_float(data + pos); - pos += sizeof(float); - } - - _FORCE_INLINE_ void read(double &p_val) { - CHECK(sizeof(double)); - - p_val = decode_double(data + pos); - pos += sizeof(double); - } - - void read(CharString &p_val) { - uint32_t len; - read(len); - CHECK(len); - p_val.resize(len + 1 /* NUL */); - memcpy(p_val.ptrw(), data + pos, len); - p_val.set(len, 0); - pos += len; - } - - void read_compressed(CharString &p_val) { - uint32_t len; - read(len); - uint32_t comp_size; - read(comp_size); - - CHECK(comp_size); - - p_val.resize(len + 1 /* NUL */); - uint32_t bytes = (uint32_t)Compression::decompress(reinterpret_cast(p_val.ptrw()), len, data + pos, comp_size, Compression::MODE_ZSTD); - if (bytes != len) { - status = Status::BAD_COMPRESSION; - return; - } - p_val.set(len, 0); - pos += comp_size; - } - - void read(LocalVector &p_val) { - uint32_t len; - read(len); - CHECK(len); - p_val.resize(len); - memcpy(p_val.ptr(), data + pos, len); - pos += len; - } - - template - void read(LocalVector &p_val) { - uint32_t len; - read(len); - CHECK(len); - p_val.resize(len); - for (uint32_t i = 0; i < len; i++) { - read(p_val[i]); - } - } - - template - void read(HashMap &p_map) { - uint32_t len; - read(len); - CHECK(len); - p_map.reserve(len); - for (uint32_t i = 0; i < len; i++) { - K key; - read(key); - V value; - read(value); - p_map[key] = value; - } - } - -#undef CHECK -}; - -const uint32_t R32UI_ALIGNMENT_CONSTANT_ID = 65535; - -struct ComputeSize { - uint32_t x = 0; - uint32_t y = 0; - uint32_t z = 0; - - size_t serialize_size() const { - return sizeof(uint32_t) * 3; - } - - void serialize(BufWriter &p_writer) const { - p_writer.write(x); - p_writer.write(y); - p_writer.write(z); - } - - void deserialize(BufReader &p_reader) { - p_reader.read(x); - p_reader.read(y); - p_reader.read(z); - } -}; - -struct ShaderStageData { - RD::ShaderStage stage = RD::ShaderStage::SHADER_STAGE_MAX; - uint32_t is_position_invariant = UINT32_MAX; - uint32_t supports_fast_math = UINT32_MAX; - CharString entry_point_name; - CharString source; - - size_t serialize_size() const { - int comp_size = Compression::get_max_compressed_buffer_size(source.length(), Compression::MODE_ZSTD); - return sizeof(uint32_t) // Stage. - + sizeof(uint32_t) // is_position_invariant - + sizeof(uint32_t) // supports_fast_math - + sizeof(uint32_t) /* entry_point_name.utf8().length */ - + entry_point_name.length() + sizeof(uint32_t) /* uncompressed size */ + sizeof(uint32_t) /* compressed size */ + comp_size; - } - - void serialize(BufWriter &p_writer) const { - p_writer.write((uint32_t)stage); - p_writer.write(is_position_invariant); - p_writer.write(supports_fast_math); - p_writer.write(entry_point_name); - p_writer.write_compressed(source); - } - - void deserialize(BufReader &p_reader) { - p_reader.read((uint32_t &)stage); - p_reader.read(is_position_invariant); - p_reader.read(supports_fast_math); - p_reader.read(entry_point_name); - p_reader.read_compressed(source); - } -}; - -struct SpecializationConstantData { - uint32_t constant_id = UINT32_MAX; - RD::PipelineSpecializationConstantType type = RD::PIPELINE_SPECIALIZATION_CONSTANT_TYPE_FLOAT; - ShaderStageUsage stages = ShaderStageUsage::None; - // Specifies the stages the constant is used by Metal. - ShaderStageUsage used_stages = ShaderStageUsage::None; - uint32_t int_value = UINT32_MAX; - - size_t serialize_size() const { - return sizeof(constant_id) + sizeof(uint32_t) // type - + sizeof(stages) + sizeof(used_stages) // used_stages - + sizeof(int_value); // int_value - } - - void serialize(BufWriter &p_writer) const { - p_writer.write(constant_id); - p_writer.write((uint32_t)type); - p_writer.write(stages); - p_writer.write(used_stages); - p_writer.write(int_value); - } - - void deserialize(BufReader &p_reader) { - p_reader.read(constant_id); - p_reader.read((uint32_t &)type); - p_reader.read((uint32_t &)stages); - p_reader.read((uint32_t &)used_stages); - p_reader.read(int_value); - } -}; - -struct API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) UniformData { - RD::UniformType type = RD::UniformType::UNIFORM_TYPE_MAX; - uint32_t binding = UINT32_MAX; - bool writable = false; - uint32_t length = UINT32_MAX; - ShaderStageUsage stages = ShaderStageUsage::None; - // Specifies the stages the uniform data is - // used by the Metal shader. - ShaderStageUsage active_stages = ShaderStageUsage::None; - BindingInfoMap bindings; - BindingInfoMap bindings_secondary; - - size_t serialize_size() const { - size_t size = 0; - size += sizeof(uint32_t); // type - size += sizeof(uint32_t); // binding - size += sizeof(uint32_t); // writable - size += sizeof(uint32_t); // length - size += sizeof(uint32_t); // stages - size += sizeof(uint32_t); // active_stages - size += sizeof(uint32_t); // bindings.size() - size += sizeof(uint32_t) * bindings.size(); // Total size of keys. - for (KeyValue const &e : bindings) { - size += e.value.serialize_size(); - } - size += sizeof(uint32_t); // bindings_secondary.size() - size += sizeof(uint32_t) * bindings_secondary.size(); // Total size of keys. - for (KeyValue const &e : bindings_secondary) { - size += e.value.serialize_size(); - } - return size; - } - - void serialize(BufWriter &p_writer) const { - p_writer.write((uint32_t)type); - p_writer.write(binding); - p_writer.write(writable); - p_writer.write(length); - p_writer.write(stages); - p_writer.write(active_stages); - p_writer.write(bindings); - p_writer.write(bindings_secondary); - } - - void deserialize(BufReader &p_reader) { - p_reader.read((uint32_t &)type); - p_reader.read(binding); - p_reader.read(writable); - p_reader.read(length); - p_reader.read((uint32_t &)stages); - p_reader.read((uint32_t &)active_stages); - p_reader.read(bindings); - p_reader.read(bindings_secondary); - } -}; - -struct API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) UniformSetData { - uint32_t index = UINT32_MAX; - LocalVector uniforms; - - size_t serialize_size() const { - size_t size = 0; - size += sizeof(uint32_t); // index - size += sizeof(uint32_t); // uniforms.size() - for (UniformData const &e : uniforms) { - size += e.serialize_size(); - } - return size; - } - - void serialize(BufWriter &p_writer) const { - p_writer.write(index); - p_writer.write(VectorView(uniforms)); - } - - void deserialize(BufReader &p_reader) { - p_reader.read(index); - p_reader.read(uniforms); - } - UniformSetData() = default; - UniformSetData(uint32_t p_index) : - index(p_index) {} -}; - -struct PushConstantData { - uint32_t size = UINT32_MAX; - ShaderStageUsage stages = ShaderStageUsage::None; - ShaderStageUsage used_stages = ShaderStageUsage::None; - HashMap msl_binding; - - size_t serialize_size() const { - return sizeof(uint32_t) // size - + sizeof(uint32_t) // stages - + sizeof(uint32_t) // used_stages - + sizeof(uint32_t) // msl_binding.size() - + sizeof(uint32_t) * msl_binding.size() // keys - + sizeof(uint32_t) * msl_binding.size(); // values - } - - void serialize(BufWriter &p_writer) const { - p_writer.write(size); - p_writer.write((uint32_t)stages); - p_writer.write((uint32_t)used_stages); - p_writer.write(msl_binding); - } - - void deserialize(BufReader &p_reader) { - p_reader.read(size); - p_reader.read((uint32_t &)stages); - p_reader.read((uint32_t &)used_stages); - p_reader.read(msl_binding); - } -}; - -struct API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) ShaderBinaryData { - enum Flags : uint32_t { - NONE = 0, - NEEDS_VIEW_MASK_BUFFER = 1 << 0, - USES_ARGUMENT_BUFFERS = 1 << 1, - }; - CharString shader_name; - // The Metal language version specified when compiling SPIR-V to MSL. - // Format is major * 10000 + minor * 100 + patch. - uint32_t msl_version = UINT32_MAX; - uint32_t vertex_input_mask = UINT32_MAX; - uint32_t fragment_output_mask = UINT32_MAX; - uint32_t spirv_specialization_constants_ids_mask = UINT32_MAX; - uint32_t flags = NONE; - ComputeSize compute_local_size; - PushConstantData push_constant; - LocalVector stages; - LocalVector constants; - LocalVector uniforms; - - MTLLanguageVersion get_msl_version() const { - uint32_t major = msl_version / 10000; - uint32_t minor = (msl_version / 100) % 100; - return MTLLanguageVersion((major << 0x10) + minor); - } - - bool is_compute() const { - return std::any_of(stages.begin(), stages.end(), [](ShaderStageData const &e) { - return e.stage == RD::ShaderStage::SHADER_STAGE_COMPUTE; - }); - } - - bool needs_view_mask_buffer() const { - return flags & NEEDS_VIEW_MASK_BUFFER; - } - - void set_needs_view_mask_buffer(bool p_value) { - if (p_value) { - flags |= NEEDS_VIEW_MASK_BUFFER; - } else { - flags &= ~NEEDS_VIEW_MASK_BUFFER; - } - } - - bool uses_argument_buffers() const { - return flags & USES_ARGUMENT_BUFFERS; - } - - void set_uses_argument_buffers(bool p_value) { - if (p_value) { - flags |= USES_ARGUMENT_BUFFERS; - } else { - flags &= ~USES_ARGUMENT_BUFFERS; - } - } - - size_t serialize_size() const { - size_t size = 0; - size += sizeof(uint32_t) + shader_name.length(); // shader_name - size += sizeof(msl_version); - size += sizeof(vertex_input_mask); - size += sizeof(fragment_output_mask); - size += sizeof(spirv_specialization_constants_ids_mask); - size += sizeof(flags); - size += compute_local_size.serialize_size(); - size += push_constant.serialize_size(); - size += sizeof(uint32_t); // stages.size() - for (ShaderStageData const &e : stages) { - size += e.serialize_size(); - } - size += sizeof(uint32_t); // constants.size() - for (SpecializationConstantData const &e : constants) { - size += e.serialize_size(); - } - size += sizeof(uint32_t); // uniforms.size() - for (UniformSetData const &e : uniforms) { - size += e.serialize_size(); - } - return size; - } - - void serialize(BufWriter &p_writer) const { - p_writer.write(shader_name); - p_writer.write(msl_version); - p_writer.write(vertex_input_mask); - p_writer.write(fragment_output_mask); - p_writer.write(spirv_specialization_constants_ids_mask); - p_writer.write(flags); - p_writer.write(compute_local_size); - p_writer.write(push_constant); - p_writer.write(VectorView(stages)); - p_writer.write(VectorView(constants)); - p_writer.write(VectorView(uniforms)); - } - - void deserialize(BufReader &p_reader) { - p_reader.read(shader_name); - p_reader.read(msl_version); - p_reader.read(vertex_input_mask); - p_reader.read(fragment_output_mask); - p_reader.read(spirv_specialization_constants_ids_mask); - p_reader.read(flags); - p_reader.read(compute_local_size); - p_reader.read(push_constant); - p_reader.read(stages); - p_reader.read(constants); - p_reader.read(uniforms); - } -}; - -// endregion - -String RenderingDeviceDriverMetal::shader_get_binary_cache_key() { - static const String cache_key = "Metal-SV" + uitos(SHADER_BINARY_VERSION); - return cache_key; -} - -Error RenderingDeviceDriverMetal::_reflect_spirv16(VectorView p_spirv, ShaderReflection &r_reflection, ShaderMeta &r_shader_meta) { - using namespace spirv_cross; - using spirv_cross::Resource; - - r_reflection = {}; - r_shader_meta = {}; - - for (uint32_t i = 0; i < p_spirv.size(); i++) { - ShaderStageSPIRVData const &v = p_spirv[i]; - ShaderStage stage = v.shader_stage; - uint32_t const *const ir = reinterpret_cast(v.spirv.ptr()); - size_t word_count = v.spirv.size() / sizeof(uint32_t); - Parser parser(ir, word_count); - try { - parser.parse(); - } catch (CompilerError &e) { - ERR_FAIL_V_MSG(ERR_CANT_CREATE, "Failed to parse IR at stage " + String(SHADER_STAGE_NAMES[stage]) + ": " + e.what()); - } - - ShaderStage stage_flag = (ShaderStage)(1 << p_spirv[i].shader_stage); - - if (p_spirv[i].shader_stage == SHADER_STAGE_COMPUTE) { - r_reflection.is_compute = true; - ERR_FAIL_COND_V_MSG(p_spirv.size() != 1, FAILED, - "Compute shaders can only receive one stage, dedicated to compute."); - } - ERR_FAIL_COND_V_MSG(r_reflection.stages.has_flag(stage_flag), FAILED, - "Stage " + String(SHADER_STAGE_NAMES[p_spirv[i].shader_stage]) + " submitted more than once."); - - ParsedIR &pir = parser.get_parsed_ir(); - using BT = SPIRType::BaseType; - - Compiler compiler(std::move(pir)); - - if (r_reflection.is_compute) { - r_reflection.compute_local_size[0] = compiler.get_execution_mode_argument(spv::ExecutionModeLocalSize, 0); - r_reflection.compute_local_size[1] = compiler.get_execution_mode_argument(spv::ExecutionModeLocalSize, 1); - r_reflection.compute_local_size[2] = compiler.get_execution_mode_argument(spv::ExecutionModeLocalSize, 2); - } - - // Parse bindings. - - auto get_decoration = [&compiler](spirv_cross::ID id, spv::Decoration decoration) { - uint32_t res = -1; - if (compiler.has_decoration(id, decoration)) { - res = compiler.get_decoration(id, decoration); - } - return res; - }; - - // Always clearer than a boolean. - enum class Writable { - No, - Maybe, - }; - - // clang-format off - enum { - SPIRV_WORD_SIZE = sizeof(uint32_t), - SPIRV_DATA_ALIGNMENT = 4 * SPIRV_WORD_SIZE, - }; - // clang-format on - - auto process_uniforms = [&r_reflection, &compiler, &get_decoration, stage, stage_flag](SmallVector &resources, Writable writable, std::function uniform_type) { - for (Resource const &res : resources) { - ShaderUniform uniform; - - std::string const &name = compiler.get_name(res.id); - uint32_t set = get_decoration(res.id, spv::DecorationDescriptorSet); - ERR_FAIL_COND_V_MSG(set == (uint32_t)-1, FAILED, "No descriptor set found"); - ERR_FAIL_COND_V_MSG(set >= MAX_UNIFORM_SETS, FAILED, "On shader stage '" + String(SHADER_STAGE_NAMES[stage]) + "', uniform '" + name.c_str() + "' uses a set (" + itos(set) + ") index larger than what is supported (" + itos(MAX_UNIFORM_SETS) + ")."); - - uniform.binding = get_decoration(res.id, spv::DecorationBinding); - ERR_FAIL_COND_V_MSG(uniform.binding == (uint32_t)-1, FAILED, "No binding found"); - - SPIRType const &a_type = compiler.get_type(res.type_id); - uniform.type = uniform_type(a_type); - - // Update length. - switch (a_type.basetype) { - case BT::Struct: { - if (uniform.type == UNIFORM_TYPE_STORAGE_BUFFER) { - // Consistent with spirv_reflect. - uniform.length = 0; - } else { - uniform.length = round_up_to_alignment(compiler.get_declared_struct_size(a_type), SPIRV_DATA_ALIGNMENT); - } - } break; - case BT::Image: - case BT::Sampler: - case BT::SampledImage: { - uniform.length = 1; - for (uint32_t const &a : a_type.array) { - uniform.length *= a; - } - } break; - default: - break; - } - - // Update writable. - if (writable == Writable::Maybe) { - if (a_type.basetype == BT::Struct) { - Bitset flags = compiler.get_buffer_block_flags(res.id); - uniform.writable = !compiler.has_decoration(res.id, spv::DecorationNonWritable) && !flags.get(spv::DecorationNonWritable); - } else if (a_type.basetype == BT::Image) { - if (a_type.image.access == spv::AccessQualifierMax) { - uniform.writable = !compiler.has_decoration(res.id, spv::DecorationNonWritable); - } else { - uniform.writable = a_type.image.access != spv::AccessQualifierReadOnly; - } - } - } - - if (set < (uint32_t)r_reflection.uniform_sets.size()) { - // Check if this already exists. - bool exists = false; - for (uint32_t k = 0; k < r_reflection.uniform_sets[set].size(); k++) { - if (r_reflection.uniform_sets[set][k].binding == uniform.binding) { - // Already exists, verify that it's the same type. - ERR_FAIL_COND_V_MSG(r_reflection.uniform_sets[set][k].type != uniform.type, FAILED, - "On shader stage '" + String(SHADER_STAGE_NAMES[stage]) + "', uniform '" + name.c_str() + "' trying to reuse location for set=" + itos(set) + ", binding=" + itos(uniform.binding) + " with different uniform type."); - - // Also, verify that it's the same size. - ERR_FAIL_COND_V_MSG(r_reflection.uniform_sets[set][k].length != uniform.length, FAILED, - "On shader stage '" + String(SHADER_STAGE_NAMES[stage]) + "', uniform '" + name.c_str() + "' trying to reuse location for set=" + itos(set) + ", binding=" + itos(uniform.binding) + " with different uniform size."); - - // Also, verify that it has the same writability. - ERR_FAIL_COND_V_MSG(r_reflection.uniform_sets[set][k].writable != uniform.writable, FAILED, - "On shader stage '" + String(SHADER_STAGE_NAMES[stage]) + "', uniform '" + name.c_str() + "' trying to reuse location for set=" + itos(set) + ", binding=" + itos(uniform.binding) + " with different writability."); - - // Just append stage mask and continue. - r_reflection.uniform_sets.write[set].write[k].stages.set_flag(stage_flag); - exists = true; - break; - } - } - - if (exists) { - continue; // Merged. - } - } - - uniform.stages.set_flag(stage_flag); - - if (set >= (uint32_t)r_reflection.uniform_sets.size()) { - r_reflection.uniform_sets.resize(set + 1); - } - - r_reflection.uniform_sets.write[set].push_back(uniform); - } - - return OK; - }; - - ShaderResources resources = compiler.get_shader_resources(); - - process_uniforms(resources.uniform_buffers, Writable::No, [](SPIRType const &a_type) { - DEV_ASSERT(a_type.basetype == BT::Struct); - return UNIFORM_TYPE_UNIFORM_BUFFER; - }); - - process_uniforms(resources.storage_buffers, Writable::Maybe, [](SPIRType const &a_type) { - DEV_ASSERT(a_type.basetype == BT::Struct); - return UNIFORM_TYPE_STORAGE_BUFFER; - }); - - process_uniforms(resources.storage_images, Writable::Maybe, [](SPIRType const &a_type) { - DEV_ASSERT(a_type.basetype == BT::Image); - if (a_type.image.dim == spv::DimBuffer) { - return UNIFORM_TYPE_IMAGE_BUFFER; - } else { - return UNIFORM_TYPE_IMAGE; - } - }); - - process_uniforms(resources.sampled_images, Writable::No, [](SPIRType const &a_type) { - DEV_ASSERT(a_type.basetype == BT::SampledImage); - return UNIFORM_TYPE_SAMPLER_WITH_TEXTURE; - }); - - process_uniforms(resources.separate_images, Writable::No, [](SPIRType const &a_type) { - DEV_ASSERT(a_type.basetype == BT::Image); - if (a_type.image.dim == spv::DimBuffer) { - return UNIFORM_TYPE_TEXTURE_BUFFER; - } else { - return UNIFORM_TYPE_TEXTURE; - } - }); - - process_uniforms(resources.separate_samplers, Writable::No, [](SPIRType const &a_type) { - DEV_ASSERT(a_type.basetype == BT::Sampler); - return UNIFORM_TYPE_SAMPLER; - }); - - process_uniforms(resources.subpass_inputs, Writable::No, [](SPIRType const &a_type) { - DEV_ASSERT(a_type.basetype == BT::Image && a_type.image.dim == spv::DimSubpassData); - return UNIFORM_TYPE_INPUT_ATTACHMENT; - }); - - if (!resources.push_constant_buffers.empty()) { - // There can be only one push constant block. - Resource const &res = resources.push_constant_buffers.front(); - - size_t push_constant_size = round_up_to_alignment(compiler.get_declared_struct_size(compiler.get_type(res.base_type_id)), SPIRV_DATA_ALIGNMENT); - ERR_FAIL_COND_V_MSG(r_reflection.push_constant_size && r_reflection.push_constant_size != push_constant_size, FAILED, - "Reflection of SPIR-V shader stage '" + String(SHADER_STAGE_NAMES[p_spirv[i].shader_stage]) + "': Push constant block must be the same across shader stages."); - - r_reflection.push_constant_size = push_constant_size; - r_reflection.push_constant_stages.set_flag(stage_flag); - } - - ERR_FAIL_COND_V_MSG(!resources.atomic_counters.empty(), FAILED, "Atomic counters not supported"); - ERR_FAIL_COND_V_MSG(!resources.acceleration_structures.empty(), FAILED, "Acceleration structures not supported"); - ERR_FAIL_COND_V_MSG(!resources.shader_record_buffers.empty(), FAILED, "Shader record buffers not supported"); - - if (stage == SHADER_STAGE_VERTEX && !resources.stage_inputs.empty()) { - for (Resource const &res : resources.stage_inputs) { - SPIRType a_type = compiler.get_type(res.base_type_id); - uint32_t loc = get_decoration(res.id, spv::DecorationLocation); - if (loc != (uint32_t)-1) { - r_reflection.vertex_input_mask |= 1 << loc; - } - } - } - - if (stage == SHADER_STAGE_FRAGMENT && !resources.stage_outputs.empty()) { - for (Resource const &res : resources.stage_outputs) { - SPIRType a_type = compiler.get_type(res.base_type_id); - uint32_t loc = get_decoration(res.id, spv::DecorationLocation); - uint32_t built_in = spv::BuiltIn(get_decoration(res.id, spv::DecorationBuiltIn)); - if (loc != (uint32_t)-1 && built_in != spv::BuiltInFragDepth) { - r_reflection.fragment_output_mask |= 1 << loc; - } - } - } - - for (const BuiltInResource &res : resources.builtin_inputs) { - if (res.builtin == spv::BuiltInViewIndex || res.builtin == spv::BuiltInViewportIndex) { - r_shader_meta.has_multiview = true; - } - } - - if (!r_shader_meta.has_multiview) { - for (const BuiltInResource &res : resources.builtin_outputs) { - if (res.builtin == spv::BuiltInViewIndex || res.builtin == spv::BuiltInViewportIndex) { - r_shader_meta.has_multiview = true; - } - } - } - - // Specialization constants. - for (SpecializationConstant const &constant : compiler.get_specialization_constants()) { - int32_t existing = -1; - ShaderSpecializationConstant sconst; - SPIRConstant &spc = compiler.get_constant(constant.id); - SPIRType const &spct = compiler.get_type(spc.constant_type); - - sconst.constant_id = constant.constant_id; - sconst.int_value = 0; - - switch (spct.basetype) { - case BT::Boolean: { - sconst.type = PIPELINE_SPECIALIZATION_CONSTANT_TYPE_BOOL; - sconst.bool_value = spc.scalar() != 0; - } break; - case BT::Int: - case BT::UInt: { - sconst.type = PIPELINE_SPECIALIZATION_CONSTANT_TYPE_INT; - sconst.int_value = spc.scalar(); - } break; - case BT::Float: { - sconst.type = PIPELINE_SPECIALIZATION_CONSTANT_TYPE_FLOAT; - sconst.float_value = spc.scalar_f32(); - } break; - default: - ERR_FAIL_V_MSG(FAILED, "Unsupported specialization constant type"); - } - sconst.stages.set_flag(stage_flag); - - for (uint32_t k = 0; k < r_reflection.specialization_constants.size(); k++) { - if (r_reflection.specialization_constants[k].constant_id == sconst.constant_id) { - ERR_FAIL_COND_V_MSG(r_reflection.specialization_constants[k].type != sconst.type, FAILED, "More than one specialization constant used for id (" + itos(sconst.constant_id) + "), but their types differ."); - ERR_FAIL_COND_V_MSG(r_reflection.specialization_constants[k].int_value != sconst.int_value, FAILED, "More than one specialization constant used for id (" + itos(sconst.constant_id) + "), but their default values differ."); - existing = k; - break; - } - } - - if (existing > 0) { - r_reflection.specialization_constants.write[existing].stages.set_flag(stage_flag); - } else { - r_reflection.specialization_constants.push_back(sconst); - } - } - - r_reflection.stages.set_flag(stage_flag); - } - - // Sort all uniform_sets. - for (uint32_t i = 0; i < r_reflection.uniform_sets.size(); i++) { - r_reflection.uniform_sets.write[i].sort(); - } - - return OK; -} - -Vector RenderingDeviceDriverMetal::shader_compile_binary_from_spirv(VectorView p_spirv, const String &p_shader_name) { - using Result = ::Vector; - using namespace spirv_cross; - using spirv_cross::CompilerMSL; - using spirv_cross::Resource; - - ShaderReflection spirv_data; - ShaderMeta shader_meta; - ERR_FAIL_COND_V(_reflect_spirv16(p_spirv, spirv_data, shader_meta), Result()); - - ShaderBinaryData bin_data{}; - if (!p_shader_name.is_empty()) { - bin_data.shader_name = p_shader_name.utf8(); - } else { - bin_data.shader_name = "unnamed"; - } - - bin_data.vertex_input_mask = spirv_data.vertex_input_mask; - bin_data.fragment_output_mask = spirv_data.fragment_output_mask; - bin_data.compute_local_size = ComputeSize{ - .x = spirv_data.compute_local_size[0], - .y = spirv_data.compute_local_size[1], - .z = spirv_data.compute_local_size[2], - }; - bin_data.push_constant.size = spirv_data.push_constant_size; - bin_data.push_constant.stages = (ShaderStageUsage)(uint8_t)spirv_data.push_constant_stages; - bin_data.set_needs_view_mask_buffer(shader_meta.has_multiview); - - for (uint32_t i = 0; i < spirv_data.uniform_sets.size(); i++) { - const ::Vector &spirv_set = spirv_data.uniform_sets[i]; - UniformSetData set(i); - for (const ShaderUniform &spirv_uniform : spirv_set) { - UniformData binding{}; - binding.type = spirv_uniform.type; - binding.binding = spirv_uniform.binding; - binding.writable = spirv_uniform.writable; - binding.stages = (ShaderStageUsage)(uint8_t)spirv_uniform.stages; - binding.length = spirv_uniform.length; - set.uniforms.push_back(binding); - } - bin_data.uniforms.push_back(set); - } - - for (const ShaderSpecializationConstant &spirv_sc : spirv_data.specialization_constants) { - SpecializationConstantData spec_constant{}; - spec_constant.type = spirv_sc.type; - spec_constant.constant_id = spirv_sc.constant_id; - spec_constant.int_value = spirv_sc.int_value; - spec_constant.stages = (ShaderStageUsage)(uint8_t)spirv_sc.stages; - bin_data.constants.push_back(spec_constant); - bin_data.spirv_specialization_constants_ids_mask |= (1 << spirv_sc.constant_id); - } - - // Reflection using SPIRV-Cross: - // https://github.com/KhronosGroup/SPIRV-Cross/wiki/Reflection-API-user-guide - - CompilerMSL::Options msl_options{}; - msl_options.set_msl_version(version_major, version_minor); - bin_data.msl_version = msl_options.msl_version; -#if TARGET_OS_OSX - msl_options.platform = CompilerMSL::Options::macOS; -#else - msl_options.platform = CompilerMSL::Options::iOS; -#endif - -#if TARGET_OS_IPHONE - msl_options.ios_use_simdgroup_functions = (*device_properties).features.simdPermute; - msl_options.ios_support_base_vertex_instance = true; -#endif - - bool disable_argument_buffers = false; - if (String v = OS::get_singleton()->get_environment(U"GODOT_DISABLE_ARGUMENT_BUFFERS"); v == U"1") { - disable_argument_buffers = true; - } - - if (device_properties->features.argument_buffers_tier >= MTLArgumentBuffersTier2 && !disable_argument_buffers) { - msl_options.argument_buffers_tier = CompilerMSL::Options::ArgumentBuffersTier::Tier2; - msl_options.argument_buffers = true; - bin_data.set_uses_argument_buffers(true); - } else { - msl_options.argument_buffers_tier = CompilerMSL::Options::ArgumentBuffersTier::Tier1; - // Tier 1 argument buffers don't support writable textures, so we disable them completely. - msl_options.argument_buffers = false; - bin_data.set_uses_argument_buffers(false); - } - msl_options.force_active_argument_buffer_resources = true; - // We can't use this, as we have to add the descriptor sets via compiler.add_msl_resource_binding. - // msl_options.pad_argument_buffer_resources = true; - msl_options.texture_buffer_native = true; // Enable texture buffer support. - msl_options.use_framebuffer_fetch_subpasses = false; - msl_options.pad_fragment_output_components = true; - msl_options.r32ui_alignment_constant_id = R32UI_ALIGNMENT_CONSTANT_ID; - msl_options.agx_manual_cube_grad_fixup = true; - if (shader_meta.has_multiview) { - msl_options.multiview = true; - msl_options.multiview_layered_rendering = true; - msl_options.view_mask_buffer_index = VIEW_MASK_BUFFER_INDEX; - } - - CompilerGLSL::Options options{}; - options.vertex.flip_vert_y = true; -#if DEV_ENABLED - options.emit_line_directives = true; -#endif - - for (uint32_t i = 0; i < p_spirv.size(); i++) { - ShaderStageSPIRVData const &v = p_spirv[i]; - ShaderStage stage = v.shader_stage; - char const *stage_name = SHADER_STAGE_NAMES[stage]; - uint32_t const *const ir = reinterpret_cast(v.spirv.ptr()); - size_t word_count = v.spirv.size() / sizeof(uint32_t); - Parser parser(ir, word_count); - try { - parser.parse(); - } catch (CompilerError &e) { - ERR_FAIL_V_MSG(Result(), "Failed to parse IR at stage " + String(SHADER_STAGE_NAMES[stage]) + ": " + e.what()); - } - - CompilerMSL compiler(std::move(parser.get_parsed_ir())); - compiler.set_msl_options(msl_options); - compiler.set_common_options(options); - - std::unordered_set active = compiler.get_active_interface_variables(); - ShaderResources resources = compiler.get_shader_resources(); - - std::string source; - try { - source = compiler.compile(); - } catch (CompilerError &e) { - ERR_FAIL_V_MSG(Result(), "Failed to compile stage " + String(SHADER_STAGE_NAMES[stage]) + ": " + e.what()); - } - - ERR_FAIL_COND_V_MSG(compiler.get_entry_points_and_stages().size() != 1, Result(), "Expected a single entry point and stage."); - - SmallVector entry_pts_stages = compiler.get_entry_points_and_stages(); - EntryPoint &entry_point_stage = entry_pts_stages.front(); - SPIREntryPoint &entry_point = compiler.get_entry_point(entry_point_stage.name, entry_point_stage.execution_model); - - // Process specialization constants. - if (!compiler.get_specialization_constants().empty()) { - for (SpecializationConstant const &constant : compiler.get_specialization_constants()) { - LocalVector::Iterator res = bin_data.constants.begin(); - while (res != bin_data.constants.end()) { - if (res->constant_id == constant.constant_id) { - res->used_stages |= 1 << stage; - break; - } - ++res; - } - if (res == bin_data.constants.end()) { - WARN_PRINT(String(stage_name) + ": unable to find constant_id: " + itos(constant.constant_id)); - } - } - } - - // Process bindings. - - LocalVector &uniform_sets = bin_data.uniforms; - using BT = SPIRType::BaseType; - - // Always clearer than a boolean. - enum class Writable { - No, - Maybe, - }; - - // Returns a std::optional containing the value of the - // decoration, if it exists. - auto get_decoration = [&compiler](spirv_cross::ID id, spv::Decoration decoration) { - uint32_t res = -1; - if (compiler.has_decoration(id, decoration)) { - res = compiler.get_decoration(id, decoration); - } - return res; - }; - - auto descriptor_bindings = [&compiler, &active, &uniform_sets, stage, &get_decoration](SmallVector &p_resources, Writable p_writable) { - for (Resource const &res : p_resources) { - uint32_t dset = get_decoration(res.id, spv::DecorationDescriptorSet); - uint32_t dbin = get_decoration(res.id, spv::DecorationBinding); - UniformData *found = nullptr; - if (dset != (uint32_t)-1 && dbin != (uint32_t)-1 && dset < uniform_sets.size()) { - UniformSetData &set = uniform_sets[dset]; - LocalVector::Iterator pos = set.uniforms.begin(); - while (pos != set.uniforms.end()) { - if (dbin == pos->binding) { - found = &(*pos); - break; - } - ++pos; - } - } - - ERR_FAIL_NULL_V_MSG(found, ERR_CANT_CREATE, "UniformData not found"); - - bool is_active = active.find(res.id) != active.end(); - if (is_active) { - found->active_stages |= 1 << stage; - } - - BindingInfo primary{}; - - SPIRType const &a_type = compiler.get_type(res.type_id); - BT basetype = a_type.basetype; - - switch (basetype) { - case BT::Struct: { - primary.dataType = MTLDataTypePointer; - } break; - - case BT::Image: - case BT::SampledImage: { - primary.dataType = MTLDataTypeTexture; - } break; - - case BT::Sampler: { - primary.dataType = MTLDataTypeSampler; - primary.arrayLength = 1; - for (uint32_t const &a : a_type.array) { - primary.arrayLength *= a; - } - } break; - - default: { - ERR_FAIL_V_MSG(ERR_CANT_CREATE, "Unexpected BaseType"); - } break; - } - - // Find array length of image. - if (basetype == BT::Image || basetype == BT::SampledImage) { - primary.arrayLength = 1; - for (uint32_t const &a : a_type.array) { - primary.arrayLength *= a; - } - primary.isMultisampled = a_type.image.ms; - - SPIRType::ImageType const &image = a_type.image; - primary.imageFormat = image.format; - - switch (image.dim) { - case spv::Dim1D: { - if (image.arrayed) { - primary.textureType = MTLTextureType1DArray; - } else { - primary.textureType = MTLTextureType1D; - } - } break; - case spv::DimSubpassData: { - DISPATCH_FALLTHROUGH; - } - case spv::Dim2D: { - if (image.arrayed && image.ms) { - primary.textureType = MTLTextureType2DMultisampleArray; - } else if (image.arrayed) { - primary.textureType = MTLTextureType2DArray; - } else if (image.ms) { - primary.textureType = MTLTextureType2DMultisample; - } else { - primary.textureType = MTLTextureType2D; - } - } break; - case spv::Dim3D: { - primary.textureType = MTLTextureType3D; - } break; - case spv::DimCube: { - if (image.arrayed) { - primary.textureType = MTLTextureTypeCube; - } - } break; - case spv::DimRect: { - } break; - case spv::DimBuffer: { - // VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER - primary.textureType = MTLTextureTypeTextureBuffer; - } break; - case spv::DimMax: { - // Add all enumerations to silence the compiler warning - // and generate future warnings, should a new one be added. - } break; - } - } - - // Update writable. - if (p_writable == Writable::Maybe) { - if (basetype == BT::Struct) { - Bitset flags = compiler.get_buffer_block_flags(res.id); - if (!flags.get(spv::DecorationNonWritable)) { - if (flags.get(spv::DecorationNonReadable)) { - primary.access = MTLBindingAccessWriteOnly; - } else { - primary.access = MTLBindingAccessReadWrite; - } - } - } else if (basetype == BT::Image) { - switch (a_type.image.access) { - case spv::AccessQualifierWriteOnly: - primary.access = MTLBindingAccessWriteOnly; - break; - case spv::AccessQualifierReadWrite: - primary.access = MTLBindingAccessReadWrite; - break; - case spv::AccessQualifierReadOnly: - break; - case spv::AccessQualifierMax: - DISPATCH_FALLTHROUGH; - default: - if (!compiler.has_decoration(res.id, spv::DecorationNonWritable)) { - if (compiler.has_decoration(res.id, spv::DecorationNonReadable)) { - primary.access = MTLBindingAccessWriteOnly; - } else { - primary.access = MTLBindingAccessReadWrite; - } - } - break; - } - } - } - - switch (primary.access) { - case MTLBindingAccessReadOnly: - primary.usage = MTLResourceUsageRead; - break; - case MTLBindingAccessWriteOnly: - primary.usage = MTLResourceUsageWrite; - break; - case MTLBindingAccessReadWrite: - primary.usage = MTLResourceUsageRead | MTLResourceUsageWrite; - break; - } - - primary.index = compiler.get_automatic_msl_resource_binding(res.id); - - found->bindings[stage] = primary; - - // A sampled image contains two bindings, the primary - // is to the image, and the secondary is to the associated sampler. - if (basetype == BT::SampledImage) { - uint32_t binding = compiler.get_automatic_msl_resource_binding_secondary(res.id); - if (binding != (uint32_t)-1) { - found->bindings_secondary[stage] = BindingInfo{ - .dataType = MTLDataTypeSampler, - .index = binding, - .access = MTLBindingAccessReadOnly, - }; - } - } - - // An image may have a secondary binding if it is used - // for atomic operations. - if (basetype == BT::Image) { - uint32_t binding = compiler.get_automatic_msl_resource_binding_secondary(res.id); - if (binding != (uint32_t)-1) { - found->bindings_secondary[stage] = BindingInfo{ - .dataType = MTLDataTypePointer, - .index = binding, - .access = MTLBindingAccessReadWrite, - }; - } - } - } - return Error::OK; - }; - - if (!resources.uniform_buffers.empty()) { - Error err = descriptor_bindings(resources.uniform_buffers, Writable::No); - ERR_FAIL_COND_V(err != OK, Result()); - } - if (!resources.storage_buffers.empty()) { - Error err = descriptor_bindings(resources.storage_buffers, Writable::Maybe); - ERR_FAIL_COND_V(err != OK, Result()); - } - if (!resources.storage_images.empty()) { - Error err = descriptor_bindings(resources.storage_images, Writable::Maybe); - ERR_FAIL_COND_V(err != OK, Result()); - } - if (!resources.sampled_images.empty()) { - Error err = descriptor_bindings(resources.sampled_images, Writable::No); - ERR_FAIL_COND_V(err != OK, Result()); - } - if (!resources.separate_images.empty()) { - Error err = descriptor_bindings(resources.separate_images, Writable::No); - ERR_FAIL_COND_V(err != OK, Result()); - } - if (!resources.separate_samplers.empty()) { - Error err = descriptor_bindings(resources.separate_samplers, Writable::No); - ERR_FAIL_COND_V(err != OK, Result()); - } - if (!resources.subpass_inputs.empty()) { - Error err = descriptor_bindings(resources.subpass_inputs, Writable::No); - ERR_FAIL_COND_V(err != OK, Result()); - } - - if (!resources.push_constant_buffers.empty()) { - for (Resource const &res : resources.push_constant_buffers) { - uint32_t binding = compiler.get_automatic_msl_resource_binding(res.id); - if (binding != (uint32_t)-1) { - bin_data.push_constant.used_stages |= 1 << stage; - bin_data.push_constant.msl_binding[stage] = binding; - } - } - } - - ERR_FAIL_COND_V_MSG(!resources.atomic_counters.empty(), Result(), "Atomic counters not supported"); - ERR_FAIL_COND_V_MSG(!resources.acceleration_structures.empty(), Result(), "Acceleration structures not supported"); - ERR_FAIL_COND_V_MSG(!resources.shader_record_buffers.empty(), Result(), "Shader record buffers not supported"); - - if (!resources.stage_inputs.empty()) { - for (Resource const &res : resources.stage_inputs) { - uint32_t binding = compiler.get_automatic_msl_resource_binding(res.id); - if (binding != (uint32_t)-1) { - bin_data.vertex_input_mask |= 1 << binding; - } - } - } - - ShaderStageData stage_data; - stage_data.stage = v.shader_stage; - stage_data.is_position_invariant = compiler.is_position_invariant(); - stage_data.supports_fast_math = !entry_point.flags.get(spv::ExecutionModeSignedZeroInfNanPreserve); - stage_data.entry_point_name = entry_point.name.c_str(); - stage_data.source = source.c_str(); - bin_data.stages.push_back(stage_data); - } - - size_t vec_size = bin_data.serialize_size() + 8; - - ::Vector ret; - ret.resize(vec_size); - BufWriter writer(ret.ptrw(), vec_size); - const uint8_t HEADER[4] = { 'G', 'M', 'S', 'L' }; - writer.write(*(uint32_t *)HEADER); - writer.write(SHADER_BINARY_VERSION); - bin_data.serialize(writer); - ret.resize(writer.get_pos()); - - return ret; -} - void RenderingDeviceDriverMetal::shader_cache_free_entry(const SHA256Digest &key) { if (ShaderCacheEntry **pentry = _shader_cache.getptr(key); pentry != nullptr) { ShaderCacheEntry *entry = *pentry; @@ -2447,115 +1105,146 @@ void RenderingDeviceDriverMetal::shader_cache_free_entry(const SHA256Digest &key } } -RDD::ShaderID RenderingDeviceDriverMetal::shader_create_from_bytecode(const Vector &p_shader_binary, ShaderDescription &r_shader_desc, String &r_name, const Vector &p_immutable_samplers) { - r_shader_desc = {}; // Driver-agnostic. +API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) +static BindingInfo from_binding_info_data(const RenderingShaderContainerMetal::BindingInfoData &p_data) { + BindingInfo bi; + bi.dataType = static_cast(p_data.data_type); + bi.index = p_data.index; + bi.access = static_cast(p_data.access); + bi.usage = static_cast(p_data.usage); + bi.textureType = static_cast(p_data.texture_type); + bi.imageFormat = p_data.image_format; + bi.arrayLength = p_data.array_length; + bi.isMultisampled = p_data.is_multisampled; + return bi; +} - const uint8_t *binptr = p_shader_binary.ptr(); - uint32_t binsize = p_shader_binary.size(); +RDD::ShaderID RenderingDeviceDriverMetal::shader_create_from_container(const Ref &p_shader_container, const Vector &p_immutable_samplers) { + Ref shader_container = p_shader_container; + using RSCM = RenderingShaderContainerMetal; - BufReader reader(binptr, binsize); - uint8_t header[4]; - reader.read((uint32_t &)header); - ERR_FAIL_COND_V_MSG(memcmp(header, "GMSL", 4) != 0, ShaderID(), "Invalid header"); - uint32_t version = 0; - reader.read(version); - ERR_FAIL_COND_V_MSG(version != SHADER_BINARY_VERSION, ShaderID(), "Invalid shader binary version"); - - ShaderBinaryData binary_data; - binary_data.deserialize(reader); - switch (reader.status) { - case BufReader::Status::OK: - break; - case BufReader::Status::BAD_COMPRESSION: - ERR_FAIL_V_MSG(ShaderID(), "Invalid compressed data"); - case BufReader::Status::SHORT_BUFFER: - ERR_FAIL_V_MSG(ShaderID(), "Unexpected end of buffer"); - } + CharString shader_name = shader_container->shader_name; + RSCM::HeaderData &mtl_reflection_data = shader_container->mtl_reflection_data; + Vector &shaders = shader_container->shaders; + Vector &mtl_shaders = shader_container->mtl_shaders; // We need to regenerate the shader if the cache is moved to an incompatible device. - ERR_FAIL_COND_V_MSG(device_properties->features.argument_buffers_tier < MTLArgumentBuffersTier2 && binary_data.uses_argument_buffers(), - ShaderID(), + ERR_FAIL_COND_V_MSG(device_properties->features.argument_buffers_tier < MTLArgumentBuffersTier2 && mtl_reflection_data.uses_argument_buffers(), + RDD::ShaderID(), "Shader was generated with argument buffers, but device has limited support"); MTLCompileOptions *options = [MTLCompileOptions new]; - options.languageVersion = binary_data.get_msl_version(); - HashMap libraries; + uint32_t major = mtl_reflection_data.msl_version / 10000; + uint32_t minor = (mtl_reflection_data.msl_version / 100) % 100; + options.languageVersion = MTLLanguageVersion((major << 0x10) + minor); + HashMap libraries; - r_name = String(binary_data.shader_name.ptr()); + bool is_compute = false; + Vector decompressed_code; + for (uint32_t shader_index = 0; shader_index < shaders.size(); shader_index++) { + const RenderingShaderContainer::Shader &shader = shaders[shader_index]; + const RSCM::StageData &shader_data = mtl_shaders[shader_index]; - for (ShaderStageData &shader_data : binary_data.stages) { - r_shader_desc.stages.push_back(shader_data.stage); + if (shader.shader_stage == RD::ShaderStage::SHADER_STAGE_COMPUTE) { + is_compute = true; + } - SHA256Digest key = SHA256Digest(shader_data.source.ptr(), shader_data.source.length()); - - if (ShaderCacheEntry **p = _shader_cache.getptr(key); p != nullptr) { - libraries[shader_data.stage] = (*p)->library; + if (ShaderCacheEntry **p = _shader_cache.getptr(shader_data.hash); p != nullptr) { + libraries[shader.shader_stage] = (*p)->library; continue; } - NSString *source = [[NSString alloc] initWithBytes:(void *)shader_data.source.ptr() - length:shader_data.source.length() + if (shader.code_decompressed_size > 0) { + decompressed_code.resize(shader.code_decompressed_size); + bool decompressed = shader_container->decompress_code(shader.code_compressed_bytes.ptr(), shader.code_compressed_bytes.size(), shader.code_compression_flags, decompressed_code.ptrw(), decompressed_code.size()); + ERR_FAIL_COND_V_MSG(!decompressed, RDD::ShaderID(), vformat("Failed to decompress code on shader stage %s.", String(RDD::SHADER_STAGE_NAMES[shader.shader_stage]))); + } else { + decompressed_code = shader.code_compressed_bytes; + } + + ShaderCacheEntry *cd = memnew(ShaderCacheEntry(*this, shader_data.hash)); + cd->name = shader_name; + cd->stage = shader.shader_stage; + + NSString *source = [[NSString alloc] initWithBytes:(void *)decompressed_code.ptr() + length:shader_data.source_size encoding:NSUTF8StringEncoding]; - ShaderCacheEntry *cd = memnew(ShaderCacheEntry(*this, key)); - cd->name = binary_data.shader_name; - cd->stage = shader_data.stage; - options.preserveInvariance = shader_data.is_position_invariant; -#if defined(VISIONOS_ENABLED) - options.mathMode = MTLMathModeFast; -#else - options.fastMathEnabled = YES; + MDLibrary *library = nil; + if (shader_data.library_size > 0) { + dispatch_data_t binary = dispatch_data_create(decompressed_code.ptr() + shader_data.source_size, shader_data.library_size, dispatch_get_main_queue(), DISPATCH_DATA_DESTRUCTOR_DEFAULT); + library = [MDLibrary newLibraryWithCacheEntry:cd + device:device +#if DEV_ENABLED + source:source #endif - MDLibrary *library = [MDLibrary newLibraryWithCacheEntry:cd - device:device - source:source - options:options - strategy:_shader_load_strategy]; - _shader_cache[key] = cd; - libraries[shader_data.stage] = library; + data:binary]; + } else { + options.preserveInvariance = shader_data.is_position_invariant; +#if defined(VISIONOS_ENABLED) + options.mathMode = MTLMathModeFast; +#else + options.fastMathEnabled = YES; +#endif + library = [MDLibrary newLibraryWithCacheEntry:cd + device:device + source:source + options:options + strategy:_shader_load_strategy]; + } + + _shader_cache[shader_data.hash] = cd; + libraries[shader.shader_stage] = library; } - Vector uniform_sets; - uniform_sets.resize(binary_data.uniforms.size()); + ShaderReflection refl = shader_container->get_shader_reflection(); + RSCM::MetalShaderReflection mtl_refl = shader_container->get_metal_shader_reflection(); - r_shader_desc.uniform_sets.resize(binary_data.uniforms.size()); + Vector uniform_sets; + uint32_t uniform_sets_count = mtl_refl.uniform_sets.size(); + uniform_sets.resize(uniform_sets_count); // Create sets. - for (UniformSetData &uniform_set : binary_data.uniforms) { - UniformSet &set = uniform_sets.write[uniform_set.index]; - set.uniforms.resize(uniform_set.uniforms.size()); + for (uint32_t i = 0; i < uniform_sets_count; i++) { + UniformSet &set = uniform_sets.write[i]; + const Vector &refl_set = refl.uniform_sets.ptr()[i]; + const Vector &mtl_set = mtl_refl.uniform_sets.ptr()[i]; + uint32_t set_size = mtl_set.size(); + set.uniforms.resize(set_size); - Vector &uset = r_shader_desc.uniform_sets.write[uniform_set.index]; - uset.resize(uniform_set.uniforms.size()); + LocalVector::Iterator iter = set.uniforms.begin(); + for (uint32_t j = 0; j < set_size; j++) { + const ShaderUniform &uniform = refl_set.ptr()[j]; + const RSCM::UniformData &bind = mtl_set.ptr()[j]; - for (uint32_t i = 0; i < uniform_set.uniforms.size(); i++) { - UniformData &uniform = uniform_set.uniforms[i]; - - ShaderUniform su; - su.type = uniform.type; - su.writable = uniform.writable; - su.length = uniform.length; - su.binding = uniform.binding; - su.stages = (ShaderStage)(uint8_t)uniform.stages; - uset.write[i] = su; - - UniformInfo &ui = set.uniforms[i]; + UniformInfo &ui = *iter; + ++iter; ui.binding = uniform.binding; - ui.active_stages = uniform.active_stages; - for (KeyValue &kv : uniform.bindings) { - ui.bindings.insert(kv.key, kv.value); + ui.active_stages = static_cast(bind.active_stages); + + for (const RSCM::BindingInfoData &info : bind.bindings) { + if (info.shader_stage == UINT32_MAX) { + continue; + } + BindingInfo bi = from_binding_info_data(info); + ui.bindings.insert((RDC::ShaderStage)info.shader_stage, bi); } - for (KeyValue &kv : uniform.bindings_secondary) { - ui.bindings_secondary.insert(kv.key, kv.value); + for (const RSCM::BindingInfoData &info : bind.bindings_secondary) { + if (info.shader_stage == UINT32_MAX) { + continue; + } + BindingInfo bi = from_binding_info_data(info); + ui.bindings_secondary.insert((RDC::ShaderStage)info.shader_stage, bi); } } } - for (UniformSetData &uniform_set : binary_data.uniforms) { - UniformSet &set = uniform_sets.write[uniform_set.index]; + + for (uint32_t i = 0; i < uniform_sets_count; i++) { + UniformSet &set = uniform_sets.write[i]; // Make encoders. - for (ShaderStageData const &stage_data : binary_data.stages) { - ShaderStage stage = stage_data.stage; + for (RenderingShaderContainer::Shader const &shader : shaders) { + RD::ShaderStage stage = shader.shader_stage; NSMutableArray *descriptors = [NSMutableArray new]; for (UniformInfo const &uniform : set.uniforms) { @@ -2593,78 +1282,56 @@ RDD::ShaderID RenderingDeviceDriverMetal::shader_create_from_bytecode(const Vect } } - r_shader_desc.specialization_constants.resize(binary_data.constants.size()); - for (uint32_t i = 0; i < binary_data.constants.size(); i++) { - SpecializationConstantData &c = binary_data.constants[i]; - - ShaderSpecializationConstant sc; - sc.type = c.type; - sc.constant_id = c.constant_id; - sc.int_value = c.int_value; - sc.stages = (ShaderStage)(uint8_t)c.stages; - r_shader_desc.specialization_constants.write[i] = sc; - } - MDShader *shader = nullptr; - if (binary_data.is_compute()) { - MDComputeShader *cs = new MDComputeShader( - binary_data.shader_name, - uniform_sets, - binary_data.uses_argument_buffers(), - libraries[ShaderStage::SHADER_STAGE_COMPUTE]); + if (is_compute) { + const RSCM::StageData &stage_data = mtl_shaders[0]; - uint32_t *binding = binary_data.push_constant.msl_binding.getptr(SHADER_STAGE_COMPUTE); - if (binding) { - cs->push_constants.size = binary_data.push_constant.size; - cs->push_constants.binding = *binding; + MDComputeShader *cs = new MDComputeShader( + shader_name, + uniform_sets, + mtl_reflection_data.uses_argument_buffers(), + libraries[RD::ShaderStage::SHADER_STAGE_COMPUTE]); + + if (stage_data.push_constant_binding != UINT32_MAX) { + cs->push_constants.size = refl.push_constant_size; + cs->push_constants.binding = stage_data.push_constant_binding; } - cs->local = MTLSizeMake(binary_data.compute_local_size.x, binary_data.compute_local_size.y, binary_data.compute_local_size.z); -#if DEV_ENABLED - cs->kernel_source = binary_data.stages[0].source; -#endif + cs->local = MTLSizeMake(refl.compute_local_size[0], refl.compute_local_size[1], refl.compute_local_size[2]); shader = cs; } else { MDRenderShader *rs = new MDRenderShader( - binary_data.shader_name, + shader_name, uniform_sets, - binary_data.needs_view_mask_buffer(), - binary_data.uses_argument_buffers(), - libraries[ShaderStage::SHADER_STAGE_VERTEX], - libraries[ShaderStage::SHADER_STAGE_FRAGMENT]); + mtl_reflection_data.needs_view_mask_buffer(), + mtl_reflection_data.uses_argument_buffers(), + libraries[RD::ShaderStage::SHADER_STAGE_VERTEX], + libraries[RD::ShaderStage::SHADER_STAGE_FRAGMENT]); - uint32_t *vert_binding = binary_data.push_constant.msl_binding.getptr(SHADER_STAGE_VERTEX); - if (vert_binding) { - rs->push_constants.vert.size = binary_data.push_constant.size; - rs->push_constants.vert.binding = *vert_binding; - } - uint32_t *frag_binding = binary_data.push_constant.msl_binding.getptr(SHADER_STAGE_FRAGMENT); - if (frag_binding) { - rs->push_constants.frag.size = binary_data.push_constant.size; - rs->push_constants.frag.binding = *frag_binding; - } - -#if DEV_ENABLED - for (ShaderStageData &stage_data : binary_data.stages) { - if (stage_data.stage == ShaderStage::SHADER_STAGE_VERTEX) { - rs->vert_source = stage_data.source; - } else if (stage_data.stage == ShaderStage::SHADER_STAGE_FRAGMENT) { - rs->frag_source = stage_data.source; + for (uint32_t j = 0; j < shaders.size(); j++) { + const RSCM::StageData &stage_data = mtl_shaders[j]; + switch (shaders[j].shader_stage) { + case RD::ShaderStage::SHADER_STAGE_VERTEX: { + if (stage_data.push_constant_binding != UINT32_MAX) { + rs->push_constants.vert.size = refl.push_constant_size; + rs->push_constants.vert.binding = stage_data.push_constant_binding; + } + } break; + case RD::ShaderStage::SHADER_STAGE_FRAGMENT: { + if (stage_data.push_constant_binding != UINT32_MAX) { + rs->push_constants.frag.size = refl.push_constant_size; + rs->push_constants.frag.binding = stage_data.push_constant_binding; + } + } break; + default: { + ERR_FAIL_V_MSG(RDD::ShaderID(), "Invalid shader stage"); + } break; } } -#endif shader = rs; } - r_shader_desc.vertex_input_mask = binary_data.vertex_input_mask; - r_shader_desc.fragment_output_mask = binary_data.fragment_output_mask; - r_shader_desc.is_compute = binary_data.is_compute(); - r_shader_desc.compute_local_size[0] = binary_data.compute_local_size.x; - r_shader_desc.compute_local_size[1] = binary_data.compute_local_size.y; - r_shader_desc.compute_local_size[2] = binary_data.compute_local_size.z; - r_shader_desc.push_constant_size = binary_data.push_constant.size; - - return ShaderID(shader); + return RDD::ShaderID(shader); } void RenderingDeviceDriverMetal::shader_free(ShaderID p_shader) { @@ -4088,7 +2755,7 @@ const RDD::FragmentDensityMapCapabilities &RenderingDeviceDriverMetal::get_fragm } String RenderingDeviceDriverMetal::get_api_version() const { - return vformat("%d.%d", version_major, version_minor); + return vformat("%d.%d", capabilities.version_major, capabilities.version_minor); } String RenderingDeviceDriverMetal::get_pipeline_cache_uuid() const { @@ -4136,6 +2803,18 @@ RenderingDeviceDriverMetal::~RenderingDeviceDriverMetal() { for (KeyValue &kv : _shader_cache) { memdelete(kv.value); } + + if (shader_container_format != nullptr) { + memdelete(shader_container_format); + } + + if (pixel_formats != nullptr) { + memdelete(pixel_formats); + } + + if (device_properties != nullptr) { + memdelete(device_properties); + } } #pragma mark - Initialization @@ -4155,16 +2834,69 @@ Error RenderingDeviceDriverMetal::_create_device() { return OK; } -Error RenderingDeviceDriverMetal::_check_capabilities() { - MTLCompileOptions *options = [MTLCompileOptions new]; - version_major = (options.languageVersion >> 0x10) & 0xff; - version_minor = (options.languageVersion >> 0x00) & 0xff; - +void RenderingDeviceDriverMetal::_check_capabilities() { capabilities.device_family = DEVICE_METAL; - capabilities.version_major = version_major; - capabilities.version_minor = version_minor; + capabilities.version_major = device_properties->features.mslVersionMajor; + capabilities.version_minor = device_properties->features.mslVersionMinor; +} - return OK; +API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) +static MetalDeviceProfile device_profile_from_properties(MetalDeviceProperties *p_device_properties) { + using DP = MetalDeviceProfile; + MetalDeviceProfile res; +#if TARGET_OS_OSX + res.platform = DP::Platform::macOS; + res.features = { + .mslVersionMajor = p_device_properties->features.mslVersionMajor, + .mslVersionMinor = p_device_properties->features.mslVersionMinor, + .argument_buffers_tier = DP::ArgumentBuffersTier::Tier2, + .simdPermute = true + }; +#else + res.platform = DP::Platform::iOS; + res.features = { + .mslVersionMajor = p_device_properties->features.mslVersionMajor, + .mslVersionMinor = p_device_properties->features.mslVersionMinor, + .argument_buffers_tier = p_device_properties->features.argument_buffers_tier == MTLArgumentBuffersTier1 ? DP::ArgumentBuffersTier::Tier1 : DP::ArgumentBuffersTier::Tier2, + .simdPermute = p_device_properties->features.simdPermute, + }; +#endif + // highestFamily will only be set to an Apple GPU family + switch (p_device_properties->features.highestFamily) { + case MTLGPUFamilyApple1: + res.gpu = DP::GPU::Apple1; + break; + case MTLGPUFamilyApple2: + res.gpu = DP::GPU::Apple2; + break; + case MTLGPUFamilyApple3: + res.gpu = DP::GPU::Apple3; + break; + case MTLGPUFamilyApple4: + res.gpu = DP::GPU::Apple4; + break; + case MTLGPUFamilyApple5: + res.gpu = DP::GPU::Apple5; + break; + case MTLGPUFamilyApple6: + res.gpu = DP::GPU::Apple6; + break; + case MTLGPUFamilyApple7: + res.gpu = DP::GPU::Apple7; + break; + case MTLGPUFamilyApple8: + res.gpu = DP::GPU::Apple8; + break; + case MTLGPUFamilyApple9: + res.gpu = DP::GPU::Apple9; + break; + default: { + // Programming error if the default case is hit. + CRASH_NOW_MSG("Unsupported GPU family"); + } break; + } + + return res; } Error RenderingDeviceDriverMetal::initialize(uint32_t p_device_index, uint32_t p_frame_count) { @@ -4172,13 +2904,15 @@ Error RenderingDeviceDriverMetal::initialize(uint32_t p_device_index, uint32_t p Error err = _create_device(); ERR_FAIL_COND_V(err, ERR_CANT_CREATE); - err = _check_capabilities(); - ERR_FAIL_COND_V(err, ERR_CANT_CREATE); + device_properties = memnew(MetalDeviceProperties(device)); + device_profile = device_profile_from_properties(device_properties); + shader_container_format = memnew(RenderingShaderContainerFormatMetal(&device_profile)); + + _check_capabilities(); // Set the pipeline cache ID based on the Metal version. pipeline_cache_id = "metal-driver-" + get_api_version(); - device_properties = memnew(MetalDeviceProperties(device)); pixel_formats = memnew(PixelFormats(device, device_properties->features)); if (device_properties->features.layeredRendering) { multiview_capabilities.is_supported = true; @@ -4212,3 +2946,7 @@ Error RenderingDeviceDriverMetal::initialize(uint32_t p_device_index, uint32_t p return OK; } + +const RenderingShaderContainerFormat &RenderingDeviceDriverMetal::get_shader_container_format() const { + return *shader_container_format; +} diff --git a/drivers/metal/rendering_shader_container_metal.h b/drivers/metal/rendering_shader_container_metal.h new file mode 100644 index 0000000000..29c49ba097 --- /dev/null +++ b/drivers/metal/rendering_shader_container_metal.h @@ -0,0 +1,267 @@ +/**************************************************************************/ +/* rendering_shader_container_metal.h */ +/**************************************************************************/ +/* This file is part of: */ +/* REDOT ENGINE */ +/* https://redotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2024-present Redot Engine contributors */ +/* (see REDOT_AUTHORS.md) */ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#pragma once + +#import "sha256_digest.h" + +#import "servers/rendering/rendering_device_driver.h" +#import "servers/rendering/rendering_shader_container.h" + +constexpr uint32_t R32UI_ALIGNMENT_CONSTANT_ID = 65535; +/// Metal buffer index for the view mask when rendering multi-view. +const uint32_t VIEW_MASK_BUFFER_INDEX = 24; + +class RenderingShaderContainerFormatMetal; + +/// @brief A minimal structure that defines a device profile for Metal. +/// +/// This structure is used by the `RenderingShaderContainerMetal` class to +/// determine options for compiling SPIR-V to Metal source. It currently only +/// contains the minimum properties required to transform shaders from SPIR-V to Metal +/// and potentially compile to a `.metallib`. +struct MetalDeviceProfile { + enum class Platform : uint32_t { + macOS = 0, + iOS = 1, + }; + + /// @brief The GPU family. + enum class GPU : uint32_t { + Apple1, + Apple2, + Apple3, + Apple4, + Apple5, + Apple6, + Apple7, + Apple8, + Apple9, + }; + + enum class ArgumentBuffersTier : uint32_t { + Tier1 = 0, + Tier2 = 1, + }; + + struct Features { + uint32_t mslVersionMajor = 0; + uint32_t mslVersionMinor = 0; + ArgumentBuffersTier argument_buffers_tier = ArgumentBuffersTier::Tier1; + bool simdPermute = false; + }; + + Platform platform = Platform::macOS; + GPU gpu = GPU::Apple4; + Features features; + + static const MetalDeviceProfile *get_profile(Platform p_platform, GPU p_gpu); + + MetalDeviceProfile() = default; + +private: + static Mutex profiles_lock; ///< Mutex to protect access to the profiles map. + static HashMap profiles; +}; + +class RenderingShaderContainerMetal : public RenderingShaderContainer { + GDSOFTCLASS(RenderingShaderContainerMetal, RenderingShaderContainer); + +public: + struct HeaderData { + enum Flags : uint32_t { + NONE = 0, + NEEDS_VIEW_MASK_BUFFER = 1 << 0, + USES_ARGUMENT_BUFFERS = 1 << 1, + }; + + /// The base profile that was used to generate this shader. + MetalDeviceProfile profile; + + /// The Metal language version specified when compiling SPIR-V to MSL. + /// Format is major * 10000 + minor * 100 + patch. + uint32_t msl_version = UINT32_MAX; + uint32_t flags = NONE; + + /// @brief Returns `true` if the shader is compiled with multi-view support. + bool needs_view_mask_buffer() const { + return flags & NEEDS_VIEW_MASK_BUFFER; + } + + void set_needs_view_mask_buffer(bool p_value) { + if (p_value) { + flags |= NEEDS_VIEW_MASK_BUFFER; + } else { + flags &= ~NEEDS_VIEW_MASK_BUFFER; + } + } + + /// @brief Returns `true` if the shader was compiled with argument buffer support. + bool uses_argument_buffers() const { + return flags & USES_ARGUMENT_BUFFERS; + } + + void set_uses_argument_buffers(bool p_value) { + if (p_value) { + flags |= USES_ARGUMENT_BUFFERS; + } else { + flags &= ~USES_ARGUMENT_BUFFERS; + } + } + }; + + struct StageData { + uint32_t vertex_input_binding_mask = 0; + uint32_t is_position_invariant = 0; ///< true if the position output is invariant + uint32_t supports_fast_math = 0; + SHA256Digest hash; ///< SHA 256 hash of the shader code + uint32_t source_size = 0; ///< size of the source code in the returned bytes + uint32_t library_size = 0; ///< size of the compiled library in the returned bytes, 0 if it is not compiled + uint32_t push_constant_binding = UINT32_MAX; ///< Metal binding slot for the push constant data + }; + + struct BindingInfoData { + uint32_t shader_stage = UINT32_MAX; ///< The shader stage this binding is used in, or UINT32_MAX if not used. + uint32_t data_type = 0; // MTLDataTypeNone + uint32_t index = 0; + uint32_t access = 0; // MTLBindingAccessReadOnly + uint32_t usage = 0; // MTLResourceUsage (none) + uint32_t texture_type = 2; // MTLTextureType2D + uint32_t image_format = 0; + uint32_t array_length = 0; + uint32_t is_multisampled = 0; + }; + + struct UniformData { + /// Specifies the index into the `bindings` array for the shader stage. + /// + /// For example, a vertex and fragment shader use slots 0 and 1 of the bindings and bindings_secondary arrays. + static constexpr uint32_t STAGE_INDEX[RenderingDeviceCommons::SHADER_STAGE_MAX] = { + 0, // SHADER_STAGE_VERTEX + 1, // SHADER_STAGE_FRAGMENT + 0, // SHADER_STAGE_TESSELATION_CONTROL + 1, // SHADER_STAGE_TESSELATION_EVALUATION + 0, // SHADER_STAGE_COMPUTE + }; + + /// Specifies the stages the uniform data is + /// used by the Metal shader. + uint32_t active_stages = 0; + /// The primary binding information for the uniform data. + /// + /// A maximum of two stages is expected for any given pipeline, such as a vertex and fragment, so + /// the array size is fixed to 2. + BindingInfoData bindings[2]; + /// The secondary binding information for the uniform data. + /// + /// This is typically a sampler for an image-sampler uniform + BindingInfoData bindings_secondary[2]; + + _FORCE_INLINE_ constexpr uint32_t get_index_for_stage(RenderingDeviceCommons::ShaderStage p_stage) const { + return STAGE_INDEX[p_stage]; + } + + _FORCE_INLINE_ BindingInfoData &get_binding_for_stage(RenderingDeviceCommons::ShaderStage p_stage) { + BindingInfoData &info = bindings[get_index_for_stage(p_stage)]; + DEV_ASSERT(info.shader_stage == UINT32_MAX || info.shader_stage == p_stage); // make sure this uniform isn't used in the other stage + info.shader_stage = p_stage; + return info; + } + + _FORCE_INLINE_ BindingInfoData &get_secondary_binding_for_stage(RenderingDeviceCommons::ShaderStage p_stage) { + BindingInfoData &info = bindings_secondary[get_index_for_stage(p_stage)]; + DEV_ASSERT(info.shader_stage == UINT32_MAX || info.shader_stage == p_stage); // make sure this uniform isn't used in the other stage + info.shader_stage = p_stage; + return info; + } + }; + + struct SpecializationData { + uint32_t used_stages = 0; + }; + + HeaderData mtl_reflection_data; // compliment to reflection_data + Vector mtl_shaders; // compliment to shaders + +private: + const MetalDeviceProfile *device_profile = nullptr; + bool export_mode = false; + + Vector mtl_reflection_binding_set_uniforms_data; // compliment to reflection_binding_set_uniforms_data + Vector mtl_reflection_specialization_data; // compliment to reflection_specialization_data + + Error compile_metal_source(const char *p_source, const StageData &p_stage_data, Vector &r_binary_data); + +public: + static constexpr uint32_t FORMAT_VERSION = 1; + + void set_export_mode(bool p_export_mode) { export_mode = p_export_mode; } + void set_device_profile(const MetalDeviceProfile *p_device_profile) { device_profile = p_device_profile; } + + struct MetalShaderReflection { + Vector> uniform_sets; + Vector specialization_constants; + }; + + MetalShaderReflection get_metal_shader_reflection() const; + +protected: + virtual uint32_t _from_bytes_reflection_extra_data(const uint8_t *p_bytes) override; + virtual uint32_t _from_bytes_reflection_binding_uniform_extra_data_start(const uint8_t *p_bytes) override; + virtual uint32_t _from_bytes_reflection_binding_uniform_extra_data(const uint8_t *p_bytes, uint32_t p_index) override; + virtual uint32_t _from_bytes_reflection_specialization_extra_data_start(const uint8_t *p_bytes) override; + virtual uint32_t _from_bytes_reflection_specialization_extra_data(const uint8_t *p_bytes, uint32_t p_index) override; + virtual uint32_t _from_bytes_shader_extra_data_start(const uint8_t *p_bytes) override; + virtual uint32_t _from_bytes_shader_extra_data(const uint8_t *p_bytes, uint32_t p_index) override; + + virtual uint32_t _to_bytes_reflection_extra_data(uint8_t *p_bytes) const override; + virtual uint32_t _to_bytes_reflection_binding_uniform_extra_data(uint8_t *p_bytes, uint32_t p_index) const override; + virtual uint32_t _to_bytes_reflection_specialization_extra_data(uint8_t *p_bytes, uint32_t p_index) const override; + virtual uint32_t _to_bytes_shader_extra_data(uint8_t *p_bytes, uint32_t p_index) const override; + + virtual uint32_t _format() const override; + virtual uint32_t _format_version() const override; + virtual bool _set_code_from_spirv(const Vector &p_spirv) override; +}; + +class RenderingShaderContainerFormatMetal : public RenderingShaderContainerFormat { + bool export_mode = false; + + const MetalDeviceProfile *device_profile = nullptr; + +public: + virtual Ref create_container() const override; + virtual ShaderLanguageVersion get_shader_language_version() const override; + virtual ShaderSpirvVersion get_shader_spirv_version() const override; + RenderingShaderContainerFormatMetal(const MetalDeviceProfile *p_device_profile, bool p_export = false); + virtual ~RenderingShaderContainerFormatMetal() = default; +}; diff --git a/drivers/metal/rendering_shader_container_metal.mm b/drivers/metal/rendering_shader_container_metal.mm new file mode 100644 index 0000000000..016866df70 --- /dev/null +++ b/drivers/metal/rendering_shader_container_metal.mm @@ -0,0 +1,701 @@ +/**************************************************************************/ +/* rendering_shader_container_metal.mm */ +/**************************************************************************/ +/* This file is part of: */ +/* REDOT ENGINE */ +/* https://redotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2024-present Redot Engine contributors */ +/* (see REDOT_AUTHORS.md) */ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#include "rendering_shader_container_metal.h" + +#include "servers/rendering/rendering_device.h" + +#import "core/io/marshalls.h" + +#import +#import +#import +#import + +Mutex MetalDeviceProfile::profiles_lock; +HashMap MetalDeviceProfile::profiles; + +const MetalDeviceProfile *MetalDeviceProfile::get_profile(MetalDeviceProfile::Platform p_platform, MetalDeviceProfile::GPU p_gpu) { + DEV_ASSERT(p_platform == Platform::macOS || p_platform == Platform::iOS); + + MutexLock lock(profiles_lock); + + uint32_t key = (uint32_t)p_platform << 16 | (uint32_t)p_gpu; + if (MetalDeviceProfile *profile = profiles.getptr(key)) { + return profile; + } + + MetalDeviceProfile res; + res.platform = p_platform; + res.gpu = p_gpu; + if (p_platform == Platform::macOS) { + res.features.mslVersionMajor = 3; + res.features.mslVersionMinor = 2; + res.features.argument_buffers_tier = ArgumentBuffersTier::Tier2; + res.features.simdPermute = true; + } else if (p_platform == Platform::iOS) { + switch (p_gpu) { + case GPU::Apple1: + case GPU::Apple2: + case GPU::Apple3: + case GPU::Apple4: + case GPU::Apple5: { + res.features.simdPermute = false; + res.features.argument_buffers_tier = ArgumentBuffersTier::Tier1; + } break; + case GPU::Apple6: + case GPU::Apple7: + case GPU::Apple8: + case GPU::Apple9: { + res.features.argument_buffers_tier = ArgumentBuffersTier::Tier2; + res.features.simdPermute = true; + } break; + } + res.features.mslVersionMajor = 3; + res.features.mslVersionMinor = 2; + } + + return &profiles.insert(key, res)->value; +} + +Error RenderingShaderContainerMetal::compile_metal_source(const char *p_source, const StageData &p_stage_data, Vector &r_binary_data) { + String name(shader_name.ptr()); + if (name.contains_char(':')) { + name = name.replace_char(':', '_'); + } + Error r_error; + Ref source_file = FileAccess::create_temp(FileAccess::ModeFlags::READ_WRITE, + name + "_" + itos(p_stage_data.hash.short_sha()), + "metal", false, &r_error); + ERR_FAIL_COND_V_MSG(r_error != OK, r_error, "Unable to create temporary source file."); + if (!source_file->store_buffer((const uint8_t *)p_source, strlen(p_source))) { + ERR_FAIL_V_MSG(ERR_CANT_CREATE, "Unable to write temporary source file"); + } + source_file->flush(); + Ref result_file = FileAccess::create_temp(FileAccess::ModeFlags::READ_WRITE, + name + "_" + itos(p_stage_data.hash.short_sha()), + "metallib", false, &r_error); + + ERR_FAIL_COND_V_MSG(r_error != OK, r_error, "Unable to create temporary target file"); + + String sdk; + switch (device_profile->platform) { + case MetalDeviceProfile::Platform::macOS: + sdk = "macosx"; + break; + case MetalDeviceProfile::Platform::iOS: + sdk = "iphoneos"; + break; + } + + // Build the metallib binary. + { + List args{ "-sdk", sdk, "metal", "-O3" }; + if (p_stage_data.is_position_invariant) { + args.push_back("-fpreserve-invariance"); + } + args.push_back("-fmetal-math-mode=fast"); + args.push_back(source_file->get_path_absolute()); + args.push_back("-o"); + args.push_back(result_file->get_path_absolute()); + String r_pipe; + int exit_code; + Error err = OS::get_singleton()->execute("/usr/bin/xcrun", args, &r_pipe, &exit_code, true); + if (!r_pipe.is_empty()) { + print_line(r_pipe); + } + if (err != OK) { + ERR_PRINT(vformat("Metal compiler returned error code: %d", err)); + } + + if (exit_code != 0) { + ERR_PRINT(vformat("Metal compiler exited with error code: %d", exit_code)); + } + int len = result_file->get_length(); + ERR_FAIL_COND_V_MSG(len == 0, ERR_CANT_CREATE, "Metal compiler created empty library"); + } + + // Strip the source from the binary. + { + List args{ "-sdk", sdk, "metal-dsymutil", "--remove-source", result_file->get_path_absolute() }; + String r_pipe; + int exit_code; + Error err = OS::get_singleton()->execute("/usr/bin/xcrun", args, &r_pipe, &exit_code, true); + if (!r_pipe.is_empty()) { + print_line(r_pipe); + } + if (err != OK) { + ERR_PRINT(vformat("metal-dsymutil tool returned error code: %d", err)); + } + + if (exit_code != 0) { + ERR_PRINT(vformat("metal-dsymutil Compiler exited with error code: %d", exit_code)); + } + int len = result_file->get_length(); + ERR_FAIL_COND_V_MSG(len == 0, ERR_CANT_CREATE, "metal-dsymutil tool created empty library"); + } + + r_binary_data = result_file->get_buffer(result_file->get_length()); + + return OK; +} + +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wunguarded-availability" + +bool RenderingShaderContainerMetal::_set_code_from_spirv(const Vector &p_spirv) { + using namespace spirv_cross; + using spirv_cross::CompilerMSL; + using spirv_cross::Resource; + + // initialize Metal-specific reflection data + shaders.resize(p_spirv.size()); + mtl_shaders.resize(p_spirv.size()); + mtl_reflection_binding_set_uniforms_data.resize(reflection_binding_set_uniforms_data.size()); + mtl_reflection_specialization_data.resize(reflection_specialization_data.size()); + + mtl_reflection_data.set_needs_view_mask_buffer(reflection_data.has_multiview); + + // set_indexes will contain the starting offsets of each descriptor set in the binding set uniforms data + // including the last one, which is the size of reflection_binding_set_uniforms_count. + LocalVector set_indexes; + uint32_t set_indexes_size = reflection_binding_set_uniforms_count.size() + 1; + { + // calculate the starting offsets of each descriptor set in the binding set uniforms data + uint32_t size = reflection_binding_set_uniforms_count.size(); + set_indexes.resize(set_indexes_size); + uint32_t offset = 0; + for (uint32_t i = 0; i < size; i++) { + set_indexes[i] = offset; + offset += reflection_binding_set_uniforms_count.get(i); + } + set_indexes[set_indexes_size - 1] = offset; + } + CompilerMSL::Options msl_options{}; + msl_options.set_msl_version(device_profile->features.mslVersionMajor, device_profile->features.mslVersionMinor); + mtl_reflection_data.msl_version = msl_options.msl_version; + msl_options.platform = device_profile->platform == MetalDeviceProfile::Platform::macOS ? CompilerMSL::Options::macOS : CompilerMSL::Options::iOS; + + if (device_profile->platform == MetalDeviceProfile::Platform::iOS) { + msl_options.ios_use_simdgroup_functions = device_profile->features.simdPermute; + msl_options.ios_support_base_vertex_instance = true; + } + + bool disable_argument_buffers = false; + if (String v = OS::get_singleton()->get_environment(U"GODOT_DISABLE_ARGUMENT_BUFFERS"); v == U"1") { + disable_argument_buffers = true; + } + + if (device_profile->features.argument_buffers_tier >= MetalDeviceProfile::ArgumentBuffersTier::Tier2 && !disable_argument_buffers) { + msl_options.argument_buffers_tier = CompilerMSL::Options::ArgumentBuffersTier::Tier2; + msl_options.argument_buffers = true; + mtl_reflection_data.set_uses_argument_buffers(true); + } else { + msl_options.argument_buffers_tier = CompilerMSL::Options::ArgumentBuffersTier::Tier1; + // Tier 1 argument buffers don't support writable textures, so we disable them completely. + msl_options.argument_buffers = false; + mtl_reflection_data.set_uses_argument_buffers(false); + } + msl_options.force_active_argument_buffer_resources = true; + // We can't use this, as we have to add the descriptor sets via compiler.add_msl_resource_binding. + // msl_options.pad_argument_buffer_resources = true; + msl_options.texture_buffer_native = true; // Enable texture buffer support. + msl_options.use_framebuffer_fetch_subpasses = false; + msl_options.pad_fragment_output_components = true; + msl_options.r32ui_alignment_constant_id = R32UI_ALIGNMENT_CONSTANT_ID; + msl_options.agx_manual_cube_grad_fixup = true; + if (reflection_data.has_multiview) { + msl_options.multiview = true; + msl_options.multiview_layered_rendering = true; + msl_options.view_mask_buffer_index = VIEW_MASK_BUFFER_INDEX; + } + + CompilerGLSL::Options options{}; + options.vertex.flip_vert_y = true; +#if DEV_ENABLED + options.emit_line_directives = true; +#endif + + for (uint32_t i = 0; i < p_spirv.size(); i++) { + StageData &stage_data = mtl_shaders.write[i]; + RD::ShaderStageSPIRVData const &v = p_spirv[i]; + RD::ShaderStage stage = v.shader_stage; + char const *stage_name = RD::SHADER_STAGE_NAMES[stage]; + uint32_t const *const ir = reinterpret_cast(v.spirv.ptr()); + size_t word_count = v.spirv.size() / sizeof(uint32_t); + Parser parser(ir, word_count); + try { + parser.parse(); + } catch (CompilerError &e) { + ERR_FAIL_V_MSG(false, "Failed to parse IR at stage " + String(RD::SHADER_STAGE_NAMES[stage]) + ": " + e.what()); + } + + CompilerMSL compiler(std::move(parser.get_parsed_ir())); + compiler.set_msl_options(msl_options); + compiler.set_common_options(options); + + std::unordered_set active = compiler.get_active_interface_variables(); + ShaderResources resources = compiler.get_shader_resources(); + + std::string source; + try { + source = compiler.compile(); + } catch (CompilerError &e) { + ERR_FAIL_V_MSG(false, "Failed to compile stage " + String(RD::SHADER_STAGE_NAMES[stage]) + ": " + e.what()); + } + + ERR_FAIL_COND_V_MSG(compiler.get_entry_points_and_stages().size() != 1, false, "Expected a single entry point and stage."); + + SmallVector entry_pts_stages = compiler.get_entry_points_and_stages(); + EntryPoint &entry_point_stage = entry_pts_stages.front(); + SPIREntryPoint &entry_point = compiler.get_entry_point(entry_point_stage.name, entry_point_stage.execution_model); + + // Process specialization constants. + if (!compiler.get_specialization_constants().empty()) { + uint32_t size = reflection_specialization_data.size(); + for (SpecializationConstant const &constant : compiler.get_specialization_constants()) { + uint32_t j = 0; + while (j < size) { + const ReflectionSpecializationData &res = reflection_specialization_data.ptr()[j]; + if (res.constant_id == constant.constant_id) { + mtl_reflection_specialization_data.ptrw()[j].used_stages |= 1 << stage; + // emulate labeled for loop and continue + goto outer_continue; + } + ++j; + } + if (j == size) { + WARN_PRINT(String(stage_name) + ": unable to find constant_id: " + itos(constant.constant_id)); + } + outer_continue:; + } + } + + // Process bindings. + uint32_t uniform_sets_size = reflection_binding_set_uniforms_count.size(); + using BT = SPIRType::BaseType; + + // Always clearer than a boolean. + enum class Writable { + No, + Maybe, + }; + + // Returns a std::optional containing the value of the + // decoration, if it exists. + auto get_decoration = [&compiler](spirv_cross::ID id, spv::Decoration decoration) { + uint32_t res = -1; + if (compiler.has_decoration(id, decoration)) { + res = compiler.get_decoration(id, decoration); + } + return res; + }; + + auto descriptor_bindings = [&compiler, &active, this, &set_indexes, uniform_sets_size, stage, &get_decoration](SmallVector &p_resources, Writable p_writable) { + for (Resource const &res : p_resources) { + uint32_t dset = get_decoration(res.id, spv::DecorationDescriptorSet); + uint32_t dbin = get_decoration(res.id, spv::DecorationBinding); + UniformData *found = nullptr; + if (dset != (uint32_t)-1 && dbin != (uint32_t)-1 && dset < uniform_sets_size) { + uint32_t begin = set_indexes[dset]; + uint32_t end = set_indexes[dset + 1]; + for (uint32_t j = begin; j < end; j++) { + const ReflectionBindingData &ref_bind = reflection_binding_set_uniforms_data[j]; + if (dbin == ref_bind.binding) { + found = &mtl_reflection_binding_set_uniforms_data.write[j]; + break; + } + } + } + + ERR_FAIL_NULL_V_MSG(found, ERR_CANT_CREATE, "UniformData not found"); + + bool is_active = active.find(res.id) != active.end(); + if (is_active) { + found->active_stages |= 1 << stage; + } + + BindingInfoData &primary = found->get_binding_for_stage(stage); + + SPIRType const &a_type = compiler.get_type(res.type_id); + BT basetype = a_type.basetype; + + switch (basetype) { + case BT::Struct: { + primary.data_type = MTLDataTypePointer; + } break; + + case BT::Image: + case BT::SampledImage: { + primary.data_type = MTLDataTypeTexture; + } break; + + case BT::Sampler: { + primary.data_type = MTLDataTypeSampler; + primary.array_length = 1; + for (uint32_t const &a : a_type.array) { + primary.array_length *= a; + } + } break; + + default: { + ERR_FAIL_V_MSG(ERR_CANT_CREATE, "Unexpected BaseType"); + } break; + } + + // Find array length of image. + if (basetype == BT::Image || basetype == BT::SampledImage) { + primary.array_length = 1; + for (uint32_t const &a : a_type.array) { + primary.array_length *= a; + } + primary.is_multisampled = a_type.image.ms; + + SPIRType::ImageType const &image = a_type.image; + primary.image_format = image.format; + + switch (image.dim) { + case spv::Dim1D: { + if (image.arrayed) { + primary.texture_type = MTLTextureType1DArray; + } else { + primary.texture_type = MTLTextureType1D; + } + } break; + case spv::DimSubpassData: { + [[fallthrough]]; + } + case spv::Dim2D: { + if (image.arrayed && image.ms) { + primary.texture_type = MTLTextureType2DMultisampleArray; + } else if (image.arrayed) { + primary.texture_type = MTLTextureType2DArray; + } else if (image.ms) { + primary.texture_type = MTLTextureType2DMultisample; + } else { + primary.texture_type = MTLTextureType2D; + } + } break; + case spv::Dim3D: { + primary.texture_type = MTLTextureType3D; + } break; + case spv::DimCube: { + if (image.arrayed) { + primary.texture_type = MTLTextureTypeCube; + } + } break; + case spv::DimRect: { + } break; + case spv::DimBuffer: { + // VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER + primary.texture_type = MTLTextureTypeTextureBuffer; + } break; + case spv::DimMax: { + // Add all enumerations to silence the compiler warning + // and generate future warnings, should a new one be added. + } break; + } + } + + // Update writable. + if (p_writable == Writable::Maybe) { + if (basetype == BT::Struct) { + Bitset flags = compiler.get_buffer_block_flags(res.id); + if (!flags.get(spv::DecorationNonWritable)) { + if (flags.get(spv::DecorationNonReadable)) { + primary.access = MTLBindingAccessWriteOnly; + } else { + primary.access = MTLBindingAccessReadWrite; + } + } + } else if (basetype == BT::Image) { + switch (a_type.image.access) { + case spv::AccessQualifierWriteOnly: + primary.access = MTLBindingAccessWriteOnly; + break; + case spv::AccessQualifierReadWrite: + primary.access = MTLBindingAccessReadWrite; + break; + case spv::AccessQualifierReadOnly: + break; + case spv::AccessQualifierMax: + [[fallthrough]]; + default: + if (!compiler.has_decoration(res.id, spv::DecorationNonWritable)) { + if (compiler.has_decoration(res.id, spv::DecorationNonReadable)) { + primary.access = MTLBindingAccessWriteOnly; + } else { + primary.access = MTLBindingAccessReadWrite; + } + } + break; + } + } + } + + switch (primary.access) { + case MTLBindingAccessReadOnly: + primary.usage = MTLResourceUsageRead; + break; + case MTLBindingAccessWriteOnly: + primary.usage = MTLResourceUsageWrite; + break; + case MTLBindingAccessReadWrite: + primary.usage = MTLResourceUsageRead | MTLResourceUsageWrite; + break; + } + + primary.index = compiler.get_automatic_msl_resource_binding(res.id); + + // A sampled image contains two bindings, the primary + // is to the image, and the secondary is to the associated sampler. + if (basetype == BT::SampledImage) { + uint32_t binding = compiler.get_automatic_msl_resource_binding_secondary(res.id); + if (binding != (uint32_t)-1) { + BindingInfoData &secondary = found->get_secondary_binding_for_stage(stage); + secondary.data_type = MTLDataTypeSampler; + secondary.index = binding; + secondary.access = MTLBindingAccessReadOnly; + } + } + + // An image may have a secondary binding if it is used + // for atomic operations. + if (basetype == BT::Image) { + uint32_t binding = compiler.get_automatic_msl_resource_binding_secondary(res.id); + if (binding != (uint32_t)-1) { + BindingInfoData &secondary = found->get_secondary_binding_for_stage(stage); + secondary.data_type = MTLDataTypePointer; + secondary.index = binding; + secondary.access = MTLBindingAccessReadWrite; + } + } + } + return Error::OK; + }; + + if (!resources.uniform_buffers.empty()) { + Error err = descriptor_bindings(resources.uniform_buffers, Writable::No); + ERR_FAIL_COND_V(err != OK, false); + } + if (!resources.storage_buffers.empty()) { + Error err = descriptor_bindings(resources.storage_buffers, Writable::Maybe); + ERR_FAIL_COND_V(err != OK, false); + } + if (!resources.storage_images.empty()) { + Error err = descriptor_bindings(resources.storage_images, Writable::Maybe); + ERR_FAIL_COND_V(err != OK, false); + } + if (!resources.sampled_images.empty()) { + Error err = descriptor_bindings(resources.sampled_images, Writable::No); + ERR_FAIL_COND_V(err != OK, false); + } + if (!resources.separate_images.empty()) { + Error err = descriptor_bindings(resources.separate_images, Writable::No); + ERR_FAIL_COND_V(err != OK, false); + } + if (!resources.separate_samplers.empty()) { + Error err = descriptor_bindings(resources.separate_samplers, Writable::No); + ERR_FAIL_COND_V(err != OK, false); + } + if (!resources.subpass_inputs.empty()) { + Error err = descriptor_bindings(resources.subpass_inputs, Writable::No); + ERR_FAIL_COND_V(err != OK, false); + } + + if (!resources.push_constant_buffers.empty()) { + for (Resource const &res : resources.push_constant_buffers) { + uint32_t binding = compiler.get_automatic_msl_resource_binding(res.id); + if (binding != (uint32_t)-1) { + stage_data.push_constant_binding = binding; + } + } + } + + ERR_FAIL_COND_V_MSG(!resources.atomic_counters.empty(), false, "Atomic counters not supported"); + ERR_FAIL_COND_V_MSG(!resources.acceleration_structures.empty(), false, "Acceleration structures not supported"); + ERR_FAIL_COND_V_MSG(!resources.shader_record_buffers.empty(), false, "Shader record buffers not supported"); + + if (!resources.stage_inputs.empty()) { + for (Resource const &res : resources.stage_inputs) { + uint32_t binding = compiler.get_automatic_msl_resource_binding(res.id); + if (binding != (uint32_t)-1) { + stage_data.vertex_input_binding_mask |= 1 << binding; + } + } + } + + stage_data.is_position_invariant = compiler.is_position_invariant(); + stage_data.supports_fast_math = !entry_point.flags.get(spv::ExecutionModeSignedZeroInfNanPreserve); + stage_data.hash = SHA256Digest(source.c_str(), source.length()); + stage_data.source_size = source.length(); + ::Vector binary_data; + binary_data.resize(stage_data.source_size); + memcpy(binary_data.ptrw(), source.c_str(), stage_data.source_size); + + if (export_mode) { + // Try to compile the Metal source code + ::Vector library_data; + Error compile_err = compile_metal_source(source.c_str(), stage_data, library_data); + if (compile_err == OK) { + stage_data.library_size = library_data.size(); + binary_data.resize(stage_data.source_size + stage_data.library_size); + memcpy(binary_data.ptrw() + stage_data.source_size, library_data.ptr(), stage_data.library_size); + } + } + + uint32_t binary_data_size = binary_data.size(); + Shader &shader = shaders.write[i]; + shader.shader_stage = stage; + shader.code_decompressed_size = binary_data_size; + shader.code_compressed_bytes.resize(binary_data_size); + + uint32_t compressed_size = 0; + bool compressed = compress_code(binary_data.ptr(), binary_data_size, shader.code_compressed_bytes.ptrw(), &compressed_size, &shader.code_compression_flags); + ERR_FAIL_COND_V_MSG(!compressed, false, vformat("Failed to compress native code to native for SPIR-V #%d.", i)); + + shader.code_compressed_bytes.resize(compressed_size); + } + + return true; +} + +#pragma clang diagnostic pop + +uint32_t RenderingShaderContainerMetal::_to_bytes_reflection_extra_data(uint8_t *p_bytes) const { + if (p_bytes != nullptr) { + *(HeaderData *)p_bytes = mtl_reflection_data; + } + return sizeof(HeaderData); +} + +uint32_t RenderingShaderContainerMetal::_to_bytes_reflection_binding_uniform_extra_data(uint8_t *p_bytes, uint32_t p_index) const { + if (p_bytes != nullptr) { + *(UniformData *)p_bytes = mtl_reflection_binding_set_uniforms_data[p_index]; + } + return sizeof(UniformData); +} + +uint32_t RenderingShaderContainerMetal::_to_bytes_reflection_specialization_extra_data(uint8_t *p_bytes, uint32_t p_index) const { + if (p_bytes != nullptr) { + *(SpecializationData *)p_bytes = mtl_reflection_specialization_data[p_index]; + } + return sizeof(SpecializationData); +} + +uint32_t RenderingShaderContainerMetal::_to_bytes_shader_extra_data(uint8_t *p_bytes, uint32_t p_index) const { + if (p_bytes != nullptr) { + *(StageData *)p_bytes = mtl_shaders[p_index]; + } + return sizeof(StageData); +} + +uint32_t RenderingShaderContainerMetal::_from_bytes_reflection_extra_data(const uint8_t *p_bytes) { + mtl_reflection_data = *(HeaderData *)p_bytes; + return sizeof(HeaderData); +} + +uint32_t RenderingShaderContainerMetal::_from_bytes_reflection_binding_uniform_extra_data_start(const uint8_t *p_bytes) { + mtl_reflection_binding_set_uniforms_data.resize(reflection_binding_set_uniforms_data.size()); + return 0; +} + +uint32_t RenderingShaderContainerMetal::_from_bytes_reflection_binding_uniform_extra_data(const uint8_t *p_bytes, uint32_t p_index) { + mtl_reflection_binding_set_uniforms_data.ptrw()[p_index] = *(UniformData *)p_bytes; + return sizeof(UniformData); +} + +uint32_t RenderingShaderContainerMetal::_from_bytes_reflection_specialization_extra_data_start(const uint8_t *p_bytes) { + mtl_reflection_specialization_data.resize(reflection_specialization_data.size()); + return 0; +} + +uint32_t RenderingShaderContainerMetal::_from_bytes_reflection_specialization_extra_data(const uint8_t *p_bytes, uint32_t p_index) { + mtl_reflection_specialization_data.ptrw()[p_index] = *(SpecializationData *)p_bytes; + return sizeof(SpecializationData); +} + +uint32_t RenderingShaderContainerMetal::_from_bytes_shader_extra_data_start(const uint8_t *p_bytes) { + mtl_shaders.resize(shaders.size()); + return 0; +} + +uint32_t RenderingShaderContainerMetal::_from_bytes_shader_extra_data(const uint8_t *p_bytes, uint32_t p_index) { + mtl_shaders.ptrw()[p_index] = *(StageData *)p_bytes; + return sizeof(StageData); +} + +RenderingShaderContainerMetal::MetalShaderReflection RenderingShaderContainerMetal::get_metal_shader_reflection() const { + MetalShaderReflection res; + + res.specialization_constants = mtl_reflection_specialization_data; + uint32_t uniform_set_count = reflection_binding_set_uniforms_count.size(); + uint32_t start = 0; + res.uniform_sets.resize(uniform_set_count); + for (uint32_t i = 0; i < uniform_set_count; i++) { + Vector &set = res.uniform_sets.ptrw()[i]; + uint32_t count = reflection_binding_set_uniforms_count.get(i); + set.resize(count); + memcpy(set.ptrw(), &mtl_reflection_binding_set_uniforms_data.ptr()[start], count * sizeof(UniformData)); + start += count; + } + + return res; +} + +uint32_t RenderingShaderContainerMetal::_format() const { + return 0x42424242; +} + +uint32_t RenderingShaderContainerMetal::_format_version() const { + return FORMAT_VERSION; +} + +Ref RenderingShaderContainerFormatMetal::create_container() const { + Ref result; + result.instantiate(); + result->set_export_mode(export_mode); + result->set_device_profile(device_profile); + return result; +} + +RenderingDeviceCommons::ShaderLanguageVersion RenderingShaderContainerFormatMetal::get_shader_language_version() const { + return SHADER_LANGUAGE_VULKAN_VERSION_1_1; +} + +RenderingDeviceCommons::ShaderSpirvVersion RenderingShaderContainerFormatMetal::get_shader_spirv_version() const { + return SHADER_SPIRV_VERSION_1_6; +} + +RenderingShaderContainerFormatMetal::RenderingShaderContainerFormatMetal(const MetalDeviceProfile *p_device_profile, bool p_export) : + export_mode(p_export), device_profile(p_device_profile) { +} diff --git a/drivers/metal/sha256_digest.h b/drivers/metal/sha256_digest.h new file mode 100644 index 0000000000..ba36a61e01 --- /dev/null +++ b/drivers/metal/sha256_digest.h @@ -0,0 +1,77 @@ +/**************************************************************************/ +/* sha256_digest.h */ +/**************************************************************************/ +/* This file is part of: */ +/* REDOT ENGINE */ +/* https://redotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2024-present Redot Engine contributors */ +/* (see REDOT_AUTHORS.md) */ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#pragma once + +#import +#import +#import + +#include "core/templates/local_vector.h" + +struct SHA256Digest { + unsigned char data[CC_SHA256_DIGEST_LENGTH]; + + static constexpr size_t serialized_size() { return CC_SHA256_DIGEST_LENGTH; } + + uint32_t hash() const { + uint32_t c = crc32(0, data, CC_SHA256_DIGEST_LENGTH); + return c; + } + + SHA256Digest() { + bzero(data, CC_SHA256_DIGEST_LENGTH); + } + + SHA256Digest(const char *p_hash) { + memcpy(data, p_hash, CC_SHA256_DIGEST_LENGTH); + } + + SHA256Digest(const char *p_data, size_t p_length) { + CC_SHA256(p_data, (CC_LONG)p_length, data); + } + + _FORCE_INLINE_ uint32_t short_sha() const { + return __builtin_bswap32(*(uint32_t *)&data[0]); + } + + LocalVector serialize() const { + LocalVector result; + result.resize(CC_SHA256_DIGEST_LENGTH); + memcpy(result.ptr(), data, CC_SHA256_DIGEST_LENGTH); + return result; + } + + static SHA256Digest deserialize(LocalVector p_ser) { + return SHA256Digest((const char *)p_ser.ptr()); + } +}; diff --git a/drivers/png/png_driver_common.cpp b/drivers/png/png_driver_common.cpp index 6c04e68f9d..a42de0031d 100644 --- a/drivers/png/png_driver_common.cpp +++ b/drivers/png/png_driver_common.cpp @@ -346,7 +346,7 @@ Error apng_to_image_frames(const uint8_t *p_source, size_t p_size, bool p_force_ }; Vector screen; - screen.resize_zeroed(width * height * pixel_size); + screen.resize_initialized(width * height * pixel_size); if (is_animated) { // Skip first frame if (is_first_frame_hidden) { @@ -380,7 +380,7 @@ Error apng_to_image_frames(const uint8_t *p_source, size_t p_size, bool p_force_ frame.delay = float(delay_num) / float(delay_den == 0 ? 100.0 : delay_den); - frame.buffer.resize_zeroed(frame.width * frame.height * pixel_size); + frame.buffer.resize_initialized(frame.width * frame.height * pixel_size); read_image(frame.buffer, frame.width, frame.height); return frame; }; diff --git a/drivers/unix/dir_access_unix.cpp b/drivers/unix/dir_access_unix.cpp index e33c34629c..3a90066019 100644 --- a/drivers/unix/dir_access_unix.cpp +++ b/drivers/unix/dir_access_unix.cpp @@ -42,6 +42,9 @@ #include #include #include +#ifdef __linux__ +#include +#endif #include #include #include @@ -518,7 +521,166 @@ uint64_t DirAccessUnix::get_space_left() { } String DirAccessUnix::get_filesystem_type() const { +#ifdef __linux__ + struct statfs fs; + if (statfs(current_dir.utf8().get_data(), &fs) != 0) { + return ""; + } + switch (fs.f_type) { + case 0x0000adf5: + return "ADFS"; + case 0x0000adff: + return "AFFS"; + case 0x5346414f: + return "AFS"; + case 0x00000187: + return "AUTOFS"; + case 0x00c36400: + return "CEPH"; + case 0x73757245: + return "CODA"; + case 0x28cd3d45: + return "CRAMFS"; + case 0x453dcd28: + return "CRAMFS"; + case 0x64626720: + return "DEBUGFS"; + case 0x73636673: + return "SECURITYFS"; + case 0xf97cff8c: + return "SELINUX"; + case 0x43415d53: + return "SMACK"; + case 0x858458f6: + return "RAMFS"; + case 0x01021994: + return "TMPFS"; + case 0x958458f6: + return "HUGETLBFS"; + case 0x73717368: + return "SQUASHFS"; + case 0x0000f15f: + return "ECRYPTFS"; + case 0x00414a53: + return "EFS"; + case 0xe0f5e1e2: + return "EROFS"; + case 0x0000ef53: + return "EXTFS"; + case 0xabba1974: + return "XENFS"; + case 0x9123683e: + return "BTRFS"; + case 0x00003434: + return "NILFS"; + case 0xf2f52010: + return "F2FS"; + case 0xf995e849: + return "HPFS"; + case 0x00009660: + return "ISOFS"; + case 0x000072b6: + return "JFFS2"; + case 0x58465342: + return "XFS"; + case 0x6165676c: + return "PSTOREFS"; + case 0xde5e81e4: + return "EFIVARFS"; + case 0x00c0ffee: + return "HOSTFS"; + case 0x794c7630: + return "OVERLAYFS"; + case 0x65735546: + return "FUSE"; + case 0xca451a4e: + return "BCACHEFS"; + case 0x00004d44: + return "FAT32"; + case 0x2011bab0: + return "EXFAT"; + case 0x0000564c: + return "NCP"; + case 0x00006969: + return "NFS"; + case 0x7461636f: + return "OCFS2"; + case 0x00009fa1: + return "OPENPROM"; + case 0x0000002f: + return "QNX4"; + case 0x68191122: + return "QNX6"; + case 0x6b414653: + return "AFS"; + case 0x52654973: + return "REISERFS"; + case 0x0000517b: + return "SMB"; + case 0xff534d42: + return "CIFS"; + case 0x0027e0eb: + return "CGROUP"; + case 0x63677270: + return "CGROUP2"; + case 0x07655821: + return "RDTGROUP"; + case 0x74726163: + return "TRACEFS"; + case 0x01021997: + return "V9FS"; + case 0x62646576: + return "BDEVFS"; + case 0x64646178: + return "DAXFS"; + case 0x42494e4d: + return "BINFMTFS"; + case 0x00001cd1: + return "DEVPTS"; + case 0x6c6f6f70: + return "BINDERFS"; + case 0x0bad1dea: + return "FUTEXFS"; + case 0x50495045: + return "PIPEFS"; + case 0x00009fa0: + return "PROC"; + case 0x534f434b: + return "SOCKFS"; + case 0x62656572: + return "SYSFS"; + case 0x00009fa2: + return "USBDEVICE"; + case 0x11307854: + return "MTD_INODE"; + case 0x09041934: + return "ANON_INODE"; + case 0x73727279: + return "BTRFS"; + case 0x6e736673: + return "NSFS"; + case 0xcafe4a11: + return "BPF_FS"; + case 0x5a3c69f0: + return "AAFS"; + case 0x5a4f4653: + return "ZONEFS"; + case 0x15013346: + return "UDF"; + case 0x444d4142: + return "DMA_BUF"; + case 0x454d444d: + return "DEVMEM"; + case 0x5345434d: + return "SECRETMEM"; + case 0x50494446: + return "PID_FS"; + default: + return ""; + } +#else return ""; //TODO this should be implemented +#endif } bool DirAccessUnix::is_hidden(const String &p_name) { diff --git a/drivers/vulkan/rendering_device_driver_vulkan.cpp b/drivers/vulkan/rendering_device_driver_vulkan.cpp index d67931d1e2..35b70fee22 100644 --- a/drivers/vulkan/rendering_device_driver_vulkan.cpp +++ b/drivers/vulkan/rendering_device_driver_vulkan.cpp @@ -34,9 +34,12 @@ #include "core/config/project_settings.h" #include "core/io/marshalls.h" -#include "thirdparty/misc/smolv.h" #include "vulkan_hooks.h" +#if RENDERING_SHADER_CONTAINER_VULKAN_SMOLV +#include "thirdparty/misc/smolv.h" +#endif + #if defined(ANDROID_ENABLED) #include "platform/android/java_godot_wrapper.h" #include "platform/android/os_android.h" @@ -3552,260 +3555,34 @@ static VkShaderStageFlagBits RD_STAGE_TO_VK_SHADER_STAGE_BITS[RDD::SHADER_STAGE_ VK_SHADER_STAGE_COMPUTE_BIT, }; -String RenderingDeviceDriverVulkan::shader_get_binary_cache_key() { - return "Vulkan-SV" + uitos(ShaderBinary::VERSION); -} - -Vector RenderingDeviceDriverVulkan::shader_compile_binary_from_spirv(VectorView p_spirv, const String &p_shader_name) { - ShaderReflection shader_refl; - if (_reflect_spirv(p_spirv, shader_refl) != OK) { - return Vector(); - } - - ERR_FAIL_COND_V_MSG((uint32_t)shader_refl.uniform_sets.size() > physical_device_properties.limits.maxBoundDescriptorSets, Vector(), - "Number of uniform sets is larger than what is supported by the hardware (" + itos(physical_device_properties.limits.maxBoundDescriptorSets) + ")."); - - // Collect reflection data into binary data. - ShaderBinary::Data binary_data; - Vector> uniforms; // Set bindings. - Vector specialization_constants; - { - binary_data.vertex_input_mask = shader_refl.vertex_input_mask; - binary_data.fragment_output_mask = shader_refl.fragment_output_mask; - binary_data.specialization_constants_count = shader_refl.specialization_constants.size(); - binary_data.is_compute = shader_refl.is_compute; - binary_data.compute_local_size[0] = shader_refl.compute_local_size[0]; - binary_data.compute_local_size[1] = shader_refl.compute_local_size[1]; - binary_data.compute_local_size[2] = shader_refl.compute_local_size[2]; - binary_data.set_count = shader_refl.uniform_sets.size(); - binary_data.push_constant_size = shader_refl.push_constant_size; - for (uint32_t i = 0; i < SHADER_STAGE_MAX; i++) { - if (shader_refl.push_constant_stages.has_flag((ShaderStage)(1 << i))) { - binary_data.vk_push_constant_stages_mask |= RD_STAGE_TO_VK_SHADER_STAGE_BITS[i]; - } - } - - for (const Vector &set_refl : shader_refl.uniform_sets) { - Vector set_bindings; - for (const ShaderUniform &uniform_refl : set_refl) { - ShaderBinary::DataBinding binding; - binding.type = (uint32_t)uniform_refl.type; - binding.binding = uniform_refl.binding; - binding.stages = (uint32_t)uniform_refl.stages; - binding.length = uniform_refl.length; - binding.writable = (uint32_t)uniform_refl.writable; - set_bindings.push_back(binding); - } - uniforms.push_back(set_bindings); - } - - for (const ShaderSpecializationConstant &refl_sc : shader_refl.specialization_constants) { - ShaderBinary::SpecializationConstant spec_constant; - spec_constant.type = (uint32_t)refl_sc.type; - spec_constant.constant_id = refl_sc.constant_id; - spec_constant.int_value = refl_sc.int_value; - spec_constant.stage_flags = (uint32_t)refl_sc.stages; - specialization_constants.push_back(spec_constant); +RDD::ShaderID RenderingDeviceDriverVulkan::shader_create_from_container(const Ref &p_shader_container, const Vector &p_immutable_samplers) { + ShaderReflection shader_refl = p_shader_container->get_shader_reflection(); + ShaderInfo shader_info; + for (uint32_t i = 0; i < SHADER_STAGE_MAX; i++) { + if (shader_refl.push_constant_stages.has_flag((ShaderStage)(1 << i))) { + shader_info.vk_push_constant_stages |= RD_STAGE_TO_VK_SHADER_STAGE_BITS[i]; } } - Vector> compressed_stages; - Vector smolv_size; - Vector zstd_size; // If 0, zstd not used. - - uint32_t stages_binary_size = 0; - - bool strip_debug = false; - - for (uint32_t i = 0; i < p_spirv.size(); i++) { - smolv::ByteArray smolv; - if (!smolv::Encode(p_spirv[i].spirv.ptr(), p_spirv[i].spirv.size(), smolv, strip_debug ? smolv::kEncodeFlagStripDebugInfo : 0)) { - ERR_FAIL_V_MSG(Vector(), "Error compressing shader stage :" + String(SHADER_STAGE_NAMES[p_spirv[i].shader_stage])); - } else { - smolv_size.push_back(smolv.size()); - { // zstd. - Vector zstd; - zstd.resize(Compression::get_max_compressed_buffer_size(smolv.size(), Compression::MODE_ZSTD)); - int dst_size = Compression::compress(zstd.ptrw(), &smolv[0], smolv.size(), Compression::MODE_ZSTD); - - if (dst_size > 0 && (uint32_t)dst_size < smolv.size()) { - zstd_size.push_back(dst_size); - zstd.resize(dst_size); - compressed_stages.push_back(zstd); - } else { - Vector smv; - smv.resize(smolv.size()); - memcpy(smv.ptrw(), &smolv[0], smolv.size()); - zstd_size.push_back(0); // Not using zstd. - compressed_stages.push_back(smv); - } - } - } - uint32_t s = compressed_stages[i].size(); - stages_binary_size += STEPIFY(s, 4); - } - - binary_data.specialization_constants_count = specialization_constants.size(); - binary_data.set_count = uniforms.size(); - binary_data.stage_count = p_spirv.size(); - - CharString shader_name_utf = p_shader_name.utf8(); - - binary_data.shader_name_len = shader_name_utf.length(); - - uint32_t total_size = sizeof(uint32_t) * 4; // Header + version + pad + main datasize;. - total_size += sizeof(ShaderBinary::Data); - - total_size += STEPIFY(binary_data.shader_name_len, 4); - - for (int i = 0; i < uniforms.size(); i++) { - total_size += sizeof(uint32_t); - total_size += uniforms[i].size() * sizeof(ShaderBinary::DataBinding); - } - - total_size += sizeof(ShaderBinary::SpecializationConstant) * specialization_constants.size(); - - total_size += compressed_stages.size() * sizeof(uint32_t) * 3; // Sizes. - total_size += stages_binary_size; - - Vector ret; - ret.resize(total_size); - { - uint32_t offset = 0; - uint8_t *binptr = ret.ptrw(); - binptr[0] = 'G'; - binptr[1] = 'S'; - binptr[2] = 'B'; - binptr[3] = 'D'; // Redot Shader Binary Data. - offset += 4; - encode_uint32(ShaderBinary::VERSION, binptr + offset); - offset += sizeof(uint32_t); - encode_uint32(sizeof(ShaderBinary::Data), binptr + offset); - offset += sizeof(uint32_t); - encode_uint32(0, binptr + offset); // Pad to align ShaderBinary::Data to 8 bytes. - offset += sizeof(uint32_t); - memcpy(binptr + offset, &binary_data, sizeof(ShaderBinary::Data)); - offset += sizeof(ShaderBinary::Data); - -#define ADVANCE_OFFSET_WITH_ALIGNMENT(m_bytes) \ - { \ - offset += m_bytes; \ - uint32_t padding = STEPIFY(m_bytes, 4) - m_bytes; \ - memset(binptr + offset, 0, padding); /* Avoid garbage data. */ \ - offset += padding; \ - } - - if (binary_data.shader_name_len > 0) { - memcpy(binptr + offset, shader_name_utf.ptr(), binary_data.shader_name_len); - ADVANCE_OFFSET_WITH_ALIGNMENT(binary_data.shader_name_len); - } - - for (int i = 0; i < uniforms.size(); i++) { - int count = uniforms[i].size(); - encode_uint32(count, binptr + offset); - offset += sizeof(uint32_t); - if (count > 0) { - memcpy(binptr + offset, uniforms[i].ptr(), sizeof(ShaderBinary::DataBinding) * count); - offset += sizeof(ShaderBinary::DataBinding) * count; - } - } - - if (specialization_constants.size()) { - memcpy(binptr + offset, specialization_constants.ptr(), sizeof(ShaderBinary::SpecializationConstant) * specialization_constants.size()); - offset += sizeof(ShaderBinary::SpecializationConstant) * specialization_constants.size(); - } - - for (int i = 0; i < compressed_stages.size(); i++) { - encode_uint32(p_spirv[i].shader_stage, binptr + offset); - offset += sizeof(uint32_t); - encode_uint32(smolv_size[i], binptr + offset); - offset += sizeof(uint32_t); - encode_uint32(zstd_size[i], binptr + offset); - offset += sizeof(uint32_t); - memcpy(binptr + offset, compressed_stages[i].ptr(), compressed_stages[i].size()); - ADVANCE_OFFSET_WITH_ALIGNMENT(compressed_stages[i].size()); - } - - DEV_ASSERT(offset == (uint32_t)ret.size()); - } - - return ret; -} - -RDD::ShaderID RenderingDeviceDriverVulkan::shader_create_from_bytecode(const Vector &p_shader_binary, ShaderDescription &r_shader_desc, String &r_name, const Vector &p_immutable_samplers) { - r_shader_desc = {}; // Driver-agnostic. - ShaderInfo shader_info; // Driver-specific. - - const uint8_t *binptr = p_shader_binary.ptr(); - uint32_t binsize = p_shader_binary.size(); - - uint32_t read_offset = 0; - - // Consistency check. - ERR_FAIL_COND_V(binsize < sizeof(uint32_t) * 4 + sizeof(ShaderBinary::Data), ShaderID()); - ERR_FAIL_COND_V(binptr[0] != 'G' || binptr[1] != 'S' || binptr[2] != 'B' || binptr[3] != 'D', ShaderID()); - - uint32_t bin_version = decode_uint32(binptr + 4); - ERR_FAIL_COND_V(bin_version != ShaderBinary::VERSION, ShaderID()); - - uint32_t bin_data_size = decode_uint32(binptr + 8); - - // 16, not 12, to skip alignment padding. - const ShaderBinary::Data &binary_data = *(reinterpret_cast(binptr + 16)); - - r_shader_desc.push_constant_size = binary_data.push_constant_size; - shader_info.vk_push_constant_stages = binary_data.vk_push_constant_stages_mask; - - r_shader_desc.vertex_input_mask = binary_data.vertex_input_mask; - r_shader_desc.fragment_output_mask = binary_data.fragment_output_mask; - - r_shader_desc.is_compute = binary_data.is_compute; - r_shader_desc.compute_local_size[0] = binary_data.compute_local_size[0]; - r_shader_desc.compute_local_size[1] = binary_data.compute_local_size[1]; - r_shader_desc.compute_local_size[2] = binary_data.compute_local_size[2]; - - read_offset += sizeof(uint32_t) * 4 + bin_data_size; - - if (binary_data.shader_name_len) { - r_name.clear(); - r_name.append_utf8((const char *)(binptr + read_offset), binary_data.shader_name_len); - read_offset += STEPIFY(binary_data.shader_name_len, 4); - } - + // Set bindings. Vector> vk_set_bindings; - - r_shader_desc.uniform_sets.resize(binary_data.set_count); - vk_set_bindings.resize(binary_data.set_count); - - for (uint32_t i = 0; i < binary_data.set_count; i++) { - ERR_FAIL_COND_V(read_offset + sizeof(uint32_t) >= binsize, ShaderID()); - uint32_t set_count = decode_uint32(binptr + read_offset); - read_offset += sizeof(uint32_t); - const ShaderBinary::DataBinding *set_ptr = reinterpret_cast(binptr + read_offset); - uint32_t set_size = set_count * sizeof(ShaderBinary::DataBinding); - ERR_FAIL_COND_V(read_offset + set_size >= binsize, ShaderID()); - - for (uint32_t j = 0; j < set_count; j++) { - ShaderUniform info; - info.type = UniformType(set_ptr[j].type); - info.writable = set_ptr[j].writable; - info.length = set_ptr[j].length; - info.binding = set_ptr[j].binding; - info.stages = set_ptr[j].stages; - + vk_set_bindings.resize(shader_refl.uniform_sets.size()); + for (uint32_t i = 0; i < shader_refl.uniform_sets.size(); i++) { + for (uint32_t j = 0; j < shader_refl.uniform_sets[i].size(); j++) { + const ShaderUniform &uniform = shader_refl.uniform_sets[i][j]; VkDescriptorSetLayoutBinding layout_binding = {}; - layout_binding.binding = set_ptr[j].binding; + layout_binding.binding = uniform.binding; layout_binding.descriptorCount = 1; for (uint32_t k = 0; k < SHADER_STAGE_MAX; k++) { - if ((set_ptr[j].stages & (1 << k))) { + if ((uniform.stages.has_flag(ShaderStage(1U << k)))) { layout_binding.stageFlags |= RD_STAGE_TO_VK_SHADER_STAGE_BITS[k]; } } - switch (info.type) { + switch (uniform.type) { case UNIFORM_TYPE_SAMPLER: { layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER; - layout_binding.descriptorCount = set_ptr[j].length; + layout_binding.descriptorCount = uniform.length; // Immutable samplers: here they get set in the layoutbinding, given that they will not be changed later. int immutable_bind_index = -1; if (immutable_samplers_enabled && p_immutable_samplers.size() > 0) { @@ -3822,19 +3599,19 @@ RDD::ShaderID RenderingDeviceDriverVulkan::shader_create_from_bytecode(const Vec } break; case UNIFORM_TYPE_SAMPLER_WITH_TEXTURE: { layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; - layout_binding.descriptorCount = set_ptr[j].length; + layout_binding.descriptorCount = uniform.length; } break; case UNIFORM_TYPE_TEXTURE: { layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; - layout_binding.descriptorCount = set_ptr[j].length; + layout_binding.descriptorCount = uniform.length; } break; case UNIFORM_TYPE_IMAGE: { layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; - layout_binding.descriptorCount = set_ptr[j].length; + layout_binding.descriptorCount = uniform.length; } break; case UNIFORM_TYPE_TEXTURE_BUFFER: { layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; - layout_binding.descriptorCount = set_ptr[j].length; + layout_binding.descriptorCount = uniform.length; } break; case UNIFORM_TYPE_IMAGE_BUFFER: { layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; @@ -3853,104 +3630,72 @@ RDD::ShaderID RenderingDeviceDriverVulkan::shader_create_from_bytecode(const Vec } } - r_shader_desc.uniform_sets.write[i].push_back(info); vk_set_bindings.write[i].push_back(layout_binding); } - - read_offset += set_size; } - ERR_FAIL_COND_V(read_offset + binary_data.specialization_constants_count * sizeof(ShaderBinary::SpecializationConstant) >= binsize, ShaderID()); - - r_shader_desc.specialization_constants.resize(binary_data.specialization_constants_count); - for (uint32_t i = 0; i < binary_data.specialization_constants_count; i++) { - const ShaderBinary::SpecializationConstant &src_sc = *(reinterpret_cast(binptr + read_offset)); - ShaderSpecializationConstant sc; - sc.type = PipelineSpecializationConstantType(src_sc.type); - sc.constant_id = src_sc.constant_id; - sc.int_value = src_sc.int_value; - sc.stages = src_sc.stage_flags; - r_shader_desc.specialization_constants.write[i] = sc; - - read_offset += sizeof(ShaderBinary::SpecializationConstant); - } - - Vector> stages_spirv; - stages_spirv.resize(binary_data.stage_count); - r_shader_desc.stages.resize(binary_data.stage_count); - - for (uint32_t i = 0; i < binary_data.stage_count; i++) { - ERR_FAIL_COND_V(read_offset + sizeof(uint32_t) * 3 >= binsize, ShaderID()); - - uint32_t stage = decode_uint32(binptr + read_offset); - read_offset += sizeof(uint32_t); - uint32_t smolv_size = decode_uint32(binptr + read_offset); - read_offset += sizeof(uint32_t); - uint32_t zstd_size = decode_uint32(binptr + read_offset); - read_offset += sizeof(uint32_t); - - uint32_t buf_size = (zstd_size > 0) ? zstd_size : smolv_size; - - Vector smolv; - const uint8_t *src_smolv = nullptr; - - if (zstd_size > 0) { - // Decompress to smolv. - smolv.resize(smolv_size); - int dec_smolv_size = Compression::decompress(smolv.ptrw(), smolv.size(), binptr + read_offset, zstd_size, Compression::MODE_ZSTD); - ERR_FAIL_COND_V(dec_smolv_size != (int32_t)smolv_size, ShaderID()); - src_smolv = smolv.ptr(); - } else { - src_smolv = binptr + read_offset; - } - - Vector &spirv = stages_spirv.ptrw()[i]; - uint32_t spirv_size = smolv::GetDecodedBufferSize(src_smolv, smolv_size); - spirv.resize(spirv_size); - if (!smolv::Decode(src_smolv, smolv_size, spirv.ptrw(), spirv_size)) { - ERR_FAIL_V_MSG(ShaderID(), "Malformed smolv input uncompressing shader stage:" + String(SHADER_STAGE_NAMES[stage])); - } - - r_shader_desc.stages.set(i, ShaderStage(stage)); - - buf_size = STEPIFY(buf_size, 4); - read_offset += buf_size; - ERR_FAIL_COND_V(read_offset > binsize, ShaderID()); - } - - ERR_FAIL_COND_V(read_offset != binsize, ShaderID()); - // Modules. - + VkResult res; String error_text; + Vector decompressed_code; + Vector decoded_spirv; + VkShaderModule vk_module; + for (int i = 0; i < shader_refl.stages_vector.size(); i++) { + const RenderingShaderContainer::Shader &shader = p_shader_container->shaders[i]; +#if RENDERING_SHADER_CONTAINER_VULKAN_COMPRESSION + bool requires_decompression = (shader.code_decompressed_size > 0); + if (requires_decompression) { + decompressed_code.resize(shader.code_decompressed_size); + bool decompressed = p_shader_container->decompress_code(shader.code_compressed_bytes.ptr(), shader.code_compressed_bytes.size(), shader.code_compression_flags, decompressed_code.ptrw(), decompressed_code.size()); + if (!decompressed) { + error_text = vformat("Failed to decompress code on shader stage %s.", String(SHADER_STAGE_NAMES[shader_refl.stages_vector[i]])); + break; + } + } +#else + bool requires_decompression = false; +#endif + + const uint8_t *smolv_input = requires_decompression ? decompressed_code.ptr() : shader.code_compressed_bytes.ptr(); + uint32_t smolv_input_size = requires_decompression ? decompressed_code.size() : shader.code_compressed_bytes.size(); +#if RENDERING_SHADER_CONTAINER_VULKAN_SMOLV + decoded_spirv.resize(smolv::GetDecodedBufferSize(smolv_input, smolv_input_size)); + if (decoded_spirv.is_empty()) { + error_text = vformat("Malformed smolv input on shader stage %s.", String(SHADER_STAGE_NAMES[shader_refl.stages_vector[i]])); + break; + } + + if (!smolv::Decode(smolv_input, smolv_input_size, decoded_spirv.ptrw(), decoded_spirv.size())) { + error_text = vformat("Malformed smolv input on shader stage %s.", String(SHADER_STAGE_NAMES[shader_refl.stages_vector[i]])); + break; + } +#else + decoded_spirv.resize(smolv_input_size); + memcpy(decoded_spirv.ptrw(), smolv_input, decoded_spirv.size()); +#endif - for (int i = 0; i < r_shader_desc.stages.size(); i++) { VkShaderModuleCreateInfo shader_module_create_info = {}; shader_module_create_info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; - shader_module_create_info.codeSize = stages_spirv[i].size(); - shader_module_create_info.pCode = (const uint32_t *)stages_spirv[i].ptr(); + shader_module_create_info.codeSize = decoded_spirv.size(); + shader_module_create_info.pCode = (const uint32_t *)(decoded_spirv.ptr()); - VkShaderModule vk_module = VK_NULL_HANDLE; - VkResult res = vkCreateShaderModule(vk_device, &shader_module_create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_SHADER_MODULE), &vk_module); - if (res) { - error_text = "Error (" + itos(res) + ") creating shader module for stage: " + String(SHADER_STAGE_NAMES[r_shader_desc.stages[i]]); + res = vkCreateShaderModule(vk_device, &shader_module_create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_SHADER_MODULE), &vk_module); + if (res != VK_SUCCESS) { + error_text = vformat("Error (%d) creating module for shader stage %s.", res, String(SHADER_STAGE_NAMES[shader_refl.stages_vector[i]])); break; } VkPipelineShaderStageCreateInfo create_info = {}; create_info.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; - create_info.stage = RD_STAGE_TO_VK_SHADER_STAGE_BITS[r_shader_desc.stages[i]]; + create_info.stage = RD_STAGE_TO_VK_SHADER_STAGE_BITS[shader_refl.stages_vector[i]]; create_info.module = vk_module; create_info.pName = "main"; - shader_info.vk_stages_create_info.push_back(create_info); } // Descriptor sets. - if (error_text.is_empty()) { - DEV_ASSERT((uint32_t)vk_set_bindings.size() == binary_data.set_count); - for (uint32_t i = 0; i < binary_data.set_count; i++) { + for (uint32_t i = 0; i < shader_refl.uniform_sets.size(); i++) { // Empty ones are fine if they were not used according to spec (binding count will be 0). VkDescriptorSetLayoutCreateInfo layout_create_info = {}; layout_create_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; @@ -3958,9 +3703,9 @@ RDD::ShaderID RenderingDeviceDriverVulkan::shader_create_from_bytecode(const Vec layout_create_info.pBindings = vk_set_bindings[i].ptr(); VkDescriptorSetLayout layout = VK_NULL_HANDLE; - VkResult res = vkCreateDescriptorSetLayout(vk_device, &layout_create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT), &layout); + res = vkCreateDescriptorSetLayout(vk_device, &layout_create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT), &layout); if (res) { - error_text = "Error (" + itos(res) + ") creating descriptor set layout for set " + itos(i); + error_text = vformat("Error (%d) creating descriptor set layout for set %d.", res, i); break; } @@ -3970,24 +3715,23 @@ RDD::ShaderID RenderingDeviceDriverVulkan::shader_create_from_bytecode(const Vec if (error_text.is_empty()) { // Pipeline layout. - VkPipelineLayoutCreateInfo pipeline_layout_create_info = {}; pipeline_layout_create_info.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; - pipeline_layout_create_info.setLayoutCount = binary_data.set_count; + pipeline_layout_create_info.setLayoutCount = shader_info.vk_descriptor_set_layouts.size(); pipeline_layout_create_info.pSetLayouts = shader_info.vk_descriptor_set_layouts.ptr(); - if (binary_data.push_constant_size) { + if (shader_refl.push_constant_size > 0) { VkPushConstantRange *push_constant_range = ALLOCA_SINGLE(VkPushConstantRange); *push_constant_range = {}; - push_constant_range->stageFlags = binary_data.vk_push_constant_stages_mask; - push_constant_range->size = binary_data.push_constant_size; + push_constant_range->stageFlags = shader_info.vk_push_constant_stages; + push_constant_range->size = shader_refl.push_constant_size; pipeline_layout_create_info.pushConstantRangeCount = 1; pipeline_layout_create_info.pPushConstantRanges = push_constant_range; } - VkResult err = vkCreatePipelineLayout(vk_device, &pipeline_layout_create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_PIPELINE_LAYOUT), &shader_info.vk_pipeline_layout); - if (err) { - error_text = "Error (" + itos(err) + ") creating pipeline layout."; + res = vkCreatePipelineLayout(vk_device, &pipeline_layout_create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_PIPELINE_LAYOUT), &shader_info.vk_pipeline_layout); + if (res != VK_SUCCESS) { + error_text = vformat("Error (%d) creating pipeline layout.", res); } } @@ -3996,7 +3740,7 @@ RDD::ShaderID RenderingDeviceDriverVulkan::shader_create_from_bytecode(const Vec for (uint32_t i = 0; i < shader_info.vk_stages_create_info.size(); i++) { vkDestroyShaderModule(vk_device, shader_info.vk_stages_create_info[i].module, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_SHADER_MODULE)); } - for (uint32_t i = 0; i < binary_data.set_count; i++) { + for (uint32_t i = 0; i < shader_info.vk_descriptor_set_layouts.size(); i++) { vkDestroyDescriptorSetLayout(vk_device, shader_info.vk_descriptor_set_layouts[i], VKC::get_allocation_callbacks(VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT)); } @@ -4004,7 +3748,6 @@ RDD::ShaderID RenderingDeviceDriverVulkan::shader_create_from_bytecode(const Vec } // Bookkeep. - ShaderInfo *shader_info_ptr = VersatileResource::allocate(resources_allocator); *shader_info_ptr = shader_info; return ShaderID(shader_info_ptr); @@ -6187,6 +5930,10 @@ const RDD::Capabilities &RenderingDeviceDriverVulkan::get_capabilities() const { return device_capabilities; } +const RenderingShaderContainerFormat &RenderingDeviceDriverVulkan::get_shader_container_format() const { + return shader_container_format; +} + bool RenderingDeviceDriverVulkan::is_composite_alpha_supported(CommandQueueID p_queue) const { if (has_comp_alpha.has((uint64_t)p_queue.id)) { return has_comp_alpha[(uint64_t)p_queue.id]; diff --git a/drivers/vulkan/rendering_device_driver_vulkan.h b/drivers/vulkan/rendering_device_driver_vulkan.h index 880e876664..180d3dafc6 100644 --- a/drivers/vulkan/rendering_device_driver_vulkan.h +++ b/drivers/vulkan/rendering_device_driver_vulkan.h @@ -35,6 +35,7 @@ #include "core/templates/hash_map.h" #include "core/templates/paged_allocator.h" #include "drivers/vulkan/rendering_context_driver_vulkan.h" +#include "drivers/vulkan/rendering_shader_container_vulkan.h" #include "servers/rendering/rendering_device_driver.h" #ifdef DEBUG_ENABLED @@ -132,6 +133,7 @@ class RenderingDeviceDriverVulkan : public RenderingDeviceDriver { FragmentDensityMapCapabilities fdm_capabilities; ShaderCapabilities shader_capabilities; StorageBufferCapabilities storage_buffer_capabilities; + RenderingShaderContainerFormatVulkan shader_container_format; bool buffer_device_address_support = false; bool pipeline_cache_control_support = false; bool device_fault_support = false; @@ -410,43 +412,6 @@ public: /**** SHADER ****/ /****************/ private: - struct ShaderBinary { - // Version 1: initial. - // Version 2: Added shader name. - // Version 3: Added writable. - // Version 4: 64-bit vertex input mask. - // Version 5: Add 4 bytes padding to align the Data struct after the change in version 4. - static const uint32_t VERSION = 5; - - struct DataBinding { - uint32_t type = 0; - uint32_t binding = 0; - uint32_t stages = 0; - uint32_t length = 0; // Size of arrays (in total elements), or UBOs (in bytes * total elements). - uint32_t writable = 0; - }; - - struct SpecializationConstant { - uint32_t type = 0; - uint32_t constant_id = 0; - uint32_t int_value = 0; - uint32_t stage_flags = 0; - }; - - struct Data { - uint64_t vertex_input_mask = 0; - uint32_t fragment_output_mask = 0; - uint32_t specialization_constants_count = 0; - uint32_t is_compute = 0; - uint32_t compute_local_size[3] = {}; - uint32_t set_count = 0; - uint32_t push_constant_size = 0; - uint32_t vk_push_constant_stages_mask = 0; - uint32_t stage_count = 0; - uint32_t shader_name_len = 0; - }; - }; - struct ShaderInfo { VkShaderStageFlags vk_push_constant_stages = 0; TightLocalVector vk_stages_create_info; @@ -455,9 +420,7 @@ private: }; public: - virtual String shader_get_binary_cache_key() override final; - virtual Vector shader_compile_binary_from_spirv(VectorView p_spirv, const String &p_shader_name) override final; - virtual ShaderID shader_create_from_bytecode(const Vector &p_shader_binary, ShaderDescription &r_shader_desc, String &r_name, const Vector &p_immutable_samplers) override final; + virtual ShaderID shader_create_from_container(const Ref &p_shader_container, const Vector &p_immutable_samplers) override final; virtual void shader_free(ShaderID p_shader) override final; virtual void shader_destroy_modules(ShaderID p_shader) override final; @@ -713,6 +676,7 @@ public: virtual String get_api_version() const override final; virtual String get_pipeline_cache_uuid() const override final; virtual const Capabilities &get_capabilities() const override final; + virtual const RenderingShaderContainerFormat &get_shader_container_format() const override final; virtual bool is_composite_alpha_supported(CommandQueueID p_queue) const override final; diff --git a/drivers/vulkan/rendering_shader_container_vulkan.cpp b/drivers/vulkan/rendering_shader_container_vulkan.cpp new file mode 100644 index 0000000000..8a15993e03 --- /dev/null +++ b/drivers/vulkan/rendering_shader_container_vulkan.cpp @@ -0,0 +1,105 @@ +/**************************************************************************/ +/* rendering_shader_container_vulkan.cpp */ +/**************************************************************************/ +/* This file is part of: */ +/* REDOT ENGINE */ +/* https://redotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2024-present Redot Engine contributors */ +/* (see REDOT_AUTHORS.md) */ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#include "rendering_shader_container_vulkan.h" + +#if RENDERING_SHADER_CONTAINER_VULKAN_SMOLV +#include "thirdparty/misc/smolv.h" +#endif + +// RenderingShaderContainerVulkan + +const uint32_t RenderingShaderContainerVulkan::FORMAT_VERSION = 1; + +uint32_t RenderingShaderContainerVulkan::_format() const { + return 0x43565053; +} + +uint32_t RenderingShaderContainerVulkan::_format_version() const { + return FORMAT_VERSION; +} + +bool RenderingShaderContainerVulkan::_set_code_from_spirv(const Vector &p_spirv) { + PackedByteArray code_bytes; + shaders.resize(p_spirv.size()); + for (int64_t i = 0; i < p_spirv.size(); i++) { +#if RENDERING_SHADER_CONTAINER_VULKAN_SMOLV + // Encode into smolv. + smolv::ByteArray smolv_bytes; + bool smolv_encoded = smolv::Encode(p_spirv[i].spirv.ptr(), p_spirv[i].spirv.size(), smolv_bytes, smolv::kEncodeFlagStripDebugInfo); + ERR_FAIL_COND_V_MSG(!smolv_encoded, false, "Failed to compress SPIR-V into smolv."); + + code_bytes.resize(smolv_bytes.size()); + memcpy(code_bytes.ptrw(), smolv_bytes.data(), code_bytes.size()); +#else + code_bytes.resize(p_spirv[i].spirv.size()); + memcpy(code_bytes.ptrw(), p_spirv[i].spirv.ptr(), code_bytes.size()); +#endif + + RenderingShaderContainer::Shader &shader = shaders.ptrw()[i]; +#if RENDERING_SHADER_CONTAINER_VULKAN_COMPRESSION + uint32_t compressed_size = 0; + shader.code_decompressed_size = code_bytes.size(); + shader.code_compressed_bytes.resize(code_bytes.size()); + + bool compressed = compress_code(code_bytes.ptr(), code_bytes.size(), shader.code_compressed_bytes.ptrw(), &compressed_size, &shader.code_compression_flags); + ERR_FAIL_COND_V_MSG(!compressed, false, vformat("Failed to compress native code to native for SPIR-V #%d.", i)); + + shader.code_compressed_bytes.resize(compressed_size); +#else + shader.code_decompressed_size = 0; + shader.code_compression_flags = 0; + shader.code_compressed_bytes = code_bytes; +#endif + shader.shader_stage = p_spirv[i].shader_stage; + } + + return true; +} + +// RenderingShaderContainerFormatVulkan + +Ref RenderingShaderContainerFormatVulkan::create_container() const { + return memnew(RenderingShaderContainerVulkan); +} + +RenderingDeviceCommons::ShaderLanguageVersion RenderingShaderContainerFormatVulkan::get_shader_language_version() const { + return SHADER_LANGUAGE_VULKAN_VERSION_1_1; +} + +RenderingDeviceCommons::ShaderSpirvVersion RenderingShaderContainerFormatVulkan::get_shader_spirv_version() const { + return SHADER_SPIRV_VERSION_1_3; +} + +RenderingShaderContainerFormatVulkan::RenderingShaderContainerFormatVulkan() {} + +RenderingShaderContainerFormatVulkan::~RenderingShaderContainerFormatVulkan() {} diff --git a/drivers/vulkan/rendering_shader_container_vulkan.h b/drivers/vulkan/rendering_shader_container_vulkan.h new file mode 100644 index 0000000000..ac95b72088 --- /dev/null +++ b/drivers/vulkan/rendering_shader_container_vulkan.h @@ -0,0 +1,59 @@ +/**************************************************************************/ +/* rendering_shader_container_vulkan.h */ +/**************************************************************************/ +/* This file is part of: */ +/* REDOT ENGINE */ +/* https://redotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2024-present Redot Engine contributors */ +/* (see REDOT_AUTHORS.md) */ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#pragma once + +#include "servers/rendering/rendering_shader_container.h" + +#define RENDERING_SHADER_CONTAINER_VULKAN_COMPRESSION 1 +#define RENDERING_SHADER_CONTAINER_VULKAN_SMOLV 1 + +class RenderingShaderContainerVulkan : public RenderingShaderContainer { + GDSOFTCLASS(RenderingShaderContainerVulkan, RenderingShaderContainer); + +public: + static const uint32_t FORMAT_VERSION; + +protected: + virtual uint32_t _format() const override; + virtual uint32_t _format_version() const override; + virtual bool _set_code_from_spirv(const Vector &p_spirv) override; +}; + +class RenderingShaderContainerFormatVulkan : public RenderingShaderContainerFormat { +public: + virtual Ref create_container() const override; + virtual ShaderLanguageVersion get_shader_language_version() const override; + virtual ShaderSpirvVersion get_shader_spirv_version() const override; + RenderingShaderContainerFormatVulkan(); + virtual ~RenderingShaderContainerFormatVulkan(); +}; diff --git a/drivers/windows/dir_access_windows.cpp b/drivers/windows/dir_access_windows.cpp index 780dccf19e..116270dd46 100644 --- a/drivers/windows/dir_access_windows.cpp +++ b/drivers/windows/dir_access_windows.cpp @@ -364,7 +364,7 @@ String DirAccessWindows::get_filesystem_type() const { &dwFileSystemFlags, szFileSystemName, sizeof(szFileSystemName)) == TRUE) { - return String::utf16((const char16_t *)szFileSystemName); + return String::utf16((const char16_t *)szFileSystemName).to_upper(); } ERR_FAIL_V(""); diff --git a/editor/action_map_editor.cpp b/editor/action_map_editor.cpp index 10f21e4f47..a2f6a76d50 100644 --- a/editor/action_map_editor.cpp +++ b/editor/action_map_editor.cpp @@ -87,7 +87,7 @@ String ActionMapEditor::_check_new_action_name(const String &p_name) { } void ActionMapEditor::_add_edit_text_changed(const String &p_name) { - String error = _check_new_action_name(p_name); + const String error = _check_new_action_name(p_name); add_button->set_tooltip_text(error); add_button->set_disabled(!error.is_empty()); } @@ -363,6 +363,15 @@ void ActionMapEditor::drop_data_fw(const Point2 &p_point, const Variant &p_data, void ActionMapEditor::_notification(int p_what) { switch (p_what) { + case NOTIFICATION_TRANSLATION_CHANGED: { + if (!actions_cache.is_empty()) { + update_action_list(); + } + if (!add_button->get_tooltip_text().is_empty()) { + _add_edit_text_changed(add_edit->get_text()); + } + } break; + case NOTIFICATION_THEME_CHANGED: { action_list_search->set_right_icon(get_editor_theme_icon(SNAME("Search"))); add_button->set_button_icon(get_editor_theme_icon(SNAME("Add"))); @@ -457,10 +466,10 @@ void ActionMapEditor::update_action_list(const Vector &p_action_info bool events_eq = Shortcut::is_event_array_equal(action_info.action_initial["events"], action_info.action["events"]); bool action_eq = deadzone_eq && events_eq; action_item->set_meta("__action_initial", action_info.action_initial); - action_item->add_button(2, action_tree->get_editor_theme_icon(SNAME("ReloadSmall")), BUTTON_REVERT_ACTION, action_eq, action_eq ? TTR("Cannot Revert - Action is same as initial") : TTR("Revert Action")); + action_item->add_button(2, action_tree->get_editor_theme_icon(SNAME("ReloadSmall")), BUTTON_REVERT_ACTION, action_eq, action_eq ? TTRC("Cannot Revert - Action is same as initial") : TTRC("Revert Action")); } - action_item->add_button(2, action_tree->get_editor_theme_icon(SNAME("Add")), BUTTON_ADD_EVENT, false, TTR("Add Event")); - action_item->add_button(2, action_tree->get_editor_theme_icon(SNAME("Remove")), BUTTON_REMOVE_ACTION, !action_info.editable, action_info.editable ? TTR("Remove Action") : TTR("Cannot Remove Action")); + action_item->add_button(2, action_tree->get_editor_theme_icon(SNAME("Add")), BUTTON_ADD_EVENT, false, TTRC("Add Event")); + action_item->add_button(2, action_tree->get_editor_theme_icon(SNAME("Remove")), BUTTON_REMOVE_ACTION, !action_info.editable, action_info.editable ? TTRC("Remove Action") : TTRC("Cannot Remove Action")); action_item->set_custom_bg_color(0, action_tree->get_theme_color(SNAME("prop_subsection"), EditorStringName(Editor))); action_item->set_custom_bg_color(1, action_tree->get_theme_color(SNAME("prop_subsection"), EditorStringName(Editor))); @@ -508,8 +517,8 @@ void ActionMapEditor::update_action_list(const Vector &p_action_info } // Third Column - Buttons - event_item->add_button(2, action_tree->get_editor_theme_icon(SNAME("Edit")), BUTTON_EDIT_EVENT, false, TTR("Edit Event"), TTR("Edit Event")); - event_item->add_button(2, action_tree->get_editor_theme_icon(SNAME("Remove")), BUTTON_REMOVE_EVENT, false, TTR("Remove Event"), TTR("Remove Event")); + event_item->add_button(2, action_tree->get_editor_theme_icon(SNAME("Edit")), BUTTON_EDIT_EVENT, false, TTRC("Edit Event"), TTRC("Edit Event")); + event_item->add_button(2, action_tree->get_editor_theme_icon(SNAME("Remove")), BUTTON_REMOVE_EVENT, false, TTRC("Remove Event"), TTRC("Remove Event")); event_item->set_button_color(2, 0, Color(1, 1, 1, 0.75)); event_item->set_button_color(2, 1, Color(1, 1, 1, 0.75)); } @@ -549,7 +558,7 @@ ActionMapEditor::ActionMapEditor() { action_list_search = memnew(LineEdit); action_list_search->set_h_size_flags(Control::SIZE_EXPAND_FILL); - action_list_search->set_placeholder(TTR("Filter by Name")); + action_list_search->set_placeholder(TTRC("Filter by Name")); action_list_search->set_accessibility_name(TTRC("Filter by Name")); action_list_search->set_clear_button_enabled(true); action_list_search->connect(SceneStringName(text_changed), callable_mp(this, &ActionMapEditor::_search_term_updated)); @@ -565,8 +574,8 @@ ActionMapEditor::ActionMapEditor() { top_hbox->add_child(action_list_search_by_event); clear_all_search = memnew(Button); - clear_all_search->set_text(TTR("Clear All")); - clear_all_search->set_tooltip_text(TTR("Clear all search filters.")); + clear_all_search->set_text(TTRC("Clear All")); + clear_all_search->set_tooltip_text(TTRC("Clear all search filters.")); clear_all_search->connect(SceneStringName(pressed), callable_mp(action_list_search_by_event, &EventListenerLineEdit::clear_event)); clear_all_search->connect(SceneStringName(pressed), callable_mp(action_list_search, &LineEdit::clear)); top_hbox->add_child(clear_all_search); @@ -577,7 +586,7 @@ ActionMapEditor::ActionMapEditor() { add_edit = memnew(LineEdit); add_edit->set_h_size_flags(Control::SIZE_EXPAND_FILL); - add_edit->set_placeholder(TTR("Add New Action")); + add_edit->set_placeholder(TTRC("Add New Action")); add_edit->set_accessibility_name(TTRC("Add New Action")); add_edit->set_clear_button_enabled(true); add_edit->set_keep_editing_on_text_submit(true); @@ -586,7 +595,7 @@ ActionMapEditor::ActionMapEditor() { add_hbox->add_child(add_edit); add_button = memnew(Button); - add_button->set_text(TTR("Add")); + add_button->set_text(TTRC("Add")); add_button->connect(SceneStringName(pressed), callable_mp(this, &ActionMapEditor::_add_action_pressed)); add_hbox->add_child(add_button); // Disable the button and set its tooltip. @@ -595,7 +604,7 @@ ActionMapEditor::ActionMapEditor() { add_hbox->add_child(memnew(VSeparator)); show_builtin_actions_checkbutton = memnew(CheckButton); - show_builtin_actions_checkbutton->set_text(TTR("Show Built-in Actions")); + show_builtin_actions_checkbutton->set_text(TTRC("Show Built-in Actions")); show_builtin_actions_checkbutton->connect(SceneStringName(toggled), callable_mp(this, &ActionMapEditor::set_show_builtin_actions)); add_hbox->add_child(show_builtin_actions_checkbutton); @@ -611,9 +620,9 @@ ActionMapEditor::ActionMapEditor() { action_tree->set_columns(3); action_tree->set_hide_root(true); action_tree->set_column_titles_visible(true); - action_tree->set_column_title(0, TTR("Action")); + action_tree->set_column_title(0, TTRC("Action")); action_tree->set_column_clip_content(0, true); - action_tree->set_column_title(1, TTR("Deadzone")); + action_tree->set_column_title(1, TTRC("Deadzone")); action_tree->set_column_expand(1, false); action_tree->set_column_custom_minimum_width(1, 80 * EDSCALE); action_tree->set_column_expand(2, false); diff --git a/editor/add_metadata_dialog.cpp b/editor/add_metadata_dialog.cpp index a2dffdd336..cbaf1e035f 100644 --- a/editor/add_metadata_dialog.cpp +++ b/editor/add_metadata_dialog.cpp @@ -32,7 +32,10 @@ #include "add_metadata_dialog.h" +#include "editor/gui/editor_validation_panel.h" +#include "editor/gui/editor_variant_type_selectors.h" #include "editor/themes/editor_scale.h" +#include "scene/gui/line_edit.h" AddMetadataDialog::AddMetadataDialog() { VBoxContainer *vbc = memnew(VBoxContainer); @@ -48,7 +51,7 @@ AddMetadataDialog::AddMetadataDialog() { hbc->add_child(add_meta_name); hbc->add_child(memnew(Label(TTR("Type:")))); - add_meta_type = memnew(OptionButton); + add_meta_type = memnew(EditorVariantTypeOptionButton); add_meta_type->set_accessibility_name(TTRC("Type:")); hbc->add_child(add_meta_type); @@ -75,19 +78,8 @@ void AddMetadataDialog::_complete_init(const StringName &p_title) { set_title(vformat(TTR("Add Metadata Property for \"%s\""), p_title)); - // Skip if we already completed the initialization. - if (add_meta_type->get_item_count()) { - return; - } - - // Theme icons can be retrieved only the Window has been initialized. - for (int i = 0; i < Variant::VARIANT_MAX; i++) { - if (i == Variant::NIL || i == Variant::RID || i == Variant::CALLABLE || i == Variant::SIGNAL) { - continue; //not editable by inspector. - } - String type = i == Variant::OBJECT ? String("Resource") : Variant::get_type_name(Variant::Type(i)); - - add_meta_type->add_icon_item(get_editor_theme_icon(type), type, i); + if (add_meta_type->get_item_count() == 0) { + add_meta_type->populate({ Variant::NIL }, { { Variant::OBJECT, "Resource" } }); } } @@ -105,7 +97,7 @@ StringName AddMetadataDialog::get_meta_name() { Variant AddMetadataDialog::get_meta_defval() { Variant defval; Callable::CallError ce; - Variant::construct(Variant::Type(add_meta_type->get_selected_id()), defval, nullptr, 0, ce); + Variant::construct(add_meta_type->get_selected_type(), defval, nullptr, 0, ce); return defval; } diff --git a/editor/add_metadata_dialog.h b/editor/add_metadata_dialog.h index a439b3f23d..9435f35c9b 100644 --- a/editor/add_metadata_dialog.h +++ b/editor/add_metadata_dialog.h @@ -32,10 +32,11 @@ #pragma once -#include "editor/gui/editor_validation_panel.h" #include "scene/gui/dialogs.h" -#include "scene/gui/line_edit.h" -#include "scene/gui/option_button.h" + +class EditorValidationPanel; +class EditorVariantTypeOptionButton; +class LineEdit; class AddMetadataDialog : public ConfirmationDialog { GDCLASS(AddMetadataDialog, ConfirmationDialog); @@ -54,6 +55,6 @@ private: void _complete_init(const StringName &p_label); LineEdit *add_meta_name = nullptr; - OptionButton *add_meta_type = nullptr; + EditorVariantTypeOptionButton *add_meta_type = nullptr; EditorValidationPanel *validation_panel = nullptr; }; diff --git a/editor/animation_bezier_editor.cpp b/editor/animation_bezier_editor.cpp index 4b3cba039e..3c6f3d44b1 100644 --- a/editor/animation_bezier_editor.cpp +++ b/editor/animation_bezier_editor.cpp @@ -745,6 +745,48 @@ Size2 AnimationBezierTrackEdit::get_minimum_size() const { return Vector2(1, 1); } +Control::CursorShape AnimationBezierTrackEdit::get_cursor_shape(const Point2 &p_pos) const { + // Box selecting or moving a handle + if (box_selecting || Math::abs(moving_handle) == 1) { + return get_default_cursor_shape(); + } + // Hovering a handle + if (!read_only) { + for (const EditPoint &edit_point : edit_points) { + if (edit_point.in_rect.has_point(p_pos) || edit_point.out_rect.has_point(p_pos)) { + return get_default_cursor_shape(); + } + } + } + // Currently box scaling + if (scaling_selection) { + if (scaling_selection_handles == Vector2i(1, 1) || scaling_selection_handles == Vector2i(-1, -1)) { + return CURSOR_FDIAGSIZE; + } else if (scaling_selection_handles == Vector2i(1, -1) || scaling_selection_handles == Vector2i(-1, 1)) { + return CURSOR_BDIAGSIZE; + } else if (abs(scaling_selection_handles.x) == 1) { + return CURSOR_HSIZE; + } else if (abs(scaling_selection_handles.y) == 1) { + return CURSOR_VSIZE; + } + } + // Hovering the scaling box + const Vector2i rel_pos = p_pos - selection_rect.position; + if (selection_handles_rect.has_point(p_pos)) { + if ((rel_pos.x < 0 && rel_pos.y < 0) || (rel_pos.x > selection_rect.size.width && rel_pos.y > selection_rect.size.height)) { + return CURSOR_FDIAGSIZE; + } else if ((rel_pos.x < 0 && rel_pos.y > selection_rect.size.height) || (rel_pos.x > selection_rect.size.width && rel_pos.y < 0)) { + return CURSOR_BDIAGSIZE; + } else if (rel_pos.x < 0 || rel_pos.x > selection_rect.size.width) { + return CURSOR_HSIZE; + } else if (rel_pos.y < 0 || rel_pos.y > selection_rect.size.height) { + return CURSOR_VSIZE; + } + return CURSOR_MOVE; + } + return get_default_cursor_shape(); +} + void AnimationBezierTrackEdit::set_timeline(AnimationTimelineEdit *p_timeline) { timeline = p_timeline; timeline->connect("zoom_changed", callable_mp(this, &AnimationBezierTrackEdit::_zoom_changed)); diff --git a/editor/animation_bezier_editor.h b/editor/animation_bezier_editor.h index 140c9e4b2b..69ea9dd9d8 100644 --- a/editor/animation_bezier_editor.h +++ b/editor/animation_bezier_editor.h @@ -216,6 +216,7 @@ public: void set_animation_and_track(const Ref &p_animation, int p_track, bool p_read_only); virtual Size2 get_minimum_size() const override; + virtual CursorShape get_cursor_shape(const Point2 &p_pos) const override; void set_timeline(AnimationTimelineEdit *p_timeline); void set_editor(AnimationTrackEditor *p_editor); diff --git a/editor/animation_track_editor.cpp b/editor/animation_track_editor.cpp index f4c32441dd..c6efa685c0 100644 --- a/editor/animation_track_editor.cpp +++ b/editor/animation_track_editor.cpp @@ -46,6 +46,7 @@ #include "editor/inspector_dock.h" #include "editor/multi_node_edit.h" #include "editor/plugins/animation_player_editor_plugin.h" +#include "editor/plugins/script_editor_plugin.h" #include "editor/themes/editor_scale.h" #include "scene/3d/mesh_instance_3d.h" #include "scene/animation/animation_player.h" @@ -2803,6 +2804,13 @@ Ref AnimationTrackEdit::_get_key_type_icon() const { return type_icons[animation->track_get_type(track)]; } +Control::CursorShape AnimationTrackEdit::get_cursor_shape(const Point2 &p_pos) const { + if (command_or_control_pressed && animation->track_get_type(track) == Animation::TYPE_METHOD && hovering_key_idx != -1) { + return Control::CURSOR_POINTING_HAND; + } + return get_default_cursor_shape(); +} + String AnimationTrackEdit::get_tooltip(const Point2 &p_pos) const { if (check_rect.has_point(p_pos)) { return TTR("Toggle this track on/off."); @@ -3146,6 +3154,11 @@ void AnimationTrackEdit::gui_input(const Ref &p_event) { } } + if (mb->is_command_or_control_pressed() && _lookup_key(hovering_key_idx)) { + accept_event(); + return; + } + if (_try_select_at_ui_pos(pos, mb->is_command_or_control_pressed() || mb->is_shift_pressed(), true)) { accept_event(); } @@ -3166,6 +3179,13 @@ void AnimationTrackEdit::gui_input(const Ref &p_event) { bool selected = _try_select_at_ui_pos(pos, mb->is_command_or_control_pressed() || mb->is_shift_pressed(), false); menu->clear(); + if (animation->track_get_type(track) == Animation::TYPE_METHOD) { + if (hovering_key_idx != -1) { + lookup_key_idx = hovering_key_idx; + menu->add_icon_item(get_editor_theme_icon(SNAME("Help")), vformat("%s (%s)", TTR("Go to Definition"), animation->method_track_get_name(track, lookup_key_idx)), MENU_KEY_LOOKUP); + menu->add_separator(); + } + } menu->add_icon_item(get_editor_theme_icon(SNAME("Key")), TTR("Insert Key..."), MENU_KEY_INSERT); if (selected || editor->is_selection_active()) { menu->add_separator(); @@ -3249,6 +3269,8 @@ void AnimationTrackEdit::gui_input(const Ref &p_event) { if (mm.is_valid()) { const int previous_hovering_key_idx = hovering_key_idx; + command_or_control_pressed = mm->is_command_or_control_pressed(); + // Hovering compressed keyframes for editing is not possible. if (!animation->track_is_compressed(track)) { const float scale = timeline->get_zoom_scale(); @@ -3394,6 +3416,45 @@ bool AnimationTrackEdit::_try_select_at_ui_pos(const Point2 &p_pos, bool p_aggre return false; } +bool AnimationTrackEdit::_lookup_key(int p_key_idx) const { + if (p_key_idx < 0 || p_key_idx >= animation->track_get_key_count(track)) { + return false; + } + + if (animation->track_get_type(track) == Animation::TYPE_METHOD) { + Node *target = root->get_node_or_null(animation->track_get_path(track)); + if (target) { + StringName method = animation->method_track_get_name(track, p_key_idx); + // First, check every script in the inheritance chain. + bool found_in_script = false; + Ref