From 985de89ca8caca2b2d24b826b4cea1498234acaa Mon Sep 17 00:00:00 2001 From: Antheas Kapenekakis <5252246+antheas@users.noreply.github.com> Date: Sat, 15 Mar 2025 18:36:49 +0100 Subject: [PATCH] fix(mesa): backport AMD GPU artifact fix, cleanup and attribute patches (#3943) * fix(mesa): backport AMD GPU artifact fix, cleanup and attribute patches * cleanup and reorder * Fix patch credit Signed-off-by: Gilver --------- Signed-off-by: Gilver Co-authored-by: Gilver --- anda/lib/mesa/12633.patch | 26 - ...f9a972324a72dd507e09ac975b969e6c88e0.patch | 34 - ...8799be3afe9a1e7ca9156a5d44ffe0aae681.patch | 38 -- anda/lib/mesa/34005.patch | 46 -- anda/lib/mesa/bazzite.patch | 581 ++++++++++++++++++ anda/lib/mesa/mesa.spec | 24 +- anda/lib/mesa/valve.patch | 143 ----- 7 files changed, 584 insertions(+), 308 deletions(-) delete mode 100644 anda/lib/mesa/12633.patch delete mode 100644 anda/lib/mesa/13a3f9a972324a72dd507e09ac975b969e6c88e0.patch delete mode 100644 anda/lib/mesa/237d8799be3afe9a1e7ca9156a5d44ffe0aae681.patch delete mode 100644 anda/lib/mesa/34005.patch create mode 100644 anda/lib/mesa/bazzite.patch delete mode 100644 anda/lib/mesa/valve.patch diff --git a/anda/lib/mesa/12633.patch b/anda/lib/mesa/12633.patch deleted file mode 100644 index 527e3b6e3d..0000000000 --- a/anda/lib/mesa/12633.patch +++ /dev/null @@ -1,26 +0,0 @@ -commit d0279e717ee740746f0770c5a9870d752108e756 (HEAD -> makepkg) -Author: Maarten Lankhorst -Date: Mon Feb 17 14:55:29 2025 -0800 - - HACK: drm/xe gamescope fix - - Workaround gamescope DRM issues on Xe by allocating ANV_BO_ALLOC_SCANOUT - when using VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT - - Link: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/4292#note_2784316 - Signed-off-by: Matthew Schwartz - -diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c -index 1884932bbc7..cbc1b4aad87 100644 ---- a/src/intel/vulkan/anv_device.c -+++ b/src/intel/vulkan/anv_device.c -@@ -1533,6 +1533,9 @@ VkResult anv_AllocateMemory( - dedicated_info->image != VK_NULL_HANDLE) { - ANV_FROM_HANDLE(anv_image, image, dedicated_info->image); - -+ if (image->vk.tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) -+ alloc_flags |= ANV_BO_ALLOC_SCANOUT; -+ - /* Apply implicit sync to be compatible with clients relying on - * implicit fencing. This matches the behavior in iris i915_batch - * submit. An example client is VA-API (iHD), so only dedicated diff --git a/anda/lib/mesa/13a3f9a972324a72dd507e09ac975b969e6c88e0.patch b/anda/lib/mesa/13a3f9a972324a72dd507e09ac975b969e6c88e0.patch deleted file mode 100644 index 43b451f3d1..0000000000 --- a/anda/lib/mesa/13a3f9a972324a72dd507e09ac975b969e6c88e0.patch +++ /dev/null @@ -1,34 +0,0 @@ -From 13a3f9a972324a72dd507e09ac975b969e6c88e0 Mon Sep 17 00:00:00 2001 -From: Hans-Kristian Arntzen -Date: Tue, 25 Feb 2025 12:43:17 +0100 -Subject: [PATCH] radv: Always set 0 dispatch offset for indirect CS. - -Fixes severe glitching in Avowed. - -Signed-off-by: Hans-Kristian Arntzen -Reviewed-by: Samuel Pitoiset -Reviewed-by: Natalie Vock -Cc: mesa-stable -Part-of: ---- - src/amd/vulkan/radv_cmd_buffer.c | 4 ++++ - 1 file changed, 4 insertions(+) - -diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c -index d8c0c6810524b..d85c0659cb04e 100644 ---- a/src/amd/vulkan/radv_cmd_buffer.c -+++ b/src/amd/vulkan/radv_cmd_buffer.c -@@ -11722,6 +11722,10 @@ radv_emit_dispatch_packets(struct radv_cmd_buffer *cmd_buffer, const struct radv - dispatch_initiator |= S_00B800_USE_THREAD_DIMENSIONS(1); - } - -+ /* Indirect CS does not support offsets in the API. Must program this in case there have been -+ * preceding 1D RT dispatch or vkCmdDispatchBase. */ -+ dispatch_initiator |= S_00B800_FORCE_START_AT_000(1); -+ - if (grid_size_offset) { - if (device->load_grid_size_from_user_sgpr) { - assert(pdev->info.gfx_level >= GFX10_3); --- -GitLab - diff --git a/anda/lib/mesa/237d8799be3afe9a1e7ca9156a5d44ffe0aae681.patch b/anda/lib/mesa/237d8799be3afe9a1e7ca9156a5d44ffe0aae681.patch deleted file mode 100644 index 47151e7f03..0000000000 --- a/anda/lib/mesa/237d8799be3afe9a1e7ca9156a5d44ffe0aae681.patch +++ /dev/null @@ -1,38 +0,0 @@ -From 237d8799be3afe9a1e7ca9156a5d44ffe0aae681 Mon Sep 17 00:00:00 2001 -From: Natalie Vock -Date: Fri, 28 Feb 2025 14:21:57 +0100 -Subject: [PATCH] radv/rt: Limit monolithic pipelines to 50 stages - -Beyond that, monolithic pipelines just bloat to incredible sizes, -destroying compile times for questionable, if any, runtime perf benefit. - -Indiana Jones: The Great Circle has more than 100 stages and takes -several minutes to compile its RT pipeline on Deck when using monolithic -compilation, and yet separate shaders still end up faster (probably -because instruction cache coherency in traversal is better). - -Part-of: ---- - src/amd/vulkan/radv_pipeline_rt.c | 6 +++++- - 1 file changed, 5 insertions(+), 1 deletion(-) - -diff --git a/src/amd/vulkan/radv_pipeline_rt.c b/src/amd/vulkan/radv_pipeline_rt.c -index 0658840966c30..bcda4aae5528d 100644 ---- a/src/amd/vulkan/radv_pipeline_rt.c -+++ b/src/amd/vulkan/radv_pipeline_rt.c -@@ -602,7 +602,11 @@ radv_rt_compile_shaders(struct radv_device *device, struct vk_pipeline_cache *ca - - bool library = pipeline->base.base.create_flags & VK_PIPELINE_CREATE_2_LIBRARY_BIT_KHR; - -- bool monolithic = !library; -+ /* Beyond 50 shader stages, inlining everything bloats the shader a ton, increasing compile times and -+ * potentially even reducing runtime performance because of instruction cache coherency issues in the -+ * traversal loop. -+ */ -+ bool monolithic = !library && pipeline->stage_count < 50; - for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) { - if (rt_stages[i].shader || rt_stages[i].nir) - continue; --- -GitLab - diff --git a/anda/lib/mesa/34005.patch b/anda/lib/mesa/34005.patch deleted file mode 100644 index cd4a36c58e..0000000000 --- a/anda/lib/mesa/34005.patch +++ /dev/null @@ -1,46 +0,0 @@ -From c6270978411609f52afb8c6a2219fcb94f9013bb Mon Sep 17 00:00:00 2001 -From: Samuel Pitoiset -Date: Tue, 11 Mar 2025 15:29:37 +0100 -Subject: [PATCH] radv/amdgpu: fix device deduplication - -To correctly deduplicate device inside the winsys, it should use the -fd or amdgpu_device_handle. Using the allocated ac_drm_device as key -is obviously broken. - -Not deduplicating devices breaks memory budget and a bunch of games -were broken. - -Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/12686 -Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/12775 -Fixes: a565f2994fe ("amd: move all uses of libdrm_amdgpu to ac_linux_drm") -Signed-off-by: Samuel Pitoiset -Part-of: ---- - src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c -index be8df8708c813..8b57abeb0b1ca 100644 ---- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c -+++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c -@@ -234,7 +234,7 @@ radv_amdgpu_winsys_create(int fd, uint64_t debug_flags, uint64_t perftest_flags, - goto fail; - } - -- struct hash_entry *entry = _mesa_hash_table_search(winsyses, dev); -+ struct hash_entry *entry = _mesa_hash_table_search(winsyses, (void *)ac_drm_device_get_cookie(dev)); - if (entry) { - ws = (struct radv_amdgpu_winsys *)entry->data; - ++ws->refcount; -@@ -325,7 +325,7 @@ radv_amdgpu_winsys_create(int fd, uint64_t debug_flags, uint64_t perftest_flags, - radv_amdgpu_bo_init_functions(ws); - radv_amdgpu_cs_init_functions(ws); - -- _mesa_hash_table_insert(winsyses, dev, ws); -+ _mesa_hash_table_insert(winsyses, (void *)ac_drm_device_get_cookie(dev), ws); - simple_mtx_unlock(&winsys_creation_mutex); - - return &ws->base; --- -GitLab - diff --git a/anda/lib/mesa/bazzite.patch b/anda/lib/mesa/bazzite.patch new file mode 100644 index 0000000000..1ad3cd5ac5 --- /dev/null +++ b/anda/lib/mesa/bazzite.patch @@ -0,0 +1,581 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Antheas Kapenekakis +Date: Sat, 15 Mar 2025 16:38:53 +0100 +Subject: [NA] Developer files, readme, etc + +-- +2.48.1 + + +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Antheas Kapenekakis +Date: Sat, 15 Mar 2025 16:39:08 +0100 +Subject: [BEGIN] SteamOS Changes + +-- +2.48.1 + + +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Bas Nieuwenhuizen +Date: Fri, 14 Jan 2022 15:58:45 +0100 +Subject: STEAMOS: radv: min image count override for FH5 + +Otherwise in combination with the vblank time reservation in +gamescope the game could get stuck in low power states. +--- + src/util/00-radv-defaults.conf | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/src/util/00-radv-defaults.conf b/src/util/00-radv-defaults.conf +index d2dbe4d5e11..1851504036a 100644 +--- a/src/util/00-radv-defaults.conf ++++ b/src/util/00-radv-defaults.conf +@@ -220,5 +220,9 @@ Application bugs worked around in this file: + + ++ ++ ++ + + +-- +2.48.1 + + +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Samuel Pitoiset +Date: Thu, 22 Feb 2024 22:32:45 +0100 +Subject: STEAMOS: Dynamic swapchain override for gamescope limiter for DRI3 + only + +The original patch (from Bas) contained WSI VK support too but it's +been removed because the Gamescope WSI layer already handles that. + +Signed-off-by: Samuel Pitoiset +--- + .../frontends/dri/loader_dri3_helper.c | 42 ++++++++++++++++++- + .../frontends/dri/loader_dri3_helper.h | 1 + + 2 files changed, 41 insertions(+), 2 deletions(-) + +diff --git a/src/gallium/frontends/dri/loader_dri3_helper.c b/src/gallium/frontends/dri/loader_dri3_helper.c +index 9e4ca3f5707..7863623f8de 100644 +--- a/src/gallium/frontends/dri/loader_dri3_helper.c ++++ b/src/gallium/frontends/dri/loader_dri3_helper.c +@@ -297,6 +297,30 @@ dri3_update_max_num_back(struct loader_dri3_drawable *draw) + } + } + ++static unsigned ++gamescope_swapchain_override() ++{ ++ const char *path = getenv("GAMESCOPE_LIMITER_FILE"); ++ if (!path) ++ return 0; ++ ++ static simple_mtx_t mtx = SIMPLE_MTX_INITIALIZER; ++ static int fd = -1; ++ ++ simple_mtx_lock(&mtx); ++ if (fd < 0) { ++ fd = open(path, O_RDONLY); ++ } ++ simple_mtx_unlock(&mtx); ++ ++ if (fd < 0) ++ return 0; ++ ++ uint32_t override_value = 0; ++ pread(fd, &override_value, sizeof(override_value), 0); ++ return override_value; ++} ++ + void + loader_dri3_set_swap_interval(struct loader_dri3_drawable *draw, int interval) + { +@@ -311,10 +335,12 @@ loader_dri3_set_swap_interval(struct loader_dri3_drawable *draw, int interval) + * PS. changing from value A to B and A < B won't cause swap out of order but + * may still gets wrong target_msc value at the beginning. + */ +- if (draw->swap_interval != interval) ++ if (draw->orig_swap_interval != interval) + loader_dri3_swapbuffer_barrier(draw); + +- draw->swap_interval = interval; ++ draw->orig_swap_interval = interval; ++ if (gamescope_swapchain_override() != 1) ++ draw->swap_interval = interval; + } + + static void +@@ -443,6 +469,12 @@ loader_dri3_drawable_init(xcb_connection_t *conn, + + draw->swap_interval = dri_get_initial_swap_interval(draw->dri_screen_render_gpu); + ++ draw->orig_swap_interval = draw->swap_interval; ++ ++ unsigned gamescope_override = gamescope_swapchain_override(); ++ if (gamescope_override == 1) ++ draw->swap_interval = 1; ++ + dri3_update_max_num_back(draw); + + /* Create a new drawable */ +@@ -1087,6 +1119,12 @@ loader_dri3_swap_buffers_msc(struct loader_dri3_drawable *draw, + if (draw->type == LOADER_DRI3_DRAWABLE_WINDOW) { + dri3_fence_reset(draw->conn, back); + ++ unsigned gamescope_override = gamescope_swapchain_override(); ++ if (gamescope_override == 1) ++ draw->swap_interval = 1; ++ else ++ draw->swap_interval = draw->orig_swap_interval; ++ + /* Compute when we want the frame shown by taking the last known + * successful MSC and adding in a swap interval for each outstanding swap + * request. target_msc=divisor=remainder=0 means "Use glXSwapBuffers() +diff --git a/src/gallium/frontends/dri/loader_dri3_helper.h b/src/gallium/frontends/dri/loader_dri3_helper.h +index 9061e9755e2..6cc64be298a 100644 +--- a/src/gallium/frontends/dri/loader_dri3_helper.h ++++ b/src/gallium/frontends/dri/loader_dri3_helper.h +@@ -170,6 +170,7 @@ struct loader_dri3_drawable { + bool block_on_depleted_buffers; + bool queries_buffer_age; + int swap_interval; ++ int orig_swap_interval; + + const struct loader_dri3_vtable *vtable; + +-- +2.48.1 + + +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Samuel Pitoiset +Date: Mon, 24 Feb 2025 17:48:21 +0100 +Subject: radv: stop computing the UUID using the physical device cache key + +Otherwise, the UUID changes for games that have shader-based drirc +workarounds and this breaks precompiled shaders on SteamDeck. + +Instead, use this pdev cache key to compute the logical device hash +which is common to all pipelines. + +Signed-off-by: Samuel Pitoiset +--- + src/amd/vulkan/radv_device.c | 6 +++++- + src/amd/vulkan/radv_physical_device.c | 1 - + 2 files changed, 5 insertions(+), 2 deletions(-) + +diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c +index 2de839e5d6d..da732ae503e 100644 +--- a/src/amd/vulkan/radv_device.c ++++ b/src/amd/vulkan/radv_device.c +@@ -858,6 +858,7 @@ radv_device_init_cache_key(struct radv_device *device) + const struct radv_physical_device *pdev = radv_device_physical(device); + const struct radv_instance *instance = radv_physical_device_instance(pdev); + struct radv_device_cache_key *key = &device->cache_key; ++ struct mesa_blake3 ctx; + + key->keep_shader_info = device->keep_shader_info; + key->trap_excp_flags = device->trap_handler_shader && instance->trap_excp_flags; +@@ -879,7 +880,10 @@ radv_device_init_cache_key(struct radv_device *device) + key->primitives_generated_query = true; + } + +- _mesa_blake3_compute(key, sizeof(*key), device->cache_hash); ++ _mesa_blake3_init(&ctx); ++ _mesa_blake3_update(&ctx, &pdev->cache_key, sizeof(pdev->cache_key)); ++ _mesa_blake3_update(&ctx, &device->cache_key, sizeof(device->cache_key)); ++ _mesa_blake3_final(&ctx, device->cache_hash); + } + + static void +diff --git a/src/amd/vulkan/radv_physical_device.c b/src/amd/vulkan/radv_physical_device.c +index f24203fcccc..b1a742d48ef 100644 +--- a/src/amd/vulkan/radv_physical_device.c ++++ b/src/amd/vulkan/radv_physical_device.c +@@ -264,7 +264,6 @@ radv_device_get_cache_uuid(struct radv_physical_device *pdev, void *uuid) + return -1; + #endif + +- _mesa_sha1_update(&ctx, &pdev->cache_key, sizeof(pdev->cache_key)); + _mesa_sha1_final(&ctx, sha1); + + memcpy(uuid, sha1, VK_UUID_SIZE); +-- +2.48.1 + + +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Antheas Kapenekakis +Date: Sat, 15 Mar 2025 16:39:25 +0100 +Subject: [BEGIN] SteamOS Backports + +-- +2.48.1 + + +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Natalie Vock +Date: Fri, 28 Feb 2025 14:21:57 +0100 +Subject: radv/rt: Limit monolithic pipelines to 50 stages + +Beyond that, monolithic pipelines just bloat to incredible sizes, +destroying compile times for questionable, if any, runtime perf benefit. + +Indiana Jones: The Great Circle has more than 100 stages and takes +several minutes to compile its RT pipeline on Deck when using monolithic +compilation, and yet separate shaders still end up faster (probably +because instruction cache coherency in traversal is better). + +Part-of: +--- + src/amd/vulkan/radv_pipeline_rt.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/src/amd/vulkan/radv_pipeline_rt.c b/src/amd/vulkan/radv_pipeline_rt.c +index 5a23dc99cc4..1421688d580 100644 +--- a/src/amd/vulkan/radv_pipeline_rt.c ++++ b/src/amd/vulkan/radv_pipeline_rt.c +@@ -600,7 +600,11 @@ radv_rt_compile_shaders(struct radv_device *device, struct vk_pipeline_cache *ca + + bool library = pipeline->base.base.create_flags & VK_PIPELINE_CREATE_2_LIBRARY_BIT_KHR; + +- bool monolithic = !library; ++ /* Beyond 50 shader stages, inlining everything bloats the shader a ton, increasing compile times and ++ * potentially even reducing runtime performance because of instruction cache coherency issues in the ++ * traversal loop. ++ */ ++ bool monolithic = !library && pipeline->stage_count < 50; + for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) { + if (rt_stages[i].shader || rt_stages[i].nir) + continue; +-- +2.48.1 + + +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Antheas Kapenekakis +Date: Sat, 15 Mar 2025 16:39:33 +0100 +Subject: [BEGIN] Our Mesa backports + +-- +2.48.1 + + +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Rhys Perry +Date: Tue, 25 Feb 2025 18:07:30 +0000 +Subject: aco: insert dependency waits in certain situations + +This seems to fix some artifacts, but we're not sure why, so it might not +be a correct or optimal solution. + +fossil-db (navi31): +Totals from 28424 (35.81% of 79377) affected shaders: +Instrs: 30112910 -> 30348977 (+0.78%); split: -0.00%, +0.78% +CodeSize: 159542980 -> 160485336 (+0.59%); split: -0.00%, +0.59% +Latency: 221438396 -> 221500856 (+0.03%); split: -0.00%, +0.03% +InvThroughput: 38154231 -> 38159984 (+0.02%); split: -0.00%, +0.02% + +Signed-off-by: Rhys Perry +Reviewed-by: Georg Lehmann +Backport-to: 25.0 +Part-of: +--- + src/amd/compiler/aco_insert_NOPs.cpp | 101 +++++++++++++++++++++++---- + 1 file changed, 87 insertions(+), 14 deletions(-) + +diff --git a/src/amd/compiler/aco_insert_NOPs.cpp b/src/amd/compiler/aco_insert_NOPs.cpp +index de062be2c74..1005f82812c 100644 +--- a/src/amd/compiler/aco_insert_NOPs.cpp ++++ b/src/amd/compiler/aco_insert_NOPs.cpp +@@ -259,6 +259,9 @@ struct NOP_ctx_gfx11 { + std::bitset<128> sgpr_read_by_valu_as_lanemask; + std::bitset<128> sgpr_read_by_valu_as_lanemask_then_wr_by_salu; + ++ std::bitset<128> sgpr_read_by_valu_as_lanemask2; ++ std::bitset<128> sgpr_read_by_valu_as_lanemask_then_wr_by_valu; ++ + /* WMMAHazards */ + std::bitset<256> vgpr_written_by_wmma; + +@@ -278,8 +281,11 @@ struct NOP_ctx_gfx11 { + valu_since_wr_by_trans.join_min(other.valu_since_wr_by_trans); + trans_since_wr_by_trans.join_min(other.trans_since_wr_by_trans); + sgpr_read_by_valu_as_lanemask |= other.sgpr_read_by_valu_as_lanemask; ++ sgpr_read_by_valu_as_lanemask2 |= other.sgpr_read_by_valu_as_lanemask2; + sgpr_read_by_valu_as_lanemask_then_wr_by_salu |= + other.sgpr_read_by_valu_as_lanemask_then_wr_by_salu; ++ sgpr_read_by_valu_as_lanemask_then_wr_by_valu |= ++ other.sgpr_read_by_valu_as_lanemask_then_wr_by_valu; + vgpr_written_by_wmma |= other.vgpr_written_by_wmma; + sgpr_read_by_valu |= other.sgpr_read_by_valu; + sgpr_read_by_valu_then_wr_by_valu |= other.sgpr_read_by_valu_then_wr_by_valu; +@@ -297,8 +303,11 @@ struct NOP_ctx_gfx11 { + valu_since_wr_by_trans == other.valu_since_wr_by_trans && + trans_since_wr_by_trans == other.trans_since_wr_by_trans && + sgpr_read_by_valu_as_lanemask == other.sgpr_read_by_valu_as_lanemask && ++ sgpr_read_by_valu_as_lanemask2 == other.sgpr_read_by_valu_as_lanemask2 && + sgpr_read_by_valu_as_lanemask_then_wr_by_salu == + other.sgpr_read_by_valu_as_lanemask_then_wr_by_salu && ++ sgpr_read_by_valu_as_lanemask_then_wr_by_valu == ++ other.sgpr_read_by_valu_as_lanemask_then_wr_by_valu && + vgpr_written_by_wmma == other.vgpr_written_by_wmma && + sgpr_read_by_valu == other.sgpr_read_by_valu && + sgpr_read_by_valu_then_wr_by_salu == other.sgpr_read_by_valu_then_wr_by_salu; +@@ -1377,6 +1386,30 @@ handle_valu_partial_forwarding_hazard(State& state, aco_ptr& instr) + return global_state.hazard_found; + } + ++static bool ++instr_reads_lanemask(Instruction* instr, Operand* op) ++{ ++ if (!instr->isVALU()) ++ return false; ++ if (instr->isVOPD()) { ++ *op = Operand(vcc, s1); ++ return instr->opcode == aco_opcode::v_dual_cndmask_b32 || ++ instr->vopd().opy == aco_opcode::v_dual_cndmask_b32; ++ } ++ switch (instr->opcode) { ++ case aco_opcode::v_addc_co_u32: ++ case aco_opcode::v_subb_co_u32: ++ case aco_opcode::v_subbrev_co_u32: ++ case aco_opcode::v_cndmask_b16: ++ case aco_opcode::v_cndmask_b32: ++ case aco_opcode::v_div_fmas_f32: ++ case aco_opcode::v_div_fmas_f64: ++ *op = instr->operands.back(); ++ return !instr->operands.back().isConstant(); ++ default: return false; ++ } ++} ++ + void + handle_instruction_gfx11(State& state, NOP_ctx_gfx11& ctx, aco_ptr& instr, + std::vector>& new_instructions) +@@ -1473,14 +1506,47 @@ handle_instruction_gfx11(State& state, NOP_ctx_gfx11& ctx, aco_ptr& + sa_sdst = 0; + } + ++ /* VALU reading a SGPR as a lane mask and later written as a lane mask shouldn't be read again ++ * as a lane mask without a wait. ++ * ++ * TODO: this fixes #12623 and #11480, but needs further investigation as to why. ++ */ ++ Operand lanemask_op; ++ if (instr_reads_lanemask(instr.get(), &lanemask_op)) { ++ unsigned reg = lanemask_op.physReg().reg(); ++ if (ctx.sgpr_read_by_valu_as_lanemask_then_wr_by_valu[reg] || ++ (state.program->wave_size == 64 && ++ ctx.sgpr_read_by_valu_as_lanemask_then_wr_by_valu[reg + 1])) { ++ bool is_vcc = reg == vcc || reg == vcc_hi; ++ bld.sopp(aco_opcode::s_waitcnt_depctr, is_vcc ? 0xfffd : 0xf1ff); ++ if (is_vcc) ++ wait.va_vcc = 0; ++ else ++ wait.va_sdst = 0; ++ } ++ } ++ + if (va_vdst == 0) { + ctx.valu_since_wr_by_trans.reset(); + ctx.trans_since_wr_by_trans.reset(); ++ ctx.sgpr_read_by_valu_as_lanemask_then_wr_by_valu.reset(); + } + + if (sa_sdst == 0) + ctx.sgpr_read_by_valu_as_lanemask_then_wr_by_salu.reset(); + ++ if (wait.va_sdst == 0) { ++ std::bitset<128> old = ctx.sgpr_read_by_valu_as_lanemask_then_wr_by_valu; ++ ctx.sgpr_read_by_valu_as_lanemask_then_wr_by_valu.reset(); ++ ctx.sgpr_read_by_valu_as_lanemask_then_wr_by_valu[vcc] = old[vcc]; ++ ctx.sgpr_read_by_valu_as_lanemask_then_wr_by_valu[vcc_hi] = old[vcc_hi]; ++ } ++ ++ if (wait.va_vcc == 0) { ++ ctx.sgpr_read_by_valu_as_lanemask_then_wr_by_valu[vcc] = false; ++ ctx.sgpr_read_by_valu_as_lanemask_then_wr_by_valu[vcc_hi] = false; ++ } ++ + if (state.program->wave_size == 64 && instr->isSALU() && + check_written_regs(instr, ctx.sgpr_read_by_valu_as_lanemask)) { + unsigned reg = instr->definitions[0].physReg().reg(); +@@ -1511,21 +1577,28 @@ handle_instruction_gfx11(State& state, NOP_ctx_gfx11& ctx, aco_ptr& + if (!op.isConstant() && op.physReg().reg() < 126) + ctx.sgpr_read_by_valu_as_lanemask.reset(); + } +- switch (instr->opcode) { +- case aco_opcode::v_addc_co_u32: +- case aco_opcode::v_subb_co_u32: +- case aco_opcode::v_subbrev_co_u32: +- case aco_opcode::v_cndmask_b16: +- case aco_opcode::v_cndmask_b32: +- case aco_opcode::v_div_fmas_f32: +- case aco_opcode::v_div_fmas_f64: +- if (instr->operands.back().physReg() != exec) { +- ctx.sgpr_read_by_valu_as_lanemask.set(instr->operands.back().physReg().reg()); +- ctx.sgpr_read_by_valu_as_lanemask.set(instr->operands.back().physReg().reg() + 1); +- } +- break; +- default: break; ++ } ++ ++ if (instr_reads_lanemask(instr.get(), &lanemask_op)) { ++ unsigned reg = lanemask_op.physReg().reg(); ++ if (state.program->wave_size == 64 && reg != exec) { ++ ctx.sgpr_read_by_valu_as_lanemask.set(reg); ++ ctx.sgpr_read_by_valu_as_lanemask.set(reg + 1); + } ++ ctx.sgpr_read_by_valu_as_lanemask2.set(reg); ++ if (state.program->wave_size == 64) ++ ctx.sgpr_read_by_valu_as_lanemask2.set(reg + 1); ++ } ++ ++ if (instr->opcode != aco_opcode::v_readlane_b32_e64 && ++ instr->opcode != aco_opcode::v_readfirstlane_b32 && ++ !instr->definitions.empty() && ++ instr->definitions.back().getTemp().type() == RegType::sgpr) { ++ unsigned reg = instr->definitions.back().physReg().reg(); ++ if (ctx.sgpr_read_by_valu_as_lanemask2[reg]) ++ ctx.sgpr_read_by_valu_as_lanemask_then_wr_by_valu[reg] = true; ++ if (state.program->wave_size == 64 && ctx.sgpr_read_by_valu_as_lanemask2[reg + 1]) ++ ctx.sgpr_read_by_valu_as_lanemask_then_wr_by_valu[reg + 1] = true; + } + } + } else { +-- +2.48.1 + + +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: "Ivan A. Melnikov" +Date: Fri, 7 Mar 2025 19:29:31 +0400 +Subject: gallium/radeon: Make sure radeonsi PCI IDs are also included + +When importing libdrm_radeon code [1][2] it was somehow missed +that what libdrm has in one r600_pci_ids.h, Mesa has split +into r600_pci_ids.h and radeonsi_pci_ids.h. So, devices +with ids from radeonsi_pci_ids.h were not considered valid for +radeon_surface_manager_new. + +This commit changes that, thus fixing radeonsi for these +devices. + +[1] commit 1299f5c50a490fadeb60b61677596f13399ee136 +[2] commit 3aa7497cc0bb52c8099fb07b27f9aee5e18e58ca + +Fixes: 1299f5c50a490fadeb60b61677596f13399ee136 +Signed-off-by: Ivan A. Melnikov +Part-of: +--- + src/gallium/winsys/radeon/drm/radeon_surface.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/src/gallium/winsys/radeon/drm/radeon_surface.c b/src/gallium/winsys/radeon/drm/radeon_surface.c +index 8a3302df684..3c469ad0c6e 100644 +--- a/src/gallium/winsys/radeon/drm/radeon_surface.c ++++ b/src/gallium/winsys/radeon/drm/radeon_surface.c +@@ -132,6 +132,9 @@ static int radeon_get_family(struct radeon_surface_manager *surf_man) + switch (surf_man->device_id) { + #define CHIPSET(pci_id, name, fam) case pci_id: surf_man->family = CHIP_##fam; break; + #include "pci_ids/r600_pci_ids.h" ++#undef CHIPSET ++#define CHIPSET(pci_id, fam) case pci_id: surf_man->family = CHIP_##fam; break; ++#include "pci_ids/radeonsi_pci_ids.h" + #undef CHIPSET + default: + return -EINVAL; +-- +2.48.1 + + +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Samuel Pitoiset +Date: Tue, 11 Mar 2025 15:29:37 +0100 +Subject: radv/amdgpu: fix device deduplication + +To correctly deduplicate device inside the winsys, it should use the +fd or amdgpu_device_handle. Using the allocated ac_drm_device as key +is obviously broken. + +Not deduplicating devices breaks memory budget and a bunch of games +were broken. + +Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/12686 +Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/12775 +Fixes: a565f2994fe ("amd: move all uses of libdrm_amdgpu to ac_linux_drm") +Signed-off-by: Samuel Pitoiset +Part-of: +--- + src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c +index be8df8708c8..8b57abeb0b1 100644 +--- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c ++++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c +@@ -234,7 +234,7 @@ radv_amdgpu_winsys_create(int fd, uint64_t debug_flags, uint64_t perftest_flags, + goto fail; + } + +- struct hash_entry *entry = _mesa_hash_table_search(winsyses, dev); ++ struct hash_entry *entry = _mesa_hash_table_search(winsyses, (void *)ac_drm_device_get_cookie(dev)); + if (entry) { + ws = (struct radv_amdgpu_winsys *)entry->data; + ++ws->refcount; +@@ -325,7 +325,7 @@ radv_amdgpu_winsys_create(int fd, uint64_t debug_flags, uint64_t perftest_flags, + radv_amdgpu_bo_init_functions(ws); + radv_amdgpu_cs_init_functions(ws); + +- _mesa_hash_table_insert(winsyses, dev, ws); ++ _mesa_hash_table_insert(winsyses, (void *)ac_drm_device_get_cookie(dev), ws); + simple_mtx_unlock(&winsys_creation_mutex); + + return &ws->base; +-- +2.48.1 + + +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Maarten Lankhorst +Date: Mon, 17 Feb 2025 14:55:29 -0800 +Subject: anv: Mark images with format modifiers set as scanout. + +We currently use the presence of struct WSI_IMAGE_CREATE_INFO_MESA.scanout to mark the BO as scanout, +but this only handles the linear case, and fails when drm format modifiers are used. + +Also handle the case of exportable BO with tiling set to VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT. +This fixes the gamescope handling of using vulkan allocated images for scanout. + +Link: https://gitlab.freedesktop.org/mesa/mesa/-/issues/12633 +Signed-off-by: Maarten Lankhorst +Signed-off-by: Matthew Schwartz + +Normalspeak: fixes battlemage iGPUs in gamescope +--- + src/intel/vulkan/anv_device.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c +index 1884932bbc7..cbc1b4aad87 100644 +--- a/src/intel/vulkan/anv_device.c ++++ b/src/intel/vulkan/anv_device.c +@@ -1533,6 +1533,9 @@ VkResult anv_AllocateMemory( + dedicated_info->image != VK_NULL_HANDLE) { + ANV_FROM_HANDLE(anv_image, image, dedicated_info->image); + ++ if (image->vk.tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) ++ alloc_flags |= ANV_BO_ALLOC_SCANOUT; ++ + /* Apply implicit sync to be compatible with clients relying on + * implicit fencing. This matches the behavior in iris i915_batch + * submit. An example client is VA-API (iHD), so only dedicated +-- +2.48.1 + diff --git a/anda/lib/mesa/mesa.spec b/anda/lib/mesa/mesa.spec index bbcffcc102..10118f22ff 100644 --- a/anda/lib/mesa/mesa.spec +++ b/anda/lib/mesa/mesa.spec @@ -76,7 +76,7 @@ Summary: Mesa graphics libraries # disabled by default, and has to be enabled manually. See `terra/release/terra-mesa.repo` for details. Epoch: 1 Version: 25.0.1 -Release: 5%?dist +Release: 6%?dist License: MIT AND BSD-3-Clause AND SGI-B-2.0 URL: http://www.mesa3d.org @@ -88,26 +88,8 @@ Source0: https://archive.mesa3d.org/%{srcname}-%{version}.tar.xz # Fedora opts to ignore the optional part of clause 2 and treat that code as 2 clause BSD. Source1: Mesa-MLAA-License-Clarification-Email.txt -#Patch10: https://src.fedoraproject.org/rpms/mesa/raw/e89544b7a4d811a64ca23b402add29524cc6f704/f/gnome-shell-glthread-disable.patch -#Patch11: https://src.fedoraproject.org/rpms/mesa/raw/e89544b7a4d811a64ca23b402add29524cc6f704/f/0001-llvmpipe-Init-eglQueryDmaBufModifiersEXT-num_modifie.patch -#Patch12: https://src.fedoraproject.org/rpms/mesa/raw/e89544b7a4d811a64ca23b402add29524cc6f704/f/0001-Revert-ac-radeonsi-remove-has_syncobj-has_fence_to_h.patch - -# https://gitlab.com/evlaV/mesa/ -Patch20: valve.patch - -# Fix issues with Intel Battlemage under Valve's gamescope in DRM mode -# https://gitlab.freedesktop.org/mesa/mesa/-/issues/12633 -Patch21: 12633.patch - -# radv/amdgpu: fix device deduplication -# https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34005/diffs -Patch22: 34005.patch - -Patch30: 237d8799be3afe9a1e7ca9156a5d44ffe0aae681.patch -Patch31: 13a3f9a972324a72dd507e09ac975b969e6c88e0.patch - -# s390x: fix build -#Patch100: https://src.fedoraproject.org/rpms/mesa/raw/e89544b7a4d811a64ca23b402add29524cc6f704/f/fix-egl-on-s390x.patch +# https://github.com/bazzite-org/mesa +Patch20: bazzite.patch BuildRequires: meson >= 1.3.0 BuildRequires: gcc diff --git a/anda/lib/mesa/valve.patch b/anda/lib/mesa/valve.patch deleted file mode 100644 index d4eb3da790..0000000000 --- a/anda/lib/mesa/valve.patch +++ /dev/null @@ -1,143 +0,0 @@ -From 04afaf13b208f5c58c0b057f3dfc2dfa5c19a334 Mon Sep 17 00:00:00 2001 -From: Bas Nieuwenhuizen -Date: Fri, 14 Jan 2022 15:58:45 +0100 -Subject: [PATCH 5/8] STEAMOS: radv: min image count override for FH5 - -Otherwise in combination with the vblank time reservation in -gamescope the game could get stuck in low power states. ---- - src/util/00-radv-defaults.conf | 4 ++++ - 1 file changed, 4 insertions(+) - -diff --git a/src/util/00-radv-defaults.conf b/src/util/00-radv-defaults.conf -index 1cbb2e087c9..43488ada6bc 100644 ---- a/src/util/00-radv-defaults.conf -+++ b/src/util/00-radv-defaults.conf -@@ -207,6 +207,11 @@ Application bugs worked around in this file: - - -+ -+ -+ -+ - - - -2.42.0 - - -From b1c0d3de07bf958317f386585ce541b1c336e929 Mon Sep 17 00:00:00 2001 -From: Bas Nieuwenhuizen -Date: Mon, 21 Feb 2022 18:43:54 +0100 -Subject: [PATCH 6/8] STEAMOS: Dynamic swapchain override for gamescope limiter - ---- - src/gallium/frontends/dri/loader_dri3_helper.c | 42 +++++++++++++++++++++++++++++++-- - src/gallium/frontends/dri/loader_dri3_helper.h | 1 + - src/loader/meson.build | 2 +- - 4 files changed, 80 insertions(+), 3 deletions(-) - -diff --git a/src/gallium/frontends/dri/loader_dri3_helper.c b/src/gallium/frontends/dri/loader_dri3_helper.c -index 2631a9e2fd5..dbf6db349c6 100644 ---- a/src/gallium/frontends/dri/loader_dri3_helper.c -+++ b/src/gallium/frontends/dri/loader_dri3_helper.c -@@ -276,6 +276,30 @@ dri3_update_max_num_back(struct loader_dri3_drawable *draw) - } - } - -+static unsigned -+gamescope_swapchain_override() -+{ -+ const char *path = getenv("GAMESCOPE_LIMITER_FILE"); -+ if (!path) -+ return 0; -+ -+ static simple_mtx_t mtx = SIMPLE_MTX_INITIALIZER; -+ static int fd = -1; -+ -+ simple_mtx_lock(&mtx); -+ if (fd < 0) { -+ fd = open(path, O_RDONLY); -+ } -+ simple_mtx_unlock(&mtx); -+ -+ if (fd < 0) -+ return 0; -+ -+ uint32_t override_value = 0; -+ pread(fd, &override_value, sizeof(override_value), 0); -+ return override_value; -+} -+ - void - loader_dri3_set_swap_interval(struct loader_dri3_drawable *draw, int interval) - { -@@ -290,10 +314,12 @@ loader_dri3_set_swap_interval(struct loader_dri3_drawable *draw, int interval) - * PS. changing from value A to B and A < B won't cause swap out of order but - * may still gets wrong target_msc value at the beginning. - */ -- if (draw->swap_interval != interval) -+ if (draw->orig_swap_interval != interval) - loader_dri3_swapbuffer_barrier(draw); - -- draw->swap_interval = interval; -+ draw->orig_swap_interval = interval; -+ if (gamescope_swapchain_override() != 1) -+ draw->swap_interval = interval; - } - - static void -@@ -422,6 +448,12 @@ loader_dri3_drawable_init(xcb_connection_t *conn, - - draw->swap_interval = dri_get_initial_swap_interval(draw->dri_screen_render_gpu); - -+ draw->orig_swap_interval = draw->swap_interval; -+ -+ unsigned gamescope_override = gamescope_swapchain_override(); -+ if (gamescope_override == 1) -+ draw->swap_interval = 1; -+ - dri3_update_max_num_back(draw); - - /* Create a new drawable */ -@@ -1066,6 +1098,12 @@ loader_dri3_swap_buffers_msc(struct loader_dri3_drawable *draw, - if (draw->type == LOADER_DRI3_DRAWABLE_WINDOW) { - dri3_fence_reset(draw->conn, back); - -+ unsigned gamescope_override = gamescope_swapchain_override(); -+ if (gamescope_override == 1) -+ draw->swap_interval = 1; -+ else -+ draw->swap_interval = draw->orig_swap_interval; -+ - /* Compute when we want the frame shown by taking the last known - * successful MSC and adding in a swap interval for each outstanding swap - * request. target_msc=divisor=remainder=0 means "Use glXSwapBuffers() -diff --git a/src/gallium/frontends/dri/loader_dri3_helper.h b/src/gallium/frontends/dri/loader_dri3_helper.h -index cc2362dd599..fe73b3f329c 100644 ---- a/src/gallium/frontends/dri/loader_dri3_helper.h -+++ b/src/gallium/frontends/dri/loader_dri3_helper.h -@@ -170,6 +170,7 @@ struct loader_dri3_drawable { - bool block_on_depleted_buffers; - bool queries_buffer_age; - int swap_interval; -+ int orig_swap_interval; - - const struct loader_dri3_vtable *vtable; - -diff --git a/src/gallium/frontends/dri/meson.build b/src/gallium/frontends/dri/meson.build -index a98c8c0..0d4f816 100644 ---- a/src/gallium/frontends/dri/meson.build -+++ b/src/gallium/frontends/dri/meson.build -@@ -23,7 +23,7 @@ if with_platform_x11 - deps_for_libdri += dep_xcb - if with_dri_platform == 'drm' - deps_for_libdri += [dep_xcb_dri3, dep_xcb_present, dep_xcb_sync, -- dep_xshmfence, dep_xcb_xfixes] -+ dep_xshmfence, dep_xcb_xfixes, dep_xcb_xrandr, idep_mesautil] - files_libdri += files('loader_dri3_helper.c') - endif - endif