packages/anda/lib/mesa/bazzite.patch

From 21b062a757a202dcb737d40442b6145c34bb1e48 Mon Sep 17 00:00:00 2001
From: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Date: Fri, 14 Jan 2022 15:58:45 +0100
Subject: [PATCH 01/11] STEAMOS: radv: min image count override for FH5

Otherwise in combination with the vblank time reservation in
gamescope the game could get stuck in low power states.
---
 src/util/00-radv-defaults.conf | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/util/00-radv-defaults.conf b/src/util/00-radv-defaults.conf
index b82e8d4da4d..c8d059571ad 100644
--- a/src/util/00-radv-defaults.conf
+++ b/src/util/00-radv-defaults.conf
@@ -234,5 +234,9 @@ Application bugs worked around in this file:
         <application name="Total War: WARHAMMER III" application_name_match="TotalWarhammer3">
             <option name="radv_disable_depth_storage" value="true"/>
         </application>
+
+        <application name="Forza Horizon 5" application_name_match="ForzaHorizon5.exe">
+            <option name="vk_x11_override_min_image_count" value="4" />
+        </application>
     </device>
 </driconf>
--
2.50.1


From e837814b4f33e48eaf6a79975cb738da39ed0fd2 Mon Sep 17 00:00:00 2001
From: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Date: Thu, 22 Feb 2024 22:32:45 +0100
Subject: [PATCH 02/11] STEAMOS: Dynamic swapchain override for gamescope
 limiter for DRI3 only

The original patch (from Bas) contained WSI VK support too but it's
been removed because the Gamescope WSI layer already handles that.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
---
 .../frontends/dri/loader_dri3_helper.c        | 42 ++++++++++++++++++-
 .../frontends/dri/loader_dri3_helper.h        |  1 +
 2 files changed, 41 insertions(+), 2 deletions(-)

diff --git a/src/gallium/frontends/dri/loader_dri3_helper.c b/src/gallium/frontends/dri/loader_dri3_helper.c
index a795d45ce29..435ea2405a8 100644
--- a/src/gallium/frontends/dri/loader_dri3_helper.c
+++ b/src/gallium/frontends/dri/loader_dri3_helper.c
@@ -297,6 +297,30 @@ dri3_update_max_num_back(struct loader_dri3_drawable *draw)
    }
 }

+static unsigned
+gamescope_swapchain_override()
+{
+   const char *path = getenv("GAMESCOPE_LIMITER_FILE");
+   if (!path)
+      return 0;
+
+   static simple_mtx_t mtx = SIMPLE_MTX_INITIALIZER;
+   static int fd = -1;
+
+   simple_mtx_lock(&mtx);
+   if (fd < 0) {
+      fd = open(path, O_RDONLY);
+   }
+   simple_mtx_unlock(&mtx);
+
+   if (fd < 0)
+      return 0;
+
+   uint32_t override_value = 0;
+   pread(fd, &override_value, sizeof(override_value), 0);
+   return override_value;
+}
+
 void
 loader_dri3_set_swap_interval(struct loader_dri3_drawable *draw, int interval)
 {
@@ -311,10 +335,12 @@ loader_dri3_set_swap_interval(struct loader_dri3_drawable *draw, int interval)
     * PS. changing from value A to B and A < B won't cause swap out of order but
     * may still gets wrong target_msc value at the beginning.
     */
-   if (draw->swap_interval != interval)
+   if (draw->orig_swap_interval != interval)
       loader_dri3_swapbuffer_barrier(draw);

-   draw->swap_interval = interval;
+   draw->orig_swap_interval = interval;
+   if (gamescope_swapchain_override() != 1)
+      draw->swap_interval = interval;
 }

 static void
@@ -443,6 +469,12 @@ loader_dri3_drawable_init(xcb_connection_t *conn,

    draw->swap_interval = dri_get_initial_swap_interval(draw->dri_screen_render_gpu);

+   draw->orig_swap_interval = draw->swap_interval;
+
+   unsigned gamescope_override = gamescope_swapchain_override();
+   if (gamescope_override == 1)
+      draw->swap_interval = 1;
+
    dri3_update_max_num_back(draw);

    /* Create a new drawable */
@@ -1085,6 +1117,12 @@ loader_dri3_swap_buffers_msc(struct loader_dri3_drawable *draw,
    if (draw->type == LOADER_DRI3_DRAWABLE_WINDOW) {
       dri3_fence_reset(draw->conn, back);

+      unsigned gamescope_override = gamescope_swapchain_override();
+      if (gamescope_override == 1)
+         draw->swap_interval = 1;
+      else
+         draw->swap_interval = draw->orig_swap_interval;
+
       /* Compute when we want the frame shown by taking the last known
        * successful MSC and adding in a swap interval for each outstanding swap
        * request. target_msc=divisor=remainder=0 means "Use glXSwapBuffers()
diff --git a/src/gallium/frontends/dri/loader_dri3_helper.h b/src/gallium/frontends/dri/loader_dri3_helper.h
index 26f138d1b83..3f0f3f66fac 100644
--- a/src/gallium/frontends/dri/loader_dri3_helper.h
+++ b/src/gallium/frontends/dri/loader_dri3_helper.h
@@ -169,6 +169,7 @@ struct loader_dri3_drawable {
    bool block_on_depleted_buffers;
    bool queries_buffer_age;
    int swap_interval;
+   int orig_swap_interval;

    const struct loader_dri3_vtable *vtable;

--
2.50.1


From 354cf8783e49b082c97982f2e5be305ad6e4ab50 Mon Sep 17 00:00:00 2001
From: Antheas Kapenekakis <git@antheas.dev>
Date: Sat, 15 Mar 2025 16:39:25 +0100
Subject: [PATCH 03/11] [BEGIN] SteamOS Backports

--
2.50.1


From c5a4eab20075dfa2f2bdfb87e55ecec262ef00f6 Mon Sep 17 00:00:00 2001
From: Antheas Kapenekakis <git@antheas.dev>
Date: Sat, 15 Mar 2025 16:39:33 +0100
Subject: [PATCH 04/11] [BEGIN] Our Mesa backports

--
2.50.1


From 221b11df6d9cd7b66c8502fa51d8d72cfc377e5e Mon Sep 17 00:00:00 2001
From: Antheas Kapenekakis <git@antheas.dev>
Date: Mon, 24 Mar 2025 19:50:51 +0100
Subject: [PATCH 05/11] Revert "winsys/amdgpu: use VM_ALWAYS_VALID for all VRAM
 and GTT allocations"

This reverts commit 8c91624614c1f939974fe0d2d1a3baf83335cecb.

Messes with AutoVRAM, who would have thought?
---
 src/gallium/winsys/amdgpu/drm/amdgpu_bo.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
index d5646e9660b..a51348b44a8 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
@@ -624,11 +624,6 @@ static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *aws,
    if (flags & RADEON_FLAG_GTT_WC)
       request.flags |= AMDGPU_GEM_CREATE_CPU_GTT_USWC;

-   if (aws->info.has_vm_always_valid &&
-       initial_domain & (RADEON_DOMAIN_VRAM_GTT | RADEON_DOMAIN_DOORBELL) &&
-       flags & RADEON_FLAG_NO_INTERPROCESS_SHARING)
-      request.flags |= AMDGPU_GEM_CREATE_VM_ALWAYS_VALID;
-
    if (flags & RADEON_FLAG_DISCARDABLE &&
        aws->info.drm_minor >= 47)
       request.flags |= AMDGPU_GEM_CREATE_DISCARDABLE;
--
2.50.1


From cf8c0d66ed49f99d0d259c28fe72174d58c06de7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Daniel=20Sch=C3=BCrmann?= <daniel@schuermann.dev>
Date: Mon, 24 Mar 2025 21:25:29 +0100
Subject: [PATCH 06/11] vulkan: implement VK_AMD_anti_lag as implicit vulkan
 layer

VkLayer_MESA_anti_lag is a lightweight implicit layer which provides
an open-source implementation of the VK_AMD_anti_lag vulkan extension.

The algorithm used by this layer is very simplistic and only aims to
minimize the delay between calls to vkQueueSubmit or vkQueueSubmit2
and the begin of the execution of the submission.

In order to build VkLayer_MESA_anti_lag, pass -Dlayers=anti-lag to meson.
It is possible to either install the layer or to use

 VK_ADD_IMPLICIT_LAYER_PATH=<buildpath>/share/vulkan/implicit_layer.d/

for testing purposes.
(Keep in mind that you have to adjust the library_path in the json file in that case.)

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34242>
---
 meson.build                                   |   1 +
 meson.options                                 |   2 +-
 .../anti-lag-layer/VkLayer_MESA_anti_lag.json |  26 +
 src/vulkan/anti-lag-layer/anti_lag_layer.c    | 590 ++++++++++++
 src/vulkan/anti-lag-layer/anti_lag_layer.h    | 111 +++
 .../anti-lag-layer/anti_lag_layer_interface.c | 899 ++++++++++++++++++
 src/vulkan/anti-lag-layer/meson.build         |  26 +
 src/vulkan/anti-lag-layer/ringbuffer.h        |  58 ++
 src/vulkan/meson.build                        |   3 +
 9 files changed, 1715 insertions(+), 1 deletion(-)
 create mode 100644 src/vulkan/anti-lag-layer/VkLayer_MESA_anti_lag.json
 create mode 100644 src/vulkan/anti-lag-layer/anti_lag_layer.c
 create mode 100644 src/vulkan/anti-lag-layer/anti_lag_layer.h
 create mode 100644 src/vulkan/anti-lag-layer/anti_lag_layer_interface.c
 create mode 100644 src/vulkan/anti-lag-layer/meson.build
 create mode 100644 src/vulkan/anti-lag-layer/ringbuffer.h

diff --git a/meson.build b/meson.build
index 427cfde435c..c6c6457abae 100644
--- a/meson.build
+++ b/meson.build
@@ -95,6 +95,7 @@ with_vulkan_overlay_layer = get_option('vulkan-layers').contains('overlay')
 with_vulkan_device_select_layer = get_option('vulkan-layers').contains('device-select')
 with_vulkan_screenshot_layer = get_option('vulkan-layers').contains('screenshot')
 with_vulkan_vram_report_limit_layer = get_option('vulkan-layers').contains('vram-report-limit')
+with_vulkan_anti_lag_layer = get_option('vulkan-layers').contains('anti-lag')
 with_tools = get_option('tools')
 if with_tools.contains('all')
   with_tools = [
diff --git a/meson.options b/meson.options
index c3c02c4c94f..cd0e56cc429 100644
--- a/meson.options
+++ b/meson.options
@@ -299,7 +299,7 @@ option(
   type : 'array',
   value : [],
   choices : [
-    'device-select', 'intel-nullhw', 'overlay', 'screenshot',
+    'device-select', 'intel-nullhw', 'overlay', 'screenshot', 'anti-lag',
     'vram-report-limit',
   ],
   description : 'List of vulkan layers to build'
diff --git a/src/vulkan/anti-lag-layer/VkLayer_MESA_anti_lag.json b/src/vulkan/anti-lag-layer/VkLayer_MESA_anti_lag.json
new file mode 100644
index 00000000000..4e2ab794c9e
--- /dev/null
+++ b/src/vulkan/anti-lag-layer/VkLayer_MESA_anti_lag.json
@@ -0,0 +1,26 @@
+{
+   "file_format_version": "1.2.1",
+   "layer": {
+      "name": "VK_LAYER_MESA_anti_lag",
+      "type": "GLOBAL",
+      "library_path": "libVkLayer_MESA_anti_lag.so",
+      "api_version": "1.4.303",
+      "implementation_version": "1",
+      "description": "Open-source implementation of the VK_AMD_anti_lag extension.",
+      "functions": {
+         "vkNegotiateLoaderLayerInterfaceVersion": "anti_lag_NegotiateLoaderLayerInterfaceVersion"
+      },
+      "device_extensions": [
+         {
+            "name": "VK_AMD_anti_lag",
+            "spec_version": "1",
+            "entrypoints": [
+               "vkAntiLagUpdateAMD"
+            ]
+         }
+      ],
+      "disable_environment": {
+         "DISABLE_LAYER_MESA_ANTI_LAG": "1"
+      }
+   }
+}
\ No newline at end of file
diff --git a/src/vulkan/anti-lag-layer/anti_lag_layer.c b/src/vulkan/anti-lag-layer/anti_lag_layer.c
new file mode 100644
index 00000000000..6c21e074024
--- /dev/null
+++ b/src/vulkan/anti-lag-layer/anti_lag_layer.c
@@ -0,0 +1,590 @@
+/*
+ * Copyright © 2025 Valve Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "anti_lag_layer.h"
+#include <string.h>
+#include "util/os_time.h"
+#include "util/simple_mtx.h"
+#include "vulkan/vulkan_core.h"
+#include "ringbuffer.h"
+#include "vk_alloc.h"
+#include "vk_util.h"
+
+static bool
+evaluate_frame(device_context *ctx, frame *frame, bool force_wait)
+{
+   if (frame->state != FRAME_PRESENT) {
+      /* This frame is not finished yet. */
+      assert(!force_wait);
+      return false;
+   }
+
+   int query_flags = VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT;
+   const uint32_t frame_idx = ringbuffer_index(ctx->frames, frame);
+
+   /* Before we commit to completing a frame, all submits on all queues must have completed. */
+   for (unsigned i = 0; i < ctx->num_queues; i++) {
+      queue_context *queue_ctx = &ctx->queues[i];
+      ringbuffer_lock(queue_ctx->queries);
+      uint64_t expected_signal_value = queue_ctx->semaphore_value - queue_ctx->queries.size +
+                                       queue_ctx->submissions_per_frame[frame_idx];
+      ringbuffer_unlock(queue_ctx->queries);
+
+      if (force_wait) {
+         /* Wait for the timeline semaphore of the frame to be signaled. */
+         struct VkSemaphoreWaitInfo wait_info = {
+            .sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO,
+            .semaphoreCount = 1,
+            .pSemaphores = &queue_ctx->semaphore,
+            .pValues = &expected_signal_value,
+         };
+         ctx->vtable.WaitSemaphores(ctx->device, &wait_info, 0);
+      } else {
+         /* Return early if the last timeline semaphore of the frame has not been signaled yet. */
+         uint64_t signal_value;
+         ctx->vtable.GetSemaphoreCounterValue(ctx->device, queue_ctx->semaphore, &signal_value);
+         if (signal_value < expected_signal_value)
+            return false;
+      }
+   }
+
+   /* For each queue, retrieve timestamp query results. */
+   for (unsigned i = 0; i < ctx->num_queues; i++) {
+      queue_context *queue_ctx = &ctx->queues[i];
+
+      /* As we hold a global mtx and this is the only place where queries are free'd,
+       * we don't need to lock the query ringbuffer here in order to read the first entry.
+       */
+      struct query *query = ringbuffer_first(queue_ctx->queries);
+      uint32_t query_idx = ringbuffer_index(queue_ctx->queries, query);
+      int num_timestamps =
+         MIN2(queue_ctx->submissions_per_frame[frame_idx], MAX_QUERIES - query_idx);
+
+      while (num_timestamps > 0) {
+         /* Retreive timestamp results from this queue. */
+         ctx->vtable.GetQueryPoolResults(ctx->device, queue_ctx->queryPool, query_idx,
+                                         num_timestamps, sizeof(uint64_t), &query->begin_gpu_ts,
+                                         sizeof(struct query), query_flags);
+
+         ringbuffer_lock(queue_ctx->queries);
+         for (unsigned j = 0; j < num_timestamps; j++) {
+
+            /* Calibrate device timestamps. */
+            query->begin_gpu_ts =
+               ctx->calibration.delta +
+               (uint64_t)(query->begin_gpu_ts * ctx->calibration.timestamp_period);
+            if (query->begin_gpu_ts > query->submit_cpu_ts)
+               frame->min_delay =
+                  MIN2(frame->min_delay, query->begin_gpu_ts - query->submit_cpu_ts);
+
+            /* Check if we can reset half of the query pool at once. */
+            uint32_t next_idx = ringbuffer_index(queue_ctx->queries, query) + 1;
+            const bool reset = next_idx == MAX_QUERIES || next_idx == MAX_QUERIES / 2;
+            if (reset) {
+               ringbuffer_unlock(queue_ctx->queries);
+               ctx->vtable.ResetQueryPool(ctx->device, queue_ctx->queryPool,
+                                          next_idx - MAX_QUERIES / 2, MAX_QUERIES / 2);
+               ringbuffer_lock(queue_ctx->queries);
+            }
+
+            /* Free query. */
+            ringbuffer_free(queue_ctx->queries, query);
+            queue_ctx->submissions_per_frame[frame_idx]--;
+
+            query = ringbuffer_first(queue_ctx->queries);
+         }
+
+         /* Ensure that the total number of queries across all frames is correct. */
+         ASSERTED uint32_t count = 0;
+         for (unsigned i = 0; i < MAX_FRAMES; i++)
+            count += queue_ctx->submissions_per_frame[i];
+         assert(count == queue_ctx->queries.size);
+
+         query_idx = ringbuffer_index(queue_ctx->queries, query);
+         num_timestamps =
+            MIN2(queue_ctx->submissions_per_frame[frame_idx], MAX_QUERIES - query_idx);
+
+         ringbuffer_unlock(queue_ctx->queries);
+      }
+   }
+
+   frame->min_delay++; /* wrap UINT64_MAX in case we didn't have any submissions. */
+
+   return true;
+}
+
+static bool
+calibrate_timestamps(device_context *ctx)
+{
+   uint64_t ts[2];
+   uint64_t deviation;
+
+   VkCalibratedTimestampInfoKHR info[2] = {
+      {
+         .sType = VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_KHR,
+         .timeDomain = VK_TIME_DOMAIN_CLOCK_MONOTONIC_KHR,
+      },
+      {
+         .sType = VK_STRUCTURE_TYPE_CALIBRATED_TIMESTAMP_INFO_KHR,
+         .timeDomain = VK_TIME_DOMAIN_DEVICE_KHR,
+      },
+   };
+
+   VkResult result = ctx->vtable.GetCalibratedTimestampsKHR(ctx->device, 2, info, ts, &deviation);
+   if (result == VK_SUCCESS) {
+      /* We take a moving average in order to avoid variance. */
+      int64_t new_delta = ts[0] - (int64_t)(ts[1] * ctx->calibration.timestamp_period);
+
+      if (ctx->calibration.delta == 0) {
+         ctx->calibration.delta = new_delta;
+      } else {
+         int64_t diff = new_delta - ctx->calibration.delta;
+         ctx->calibration.delta += diff / 8;
+      }
+
+      /* Take a new calibrated timestamp every second. */
+      ctx->calibration.recalibrate_when = ts[0] + 1000000000ull;
+   }
+
+   return result == VK_SUCCESS;
+}
+
+static void
+begin_next_frame(device_context *ctx)
+{
+   frame *next_frame;
+   if (ctx->active_frame) {
+      assert(ctx->active_frame->state == FRAME_SUBMIT);
+      ctx->active_frame->state = FRAME_PRESENT;
+      next_frame = ringbuffer_next(ctx->frames, ctx->active_frame);
+   } else {
+      next_frame = ringbuffer_last(ctx->frames);
+   }
+
+   /* If there is a frame ready, it becomes active. */
+   if (next_frame->state == FRAME_INPUT) {
+      next_frame->state = FRAME_SUBMIT;
+      ctx->active_frame = next_frame;
+   } else {
+      ctx->active_frame = NULL;
+   }
+}
+
+static void
+anti_lag_disable(device_context *ctx)
+{
+   ringbuffer_lock(ctx->frames);
+   while (ctx->frames.size) {
+      /* Set force-wait=true, so that all pending timestamp queries get completed. */
+      begin_next_frame(ctx);
+      frame *frame = ringbuffer_first(ctx->frames);
+      evaluate_frame(ctx, frame, true);
+      frame->state = FRAME_INVALID;
+      ringbuffer_free(ctx->frames, frame);
+   }
+   assert(!ctx->active_frame);
+   ringbuffer_unlock(ctx->frames);
+}
+
+#define TARGET_DELAY 4000000ll /* 4 ms */
+/**
+ * Returns the amount of time that we want the next frame to be delayed.
+ *
+ * The algorithm used by this function is very simplistic and only aims
+ * to minimize the delay between calls to vkQueueSubmit or vkQueueSubmit2
+ * and the begin of the execution of the submission.
+ */
+static int64_t
+get_wait_time(device_context *ctx)
+{
+   /* Take the previous evaluated frame's delay as baseline. */
+   int64_t imposed_delay = ctx->base_delay;
+   int64_t adaptation = 0;
+
+   ringbuffer_lock(ctx->frames);
+   /* In case our ringbuffer is completely full and no frame is in PRESENT stage,
+    * just move the oldest frame to PRESENT stage, and force-wait.
+    */
+   bool force_wait = ctx->frames.size == MAX_FRAMES;
+   frame *next_frame = ringbuffer_first(ctx->frames);
+   if (force_wait && next_frame->state != FRAME_PRESENT)
+      begin_next_frame(ctx);
+
+   /* Also force-wait for the oldest frame if there is already 2 frames in PRESENT stage. */
+   force_wait |= ringbuffer_next(ctx->frames, next_frame)->state == FRAME_PRESENT;
+   ringbuffer_unlock(ctx->frames);
+
+   /* Take new evaluated frames into consideration. */
+   while (evaluate_frame(ctx, next_frame, force_wait)) {
+
+      if (next_frame->min_delay < TARGET_DELAY / 2 && ctx->adaptation <= 0) {
+         /* If there is no delay between submission and GPU start, halve the base delay and
+          * set the delay for this frame to zero, in order to account for sudden changes.
+          */
+         ctx->base_delay = ctx->base_delay / 2;
+         adaptation = -ctx->base_delay;
+      } else {
+         /* We use some kind of exponential weighted moving average function here,
+          * in order to determine a base-delay. We use a smoothing-factor of roughly
+          * 3%, but don't discount the previous value. This helps keeping the delay
+          * slightly below the target of 5 ms, most of the time.
+          */
+         int64_t diff = (int64_t)next_frame->min_delay - TARGET_DELAY;
+         ctx->base_delay = MAX2(0, ctx->base_delay + diff / 32); /* corresponds to ~3 % */
+
+         /* As the base-delay gets adjusted rather slowly, we additionally use the half of the
+          * diff as adaptation delay to account for sudden changes. A quarter of the adaptation
+          * is then subtracted for the next frame, so that we can avoid overcompensation.
+          */
+         adaptation = diff / 2 - ctx->adaptation / 4;
+      }
+
+      /* We only need space for one frame. */
+      force_wait = false;
+
+      ringbuffer_lock(ctx->frames);
+      next_frame->state = FRAME_INVALID;
+      ringbuffer_free(ctx->frames, next_frame);
+      next_frame = ringbuffer_first(ctx->frames);
+      ringbuffer_unlock(ctx->frames);
+   }
+   imposed_delay = ctx->base_delay + adaptation;
+   ctx->adaptation = adaptation;
+
+   if (imposed_delay > 100000000) {
+      /* This corresponds to <10 FPS. Something might have gone wrong. */
+      calibrate_timestamps(ctx);
+      ctx->base_delay = ctx->adaptation = imposed_delay = 0;
+   }
+
+   return MAX2(0, imposed_delay);
+}
+
+static void
+reset_frame(frame *frame)
+{
+   assert(frame->state == FRAME_INVALID);
+   frame->frame_idx = 0;
+   frame->frame_start_time = 0;
+   frame->min_delay = UINT64_MAX;
+   frame->state = FRAME_INPUT;
+}
+
+VKAPI_ATTR void VKAPI_CALL
+anti_lag_AntiLagUpdateAMD(VkDevice device, const VkAntiLagDataAMD *pData)
+{
+   if (pData == NULL)
+      return;
+
+   device_context *ctx = get_device_context(device);
+   if (pData->mode == VK_ANTI_LAG_MODE_OFF_AMD) {
+      /* Application request to disable Anti-Lag. */
+      simple_mtx_lock(&ctx->mtx);
+      anti_lag_disable(ctx);
+      simple_mtx_unlock(&ctx->mtx);
+      return;
+   }
+
+   uint64_t frame_idx = 0;
+   int64_t now = os_time_get_nano();
+   int64_t imposed_delay = 0;
+   int64_t last_frame_begin = 0;
+
+   if (pData->pPresentationInfo) {
+      /* The same frameIndex value should be used with VK_ANTI_LAG_STAGE_INPUT_AMD before
+       * the frame begins and with VK_ANTI_LAG_STAGE_PRESENT_AMD when the frame ends.
+       */
+      frame_idx = pData->pPresentationInfo->frameIndex;
+
+      /* This marks the end of the current frame. */
+      if (pData->pPresentationInfo->stage == VK_ANTI_LAG_STAGE_PRESENT_AMD) {
+         /* If there is already a new frame pending, any submission that happens afterwards
+          * gets associated with the new frame.
+          */
+         ringbuffer_lock(ctx->frames);
+         /* Check that the currently active frame is indeed the frame we are ending now. */
+         while (ctx->active_frame && ctx->active_frame->frame_idx <= frame_idx) {
+            begin_next_frame(ctx);
+         }
+         ringbuffer_unlock(ctx->frames);
+         return;
+      }
+   }
+
+   /* Lock this function, in order to avoid race conditions on frame allocation. */
+   simple_mtx_lock(&ctx->mtx);
+
+   /* VK_ANTI_LAG_STAGE_INPUT_AMD: This marks the begin of a new frame.
+    * Evaluate previous frames in order to determine the wait time.
+    */
+   imposed_delay = get_wait_time(ctx);
+   int64_t next_deadline = now + imposed_delay;
+
+   /* Ensure maxFPS adherence. */
+   if (pData->maxFPS) {
+      int64_t frametime_period = 1000000000u / pData->maxFPS;
+      last_frame_begin = ringbuffer_last(ctx->frames)->frame_start_time;
+      next_deadline = MAX2(next_deadline, last_frame_begin + frametime_period);
+   }
+
+   /* Recalibrate every now and then. */
+   if (next_deadline > ctx->calibration.recalibrate_when)
+      calibrate_timestamps(ctx);
+
+   /* Sleep until deadline is met. */
+   os_time_nanosleep_until(next_deadline);
+
+   /* Initialize new frame. */
+   ringbuffer_lock(ctx->frames);
+   frame *new_frame = ringbuffer_alloc(ctx->frames);
+   reset_frame(new_frame);
+   new_frame->frame_start_time = next_deadline;
+   new_frame->imposed_delay = imposed_delay;
+   new_frame->frame_idx = frame_idx;
+
+   /* Immediately set the frame active if there is no other frame already active. */
+   if (!ctx->active_frame)
+      begin_next_frame(ctx);
+
+   ringbuffer_unlock(ctx->frames);
+   simple_mtx_unlock(&ctx->mtx);
+}
+
+static queue_context *
+get_queue_context(device_context *ctx, VkQueue queue)
+{
+   for (unsigned i = 0; i < ctx->num_queues; i++) {
+      if (ctx->queues[i].queue == queue)
+         return &ctx->queues[i];
+   }
+
+   return NULL;
+}
+
+static struct query *
+allocate_query(device_context *ctx, queue_context *queue_ctx)
+{
+   if (!ctx->active_frame)
+      return NULL;
+
+   /* Allow for a single frame to use at most half of the query pool. */
+   uint32_t frame_idx = ringbuffer_index(ctx->frames, ctx->active_frame);
+   if (queue_ctx->submissions_per_frame[frame_idx] > MAX_QUERIES / 2)
+      return NULL;
+
+   /* Check that the next query index has been reset properly:
+    *
+    * We use some double-buffering here in order to reduce the number of
+    * VkResetQueryPool commands.
+    * Return false if the next query-index allocation crosses into the half
+    * which still contains active queries,
+    */
+   if (queue_ctx->queries.size > MAX_QUERIES / 2) {
+      struct query *last_query = ringbuffer_last(queue_ctx->queries);
+      uint32_t next_idx = ringbuffer_index(queue_ctx->queries, last_query) + 1;
+      if (next_idx == MAX_QUERIES || next_idx == MAX_QUERIES / 2)
+         return NULL;
+   }
+
+   return ringbuffer_alloc(queue_ctx->queries);
+}
+
+static bool
+get_commandbuffer(device_context *ctx, queue_context *queue_ctx, VkCommandBuffer *cmdbuffer)
+{
+   uint64_t now = os_time_get_nano();
+
+   /* Begin critical section. */
+   ringbuffer_lock(ctx->frames);
+   ringbuffer_lock(queue_ctx->queries);
+   struct query *query = allocate_query(ctx, queue_ctx);
+   if (query == NULL) {
+      ringbuffer_unlock(queue_ctx->queries);
+      ringbuffer_unlock(ctx->frames);
+      return false;
+   }
+
+   query->submit_cpu_ts = now;
+
+   /* Assign commandBuffer for timestamp. */
+   *cmdbuffer = query->cmdbuffer;
+
+   /* Increment timeline semaphore count. */
+   queue_ctx->semaphore_value++;
+
+   /* Add new submission entry for the current frame */
+   assert(ctx->active_frame->state == FRAME_SUBMIT);
+   uint32_t frame_idx = ringbuffer_index(ctx->frames, ctx->active_frame);
+   queue_ctx->submissions_per_frame[frame_idx]++;
+
+   ringbuffer_unlock(queue_ctx->queries);
+   ringbuffer_unlock(ctx->frames);
+   return true;
+}
+
+static VkResult
+queue_submit2(device_context *ctx, VkQueue queue, uint32_t submitCount,
+              const VkSubmitInfo2 *pSubmits, VkFence fence, PFN_vkQueueSubmit2 queueSubmit2)
+{
+   queue_context *queue_ctx = get_queue_context(ctx, queue);
+   if (!ctx->active_frame || !queue_ctx)
+      return queueSubmit2(queue, submitCount, pSubmits, fence);
+
+   int first = -1;
+   VkCommandBuffer timestamp_cmdbuffer;
+   /* Check if any submission contains commandbuffers. */
+   for (unsigned i = 0; i < submitCount; i++) {
+      if (pSubmits[i].commandBufferInfoCount) {
+         first = i;
+         break;
+      }
+   }
+
+   /* Get timestamp commandbuffer. */
+   if (first == -1 || !get_commandbuffer(ctx, queue_ctx, &timestamp_cmdbuffer))
+      return queueSubmit2(queue, submitCount, pSubmits, fence);
+
+   VkSubmitInfo2 *submits;
+   VkCommandBufferSubmitInfo *cmdbuffers;
+   VkSemaphoreSubmitInfo *semaphores;
+   VK_MULTIALLOC(ma);
+   vk_multialloc_add(&ma, &submits, VkSubmitInfo2, submitCount);
+   vk_multialloc_add(&ma, &cmdbuffers, VkCommandBufferSubmitInfo,
+                     pSubmits[first].commandBufferInfoCount + 1);
+   vk_multialloc_add(&ma, &semaphores, VkSemaphoreSubmitInfo,
+                     pSubmits[first].signalSemaphoreInfoCount + 1);
+   void *buf = vk_multialloc_zalloc(&ma, &ctx->alloc, VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
+   if (!buf)
+      return VK_ERROR_OUT_OF_HOST_MEMORY;
+
+   memcpy(submits, pSubmits, sizeof(VkSubmitInfo2) * submitCount);
+   VkSubmitInfo2 *submit_info = &submits[first];
+
+   /* Add commandbuffer to submission. */
+   cmdbuffers[0] = (VkCommandBufferSubmitInfo){
+      .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_SUBMIT_INFO,
+      .commandBuffer = timestamp_cmdbuffer,
+   };
+   memcpy(&cmdbuffers[1], submit_info->pCommandBufferInfos,
+          sizeof(VkCommandBufferSubmitInfo) * submit_info->commandBufferInfoCount);
+   submit_info->pCommandBufferInfos = cmdbuffers;
+   submit_info->commandBufferInfoCount++;
+
+   /* Add timeline semaphore to submission. */
+   memcpy(semaphores, submit_info->pSignalSemaphoreInfos,
+          sizeof(VkSemaphoreSubmitInfo) * submit_info->signalSemaphoreInfoCount);
+   semaphores[submit_info->signalSemaphoreInfoCount] = (VkSemaphoreSubmitInfo){
+      .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO,
+      .semaphore = queue_ctx->semaphore,
+      .value = queue_ctx->semaphore_value,
+      .stageMask = VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
+   };
+   submit_info->pSignalSemaphoreInfos = semaphores;
+   submit_info->signalSemaphoreInfoCount++;
+
+   /* Submit with added timestamp query commandbuffer. */
+   VkResult res = queueSubmit2(queue, submitCount, submits, fence);
+   vk_free(&ctx->alloc, submits);
+   return res;
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+anti_lag_QueueSubmit2KHR(VkQueue queue, uint32_t submitCount, const VkSubmitInfo2 *pSubmits,
+                         VkFence fence)
+{
+   device_context *ctx = get_device_context(queue);
+   return queue_submit2(ctx, queue, submitCount, pSubmits, fence, ctx->vtable.QueueSubmit2KHR);
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+anti_lag_QueueSubmit2(VkQueue queue, uint32_t submitCount, const VkSubmitInfo2 *pSubmits,
+                      VkFence fence)
+{
+   device_context *ctx = get_device_context(queue);
+   return queue_submit2(ctx, queue, submitCount, pSubmits, fence, ctx->vtable.QueueSubmit2);
+}
+
+VKAPI_ATTR VkResult VKAPI_CALL
+anti_lag_QueueSubmit(VkQueue queue, uint32_t submitCount, const VkSubmitInfo *pSubmits,
+                     VkFence fence)
+{
+   device_context *ctx = get_device_context(queue);
+   queue_context *queue_ctx = get_queue_context(ctx, queue);
+   if (!ctx->active_frame || !queue_ctx)
+      return ctx->vtable.QueueSubmit(queue, submitCount, pSubmits, fence);
+
+   int first = -1;
+   VkCommandBuffer timestamp_cmdbuffer;
+   /* Check if any submission contains commandbuffers. */
+   for (unsigned i = 0; i < submitCount; i++) {
+      if (pSubmits[i].commandBufferCount) {
+         first = i;
+         break;
+      }
+   }
+
+   /* Get timestamp commandbuffer. */
+   if (first == -1 || !get_commandbuffer(ctx, queue_ctx, &timestamp_cmdbuffer))
+      return ctx->vtable.QueueSubmit(queue, submitCount, pSubmits, fence);
+
+   VkSubmitInfo *submits;
+   VkCommandBuffer *cmdbuffers;
+   VkSemaphore *semaphores;
+   VkTimelineSemaphoreSubmitInfo *semaphore_info;
+   uint64_t *semaphore_values;
+   VK_MULTIALLOC(ma);
+   vk_multialloc_add(&ma, &submits, VkSubmitInfo, submitCount);
+   vk_multialloc_add(&ma, &cmdbuffers, VkCommandBuffer, pSubmits[first].commandBufferCount + 1);
+   vk_multialloc_add(&ma, &semaphores, VkSemaphore, pSubmits[first].signalSemaphoreCount + 1);
+   vk_multialloc_add(&ma, &semaphore_info, VkTimelineSemaphoreSubmitInfo, 1);
+   vk_multialloc_add(&ma, &semaphore_values, uint64_t, pSubmits[first].signalSemaphoreCount + 1);
+   void *buf = vk_multialloc_zalloc(&ma, &ctx->alloc, VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
+   if (!buf)
+      return VK_ERROR_OUT_OF_HOST_MEMORY;
+
+   memcpy(submits, pSubmits, sizeof(VkSubmitInfo) * submitCount);
+   VkSubmitInfo *submit_info = &submits[first];
+
+   /* Add commandbuffer to submission. */
+   cmdbuffers[0] = timestamp_cmdbuffer;
+   memcpy(&cmdbuffers[1], submit_info->pCommandBuffers,
+          sizeof(VkCommandBuffer) * submit_info->commandBufferCount);
+   submit_info->pCommandBuffers = cmdbuffers;
+   submit_info->commandBufferCount++;
+
+   /* Add timeline semaphore to submission. */
+   const VkTimelineSemaphoreSubmitInfo *tlssi =
+      vk_find_struct_const(pSubmits[first].pNext, TIMELINE_SEMAPHORE_SUBMIT_INFO);
+   semaphores[0] = queue_ctx->semaphore;
+   memcpy(&semaphores[1], submit_info->pSignalSemaphores,
+          sizeof(VkSemaphore) * submit_info->signalSemaphoreCount);
+   submit_info->pSignalSemaphores = semaphores;
+   submit_info->signalSemaphoreCount++;
+   semaphore_values[0] = queue_ctx->semaphore_value;
+   if (tlssi) {
+      *semaphore_info = *tlssi; /* save original values */
+      memcpy(&semaphore_values[1], tlssi->pSignalSemaphoreValues,
+             sizeof(uint64_t) * tlssi->signalSemaphoreValueCount);
+      ((VkTimelineSemaphoreSubmitInfo *)tlssi)->pSignalSemaphoreValues = semaphore_values;
+      ((VkTimelineSemaphoreSubmitInfo *)tlssi)->signalSemaphoreValueCount =
+         submit_info->signalSemaphoreCount;
+   } else {
+      *semaphore_info = (VkTimelineSemaphoreSubmitInfo){
+         .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO,
+         .pNext = submit_info->pNext,
+         .signalSemaphoreValueCount = submit_info->signalSemaphoreCount,
+         .pSignalSemaphoreValues = semaphore_values,
+      };
+      submit_info->pNext = semaphore_info;
+   }
+
+   /* Submit with added timestamp query commandbuffer. */
+   VkResult res = ctx->vtable.QueueSubmit(queue, submitCount, submits, fence);
+   if (tlssi)
+      *(VkTimelineSemaphoreSubmitInfo *)tlssi = *semaphore_info; /* restore */
+   vk_free(&ctx->alloc, buf);
+   return res;
+}
diff --git a/src/vulkan/anti-lag-layer/anti_lag_layer.h b/src/vulkan/anti-lag-layer/anti_lag_layer.h
new file mode 100644
index 00000000000..31abb0f9aee
--- /dev/null
+++ b/src/vulkan/anti-lag-layer/anti_lag_layer.h
@@ -0,0 +1,111 @@
+/*
+ * Copyright © 2025 Valve Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ */
+
+#ifndef ANTI_LAG_LAYER_H
+#define ANTI_LAG_LAYER_H
+
+#include "util/simple_mtx.h"
+#include "vulkan/vk_layer.h"
+#include "vulkan/vulkan_core.h"
+#include "ringbuffer.h"
+
+#define MAX_FRAMES  8
+#define MAX_QUERIES 256
+
+enum frame_state {
+   FRAME_INVALID = 0,
+   FRAME_INPUT,   /* Frame is in input stage. */
+   FRAME_SUBMIT,  /* All current queueSubmit calls are associated with this frame. */
+   FRAME_PRESENT, /* Frame is in present stage and latencies can be evaluated. */
+};
+
+typedef struct frame {
+   uint64_t frame_idx;
+   uint64_t frame_start_time;
+   uint64_t min_delay;
+   uint64_t imposed_delay;
+   enum frame_state state;
+} frame;
+
+struct query {
+   uint64_t begin_gpu_ts;
+   uint64_t submit_cpu_ts;
+   VkCommandBuffer cmdbuffer;
+};
+
+typedef struct queue_context {
+   VkQueue queue;
+   uint32_t queue_family_idx;
+   VkCommandPool cmdPool;
+   VkQueryPool queryPool;
+   VkSemaphore semaphore;
+   uint64_t semaphore_value;
+   uint8_t submissions_per_frame[MAX_FRAMES];
+   RINGBUFFER_DECLARE(queries, struct query, MAX_QUERIES);
+} queue_context;
+
+typedef struct device_context {
+
+   struct DeviceDispatchTable {
+#define DECLARE_HOOK(fn) PFN_vk##fn fn
+      DECLARE_HOOK(GetDeviceProcAddr);
+      DECLARE_HOOK(SetDeviceLoaderData);
+      DECLARE_HOOK(DestroyDevice);
+      DECLARE_HOOK(QueueSubmit);
+      DECLARE_HOOK(QueueSubmit2);
+      DECLARE_HOOK(QueueSubmit2KHR);
+      DECLARE_HOOK(GetDeviceQueue);
+      DECLARE_HOOK(CreateCommandPool);
+      DECLARE_HOOK(DestroyCommandPool);
+      DECLARE_HOOK(CreateQueryPool);
+      DECLARE_HOOK(ResetQueryPool);
+      DECLARE_HOOK(DestroyQueryPool);
+      DECLARE_HOOK(GetQueryPoolResults);
+      DECLARE_HOOK(AllocateCommandBuffers);
+      DECLARE_HOOK(FreeCommandBuffers);
+      DECLARE_HOOK(BeginCommandBuffer);
+      DECLARE_HOOK(EndCommandBuffer);
+      DECLARE_HOOK(GetCalibratedTimestampsKHR);
+      DECLARE_HOOK(CmdWriteTimestamp);
+      DECLARE_HOOK(CreateSemaphore);
+      DECLARE_HOOK(DestroySemaphore);
+      DECLARE_HOOK(GetSemaphoreCounterValue);
+      DECLARE_HOOK(WaitSemaphores);
+#undef DECLARE_HOOK
+   } vtable;
+
+   VkDevice device;
+   VkAllocationCallbacks alloc;
+   simple_mtx_t mtx;
+
+   struct {
+      int64_t delta;
+      uint64_t recalibrate_when;
+      float timestamp_period;
+   } calibration;
+
+   RINGBUFFER_DECLARE(frames, frame, MAX_FRAMES);
+   frame *active_frame;
+   int64_t base_delay;
+   int64_t adaptation;
+
+   unsigned num_queues;
+   queue_context queues[];
+} device_context;
+
+device_context *get_device_context(const void *object);
+
+void anti_lag_AntiLagUpdateAMD(VkDevice device, const VkAntiLagDataAMD *pData);
+VkResult anti_lag_QueueSubmit2KHR(VkQueue queue, uint32_t submitCount,
+                                  const VkSubmitInfo2 *pSubmits, VkFence fence);
+VkResult anti_lag_QueueSubmit2(VkQueue queue, uint32_t submitCount, const VkSubmitInfo2 *pSubmits,
+                               VkFence fence);
+VkResult anti_lag_QueueSubmit(VkQueue queue, uint32_t submitCount, const VkSubmitInfo *pSubmits,
+                              VkFence fence);
+
+VkResult anti_lag_NegotiateLoaderLayerInterfaceVersion(VkNegotiateLayerInterface *pVersionStruct);
+
+#endif /* ANTI_LAG_LAYER_H */
diff --git a/src/vulkan/anti-lag-layer/anti_lag_layer_interface.c b/src/vulkan/anti-lag-layer/anti_lag_layer_interface.c
new file mode 100644
index 00000000000..d2ca4a7dd44
--- /dev/null
+++ b/src/vulkan/anti-lag-layer/anti_lag_layer_interface.c
@@ -0,0 +1,899 @@
+/*
+ * Copyright © 2025 Valve Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "util/simple_mtx.h"
+#include "vulkan/vk_layer.h"
+#include "vulkan/vulkan_core.h"
+#include "anti_lag_layer.h"
+#include "vk_alloc.h"
+#include "vk_util.h"
+
+static uintptr_t
+object_to_key(const void *object)
+{
+   return (uintptr_t)*(uintptr_t *)object;
+}
+
+typedef struct instance_data {
+   struct InstanceDispatchTable {
+#define DECLARE_HOOK(fn) PFN_vk##fn fn
+      DECLARE_HOOK(GetInstanceProcAddr);
+      DECLARE_HOOK(CreateInstance);
+      DECLARE_HOOK(DestroyInstance);
+      DECLARE_HOOK(CreateDevice);
+      DECLARE_HOOK(EnumerateDeviceExtensionProperties);
+      DECLARE_HOOK(GetPhysicalDeviceFeatures2KHR);
+      DECLARE_HOOK(GetPhysicalDeviceFeatures2);
+      DECLARE_HOOK(GetPhysicalDeviceProperties);
+      DECLARE_HOOK(GetPhysicalDeviceCalibrateableTimeDomainsEXT);
+      DECLARE_HOOK(GetPhysicalDeviceCalibrateableTimeDomainsKHR);
+      DECLARE_HOOK(GetPhysicalDeviceQueueFamilyProperties);
+#undef DECLARE_HOOK
+   } vtable;
+
+   VkInstance instance;
+   uint32_t apiVersion;
+   VkAllocationCallbacks alloc;
+   struct instance_data *next;
+} instance_data;
+
+static void
+init_instance_vtable(instance_data *ctx, PFN_vkGetInstanceProcAddr gpa)
+{
+   ctx->vtable.GetInstanceProcAddr = gpa;
+#define INIT_HOOK(fn) ctx->vtable.fn = (PFN_vk##fn)gpa(ctx->instance, "vk" #fn)
+   INIT_HOOK(CreateInstance);
+   INIT_HOOK(DestroyInstance);
+   INIT_HOOK(CreateDevice);
+   INIT_HOOK(EnumerateDeviceExtensionProperties);
+   INIT_HOOK(GetPhysicalDeviceFeatures2KHR);
+   INIT_HOOK(GetPhysicalDeviceFeatures2);
+   INIT_HOOK(GetPhysicalDeviceProperties);
+   INIT_HOOK(GetPhysicalDeviceCalibrateableTimeDomainsEXT);
+   INIT_HOOK(GetPhysicalDeviceCalibrateableTimeDomainsKHR);
+   INIT_HOOK(GetPhysicalDeviceQueueFamilyProperties);
+#undef INIT_HOOK
+}
+
+static simple_mtx_t instance_mtx = SIMPLE_MTX_INITIALIZER;
+static instance_data *instance_list = NULL;
+
+static void
+add_instance(instance_data *instance)
+{
+   simple_mtx_lock(&instance_mtx);
+   instance_data **ptr = &instance_list;
+   while (*ptr != NULL)
+      ptr = &(*ptr)->next;
+   *ptr = instance;
+   simple_mtx_unlock(&instance_mtx);
+}
+
+static instance_data *
+remove_instance(const void *object)
+{
+   uintptr_t key = object_to_key(object);
+   simple_mtx_lock(&instance_mtx);
+   instance_data **ptr = &instance_list;
+   while (*ptr && key != object_to_key((*ptr)->instance))
+      ptr = &(*ptr)->next;
+
+   instance_data *ctx = *ptr;
+   *ptr = ctx ? ctx->next : NULL;
+   simple_mtx_unlock(&instance_mtx);
+   return ctx;
+}
+
+static instance_data *
+get_instance_data(const void *object)
+{
+   uintptr_t key = object_to_key(object);
+   simple_mtx_lock(&instance_mtx);
+   instance_data *ctx = instance_list;
+   while (ctx && key != object_to_key(ctx->instance))
+      ctx = ctx->next;
+   simple_mtx_unlock(&instance_mtx);
+   return ctx;
+}
+
+static VKAPI_ATTR VkResult VKAPI_CALL
+anti_lag_CreateInstance(const VkInstanceCreateInfo *pCreateInfo,
+                        const VkAllocationCallbacks *pAllocator, VkInstance *pInstance)
+{
+   VkLayerInstanceCreateInfo *chain_info = (VkLayerInstanceCreateInfo *)(pCreateInfo->pNext);
+   while (chain_info && !(chain_info->sType == VK_STRUCTURE_TYPE_LOADER_INSTANCE_CREATE_INFO &&
+                          chain_info->function == VK_LAYER_LINK_INFO)) {
+      chain_info = (VkLayerInstanceCreateInfo *)(chain_info->pNext);
+   }
+
+   assert(chain_info && chain_info->u.pLayerInfo);
+   PFN_vkGetInstanceProcAddr fpGetInstanceProcAddr =
+      chain_info->u.pLayerInfo->pfnNextGetInstanceProcAddr;
+   PFN_vkCreateInstance fpCreateInstance =
+      (PFN_vkCreateInstance)fpGetInstanceProcAddr(NULL, "vkCreateInstance");
+   if (fpCreateInstance == NULL)
+      return VK_ERROR_INITIALIZATION_FAILED;
+
+   /* Advance the link info for the next element on the chain. */
+   chain_info->u.pLayerInfo = chain_info->u.pLayerInfo->pNext;
+
+   /* Create Instance. */
+   VkResult result = fpCreateInstance(pCreateInfo, pAllocator, pInstance);
+   if (result != VK_SUCCESS)
+      return result;
+
+   /* Create Instance context. */
+   const VkAllocationCallbacks *alloc = pAllocator ? pAllocator : vk_default_allocator();
+   void *buf = vk_alloc(alloc, sizeof(instance_data), alignof(instance_data),
+                        VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
+   if (!buf) {
+      PFN_vkDestroyInstance fpDestroyInstance =
+         (PFN_vkDestroyInstance)fpGetInstanceProcAddr(*pInstance, "vkDestroyInstance");
+      fpDestroyInstance(*pInstance, alloc);
+      return VK_ERROR_OUT_OF_HOST_MEMORY;
+   }
+   instance_data *ctx = (instance_data *)buf;
+   ctx->apiVersion = pCreateInfo->pApplicationInfo && pCreateInfo->pApplicationInfo->apiVersion
+                        ? pCreateInfo->pApplicationInfo->apiVersion
+                        : VK_API_VERSION_1_0;
+   ctx->instance = *pInstance;
+   ctx->alloc = *alloc;
+   ctx->next = NULL;
+   init_instance_vtable(ctx, fpGetInstanceProcAddr);
+   add_instance(ctx);
+
+   return VK_SUCCESS;
+}
+
+static VKAPI_ATTR void VKAPI_CALL
+anti_lag_DestroyInstance(VkInstance instance, const VkAllocationCallbacks *pAllocator)
+{
+   instance_data *ctx = remove_instance(instance);
+   if (ctx) {
+      ctx->vtable.DestroyInstance(instance, pAllocator);
+      vk_free(&ctx->alloc, ctx);
+   }
+}
+
+typedef struct device_data {
+   VkDevice device;
+   PFN_vkGetDeviceProcAddr GetDeviceProcAddr;
+   device_context *ctx; /* NULL if anti-lag ext is not enabled. */
+   struct device_data *next;
+} device_data;
+
+static void
+init_device_vtable(device_context *ctx, PFN_vkGetDeviceProcAddr gpa, PFN_vkSetDeviceLoaderData sld,
+                   bool calibrated_timestamps_khr, bool host_query_reset_ext,
+                   bool timeline_semaphore_khr)
+{
+   ctx->vtable.GetDeviceProcAddr = gpa;
+   ctx->vtable.SetDeviceLoaderData = sld;
+#define INIT_HOOK(fn) ctx->vtable.fn = (PFN_vk##fn)gpa(ctx->device, "vk" #fn)
+#define INIT_HOOK_ALIAS(fn, alias, cond)                                                           \
+   ctx->vtable.fn = (PFN_vk##fn)gpa(ctx->device, cond ? "vk" #alias : "vk" #fn)
+   INIT_HOOK(DestroyDevice);
+   INIT_HOOK(QueueSubmit);
+   INIT_HOOK(QueueSubmit2);
+   INIT_HOOK(QueueSubmit2KHR);
+   INIT_HOOK(GetDeviceQueue);
+   INIT_HOOK(CreateCommandPool);
+   INIT_HOOK(DestroyCommandPool);
+   INIT_HOOK(CreateQueryPool);
+   INIT_HOOK_ALIAS(ResetQueryPool, ResetQueryPoolEXT, host_query_reset_ext);
+   INIT_HOOK(DestroyQueryPool);
+   INIT_HOOK(GetQueryPoolResults);
+   INIT_HOOK(AllocateCommandBuffers);
+   INIT_HOOK(FreeCommandBuffers);
+   INIT_HOOK(BeginCommandBuffer);
+   INIT_HOOK(EndCommandBuffer);
+   INIT_HOOK_ALIAS(GetCalibratedTimestampsKHR, GetCalibratedTimestampsEXT, !calibrated_timestamps_khr);
+   INIT_HOOK(CmdWriteTimestamp);
+   INIT_HOOK(CreateSemaphore);
+   INIT_HOOK(DestroySemaphore);
+   INIT_HOOK_ALIAS(GetSemaphoreCounterValue, GetSemaphoreCounterValueKHR, timeline_semaphore_khr);
+   INIT_HOOK_ALIAS(WaitSemaphores, WaitSemaphoresKHR, timeline_semaphore_khr);
+#undef INIT_HOOK
+#undef INIT_HOOK_ALIAS
+}
+
+static simple_mtx_t device_mtx = SIMPLE_MTX_INITIALIZER;
+static device_data *device_list = NULL;
+
+static void
+add_device(device_data *device)
+{
+   simple_mtx_lock(&device_mtx);
+   device_data **ptr = &device_list;
+   while (*ptr != NULL)
+      ptr = &(*ptr)->next;
+   *ptr = device;
+   simple_mtx_unlock(&device_mtx);
+}
+
+static device_data *
+remove_device(const void *object)
+{
+   uintptr_t key = object_to_key(object);
+   simple_mtx_lock(&device_mtx);
+   device_data **ptr = &device_list;
+   while (*ptr && key != object_to_key((*ptr)->device))
+      ptr = &(*ptr)->next;
+
+   device_data *ctx = *ptr;
+   *ptr = ctx ? ctx->next : NULL;
+   simple_mtx_unlock(&device_mtx);
+   return ctx;
+}
+
+static device_data *
+get_device_data(const void *object)
+{
+   uintptr_t key = object_to_key(object);
+   simple_mtx_lock(&device_mtx);
+   device_data *ctx = device_list;
+   while (ctx && key != object_to_key(ctx->device))
+      ctx = ctx->next;
+   simple_mtx_unlock(&device_mtx);
+   return ctx;
+}
+
+device_context *
+get_device_context(const void *object)
+{
+   device_data *data = get_device_data(object);
+   assert(data && data->ctx);
+   return data->ctx;
+}
+
+static VkLayerDeviceCreateInfo *
+get_device_chain_info(const VkDeviceCreateInfo *pCreateInfo, VkLayerFunction func)
+{
+   vk_foreach_struct_const (item, pCreateInfo->pNext) {
+      if (item->sType == VK_STRUCTURE_TYPE_LOADER_DEVICE_CREATE_INFO &&
+          ((VkLayerDeviceCreateInfo *)item)->function == func)
+         return (VkLayerDeviceCreateInfo *)item;
+   }
+   return NULL;
+}
+
+static bool
+should_enable_layer(instance_data *ctx, VkPhysicalDevice physicalDevice,
+                    VkPhysicalDeviceAntiLagFeaturesAMD ext_feature)
+{
+   /* The extension is not requested by the application. */
+   if (!ext_feature.antiLag)
+      return false;
+
+   /* Ensure that the underlying implementation does not expose VK_AMD_anti_lag itself. */
+   ext_feature.antiLag = false;
+   VkPhysicalDeviceFeatures2 features = {
+      .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2,
+      .pNext = &ext_feature,
+   };
+
+   if (ctx->vtable.GetPhysicalDeviceFeatures2KHR) {
+      ctx->vtable.GetPhysicalDeviceFeatures2KHR(physicalDevice, &features);
+      return !ext_feature.antiLag;
+   }
+
+   if (ctx->vtable.GetPhysicalDeviceFeatures2) {
+      ctx->vtable.GetPhysicalDeviceFeatures2(physicalDevice, &features);
+      return !ext_feature.antiLag;
+   }
+
+   return false;
+}
+
+static bool
+check_calibrated_timestamps(instance_data *data, VkPhysicalDevice physicalDevice, bool *has_khr)
+{
+   VkResult res;
+   uint32_t count = 0;
+   res = data->vtable.EnumerateDeviceExtensionProperties(physicalDevice, NULL, &count, NULL);
+   VkExtensionProperties *extensions =
+      vk_alloc(&data->alloc, count * sizeof(VkExtensionProperties), alignof(VkExtensionProperties),
+               VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
+   if (!extensions)
+      return false;
+
+   res |= data->vtable.EnumerateDeviceExtensionProperties(physicalDevice, NULL, &count, extensions);
+
+   *has_khr = false;
+   bool has_ext = false;
+   if (res == VK_SUCCESS) {
+      for (unsigned i = 0; i < count; i++) {
+         if (strcmp(extensions[i].extensionName, VK_KHR_CALIBRATED_TIMESTAMPS_EXTENSION_NAME) == 0)
+            *has_khr = true;
+         if (strcmp(extensions[i].extensionName, VK_EXT_CALIBRATED_TIMESTAMPS_EXTENSION_NAME) == 0)
+            has_ext = true;
+      }
+   }
+
+   vk_free(&data->alloc, extensions);
+   return *has_khr || has_ext;
+}
+
+/* Initialize per-queue context:
+ *
+ * This includes creating one CommandPool and one QueryPool per Queue as well as
+ * recording one CommandBuffer per timestamp query.
+ */
+static VkResult
+init_queue_context(device_context *ctx, queue_context *queue_ctx)
+{
+#define CHECK_RESULT(res, label)                                                                   \
+   if (res != VK_SUCCESS) {                                                                        \
+      goto label;                                                                                  \
+   }
+
+   VkResult result;
+
+   /* Create command pool */
+   struct VkCommandPoolCreateInfo pool_info = {
+      .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
+      .pNext = NULL,
+      .flags = 0,
+      .queueFamilyIndex = queue_ctx->queue_family_idx,
+   };
+   result =
+      ctx->vtable.CreateCommandPool(ctx->device, &pool_info, &ctx->alloc, &queue_ctx->cmdPool);
+   CHECK_RESULT(result, fail_cmdpool)
+
+   /* Create query pool */
+   VkQueryPoolCreateInfo query_pool_info = {
+      .sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO,
+      .queryType = VK_QUERY_TYPE_TIMESTAMP,
+      .queryCount = MAX_QUERIES,
+   };
+   result = ctx->vtable.CreateQueryPool(ctx->device, &query_pool_info, &ctx->alloc,
+                                        &queue_ctx->queryPool);
+   CHECK_RESULT(result, fail_querypool)
+   ctx->vtable.ResetQueryPool(ctx->device, queue_ctx->queryPool, 0, MAX_QUERIES);
+   ringbuffer_init(queue_ctx->queries);
+
+   /* Create timeline semaphore */
+   VkSemaphoreTypeCreateInfo timelineCreateInfo = {
+      .sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO,
+      .pNext = NULL,
+      .semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE,
+      .initialValue = 0,
+   };
+   VkSemaphoreCreateInfo createInfo = {
+      .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO,
+      .pNext = &timelineCreateInfo,
+      .flags = 0,
+   };
+   result =
+      ctx->vtable.CreateSemaphore(ctx->device, &createInfo, &ctx->alloc, &queue_ctx->semaphore);
+   CHECK_RESULT(result, fail_semaphore);
+
+   for (unsigned j = 0; j < MAX_QUERIES; j++) {
+      struct query *query = &queue_ctx->queries.data[j];
+
+      /* Allocate commandBuffer for timestamp. */
+      VkCommandBufferAllocateInfo buffer_info = {
+         .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
+         .commandPool = queue_ctx->cmdPool,
+         .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
+         .commandBufferCount = 1,
+      };
+      result = ctx->vtable.AllocateCommandBuffers(ctx->device, &buffer_info, &query->cmdbuffer);
+      CHECK_RESULT(result, fail)
+      result = ctx->vtable.SetDeviceLoaderData(ctx->device, query->cmdbuffer);
+      CHECK_RESULT(result, fail)
+
+      /* Record commandbuffer. */
+      VkCommandBufferBeginInfo beginInfo = {
+         .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
+      };
+
+      result = ctx->vtable.BeginCommandBuffer(query->cmdbuffer, &beginInfo);
+      CHECK_RESULT(result, fail)
+      ctx->vtable.CmdWriteTimestamp(query->cmdbuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+                                    queue_ctx->queryPool, j);
+      result = ctx->vtable.EndCommandBuffer(query->cmdbuffer);
+      CHECK_RESULT(result, fail)
+   }
+
+#undef CHECK_RESULT
+   return result;
+
+fail:
+   ctx->vtable.DestroySemaphore(ctx->device, queue_ctx->semaphore, &ctx->alloc);
+fail_semaphore:
+   ctx->vtable.DestroyQueryPool(ctx->device, queue_ctx->queryPool, &ctx->alloc);
+fail_querypool:
+   ctx->vtable.DestroyCommandPool(ctx->device, queue_ctx->cmdPool, &ctx->alloc);
+fail_cmdpool:
+   for (queue_context *qctx = ctx->queues; qctx != queue_ctx; qctx++) {
+      ctx->vtable.DestroyQueryPool(ctx->device, qctx->queryPool, &ctx->alloc);
+      ctx->vtable.DestroyCommandPool(ctx->device, qctx->cmdPool, &ctx->alloc);
+   }
+
+   return result;
+}
+
+static VKAPI_ATTR VkResult VKAPI_CALL
+anti_lag_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCreateInfo,
+                      const VkAllocationCallbacks *pAllocator, VkDevice *pDevice)
+{
+   instance_data *instance_ctx = get_instance_data(physicalDevice);
+   VkLayerDeviceCreateInfo *chain_info = get_device_chain_info(pCreateInfo, VK_LAYER_LINK_INFO);
+   PFN_vkGetDeviceProcAddr fpGetDeviceProcAddr = chain_info->u.pLayerInfo->pfnNextGetDeviceProcAddr;
+   PFN_vkGetInstanceProcAddr fpGetInstanceProcAddr =
+      chain_info->u.pLayerInfo->pfnNextGetInstanceProcAddr;
+   PFN_vkCreateDevice fpCreateDevice =
+      (PFN_vkCreateDevice)fpGetInstanceProcAddr(instance_ctx->instance, "vkCreateDevice");
+   if (fpCreateDevice == NULL)
+      return VK_ERROR_INITIALIZATION_FAILED;
+
+   /* Advance the link info for the next element on the chain. */
+   chain_info->u.pLayerInfo = chain_info->u.pLayerInfo->pNext;
+
+   const VkAllocationCallbacks *alloc = pAllocator ? pAllocator : &instance_ctx->alloc;
+   device_data *data;
+   VkResult result;
+
+   /*  Only allocate a context and add to dispatch if the extension is enabled. */
+   const VkPhysicalDeviceAntiLagFeaturesAMD *ext_features =
+      vk_find_struct_const(pCreateInfo->pNext, PHYSICAL_DEVICE_ANTI_LAG_FEATURES_AMD);
+   bool enable = ext_features && should_enable_layer(instance_ctx, physicalDevice, *ext_features);
+   if (enable) {
+      /* Count queues with sufficient timestamp valid bits. */
+      // TODO: make it work with less than 64 valid bits
+      unsigned num_queue_families = 0;
+      unsigned num_queues = 0;
+      for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++)
+         num_queue_families =
+            MAX2(num_queue_families, pCreateInfo->pQueueCreateInfos[i].queueFamilyIndex + 1);
+      VkQueueFamilyProperties *queue_family_props =
+         vk_alloc(alloc, num_queue_families * sizeof(VkQueueFamilyProperties),
+                  alignof(VkQueueFamilyProperties), VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
+      if (!queue_family_props)
+         return VK_ERROR_OUT_OF_HOST_MEMORY;
+
+      instance_ctx->vtable.GetPhysicalDeviceQueueFamilyProperties(
+         physicalDevice, &num_queue_families, queue_family_props);
+      for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
+         uint32_t queue_family_idx = pCreateInfo->pQueueCreateInfos[i].queueFamilyIndex;
+         if (queue_family_props[queue_family_idx].timestampValidBits == 64 &&
+             (queue_family_props[queue_family_idx].queueFlags &
+              (VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT))) {
+            num_queues += pCreateInfo->pQueueCreateInfos[i].queueCount;
+         }
+      }
+
+      /* Allocate the context. */
+      device_context *ctx;
+      queue_context *queues;
+      VK_MULTIALLOC(ma);
+      vk_multialloc_add(&ma, &data, device_data, 1);
+      vk_multialloc_add(&ma, &ctx, struct device_context, 1);
+      vk_multialloc_add(&ma, &queues, queue_context, num_queues);
+      void *buf = vk_multialloc_zalloc(&ma, alloc, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+      if (!buf) {
+         vk_free(alloc, queue_family_props);
+         return VK_ERROR_OUT_OF_HOST_MEMORY;
+      }
+
+      VkPhysicalDeviceProperties properties;
+      instance_ctx->vtable.GetPhysicalDeviceProperties(physicalDevice, &properties);
+
+      /* Ensure that calibrated timestamps and host query reset extensions are enabled. */
+      bool has_calibrated_timestamps = false;
+      bool has_calibrated_timestamps_khr = false;
+      bool has_vk12 = instance_ctx->apiVersion >= VK_API_VERSION_1_2 &&
+                      properties.apiVersion >= VK_API_VERSION_1_2;
+      bool has_host_query_reset = has_vk12;
+      bool has_host_query_reset_ext = false;
+      bool has_timeline_semaphore = has_vk12;
+      bool has_timeline_semaphore_khr = false;
+      for (unsigned i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
+         if (strcmp(pCreateInfo->ppEnabledExtensionNames[i],
+                    VK_KHR_CALIBRATED_TIMESTAMPS_EXTENSION_NAME) == 0)
+            has_calibrated_timestamps = has_calibrated_timestamps_khr = true;
+         if (strcmp(pCreateInfo->ppEnabledExtensionNames[i],
+                    VK_EXT_CALIBRATED_TIMESTAMPS_EXTENSION_NAME) == 0)
+            has_calibrated_timestamps = true;
+         if (strcmp(pCreateInfo->ppEnabledExtensionNames[i],
+                    VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME) == 0)
+            has_host_query_reset = has_host_query_reset_ext = true;
+         if (strcmp(pCreateInfo->ppEnabledExtensionNames[i],
+                    VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME) == 0)
+            has_timeline_semaphore = has_timeline_semaphore_khr = true;
+      }
+
+      /* Add missing extensions. */
+      VkDeviceCreateInfo create_info = *pCreateInfo;
+      const char **ext_names = NULL;
+      uint32_t num_extra_extensions =
+         !has_calibrated_timestamps + !has_host_query_reset + !has_timeline_semaphore;
+      if (num_extra_extensions) {
+         ext_names = vk_alloc(
+            alloc, (pCreateInfo->enabledExtensionCount + num_extra_extensions) * sizeof(char *),
+            alignof(char *), VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
+         if (!ext_names) {
+            result = VK_ERROR_OUT_OF_HOST_MEMORY;
+            goto fail;
+         }
+
+         memcpy(ext_names, pCreateInfo->ppEnabledExtensionNames,
+                sizeof(char *) * pCreateInfo->enabledExtensionCount);
+
+         if (!has_timeline_semaphore) {
+            has_timeline_semaphore_khr = true;
+            ext_names[create_info.enabledExtensionCount++] =
+               VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME;
+         }
+         if (!has_host_query_reset) {
+            has_host_query_reset_ext = true;
+            ext_names[create_info.enabledExtensionCount++] = VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME;
+         }
+         if (!has_calibrated_timestamps) {
+            check_calibrated_timestamps(instance_ctx, physicalDevice,
+                                        &has_calibrated_timestamps_khr);
+            ext_names[create_info.enabledExtensionCount++] =
+               has_calibrated_timestamps_khr ? VK_KHR_CALIBRATED_TIMESTAMPS_EXTENSION_NAME
+                                             : VK_EXT_CALIBRATED_TIMESTAMPS_EXTENSION_NAME;
+         }
+         create_info.ppEnabledExtensionNames = ext_names;
+      }
+
+      /* Ensure that hostQueryReset feature is enabled. */
+      const VkPhysicalDeviceVulkan12Features *vk12 =
+         vk_find_struct_const(pCreateInfo->pNext, PHYSICAL_DEVICE_VULKAN_1_2_FEATURES);
+      const VkPhysicalDeviceHostQueryResetFeatures *query_reset =
+         vk_find_struct_const(pCreateInfo->pNext, PHYSICAL_DEVICE_HOST_QUERY_RESET_FEATURES);
+      const VkPhysicalDeviceTimelineSemaphoreFeatures *timeline_semaphore =
+         vk_find_struct_const(pCreateInfo->pNext, PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES);
+      uint32_t prev_hostQueryReset;
+      uint32_t prev_timelineSemaphore;
+      if (vk12) {
+         prev_hostQueryReset = vk12->hostQueryReset;
+         prev_timelineSemaphore = vk12->timelineSemaphore;
+         ((VkPhysicalDeviceVulkan12Features *)vk12)->hostQueryReset = VK_TRUE;
+         ((VkPhysicalDeviceVulkan12Features *)vk12)->timelineSemaphore = VK_TRUE;
+      } else {
+         if (query_reset) {
+            prev_hostQueryReset = query_reset->hostQueryReset;
+            ((VkPhysicalDeviceHostQueryResetFeatures *)query_reset)->hostQueryReset = VK_TRUE;
+         } else {
+            VkPhysicalDeviceHostQueryResetFeatures *feat =
+               alloca(sizeof(VkPhysicalDeviceHostQueryResetFeatures));
+            *feat = (VkPhysicalDeviceHostQueryResetFeatures){
+               .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_QUERY_RESET_FEATURES,
+               .pNext = (void *)create_info.pNext,
+               .hostQueryReset = VK_TRUE,
+            };
+            create_info.pNext = feat;
+         }
+         if (timeline_semaphore) {
+            prev_timelineSemaphore = timeline_semaphore->timelineSemaphore;
+            ((VkPhysicalDeviceTimelineSemaphoreFeatures *)timeline_semaphore)->timelineSemaphore =
+               VK_TRUE;
+         } else {
+            VkPhysicalDeviceTimelineSemaphoreFeatures *feat =
+               alloca(sizeof(VkPhysicalDeviceTimelineSemaphoreFeatures));
+            *feat = (VkPhysicalDeviceTimelineSemaphoreFeatures){
+               .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES,
+               .pNext = (void *)create_info.pNext,
+               .timelineSemaphore = VK_TRUE,
+            };
+            create_info.pNext = feat;
+         }
+      }
+
+      /* Create Device. */
+      result = fpCreateDevice(physicalDevice, &create_info, pAllocator, pDevice);
+
+      if (vk12) {
+         ((VkPhysicalDeviceVulkan12Features *)vk12)->hostQueryReset = prev_hostQueryReset;
+         ((VkPhysicalDeviceVulkan12Features *)vk12)->timelineSemaphore = prev_timelineSemaphore;
+      } else {
+         if (query_reset)
+            ((VkPhysicalDeviceHostQueryResetFeatures *)query_reset)->hostQueryReset =
+               prev_hostQueryReset;
+         if (timeline_semaphore)
+            ((VkPhysicalDeviceTimelineSemaphoreFeatures *)timeline_semaphore)->timelineSemaphore =
+               prev_timelineSemaphore;
+      }
+      if (ext_names)
+         vk_free(alloc, ext_names);
+
+      if (result != VK_SUCCESS)
+         goto fail;
+
+      /* Initialize Context. */
+      data->ctx = ctx;
+      ctx->device = *pDevice;
+      chain_info = get_device_chain_info(pCreateInfo, VK_LOADER_DATA_CALLBACK);
+      PFN_vkSetDeviceLoaderData fpSetDeviceLoaderData =
+         (PFN_vkSetDeviceLoaderData)chain_info->u.pfnSetDeviceLoaderData;
+      init_device_vtable(ctx, fpGetDeviceProcAddr, fpSetDeviceLoaderData,
+                         has_calibrated_timestamps_khr, has_host_query_reset_ext,
+                         has_timeline_semaphore_khr);
+      simple_mtx_init(&ctx->mtx, mtx_plain);
+      ctx->num_queues = num_queues;
+      ctx->alloc = *alloc;
+      ctx->calibration.timestamp_period = properties.limits.timestampPeriod;
+      ringbuffer_init(ctx->frames);
+
+      /* Initialize Queue contexts. */
+      unsigned idx = 0;
+      for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
+         /* Skip queue families without sufficient timestamp valid bits.
+          * Also skip queue families which cannot do GRAPHICS or COMPUTE since they
+          * always heavily async in nature (DMA transfers and sparse for example).
+          * Video is also irrelvant here since it should never be a critical path
+          * in a game that wants anti-lag. */
+         uint32_t queue_family_idx = pCreateInfo->pQueueCreateInfos[i].queueFamilyIndex;
+         if (queue_family_props[queue_family_idx].timestampValidBits != 64 ||
+             !(queue_family_props[queue_family_idx].queueFlags &
+               (VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT)))
+            continue;
+
+         for (unsigned j = 0; j < pCreateInfo->pQueueCreateInfos[i].queueCount; j++) {
+            VkQueue queue;
+            ctx->vtable.GetDeviceQueue(*pDevice, queue_family_idx, j, &queue);
+            ctx->queues[idx].queue = queue;
+            ctx->queues[idx].queue_family_idx = queue_family_idx;
+            result = init_queue_context(ctx, &ctx->queues[idx]);
+            idx++;
+            if (result != VK_SUCCESS)
+               goto fail;
+         }
+      }
+      assert(idx == num_queues);
+   fail:
+      vk_free(alloc, queue_family_props);
+   } else {
+      data = (device_data *)vk_alloc(alloc, sizeof(device_data), alignof(device_data),
+                                     VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+      if (!data)
+         return VK_ERROR_OUT_OF_HOST_MEMORY;
+      result = fpCreateDevice(physicalDevice, pCreateInfo, pAllocator, pDevice);
+      data->ctx = NULL;
+   }
+
+   if (result == VK_SUCCESS) {
+      data->device = *pDevice;
+      data->GetDeviceProcAddr = fpGetDeviceProcAddr;
+      data->next = NULL;
+      add_device(data);
+   } else {
+      vk_free(alloc, data);
+   }
+
+   return result;
+}
+
+static VKAPI_ATTR void VKAPI_CALL
+anti_lag_DestroyDevice(VkDevice pDevice, const VkAllocationCallbacks *pAllocator)
+{
+   device_data *data = remove_device(pDevice);
+   assert(data && data->ctx);
+   device_context *ctx = data->ctx;
+
+   /* Destroy per-queue context.
+    * The application must ensure that no work is active on the device.
+    */
+   for (unsigned i = 0; i < ctx->num_queues; i++) {
+      queue_context *queue_ctx = &ctx->queues[i];
+      ctx->vtable.DestroyQueryPool(ctx->device, queue_ctx->queryPool, &ctx->alloc);
+      ctx->vtable.DestroyCommandPool(ctx->device, queue_ctx->cmdPool, &ctx->alloc);
+      ctx->vtable.DestroySemaphore(ctx->device, queue_ctx->semaphore, &ctx->alloc);
+   }
+
+   ctx->vtable.DestroyDevice(pDevice, pAllocator);
+   vk_free(&ctx->alloc, data);
+}
+
+static bool
+is_anti_lag_supported(VkPhysicalDevice physicalDevice)
+{
+   instance_data *data = get_instance_data(physicalDevice);
+   VkPhysicalDeviceProperties properties;
+   data->vtable.GetPhysicalDeviceProperties(physicalDevice, &properties);
+   if (properties.limits.timestampPeriod == 0.0 || !properties.limits.timestampComputeAndGraphics)
+      return false;
+
+   /* Check whether calibrated timestamps are supported. */
+   bool has_khr;
+   if (!check_calibrated_timestamps(data, physicalDevice, &has_khr))
+      return false;
+
+   /* Check whether timeline semaphores and host query reset are supported. */
+   VkPhysicalDeviceTimelineSemaphoreFeatures timeline_semaphore = {
+      .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES,
+      .timelineSemaphore = VK_FALSE,
+   };
+   VkPhysicalDeviceHostQueryResetFeatures query_reset = {
+      .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_QUERY_RESET_FEATURES,
+      .pNext = &timeline_semaphore,
+      .hostQueryReset = VK_FALSE,
+   };
+   VkPhysicalDeviceFeatures2 features = {
+      .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2,
+      .pNext = &query_reset,
+   };
+   if (data->vtable.GetPhysicalDeviceFeatures2KHR)
+      data->vtable.GetPhysicalDeviceFeatures2KHR(physicalDevice, &features);
+   else if (data->vtable.GetPhysicalDeviceFeatures2)
+      data->vtable.GetPhysicalDeviceFeatures2(physicalDevice, &features);
+   if (!timeline_semaphore.timelineSemaphore || !query_reset.hostQueryReset)
+      return false;
+
+   /* Check that DEVICE and CLOCK_MONOTONIC time domains are available. */
+   VkResult res;
+   uint32_t count = 0;
+   PFN_vkGetPhysicalDeviceCalibrateableTimeDomainsKHR ctd =
+      has_khr ? data->vtable.GetPhysicalDeviceCalibrateableTimeDomainsKHR
+              : data->vtable.GetPhysicalDeviceCalibrateableTimeDomainsEXT;
+   res = ctd(physicalDevice, &count, NULL);
+   VkTimeDomainKHR *time_domains = alloca(count * sizeof(VkTimeDomainKHR));
+   res |= ctd(physicalDevice, &count, time_domains);
+   if (res != VK_SUCCESS)
+      return false;
+
+   bool has_device_domain = false;
+   bool has_host_domain = false;
+   for (unsigned i = 0; i < count; i++) {
+      has_device_domain |= time_domains[i] == VK_TIME_DOMAIN_DEVICE_KHR;
+      has_host_domain |= time_domains[i] == VK_TIME_DOMAIN_CLOCK_MONOTONIC_KHR;
+   }
+
+   return has_device_domain && has_host_domain;
+}
+
+static VKAPI_ATTR VkResult VKAPI_CALL
+anti_lag_EnumerateDeviceExtensionProperties(VkPhysicalDevice physicalDevice, const char *pLayerName,
+                                            uint32_t *pPropertyCount,
+                                            VkExtensionProperties *pProperties)
+{
+   instance_data *instance_data = get_instance_data(physicalDevice);
+
+   if (pLayerName && strcmp(pLayerName, "VK_LAYER_MESA_anti_lag") == 0) {
+      if (!is_anti_lag_supported(physicalDevice)) {
+         *pPropertyCount = 0;
+         return VK_SUCCESS;
+      }
+
+      VK_OUTARRAY_MAKE_TYPED(VkExtensionProperties, out, pProperties, pPropertyCount);
+      vk_outarray_append_typed(VkExtensionProperties, &out, prop)
+      {
+         *prop =
+            (VkExtensionProperties){VK_AMD_ANTI_LAG_EXTENSION_NAME, VK_AMD_ANTI_LAG_SPEC_VERSION};
+      }
+      return vk_outarray_status(&out);
+   }
+
+   return instance_data->vtable.EnumerateDeviceExtensionProperties(physicalDevice, pLayerName,
+                                                                   pPropertyCount, pProperties);
+}
+
+static VKAPI_ATTR void VKAPI_CALL
+anti_lag_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice,
+                                    VkPhysicalDeviceFeatures2 *pFeatures)
+{
+   instance_data *ctx = get_instance_data(physicalDevice);
+   ctx->vtable.GetPhysicalDeviceFeatures2(physicalDevice, pFeatures);
+   VkPhysicalDeviceAntiLagFeaturesAMD *anti_lag_features =
+      vk_find_struct(pFeatures->pNext, PHYSICAL_DEVICE_ANTI_LAG_FEATURES_AMD);
+
+   if (anti_lag_features) {
+      anti_lag_features->antiLag |= is_anti_lag_supported(physicalDevice);
+   }
+}
+
+static VKAPI_ATTR void VKAPI_CALL
+anti_lag_GetPhysicalDeviceFeatures2KHR(VkPhysicalDevice physicalDevice,
+                                       VkPhysicalDeviceFeatures2 *pFeatures)
+{
+   instance_data *ctx = get_instance_data(physicalDevice);
+   ctx->vtable.GetPhysicalDeviceFeatures2KHR(physicalDevice, pFeatures);
+   VkPhysicalDeviceAntiLagFeaturesAMD *anti_lag_features =
+      vk_find_struct(pFeatures->pNext, PHYSICAL_DEVICE_ANTI_LAG_FEATURES_AMD);
+
+   if (anti_lag_features) {
+      anti_lag_features->antiLag |= is_anti_lag_supported(physicalDevice);
+   }
+}
+
+static VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
+anti_lag_GetInstanceProcAddr(VkInstance instance, const char *pName);
+
+static VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
+anti_lag_GetDeviceProcAddr(VkDevice device, const char *pName);
+
+#define ADD_HOOK(fn) {"vk" #fn, (PFN_vkVoidFunction)anti_lag_##fn}
+static const struct {
+   const char *name;
+   PFN_vkVoidFunction ptr;
+} instance_funcptr_map[] = {
+   ADD_HOOK(GetInstanceProcAddr),
+   ADD_HOOK(CreateInstance),
+   ADD_HOOK(DestroyInstance),
+   ADD_HOOK(EnumerateDeviceExtensionProperties),
+   ADD_HOOK(CreateDevice),
+   ADD_HOOK(GetPhysicalDeviceFeatures2),
+   ADD_HOOK(GetPhysicalDeviceFeatures2KHR),
+};
+
+static const struct {
+   const char *name;
+   PFN_vkVoidFunction ptr;
+} device_funcptr_map[] = {
+   ADD_HOOK(GetDeviceProcAddr),
+   ADD_HOOK(DestroyDevice),
+   ADD_HOOK(AntiLagUpdateAMD),
+   ADD_HOOK(QueueSubmit),
+   ADD_HOOK(QueueSubmit2),
+   ADD_HOOK(QueueSubmit2KHR),
+};
+#undef ADD_HOOK
+
+static VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
+anti_lag_GetInstanceProcAddr(VkInstance instance, const char *pName)
+{
+   if (!pName)
+      return NULL;
+
+   PFN_vkVoidFunction result = NULL;
+   if (instance) {
+      instance_data *ctx = get_instance_data(instance);
+      if (ctx)
+         result = ctx->vtable.GetInstanceProcAddr(instance, pName);
+   }
+
+   /* Only hook instance functions which are exposed by the underlying impl.
+    * Ignore instance parameter for vkCreateInstance and vkCreateDevice.
+    */
+   if (result || strcmp(pName, "vkCreateInstance") == 0 || strcmp(pName, "vkCreateDevice") == 0) {
+      for (uint32_t i = 0; i < ARRAY_SIZE(instance_funcptr_map); i++) {
+         if (strcmp(pName, instance_funcptr_map[i].name) == 0)
+            return instance_funcptr_map[i].ptr;
+      }
+   }
+
+   return result;
+}
+
+static VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
+anti_lag_GetDeviceProcAddr(VkDevice device, const char *pName)
+{
+   if (!pName || !device)
+      return NULL;
+
+   device_data *data = get_device_data(device);
+   PFN_vkVoidFunction result = data->GetDeviceProcAddr(device, pName);
+
+   /* Only hook device functions if the Layer extension is enabled. */
+   if (data->ctx && (result || strcmp(pName, "vkAntiLagUpdateAMD") == 0)) {
+      for (uint32_t i = 0; i < ARRAY_SIZE(device_funcptr_map); i++) {
+         if (strcmp(pName, device_funcptr_map[i].name) == 0)
+            return device_funcptr_map[i].ptr;
+      }
+   }
+
+   return result;
+}
+
+PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
+anti_lag_NegotiateLoaderLayerInterfaceVersion(VkNegotiateLayerInterface *pVersionStruct)
+{
+   assert(pVersionStruct != NULL);
+   assert(pVersionStruct->sType == LAYER_NEGOTIATE_INTERFACE_STRUCT);
+
+   if (pVersionStruct->loaderLayerInterfaceVersion >= 2) {
+      pVersionStruct->loaderLayerInterfaceVersion = 2;
+      pVersionStruct->pfnGetInstanceProcAddr = anti_lag_GetInstanceProcAddr;
+      pVersionStruct->pfnGetDeviceProcAddr = anti_lag_GetDeviceProcAddr;
+      pVersionStruct->pfnGetPhysicalDeviceProcAddr = NULL;
+   }
+
+   return VK_SUCCESS;
+}
diff --git a/src/vulkan/anti-lag-layer/meson.build b/src/vulkan/anti-lag-layer/meson.build
new file mode 100644
index 00000000000..264c55c8e75
--- /dev/null
+++ b/src/vulkan/anti-lag-layer/meson.build
@@ -0,0 +1,26 @@
+# Copyright © 2025 Valve Corporation
+# SPDX-License-Identifier: MIT
+
+vklayer_files = files(
+  'anti_lag_layer.c',
+  'anti_lag_layer_interface.c',
+)
+
+shared_library(
+  'VkLayer_MESA_anti_lag',
+  vklayer_files,
+  c_args : [no_override_init_args],
+  gnu_symbol_visibility : 'hidden',
+  dependencies : [
+    idep_vulkan_util, idep_mesautil,
+  ],
+  include_directories : [inc_include, inc_util, inc_src],
+  link_args : cc.get_supported_link_arguments(['-Wl,-Bsymbolic-functions', '-Wl,-z,relro']),
+  install : true
+)
+
+install_data(
+  files('VkLayer_MESA_anti_lag.json'),
+  install_dir : join_paths(get_option('datadir'), 'vulkan', 'implicit_layer.d'),
+  install_tag : 'runtime',
+)
diff --git a/src/vulkan/anti-lag-layer/ringbuffer.h b/src/vulkan/anti-lag-layer/ringbuffer.h
new file mode 100644
index 00000000000..1747b7e720f
--- /dev/null
+++ b/src/vulkan/anti-lag-layer/ringbuffer.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright © 2025 Valve Corporation
+ *
+ * SPDX-License-Identifier: MIT
+ */
+
+#ifndef RINGBUFFER_H
+#define RINGBUFFER_H
+
+#include "util/macros.h"
+
+#define RINGBUFFER_DECLARE(name, type, N)                                                          \
+   struct {                                                                                        \
+      type data[N];                                                                                \
+      uint32_t head;                                                                               \
+      uint32_t tail;                                                                               \
+      uint32_t size;                                                                               \
+      simple_mtx_t mtx;                                                                            \
+   } name
+
+#define ringbuffer_init(buffer)                                                                    \
+   (buffer.head = buffer.tail = buffer.size = 0, simple_mtx_init(&buffer.mtx, mtx_plain))
+
+#define ringbuffer_lock(buffer)   simple_mtx_lock(&buffer.mtx)
+#define ringbuffer_unlock(buffer) simple_mtx_unlock(&buffer.mtx)
+
+static inline uint32_t
+__ringbuffer_add_wrap(uint32_t *val, uint32_t *size, uint32_t N)
+{
+   uint32_t prev = *val;
+   *val = (*val + 1) % N;
+   *size = *size + 1;
+   assert(*size <= N);
+   return prev;
+}
+
+#define ringbuffer_alloc(buffer)                                                                   \
+   (buffer.size == ARRAY_SIZE(buffer.data)                                                         \
+       ? NULL                                                                                      \
+       : &buffer.data[__ringbuffer_add_wrap(&buffer.head, &buffer.size, ARRAY_SIZE(buffer.data))])
+
+#define ringbuffer_free(buffer, elem)                                                              \
+   assert(elem == NULL || elem == &buffer.data[buffer.tail]);                                      \
+   buffer.size--;                                                                                  \
+   assert(buffer.size < ARRAY_SIZE(buffer.data));                                                  \
+   buffer.tail = (buffer.tail + 1) % ARRAY_SIZE(buffer.data)
+
+#define ringbuffer_first(buffer) (&buffer.data[buffer.tail])
+
+#define ringbuffer_last(buffer)                                                                    \
+   (&buffer.data[(buffer.head + ARRAY_SIZE(buffer.data) - 1) % ARRAY_SIZE(buffer.data)])
+
+#define ringbuffer_index(buffer, elem) (elem - buffer.data)
+
+#define ringbuffer_next(buffer, elem)                                                              \
+   (&buffer.data[(ringbuffer_index(buffer, elem) + 1) % ARRAY_SIZE(buffer.data)])
+
+#endif /* RINGBUFFER_H */
diff --git a/src/vulkan/meson.build b/src/vulkan/meson.build
index 3225b5f4a9d..cf62ecc6ae7 100644
--- a/src/vulkan/meson.build
+++ b/src/vulkan/meson.build
@@ -98,3 +98,6 @@ endif
 if with_vulkan_vram_report_limit_layer
   subdir('vram-report-limit-layer')
 endif
+if with_vulkan_anti_lag_layer
+  subdir('anti-lag-layer')
+endif
--
2.50.1


From e4adbbe12d9aafdaf80f340f685cf7bd7758d385 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Daniel=20Sch=C3=BCrmann?= <daniel@schuermann.dev>
Date: Thu, 30 May 2024 11:55:46 +0200
Subject: [PATCH 07/11] util/time: add os_time_nanosleep_until() function

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34242>
---
 src/util/os_time.c | 16 +++++++++++++++-
 src/util/os_time.h |  2 ++
 2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/src/util/os_time.c b/src/util/os_time.c
index da8ad7a80b8..209b7ae442c 100644
--- a/src/util/os_time.c
+++ b/src/util/os_time.c
@@ -60,7 +60,21 @@ os_time_get_nano(void)
    return ts.tv_nsec + ts.tv_sec*INT64_C(1000000000);
 }

-
+void
+os_time_nanosleep_until(int64_t deadline)
+{
+#if DETECT_OS_LINUX || DETECT_OS_MANAGARM
+   struct timespec time;
+   time.tv_sec = deadline / INT64_C(1000000000);
+   time.tv_nsec = deadline % INT64_C(1000000000);
+   while (clock_nanosleep(CLOCK_MONOTONIC, TIMER_ABSTIME, &time, &time) == EINTR);
+#else
+   int64_t duration = deadline - os_time_get_nano();
+   if (duration > 0) {
+      os_time_sleep(duration / 1000);
+   }
+#endif
+}

 void
 os_time_sleep(int64_t usecs)
diff --git a/src/util/os_time.h b/src/util/os_time.h
index 6ca37eac769..4217ff37b68 100644
--- a/src/util/os_time.h
+++ b/src/util/os_time.h
@@ -74,6 +74,8 @@ os_localtime(const time_t *timer, struct tm *buf)
 #endif
 }

+void
+os_time_nanosleep_until(int64_t deadline);

 /*
  * Sleep.
--
2.50.1


From 22d1adddbaff70c62207396a12576329f477174f Mon Sep 17 00:00:00 2001
From: Hans-Kristian Arntzen <post@arntzen-software.no>
Date: Thu, 26 Jun 2025 13:00:20 +0200
Subject: [PATCH 08/11] anti-lag: Only consider timestamps from queues which
 have presented.

Avoids stray submissions to compute queues to nullify the delay.

Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34242>
---
 src/vulkan/anti-lag-layer/anti_lag_layer.c    | 24 ++++++++++++++++++-
 src/vulkan/anti-lag-layer/anti_lag_layer.h    |  3 +++
 .../anti-lag-layer/anti_lag_layer_interface.c |  2 ++
 3 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/src/vulkan/anti-lag-layer/anti_lag_layer.c b/src/vulkan/anti-lag-layer/anti_lag_layer.c
index 6c21e074024..d7543a5dfd9 100644
--- a/src/vulkan/anti-lag-layer/anti_lag_layer.c
+++ b/src/vulkan/anti-lag-layer/anti_lag_layer.c
@@ -8,6 +8,7 @@
 #include <string.h>
 #include "util/os_time.h"
 #include "util/simple_mtx.h"
+#include "util/u_atomic.h"
 #include "vulkan/vulkan_core.h"
 #include "ringbuffer.h"
 #include "vk_alloc.h"
@@ -400,7 +401,11 @@ get_commandbuffer(device_context *ctx, queue_context *queue_ctx, VkCommandBuffer
    /* Begin critical section. */
    ringbuffer_lock(ctx->frames);
    ringbuffer_lock(queue_ctx->queries);
-   struct query *query = allocate_query(ctx, queue_ctx);
+
+   /* Don't record timestamps for queues that are not deemed sensitive to latency. */
+   struct query *query =
+      p_atomic_read(&queue_ctx->latency_sensitive) ? allocate_query(ctx, queue_ctx) : NULL;
+
    if (query == NULL) {
       ringbuffer_unlock(queue_ctx->queries);
       ringbuffer_unlock(ctx->frames);
@@ -588,3 +593,20 @@ anti_lag_QueueSubmit(VkQueue queue, uint32_t submitCount, const VkSubmitInfo *pS
    vk_free(&ctx->alloc, buf);
    return res;
 }
+
+VKAPI_ATTR VkResult VKAPI_CALL
+anti_lag_QueuePresentKHR(VkQueue queue, const VkPresentInfoKHR *pPresentInfo)
+{
+   /* When multiple queues are in flight, the min-delay approach
+    * has problems. An async compute queue could be submitted to
+    * with very low delay while the main graphics queue would be swamped with work.
+    * If we take a global min-delay over all queues, the algorithm would
+    * assume that there is very low delay and thus sleeps are disabled, but
+    * unless the graphics work depends directly on the async compute work,
+    * this is a false assumption. */
+   device_context *ctx = get_device_context(queue);
+   queue_context *queue_ctx = get_queue_context(ctx, queue);
+   p_atomic_set(&queue_ctx->latency_sensitive, true);
+
+   return ctx->vtable.QueuePresentKHR(queue, pPresentInfo);
+}
diff --git a/src/vulkan/anti-lag-layer/anti_lag_layer.h b/src/vulkan/anti-lag-layer/anti_lag_layer.h
index 31abb0f9aee..d03d246d79c 100644
--- a/src/vulkan/anti-lag-layer/anti_lag_layer.h
+++ b/src/vulkan/anti-lag-layer/anti_lag_layer.h
@@ -39,6 +39,7 @@ struct query {
 typedef struct queue_context {
    VkQueue queue;
    uint32_t queue_family_idx;
+   bool latency_sensitive;
    VkCommandPool cmdPool;
    VkQueryPool queryPool;
    VkSemaphore semaphore;
@@ -74,6 +75,7 @@ typedef struct device_context {
       DECLARE_HOOK(DestroySemaphore);
       DECLARE_HOOK(GetSemaphoreCounterValue);
       DECLARE_HOOK(WaitSemaphores);
+      DECLARE_HOOK(QueuePresentKHR);
 #undef DECLARE_HOOK
    } vtable;

@@ -105,6 +107,7 @@ VkResult anti_lag_QueueSubmit2(VkQueue queue, uint32_t submitCount, const VkSubm
                                VkFence fence);
 VkResult anti_lag_QueueSubmit(VkQueue queue, uint32_t submitCount, const VkSubmitInfo *pSubmits,
                               VkFence fence);
+VkResult anti_lag_QueuePresentKHR(VkQueue queue, const VkPresentInfoKHR *pPresentInfo);

 VkResult anti_lag_NegotiateLoaderLayerInterfaceVersion(VkNegotiateLayerInterface *pVersionStruct);

diff --git a/src/vulkan/anti-lag-layer/anti_lag_layer_interface.c b/src/vulkan/anti-lag-layer/anti_lag_layer_interface.c
index d2ca4a7dd44..6a803e24fe6 100644
--- a/src/vulkan/anti-lag-layer/anti_lag_layer_interface.c
+++ b/src/vulkan/anti-lag-layer/anti_lag_layer_interface.c
@@ -194,6 +194,7 @@ init_device_vtable(device_context *ctx, PFN_vkGetDeviceProcAddr gpa, PFN_vkSetDe
    INIT_HOOK(CmdWriteTimestamp);
    INIT_HOOK(CreateSemaphore);
    INIT_HOOK(DestroySemaphore);
+   INIT_HOOK(QueuePresentKHR);
    INIT_HOOK_ALIAS(GetSemaphoreCounterValue, GetSemaphoreCounterValueKHR, timeline_semaphore_khr);
    INIT_HOOK_ALIAS(WaitSemaphores, WaitSemaphoresKHR, timeline_semaphore_khr);
 #undef INIT_HOOK
@@ -833,6 +834,7 @@ static const struct {
    ADD_HOOK(QueueSubmit),
    ADD_HOOK(QueueSubmit2),
    ADD_HOOK(QueueSubmit2KHR),
+   ADD_HOOK(QueuePresentKHR),
 };
 #undef ADD_HOOK

--
2.50.1


From be19fb7abf7dba7aaff2ff809a6a0a8f6ac68ce4 Mon Sep 17 00:00:00 2001
From: Hans-Kristian Arntzen <post@arntzen-software.no>
Date: Thu, 26 Jun 2025 14:22:07 +0200
Subject: [PATCH 09/11] anti-lag: Submit timestamps early in a frame.

Allows detecting if the queue ends up going idle due to
a cross-queue dependency. Since we're only considering delays from
specific queues, we would not be able to detect low-latency situations
arising from the start of a frame happening on async queues.

Until we observe real work happening for a queue in a frame context,
submit timestamps ahead of any other waits.

Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34242>
---
 src/vulkan/anti-lag-layer/anti_lag_layer.c | 114 ++++++++++++++++-----
 1 file changed, 86 insertions(+), 28 deletions(-)

diff --git a/src/vulkan/anti-lag-layer/anti_lag_layer.c b/src/vulkan/anti-lag-layer/anti_lag_layer.c
index d7543a5dfd9..f730ca00f9c 100644
--- a/src/vulkan/anti-lag-layer/anti_lag_layer.c
+++ b/src/vulkan/anti-lag-layer/anti_lag_layer.c
@@ -366,13 +366,9 @@ get_queue_context(device_context *ctx, VkQueue queue)
 }

 static struct query *
-allocate_query(device_context *ctx, queue_context *queue_ctx)
+allocate_query(queue_context *queue_ctx, uint32_t frame_idx)
 {
-   if (!ctx->active_frame)
-      return NULL;
-
    /* Allow for a single frame to use at most half of the query pool. */
-   uint32_t frame_idx = ringbuffer_index(ctx->frames, ctx->active_frame);
    if (queue_ctx->submissions_per_frame[frame_idx] > MAX_QUERIES / 2)
       return NULL;

@@ -394,7 +390,8 @@ allocate_query(device_context *ctx, queue_context *queue_ctx)
 }

 static bool
-get_commandbuffer(device_context *ctx, queue_context *queue_ctx, VkCommandBuffer *cmdbuffer)
+get_commandbuffer(device_context *ctx, queue_context *queue_ctx, VkCommandBuffer *cmdbuffer,
+                  bool has_command_buffer, bool has_wait_before_cmdbuffer, bool *early_submit)
 {
    uint64_t now = os_time_get_nano();

@@ -403,8 +400,24 @@ get_commandbuffer(device_context *ctx, queue_context *queue_ctx, VkCommandBuffer
    ringbuffer_lock(queue_ctx->queries);

    /* Don't record timestamps for queues that are not deemed sensitive to latency. */
-   struct query *query =
-      p_atomic_read(&queue_ctx->latency_sensitive) ? allocate_query(ctx, queue_ctx) : NULL;
+   bool need_query = ctx->active_frame && p_atomic_read(&queue_ctx->latency_sensitive);
+   uint32_t frame_idx;
+   struct query *query = NULL;
+
+   if (need_query) {
+      assert(ctx->active_frame->state == FRAME_SUBMIT);
+      frame_idx = ringbuffer_index(ctx->frames, ctx->active_frame);
+
+      /* For the very first submissions in a frame (until we observe real GPU work happening),
+       * we would want to submit a timestamp before anything else, including waits.
+       * This allows us to detect a sensitive queue going idle before we can submit work to it.
+       * If the queue in question depends on semaphores from other unrelated queues,
+       * we may not easily be able to detect that situation without adding a lot more complexity.
+       */
+      *early_submit = has_wait_before_cmdbuffer && queue_ctx->submissions_per_frame[frame_idx] == 0;
+      if (has_command_buffer || *early_submit)
+         query = allocate_query(queue_ctx, frame_idx);
+   }

    if (query == NULL) {
       ringbuffer_unlock(queue_ctx->queries);
@@ -421,8 +434,6 @@ get_commandbuffer(device_context *ctx, queue_context *queue_ctx, VkCommandBuffer
    queue_ctx->semaphore_value++;

    /* Add new submission entry for the current frame */
-   assert(ctx->active_frame->state == FRAME_SUBMIT);
-   uint32_t frame_idx = ringbuffer_index(ctx->frames, ctx->active_frame);
    queue_ctx->submissions_per_frame[frame_idx]++;

    ringbuffer_unlock(queue_ctx->queries);
@@ -435,13 +446,17 @@ queue_submit2(device_context *ctx, VkQueue queue, uint32_t submitCount,
               const VkSubmitInfo2 *pSubmits, VkFence fence, PFN_vkQueueSubmit2 queueSubmit2)
 {
    queue_context *queue_ctx = get_queue_context(ctx, queue);
-   if (!ctx->active_frame || !queue_ctx)
+   if (!ctx->active_frame || !queue_ctx || !submitCount)
       return queueSubmit2(queue, submitCount, pSubmits, fence);

+   bool has_wait_before_cmdbuffer = false;
    int first = -1;
    VkCommandBuffer timestamp_cmdbuffer;
    /* Check if any submission contains commandbuffers. */
    for (unsigned i = 0; i < submitCount; i++) {
+      if (pSubmits[i].waitSemaphoreInfoCount != 0)
+         has_wait_before_cmdbuffer = true;
+
       if (pSubmits[i].commandBufferInfoCount) {
          first = i;
          break;
@@ -449,23 +464,42 @@ queue_submit2(device_context *ctx, VkQueue queue, uint32_t submitCount,
    }

    /* Get timestamp commandbuffer. */
-   if (first == -1 || !get_commandbuffer(ctx, queue_ctx, &timestamp_cmdbuffer))
+   bool early_submit;
+   if (!get_commandbuffer(ctx, queue_ctx, &timestamp_cmdbuffer, first >= 0,
+                          has_wait_before_cmdbuffer, &early_submit)) {
       return queueSubmit2(queue, submitCount, pSubmits, fence);
+   }

    VkSubmitInfo2 *submits;
    VkCommandBufferSubmitInfo *cmdbuffers;
    VkSemaphoreSubmitInfo *semaphores;
    VK_MULTIALLOC(ma);
-   vk_multialloc_add(&ma, &submits, VkSubmitInfo2, submitCount);
-   vk_multialloc_add(&ma, &cmdbuffers, VkCommandBufferSubmitInfo,
-                     pSubmits[first].commandBufferInfoCount + 1);
-   vk_multialloc_add(&ma, &semaphores, VkSemaphoreSubmitInfo,
-                     pSubmits[first].signalSemaphoreInfoCount + 1);
+
+   if (early_submit) {
+      vk_multialloc_add(&ma, &submits, VkSubmitInfo2, submitCount + 1);
+      vk_multialloc_add(&ma, &cmdbuffers, VkCommandBufferSubmitInfo, 1);
+      vk_multialloc_add(&ma, &semaphores, VkSemaphoreSubmitInfo, 1);
+      first = 0;
+   } else {
+      vk_multialloc_add(&ma, &submits, VkSubmitInfo2, submitCount);
+      vk_multialloc_add(&ma, &cmdbuffers, VkCommandBufferSubmitInfo,
+                        pSubmits[first].commandBufferInfoCount + 1);
+      vk_multialloc_add(&ma, &semaphores, VkSemaphoreSubmitInfo,
+                        pSubmits[first].signalSemaphoreInfoCount + 1);
+   }
+
    void *buf = vk_multialloc_zalloc(&ma, &ctx->alloc, VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
    if (!buf)
       return VK_ERROR_OUT_OF_HOST_MEMORY;

-   memcpy(submits, pSubmits, sizeof(VkSubmitInfo2) * submitCount);
+   if (early_submit) {
+      memcpy(submits + 1, pSubmits, sizeof(VkSubmitInfo2) * submitCount);
+      submits[0] = (VkSubmitInfo2){.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO_2};
+      submitCount++;
+   } else {
+      memcpy(submits, pSubmits, sizeof(VkSubmitInfo2) * submitCount);
+   }
+
    VkSubmitInfo2 *submit_info = &submits[first];

    /* Add commandbuffer to submission. */
@@ -518,13 +552,17 @@ anti_lag_QueueSubmit(VkQueue queue, uint32_t submitCount, const VkSubmitInfo *pS
 {
    device_context *ctx = get_device_context(queue);
    queue_context *queue_ctx = get_queue_context(ctx, queue);
-   if (!ctx->active_frame || !queue_ctx)
+   if (!ctx->active_frame || !queue_ctx || !submitCount)
       return ctx->vtable.QueueSubmit(queue, submitCount, pSubmits, fence);

+   bool has_wait_before_cmdbuffer = false;
    int first = -1;
    VkCommandBuffer timestamp_cmdbuffer;
-   /* Check if any submission contains commandbuffers. */
+   /* Check if any submission contains commandbuffers or waits before those. */
    for (unsigned i = 0; i < submitCount; i++) {
+      if (pSubmits[i].waitSemaphoreCount != 0)
+         has_wait_before_cmdbuffer = true;
+
       if (pSubmits[i].commandBufferCount) {
          first = i;
          break;
@@ -532,8 +570,11 @@ anti_lag_QueueSubmit(VkQueue queue, uint32_t submitCount, const VkSubmitInfo *pS
    }

    /* Get timestamp commandbuffer. */
-   if (first == -1 || !get_commandbuffer(ctx, queue_ctx, &timestamp_cmdbuffer))
+   bool early_submit;
+   if (!get_commandbuffer(ctx, queue_ctx, &timestamp_cmdbuffer, first >= 0,
+                          has_wait_before_cmdbuffer, &early_submit)) {
       return ctx->vtable.QueueSubmit(queue, submitCount, pSubmits, fence);
+   }

    VkSubmitInfo *submits;
    VkCommandBuffer *cmdbuffers;
@@ -541,16 +582,33 @@ anti_lag_QueueSubmit(VkQueue queue, uint32_t submitCount, const VkSubmitInfo *pS
    VkTimelineSemaphoreSubmitInfo *semaphore_info;
    uint64_t *semaphore_values;
    VK_MULTIALLOC(ma);
-   vk_multialloc_add(&ma, &submits, VkSubmitInfo, submitCount);
-   vk_multialloc_add(&ma, &cmdbuffers, VkCommandBuffer, pSubmits[first].commandBufferCount + 1);
-   vk_multialloc_add(&ma, &semaphores, VkSemaphore, pSubmits[first].signalSemaphoreCount + 1);
-   vk_multialloc_add(&ma, &semaphore_info, VkTimelineSemaphoreSubmitInfo, 1);
-   vk_multialloc_add(&ma, &semaphore_values, uint64_t, pSubmits[first].signalSemaphoreCount + 1);
+
+   if (early_submit) {
+      vk_multialloc_add(&ma, &submits, VkSubmitInfo, submitCount + 1);
+      vk_multialloc_add(&ma, &cmdbuffers, VkCommandBuffer, 1);
+      vk_multialloc_add(&ma, &semaphores, VkSemaphore, 1);
+      vk_multialloc_add(&ma, &semaphore_info, VkTimelineSemaphoreSubmitInfo, 1);
+      vk_multialloc_add(&ma, &semaphore_values, uint64_t, 1);
+      first = 0;
+   } else {
+      vk_multialloc_add(&ma, &submits, VkSubmitInfo, submitCount);
+      vk_multialloc_add(&ma, &cmdbuffers, VkCommandBuffer, pSubmits[first].commandBufferCount + 1);
+      vk_multialloc_add(&ma, &semaphores, VkSemaphore, pSubmits[first].signalSemaphoreCount + 1);
+      vk_multialloc_add(&ma, &semaphore_info, VkTimelineSemaphoreSubmitInfo, 1);
+      vk_multialloc_add(&ma, &semaphore_values, uint64_t, pSubmits[first].signalSemaphoreCount + 1);
+   }
    void *buf = vk_multialloc_zalloc(&ma, &ctx->alloc, VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
    if (!buf)
       return VK_ERROR_OUT_OF_HOST_MEMORY;

-   memcpy(submits, pSubmits, sizeof(VkSubmitInfo) * submitCount);
+   if (early_submit) {
+      memcpy(submits + 1, pSubmits, sizeof(VkSubmitInfo) * submitCount);
+      submits[0] = (VkSubmitInfo){.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO};
+      submitCount++;
+   } else {
+      memcpy(submits, pSubmits, sizeof(VkSubmitInfo) * submitCount);
+   }
+
    VkSubmitInfo *submit_info = &submits[first];

    /* Add commandbuffer to submission. */
@@ -562,7 +620,7 @@ anti_lag_QueueSubmit(VkQueue queue, uint32_t submitCount, const VkSubmitInfo *pS

    /* Add timeline semaphore to submission. */
    const VkTimelineSemaphoreSubmitInfo *tlssi =
-      vk_find_struct_const(pSubmits[first].pNext, TIMELINE_SEMAPHORE_SUBMIT_INFO);
+      vk_find_struct_const(submit_info->pNext, TIMELINE_SEMAPHORE_SUBMIT_INFO);
    semaphores[0] = queue_ctx->semaphore;
    memcpy(&semaphores[1], submit_info->pSignalSemaphores,
           sizeof(VkSemaphore) * submit_info->signalSemaphoreCount);
--
2.50.1


From aaaa9d5cd9891b88b8a94692f0f49036233da227 Mon Sep 17 00:00:00 2001
From: Kyle Gospodnetich <me@kylegospodneti.ch>
Date: Sun, 18 May 2025 09:40:01 -0700
Subject: [PATCH 10/11] [BEGIN] Proton-GE Patches

--
2.50.1


From c4bb61d428cc14bc21f9a10f530fd37aa32a4c24 Mon Sep 17 00:00:00 2001
From: Kyle Gospodnetich <me@kylegospodneti.ch>
Date: Sun, 18 May 2025 09:42:23 -0700
Subject: [PATCH 11/11] radv: min image count patch for Wine Wayland/Path of
 Exile 2 Credit to Glorious Eggroll.

---
 src/amd/vulkan/radv_instance.c       |  2 +-
 src/asahi/vulkan/hk_instance.c       |  2 +-
 src/freedreno/vulkan/tu_device.cc    |  2 +-
 src/intel/vulkan/anv_instance.c      |  2 +-
 src/intel/vulkan_hasvk/anv_device.c  |  2 +-
 src/nouveau/vulkan/nvk_instance.c    |  2 +-
 src/panfrost/vulkan/panvk_instance.c |  2 +-
 src/util/00-mesa-defaults.conf       | 10 ++++++----
 src/util/driconf.h                   |  4 ++--
 src/virtio/vulkan/vn_instance.c      |  2 +-
 src/vulkan/wsi/wsi_common.c          |  2 +-
 src/vulkan/wsi/wsi_common.h          |  4 ++++
 src/vulkan/wsi/wsi_common_private.h  |  3 ++-
 src/vulkan/wsi/wsi_common_wayland.c  | 21 +++++++++++++++++----
 src/vulkan/wsi/wsi_common_x11.c      |  4 ++--
 15 files changed, 42 insertions(+), 22 deletions(-)

diff --git a/src/amd/vulkan/radv_instance.c b/src/amd/vulkan/radv_instance.c
index 6bcf18847bd..2773003911b 100644
--- a/src/amd/vulkan/radv_instance.c
+++ b/src/amd/vulkan/radv_instance.c
@@ -151,7 +151,7 @@ static const struct debug_control trace_options[] = {
 static const driOptionDescription radv_dri_options[] = {
    DRI_CONF_SECTION_PERFORMANCE
       DRI_CONF_ADAPTIVE_SYNC(true)
-      DRI_CONF_VK_X11_OVERRIDE_MIN_IMAGE_COUNT(0)
+      DRI_CONF_VK_OVERRIDE_MIN_IMAGE_COUNT(0)
       DRI_CONF_VK_X11_STRICT_IMAGE_COUNT(false)
       DRI_CONF_VK_X11_ENSURE_MIN_IMAGE_COUNT(false)
       DRI_CONF_VK_KHR_PRESENT_WAIT(false)
diff --git a/src/asahi/vulkan/hk_instance.c b/src/asahi/vulkan/hk_instance.c
index 69e315ff979..b0361133793 100644
--- a/src/asahi/vulkan/hk_instance.c
+++ b/src/asahi/vulkan/hk_instance.c
@@ -80,7 +80,7 @@ hk_EnumerateInstanceExtensionProperties(const char *pLayerName,
 static const driOptionDescription hk_dri_options[] = {
    DRI_CONF_SECTION_PERFORMANCE
       DRI_CONF_ADAPTIVE_SYNC(true)
-      DRI_CONF_VK_X11_OVERRIDE_MIN_IMAGE_COUNT(0)
+      DRI_CONF_VK_OVERRIDE_MIN_IMAGE_COUNT(0)
       DRI_CONF_VK_X11_STRICT_IMAGE_COUNT(false)
       DRI_CONF_VK_X11_ENSURE_MIN_IMAGE_COUNT(false)
       DRI_CONF_VK_KHR_PRESENT_WAIT(false)
diff --git a/src/freedreno/vulkan/tu_device.cc b/src/freedreno/vulkan/tu_device.cc
index 2c72aff780d..f9d95c63bbe 100644
--- a/src/freedreno/vulkan/tu_device.cc
+++ b/src/freedreno/vulkan/tu_device.cc
@@ -1671,7 +1671,7 @@ tu_destroy_physical_device(struct vk_physical_device *device)

 static const driOptionDescription tu_dri_options[] = {
    DRI_CONF_SECTION_PERFORMANCE
-      DRI_CONF_VK_X11_OVERRIDE_MIN_IMAGE_COUNT(0)
+      DRI_CONF_VK_OVERRIDE_MIN_IMAGE_COUNT(0)
       DRI_CONF_VK_KHR_PRESENT_WAIT(false)
       DRI_CONF_VK_X11_STRICT_IMAGE_COUNT(false)
       DRI_CONF_VK_X11_ENSURE_MIN_IMAGE_COUNT(false)
diff --git a/src/intel/vulkan/anv_instance.c b/src/intel/vulkan/anv_instance.c
index 268a5f3425b..0ab889654ae 100644
--- a/src/intel/vulkan/anv_instance.c
+++ b/src/intel/vulkan/anv_instance.c
@@ -10,7 +10,7 @@
 static const driOptionDescription anv_dri_options[] = {
    DRI_CONF_SECTION_PERFORMANCE
       DRI_CONF_ADAPTIVE_SYNC(true)
-      DRI_CONF_VK_X11_OVERRIDE_MIN_IMAGE_COUNT(0)
+      DRI_CONF_VK_OVERRIDE_MIN_IMAGE_COUNT(0)
       DRI_CONF_VK_X11_STRICT_IMAGE_COUNT(false)
       DRI_CONF_VK_KHR_PRESENT_WAIT(false)
       DRI_CONF_VK_XWAYLAND_WAIT_READY(false)
diff --git a/src/intel/vulkan_hasvk/anv_device.c b/src/intel/vulkan_hasvk/anv_device.c
index 81f08e50e5d..7e9d43df7ce 100644
--- a/src/intel/vulkan_hasvk/anv_device.c
+++ b/src/intel/vulkan_hasvk/anv_device.c
@@ -65,7 +65,7 @@
 static const driOptionDescription anv_dri_options[] = {
    DRI_CONF_SECTION_PERFORMANCE
       DRI_CONF_ADAPTIVE_SYNC(true)
-      DRI_CONF_VK_X11_OVERRIDE_MIN_IMAGE_COUNT(0)
+      DRI_CONF_VK_OVERRIDE_MIN_IMAGE_COUNT(0)
       DRI_CONF_VK_X11_STRICT_IMAGE_COUNT(false)
       DRI_CONF_VK_KHR_PRESENT_WAIT(false)
       DRI_CONF_VK_XWAYLAND_WAIT_READY(true)
diff --git a/src/nouveau/vulkan/nvk_instance.c b/src/nouveau/vulkan/nvk_instance.c
index 37e7abe1584..29da7e3a0b3 100644
--- a/src/nouveau/vulkan/nvk_instance.c
+++ b/src/nouveau/vulkan/nvk_instance.c
@@ -98,7 +98,7 @@ nvk_init_debug_flags(struct nvk_instance *instance)
 static const driOptionDescription nvk_dri_options[] = {
    DRI_CONF_SECTION_PERFORMANCE
       DRI_CONF_ADAPTIVE_SYNC(true)
-      DRI_CONF_VK_X11_OVERRIDE_MIN_IMAGE_COUNT(0)
+      DRI_CONF_VK_OVERRIDE_MIN_IMAGE_COUNT(0)
       DRI_CONF_VK_X11_STRICT_IMAGE_COUNT(false)
       DRI_CONF_VK_X11_ENSURE_MIN_IMAGE_COUNT(false)
       DRI_CONF_VK_KHR_PRESENT_WAIT(false)
diff --git a/src/panfrost/vulkan/panvk_instance.c b/src/panfrost/vulkan/panvk_instance.c
index 31abc8f4369..8c8f7a8ca0c 100644
--- a/src/panfrost/vulkan/panvk_instance.c
+++ b/src/panfrost/vulkan/panvk_instance.c
@@ -151,7 +151,7 @@ panvk_kmod_free(const struct pan_kmod_allocator *allocator, void *data)
 static const driOptionDescription panvk_dri_options[] = {
    DRI_CONF_SECTION_PERFORMANCE
       DRI_CONF_ADAPTIVE_SYNC(true)
-      DRI_CONF_VK_X11_OVERRIDE_MIN_IMAGE_COUNT(0)
+      DRI_CONF_VK_OVERRIDE_MIN_IMAGE_COUNT(0)
       DRI_CONF_VK_X11_STRICT_IMAGE_COUNT(false)
       DRI_CONF_VK_X11_ENSURE_MIN_IMAGE_COUNT(false)
       DRI_CONF_VK_KHR_PRESENT_WAIT(false)
diff --git a/src/util/00-mesa-defaults.conf b/src/util/00-mesa-defaults.conf
index d42526732fa..eaab8afc3e9 100644
--- a/src/util/00-mesa-defaults.conf
+++ b/src/util/00-mesa-defaults.conf
@@ -634,24 +634,24 @@ TODO: document the other workarounds.

         <application name="gfxbench" executable="testfw_app">
             <option name="mesa_glthread_app_profile" value="0" />
-            <option name="vk_x11_override_min_image_count" value="2" />
+            <option name="vk_override_min_image_count" value="2" />
             <option name="vk_wsi_force_bgra8_unorm_first" value="true" />
         </application>

         <application name="Rainbow Six Siege (Vulkan)" executable="RainbowSix_Vulkan.exe">
-            <option name="vk_x11_override_min_image_count" value="2" />
+            <option name="vk_override_min_image_count" value="2" />
             <option name="vk_x11_strict_image_count" value="true" />
         </application>

         <application name="Rainbow Six Extraction (Wine)" executable="R6-Extraction.exe">
-            <option name="vk_x11_override_min_image_count" value="2" />
+            <option name="vk_override_min_image_count" value="2" />
             <option name="vk_x11_strict_image_count" value="true" />
         </application>

         <!-- Workaround for Hades: Vulkan backend of the game is not starting
              if the implementation returns more than 3 swapchain images. -->
         <application name="Hades" executable="Hades.exe">
-            <option name="vk_x11_override_min_image_count" value="3" />
+            <option name="vk_override_min_image_count" value="3" />
             <option name="vk_x11_strict_image_count" value="true" />
         </application>

@@ -712,10 +712,12 @@ TODO: document the other workarounds.

         <application name="Path of Exile" executable="PathOfExile_x64Steam.exe">
             <option name="vk_zero_vram" value="true" />
+            <option name="vk_override_min_image_count" value="3" />
         </application>

         <application name="Path of Exile" executable="PathOfExileSteam.exe">
             <option name="vk_zero_vram" value="true" />
+            <option name="vk_override_min_image_count" value="3" />
         </application>

         <application name="X4 Foundations" executable="X4">
diff --git a/src/util/driconf.h b/src/util/driconf.h
index 8faa15fb560..c94de3f45fe 100644
--- a/src/util/driconf.h
+++ b/src/util/driconf.h
@@ -449,8 +449,8 @@
    DRI_CONF_OPT_B(vk_wsi_force_swapchain_to_current_extent, def, \
                   "Force VkSwapchainCreateInfoKHR::imageExtent to be VkSurfaceCapabilities2KHR::currentExtent")

-#define DRI_CONF_VK_X11_OVERRIDE_MIN_IMAGE_COUNT(def) \
-   DRI_CONF_OPT_I(vk_x11_override_min_image_count, def, 0, 999, \
+#define DRI_CONF_VK_OVERRIDE_MIN_IMAGE_COUNT(def) \
+   DRI_CONF_OPT_I(vk_override_min_image_count, def, 0, 999, \
                   "Override the VkSurfaceCapabilitiesKHR::minImageCount (0 = no override)")

 #define DRI_CONF_VK_X11_STRICT_IMAGE_COUNT(def) \
diff --git a/src/virtio/vulkan/vn_instance.c b/src/virtio/vulkan/vn_instance.c
index 1942d77f67c..23c8e19188c 100644
--- a/src/virtio/vulkan/vn_instance.c
+++ b/src/virtio/vulkan/vn_instance.c
@@ -70,8 +70,8 @@ static const struct vk_instance_extension_table
 static const driOptionDescription vn_dri_options[] = {
    /* clang-format off */
    DRI_CONF_SECTION_PERFORMANCE
+      DRI_CONF_VK_OVERRIDE_MIN_IMAGE_COUNT(0)
       DRI_CONF_VK_X11_ENSURE_MIN_IMAGE_COUNT(false)
-      DRI_CONF_VK_X11_OVERRIDE_MIN_IMAGE_COUNT(0)
       DRI_CONF_VK_X11_STRICT_IMAGE_COUNT(false)
       DRI_CONF_VK_XWAYLAND_WAIT_READY(true)
       DRI_CONF_VENUS_IMPLICIT_FENCING(false)
diff --git a/src/vulkan/wsi/wsi_common.c b/src/vulkan/wsi/wsi_common.c
index f78e4baa22a..047d5dcdeaf 100644
--- a/src/vulkan/wsi/wsi_common.c
+++ b/src/vulkan/wsi/wsi_common.c
@@ -220,7 +220,7 @@ wsi_device_init(struct wsi_device *wsi,
 #endif

 #ifdef VK_USE_PLATFORM_WAYLAND_KHR
-   result = wsi_wl_init_wsi(wsi, alloc, pdevice);
+   result = wsi_wl_init_wsi(wsi, alloc, pdevice, dri_options);
    if (result != VK_SUCCESS)
       goto fail;
 #endif
diff --git a/src/vulkan/wsi/wsi_common.h b/src/vulkan/wsi/wsi_common.h
index 44c81ccddf0..a97e7c2a948 100644
--- a/src/vulkan/wsi/wsi_common.h
+++ b/src/vulkan/wsi/wsi_common.h
@@ -209,6 +209,10 @@ struct wsi_device {
    struct {
       /* Don't use the commit-timing protocol for pacing */
       bool disable_timestamps;
+
+      /* Override the minimum number of images on the swapchain.
+       * 0 = no override */
+      double override_minImageCount;
    } wayland;

    /*
diff --git a/src/vulkan/wsi/wsi_common_private.h b/src/vulkan/wsi/wsi_common_private.h
index f138fee5519..1d1b55ac7e5 100644
--- a/src/vulkan/wsi/wsi_common_private.h
+++ b/src/vulkan/wsi/wsi_common_private.h
@@ -420,7 +420,8 @@ void wsi_x11_finish_wsi(struct wsi_device *wsi_device,
                         const VkAllocationCallbacks *alloc);
 VkResult wsi_wl_init_wsi(struct wsi_device *wsi_device,
                          const VkAllocationCallbacks *alloc,
-                         VkPhysicalDevice physical_device);
+                         VkPhysicalDevice physical_device,
+                         const struct driOptionCache *dri_options);
 void wsi_wl_finish_wsi(struct wsi_device *wsi_device,
                        const VkAllocationCallbacks *alloc);
 VkResult wsi_win32_init_wsi(struct wsi_device *wsi_device,
diff --git a/src/vulkan/wsi/wsi_common_wayland.c b/src/vulkan/wsi/wsi_common_wayland.c
index dd15d03846b..df68d2ea006 100644
--- a/src/vulkan/wsi/wsi_common_wayland.c
+++ b/src/vulkan/wsi/wsi_common_wayland.c
@@ -58,6 +58,7 @@
 #include <util/u_dynarray.h>
 #include <util/anon_file.h>
 #include <util/os_time.h>
+#include <util/xmlconfig.h>

 #include <loader/loader_wayland_helper.h>

@@ -1668,9 +1669,12 @@ wsi_wl_surface_get_support(VkIcdSurfaceBase *surface,
 #define WSI_WL_DEFAULT_NUM_IMAGES 3

 static uint32_t
-wsi_wl_surface_get_min_image_count(struct wsi_wl_display *display,
+wsi_wl_surface_get_min_image_count(struct wsi_device *wsi_device, struct wsi_wl_display *display,
                                    const VkSurfacePresentModeEXT *present_mode)
 {
+   if (wsi_device->wayland.override_minImageCount)
+      return wsi_device->wayland.override_minImageCount;
+
    if (present_mode) {
       return present_mode->presentMode == VK_PRESENT_MODE_MAILBOX_KHR ?
              WSI_WL_BUMPED_NUM_IMAGES : WSI_WL_DEFAULT_NUM_IMAGES;
@@ -1719,7 +1723,7 @@ wsi_wl_surface_get_capabilities(VkIcdSurfaceBase *icd_surface,
       display = &temp_display;
    }

-   caps->minImageCount = wsi_wl_surface_get_min_image_count(display, present_mode);
+   caps->minImageCount = wsi_wl_surface_get_min_image_count(wsi_device, display, present_mode);

    if (!wsi_wl_surface->display)
       wsi_wl_display_finish(&temp_display);
@@ -3481,7 +3485,7 @@ wsi_wl_surface_create_swapchain(VkIcdSurfaceBase *icd_surface,
       const VkSurfacePresentModeEXT mode =
             { VK_STRUCTURE_TYPE_SURFACE_PRESENT_MODE_EXT, NULL, pCreateInfo->presentMode };

-      uint32_t min_images = wsi_wl_surface_get_min_image_count(wsi_wl_surface->display, &mode);
+      uint32_t min_images = wsi_wl_surface_get_min_image_count(wsi_device, wsi_wl_surface->display, &mode);
       bool requires_image_count_bump = min_images == WSI_WL_BUMPED_NUM_IMAGES;
       if (requires_image_count_bump)
          num_images = MAX2(min_images, num_images);
@@ -3676,7 +3680,8 @@ fail:
 VkResult
 wsi_wl_init_wsi(struct wsi_device *wsi_device,
                 const VkAllocationCallbacks *alloc,
-                VkPhysicalDevice physical_device)
+                VkPhysicalDevice physical_device,
+                const struct driOptionCache *dri_options)
 {
    struct wsi_wayland *wsi;
    VkResult result;
@@ -3702,6 +3707,14 @@ wsi_wl_init_wsi(struct wsi_device *wsi_device,

    wsi_device->wsi[VK_ICD_WSI_PLATFORM_WAYLAND] = &wsi->base;

+   if (dri_options)
+   {
+      if (driCheckOption(dri_options, "vk_override_min_image_count", DRI_INT)) {
+         wsi_device->wayland.override_minImageCount =
+            driQueryOptioni(dri_options, "vk_override_min_image_count");
+      }
+   }
+
    return VK_SUCCESS;

 fail:
diff --git a/src/vulkan/wsi/wsi_common_x11.c b/src/vulkan/wsi/wsi_common_x11.c
index 2edb7bf2bf3..0f9e2a422c0 100644
--- a/src/vulkan/wsi/wsi_common_x11.c
+++ b/src/vulkan/wsi/wsi_common_x11.c
@@ -2924,9 +2924,9 @@ wsi_x11_init_wsi(struct wsi_device *wsi_device,
    }

    if (dri_options) {
-      if (driCheckOption(dri_options, "vk_x11_override_min_image_count", DRI_INT)) {
+      if (driCheckOption(dri_options, "vk_override_min_image_count", DRI_INT)) {
          wsi_device->x11.override_minImageCount =
-            driQueryOptioni(dri_options, "vk_x11_override_min_image_count");
+            driQueryOptioni(dri_options, "vk_override_min_image_count");
       }
       if (driCheckOption(dri_options, "vk_x11_strict_image_count", DRI_BOOL)) {
          wsi_device->x11.strict_imageCount =
--
2.50.1