radv: replace RADV_TRACE_FILE by RADV_DEBUG=hang
The trace file will be dumped as part of the hang report into
$HOME/radv_dumps_<pid>/trace.log if a GPU hang is detected.
The old and famous RADV_TRACE_FILE envvar is now deprecated.
Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7233>
diff --git a/docs/envvars.rst b/docs/envvars.rst
index b325746..672b565 100644
--- a/docs/envvars.rst
+++ b/docs/envvars.rst
@@ -554,6 +554,9 @@
``forcecompress``
Enables DCC,FMASK,CMASK,HTILE in situations where the driver supports it
but normally does not deem it beneficial.
+ ``hang``
+ enable GPU hangs detection and dump a report to $HOME/radv_dumps_<pid>
+ if a GPU hang is detected
``info``
show GPU-related information
``metashaders``
@@ -624,8 +627,6 @@
``RADV_TEX_ANISO``
force anisotropy filter (up to 16)
-``RADV_TRACE_FILE``
- generate cmdbuffer tracefiles when a GPU hang is detected
``ACO_DEBUG``
a comma-separated list of named flags, which do various things:
diff --git a/src/amd/vulkan/radv_debug.c b/src/amd/vulkan/radv_debug.c
index c5ab538..7f80381 100644
--- a/src/amd/vulkan/radv_debug.c
+++ b/src/amd/vulkan/radv_debug.c
@@ -83,19 +83,10 @@
}
static void
-radv_dump_trace(struct radv_device *device, struct radeon_cmdbuf *cs)
+radv_dump_trace(struct radv_device *device, struct radeon_cmdbuf *cs, FILE *f)
{
- const char *filename = getenv("RADV_TRACE_FILE");
- FILE *f = fopen(filename, "w");
-
- if (!f) {
- fprintf(stderr, "Failed to write trace dump to %s\n", filename);
- return;
- }
-
fprintf(f, "Trace ID: %x\n", *device->trace_id_ptr);
device->ws->cs_dump(cs, f, (const int*)device->trace_id_ptr, 2);
- fclose(f);
}
static void
@@ -625,8 +616,6 @@
fprintf(stderr, "radv: GPU hang detected...\n");
- radv_dump_trace(queue->device, cs);
-
/* Create a directory into $HOME/radv_dumps_<pid> to save various
* debugging info about that GPU hang.
*/
@@ -638,6 +627,14 @@
abort();
}
+ /* Dump trace file. */
+ snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "trace.log");
+ f = fopen(dump_path, "w+");
+ if (f) {
+ radv_dump_trace(queue->device, cs, f);
+ fclose(f);
+ }
+
/* Dump pipeline state. */
snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "pipeline.log");
f = fopen(dump_path, "w+");
diff --git a/src/amd/vulkan/radv_debug.h b/src/amd/vulkan/radv_debug.h
index 1037164..0985be3 100644
--- a/src/amd/vulkan/radv_debug.h
+++ b/src/amd/vulkan/radv_debug.h
@@ -57,6 +57,7 @@
RADV_DEBUG_DISCARD_TO_DEMOTE = 1 << 26,
RADV_DEBUG_LLVM = 1 << 27,
RADV_DEBUG_FORCE_COMPRESS = 1 << 28,
+ RADV_DEBUG_HANG = 1 << 29,
};
enum {
diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index c29006f..ce58e99 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -530,6 +530,7 @@
{"discardtodemote", RADV_DEBUG_DISCARD_TO_DEMOTE},
{"llvm", RADV_DEBUG_LLVM},
{"forcecompress", RADV_DEBUG_FORCE_COMPRESS},
+ {"hang", RADV_DEBUG_HANG},
{NULL, 0}
};
@@ -2794,19 +2795,25 @@
device->physical_device->rad_info.family == CHIP_HAWAII ? 4096 : 8192;
if (getenv("RADV_TRACE_FILE")) {
- const char *filename = getenv("RADV_TRACE_FILE");
+ fprintf(stderr, "***********************************************************************************\n");
+ fprintf(stderr, "* WARNING: RADV_TRACE_FILE=<file> is deprecated and replaced by RADV_DEBUG=hang *\n");
+ fprintf(stderr, "***********************************************************************************\n");
+ abort();
+ }
+ if (device->instance->debug_flags & RADV_DEBUG_HANG) {
+ /* Enable GPU hangs detection and dump logs if a GPU hang is
+ * detected.
+ */
keep_shader_info = true;
if (!radv_init_trace(device))
goto fail;
fprintf(stderr, "*****************************************************************************\n");
- fprintf(stderr, "* WARNING: RADV_TRACE_FILE is costly and should only be used for debugging! *\n");
+ fprintf(stderr, "* WARNING: RADV_DEBUG=hang is costly and should only be used for debugging! *\n");
fprintf(stderr, "*****************************************************************************\n");
- fprintf(stderr, "Trace file will be dumped to %s\n", filename);
-
/* Wait for idle after every draw/dispatch to identify the
* first bad call.
*/