| /************************************************************************** |
| * |
| * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. |
| * All Rights Reserved. |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a |
| * copy of this software and associated documentation files (the |
| * "Software"), to deal in the Software without restriction, including |
| * without limitation the rights to use, copy, modify, merge, publish, |
| * distribute, sub license, and/or sell copies of the Software, and to |
| * permit persons to whom the Software is furnished to do so, subject to |
| * the following conditions: |
| * |
| * The above copyright notice and this permission notice (including the |
| * next paragraph) shall be included in all copies or substantial portions |
| * of the Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
| * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
| * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. |
| * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR |
| * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
| * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
| * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
| * |
| **************************************************************************/ |
| |
| #include <search.h> |
| #include <glib.h> |
| |
| #include "gpgpu_fill.h" |
| #include "huc_copy.h" |
| #include "i915/gem_create.h" |
| #include "i915/gem_mman.h" |
| #include "intel_blt.h" |
| #include "igt_aux.h" |
| #include "igt_syncobj.h" |
| #include "intel_batchbuffer.h" |
| #include "intel_bufops.h" |
| #include "intel_chipset.h" |
| #include "media_fill.h" |
| #include "media_spin.h" |
| #include "sw_sync.h" |
| #include "veboxcopy.h" |
| #include "xe/xe_ioctl.h" |
| #include "xe/xe_query.h" |
| |
| #define BCS_SWCTRL 0x22200 |
| #define BCS_SRC_Y (1 << 0) |
| #define BCS_DST_Y (1 << 1) |
| |
| /** |
| * SECTION:intel_batchbuffer |
| * @short_description: Batchbuffer and blitter support |
| * @title: Batch Buffer |
| * @include: igt.h |
| * |
| * Note that this library's header pulls in the [i-g-t core](igt-gpu-tools-i-g-t-core.html) |
| * library as a dependency. |
| */ |
| |
| static bool intel_bb_do_tracking; |
| static IGT_LIST_HEAD(intel_bb_list); |
| static pthread_mutex_t intel_bb_list_lock = PTHREAD_MUTEX_INITIALIZER; |
| |
| #define CMD_POLY_STIPPLE_OFFSET 0x7906 |
| |
| #define CHECK_RANGE(x) do { \ |
| igt_assert_lte(0, (x)); \ |
| igt_assert_lt((x), (1 << 15)); \ |
| } while (0) |
| |
| /* |
| * pitches are in bytes if the surfaces are linear, number of dwords |
| * otherwise |
| */ |
| static uint32_t fast_copy_pitch(unsigned int stride, unsigned int tiling) |
| { |
| if (tiling != I915_TILING_NONE) |
| return stride / 4; |
| else |
| return stride; |
| } |
| |
| uint32_t fast_copy_dword0(unsigned int src_tiling, |
| unsigned int dst_tiling) |
| { |
| uint32_t dword0 = 0; |
| |
| dword0 |= XY_FAST_COPY_BLT; |
| |
| switch (src_tiling) { |
| case I915_TILING_X: |
| dword0 |= XY_FAST_COPY_SRC_TILING_X; |
| break; |
| case I915_TILING_Y: |
| case I915_TILING_4: |
| case I915_TILING_Yf: |
| dword0 |= XY_FAST_COPY_SRC_TILING_Yb_Yf; |
| break; |
| case I915_TILING_Ys: |
| dword0 |= XY_FAST_COPY_SRC_TILING_Ys; |
| break; |
| case I915_TILING_NONE: |
| default: |
| break; |
| } |
| |
| switch (dst_tiling) { |
| case I915_TILING_X: |
| dword0 |= XY_FAST_COPY_DST_TILING_X; |
| break; |
| case I915_TILING_Y: |
| case I915_TILING_4: |
| case I915_TILING_Yf: |
| dword0 |= XY_FAST_COPY_DST_TILING_Yb_Yf; |
| break; |
| case I915_TILING_Ys: |
| dword0 |= XY_FAST_COPY_DST_TILING_Ys; |
| break; |
| case I915_TILING_NONE: |
| default: |
| break; |
| } |
| |
| return dword0; |
| } |
| |
| static bool new_tile_y_format(unsigned int tiling) |
| { |
| return tiling == T_YFMAJOR || tiling == T_TILE4; |
| } |
| |
| uint32_t fast_copy_dword1(int fd, unsigned int src_tiling, |
| unsigned int dst_tiling, |
| int bpp) |
| { |
| uint32_t dword1 = 0; |
| |
| if (blt_fast_copy_supports_tiling(fd, T_YMAJOR)) { |
| dword1 |= new_tile_y_format(src_tiling) |
| ? XY_FAST_COPY_SRC_TILING_Yf : 0; |
| dword1 |= new_tile_y_format(dst_tiling) |
| ? XY_FAST_COPY_DST_TILING_Yf : 0; |
| } else { |
| /* Always set bits for platforms that don't support legacy TileY */ |
| dword1 |= XY_FAST_COPY_SRC_TILING_Yf | XY_FAST_COPY_DST_TILING_Yf; |
| } |
| |
| switch (bpp) { |
| case 8: |
| dword1 |= XY_FAST_COPY_COLOR_DEPTH_8; |
| break; |
| case 16: |
| dword1 |= XY_FAST_COPY_COLOR_DEPTH_16; |
| break; |
| case 32: |
| dword1 |= XY_FAST_COPY_COLOR_DEPTH_32; |
| break; |
| case 64: |
| dword1 |= XY_FAST_COPY_COLOR_DEPTH_64; |
| break; |
| case 128: |
| dword1 |= XY_FAST_COPY_COLOR_DEPTH_128; |
| break; |
| default: |
| igt_assert(0); |
| } |
| |
| return dword1; |
| } |
| |
| static void |
| fill_relocation(struct drm_i915_gem_relocation_entry *reloc, |
| uint32_t gem_handle, uint64_t presumed_offset, |
| uint32_t delta, /* in bytes */ |
| uint32_t offset, /* in dwords */ |
| uint32_t read_domains, uint32_t write_domains) |
| { |
| reloc->target_handle = gem_handle; |
| reloc->delta = delta; |
| reloc->offset = offset * sizeof(uint32_t); |
| reloc->presumed_offset = presumed_offset; |
| reloc->read_domains = read_domains; |
| reloc->write_domain = write_domains; |
| } |
| |
| static void |
| fill_object(struct drm_i915_gem_exec_object2 *obj, |
| uint32_t gem_handle, uint64_t gem_offset, |
| struct drm_i915_gem_relocation_entry *relocs, uint32_t count) |
| { |
| memset(obj, 0, sizeof(*obj)); |
| obj->handle = gem_handle; |
| obj->offset = gem_offset; |
| obj->relocation_count = count; |
| obj->relocs_ptr = to_user_pointer(relocs); |
| } |
| |
| static uint32_t find_engine(const intel_ctx_cfg_t *cfg, unsigned int class) |
| { |
| unsigned int i; |
| uint32_t engine_id = -1; |
| |
| for (i = 0; i < cfg->num_engines; i++) { |
| if (cfg->engines[i].engine_class == class) |
| engine_id = i; |
| } |
| |
| igt_assert_f(engine_id != -1, "Requested engine not found!\n"); |
| |
| return engine_id; |
| } |
| |
| static void exec_blit(int fd, |
| struct drm_i915_gem_exec_object2 *objs, |
| uint32_t count, uint32_t ctx, |
| const intel_ctx_cfg_t *cfg) |
| { |
| struct drm_i915_gem_execbuffer2 exec; |
| uint32_t devid = intel_get_drm_devid(fd); |
| uint32_t blt_id = HAS_BLT_RING(devid) ? I915_EXEC_BLT : I915_EXEC_DEFAULT; |
| |
| if (cfg) |
| blt_id = find_engine(cfg, I915_ENGINE_CLASS_COPY); |
| |
| exec = (struct drm_i915_gem_execbuffer2) { |
| .buffers_ptr = to_user_pointer(objs), |
| .buffer_count = count, |
| .flags = blt_id | I915_EXEC_NO_RELOC, |
| .rsvd1 = ctx, |
| }; |
| |
| gem_execbuf(fd, &exec); |
| } |
| |
| static uint32_t src_copy_dword0(uint32_t src_tiling, uint32_t dst_tiling, |
| uint32_t bpp, uint32_t device_gen) |
| { |
| uint32_t dword0 = 0; |
| |
| dword0 |= XY_SRC_COPY_BLT_CMD; |
| if (bpp == 32) |
| dword0 |= XY_SRC_COPY_BLT_WRITE_RGB | |
| XY_SRC_COPY_BLT_WRITE_ALPHA; |
| |
| if (device_gen >= 4 && src_tiling) |
| dword0 |= XY_SRC_COPY_BLT_SRC_TILED; |
| if (device_gen >= 4 && dst_tiling) |
| dword0 |= XY_SRC_COPY_BLT_DST_TILED; |
| |
| return dword0; |
| } |
| |
| static uint32_t src_copy_dword1(uint32_t dst_pitch, uint32_t bpp) |
| { |
| uint32_t dword1 = 0; |
| |
| switch (bpp) { |
| case 8: |
| break; |
| case 16: |
| dword1 |= 1 << 24; /* Only support 565 color */ |
| break; |
| case 32: |
| dword1 |= 3 << 24; |
| break; |
| default: |
| igt_assert(0); |
| } |
| |
| dword1 |= 0xcc << 16; |
| dword1 |= dst_pitch; |
| |
| return dword1; |
| } |
| |
| /** |
| * igt_blitter_copy: |
| * @fd: file descriptor of the i915 driver |
| * @ahnd: handle to an allocator |
| * @ctx: context within which execute copy blit |
| * @src_handle: GEM handle of the source buffer |
| * @src_delta: offset into the source GEM bo, in bytes |
| * @src_stride: Stride (in bytes) of the source buffer |
| * @src_tiling: Tiling mode of the source buffer |
| * @src_x: X coordinate of the source region to copy |
| * @src_y: Y coordinate of the source region to copy |
| * @src_size: size of the src bo required for allocator and softpin |
| * @width: Width of the region to copy |
| * @height: Height of the region to copy |
| * @bpp: source and destination bits per pixel |
| * @dst_handle: GEM handle of the destination buffer |
| * @dst_delta: offset into the destination GEM bo, in bytes |
| * @dst_stride: Stride (in bytes) of the destination buffer |
| * @dst_tiling: Tiling mode of the destination buffer |
| * @dst_x: X coordinate of destination |
| * @dst_y: Y coordinate of destination |
| * @dst_size: size of the dst bo required for allocator and softpin |
| * |
| * Wrapper API to call appropriate blitter copy function. |
| */ |
| |
| void igt_blitter_copy(int fd, |
| uint64_t ahnd, |
| uint32_t ctx, |
| const intel_ctx_cfg_t *cfg, |
| /* src */ |
| uint32_t src_handle, |
| uint32_t src_delta, |
| uint32_t src_stride, |
| uint32_t src_tiling, |
| uint32_t src_x, uint32_t src_y, |
| uint64_t src_size, |
| /* size */ |
| uint32_t width, uint32_t height, |
| /* bpp */ |
| uint32_t bpp, |
| /* dst */ |
| uint32_t dst_handle, |
| uint32_t dst_delta, |
| uint32_t dst_stride, |
| uint32_t dst_tiling, |
| uint32_t dst_x, uint32_t dst_y, |
| uint64_t dst_size) |
| { |
| uint32_t devid; |
| |
| devid = intel_get_drm_devid(fd); |
| |
| if (intel_graphics_ver(devid) >= IP_VER(12, 60)) |
| igt_blitter_fast_copy__raw(fd, ahnd, ctx, NULL, |
| src_handle, src_delta, |
| src_stride, src_tiling, |
| src_x, src_y, src_size, |
| width, height, bpp, |
| dst_handle, dst_delta, |
| dst_stride, dst_tiling, |
| dst_x, dst_y, dst_size); |
| else |
| igt_blitter_src_copy(fd, ahnd, ctx, NULL, |
| src_handle, src_delta, |
| src_stride, src_tiling, |
| src_x, src_y, src_size, |
| width, height, bpp, |
| dst_handle, dst_delta, |
| dst_stride, dst_tiling, |
| dst_x, dst_y, dst_size); |
| } |
| /** |
| * igt_blitter_src_copy: |
| * @fd: file descriptor of the i915 driver |
| * @ahnd: handle to an allocator |
| * @ctx: context within which execute copy blit |
| * @cfg: intel_ctx configuration, NULL for default context or legacy mode |
| * @src_handle: GEM handle of the source buffer |
| * @src_delta: offset into the source GEM bo, in bytes |
| * @src_stride: Stride (in bytes) of the source buffer |
| * @src_tiling: Tiling mode of the source buffer |
| * @src_x: X coordinate of the source region to copy |
| * @src_y: Y coordinate of the source region to copy |
| * @src_size: size of the src bo required for allocator and softpin |
| * @width: Width of the region to copy |
| * @height: Height of the region to copy |
| * @bpp: source and destination bits per pixel |
| * @dst_handle: GEM handle of the destination buffer |
| * @dst_delta: offset into the destination GEM bo, in bytes |
| * @dst_stride: Stride (in bytes) of the destination buffer |
| * @dst_tiling: Tiling mode of the destination buffer |
| * @dst_x: X coordinate of destination |
| * @dst_y: Y coordinate of destination |
| * @dst_size: size of the dst bo required for allocator and softpin |
| * |
| * Copy @src into @dst using the XY_SRC blit command. |
| */ |
| void igt_blitter_src_copy(int fd, |
| uint64_t ahnd, |
| uint32_t ctx, |
| const intel_ctx_cfg_t *cfg, |
| /* src */ |
| uint32_t src_handle, |
| uint32_t src_delta, |
| uint32_t src_stride, |
| uint32_t src_tiling, |
| uint32_t src_x, uint32_t src_y, |
| uint64_t src_size, |
| |
| /* size */ |
| uint32_t width, uint32_t height, |
| |
| /* bpp */ |
| uint32_t bpp, |
| |
| /* dst */ |
| uint32_t dst_handle, |
| uint32_t dst_delta, |
| uint32_t dst_stride, |
| uint32_t dst_tiling, |
| uint32_t dst_x, uint32_t dst_y, |
| uint64_t dst_size) |
| { |
| uint32_t batch[32]; |
| struct drm_i915_gem_exec_object2 objs[3]; |
| struct drm_i915_gem_relocation_entry relocs[2]; |
| uint32_t batch_handle; |
| uint32_t src_pitch, dst_pitch; |
| uint32_t dst_reloc_offset, src_reloc_offset; |
| uint32_t gen = intel_gen(intel_get_drm_devid(fd)); |
| uint64_t batch_offset, src_offset, dst_offset; |
| const bool has_64b_reloc = gen >= 8; |
| int i = 0; |
| |
| batch_handle = gem_create(fd, 4096); |
| if (ahnd) { |
| src_offset = get_offset(ahnd, src_handle, src_size, 0); |
| dst_offset = get_offset(ahnd, dst_handle, dst_size, 0); |
| batch_offset = get_offset(ahnd, batch_handle, 4096, 0); |
| } else { |
| src_offset = 16 << 20; |
| dst_offset = ALIGN(src_offset + src_size, 1 << 20); |
| batch_offset = ALIGN(dst_offset + dst_size, 1 << 20); |
| } |
| |
| memset(batch, 0, sizeof(batch)); |
| |
| igt_assert((src_tiling == I915_TILING_NONE) || |
| (src_tiling == I915_TILING_X) || |
| (src_tiling == I915_TILING_Y)); |
| igt_assert((dst_tiling == I915_TILING_NONE) || |
| (dst_tiling == I915_TILING_X) || |
| (dst_tiling == I915_TILING_Y)); |
| |
| src_pitch = (gen >= 4 && src_tiling) ? src_stride / 4 : src_stride; |
| dst_pitch = (gen >= 4 && dst_tiling) ? dst_stride / 4 : dst_stride; |
| |
| if (bpp == 64) { |
| bpp /= 2; |
| width *= 2; |
| } |
| |
| CHECK_RANGE(src_x); CHECK_RANGE(src_y); |
| CHECK_RANGE(dst_x); CHECK_RANGE(dst_y); |
| CHECK_RANGE(width); CHECK_RANGE(height); |
| CHECK_RANGE(src_x + width); CHECK_RANGE(src_y + height); |
| CHECK_RANGE(dst_x + width); CHECK_RANGE(dst_y + height); |
| CHECK_RANGE(src_pitch); CHECK_RANGE(dst_pitch); |
| |
| if ((src_tiling | dst_tiling) >= I915_TILING_Y) { |
| unsigned int mask; |
| |
| batch[i++] = MI_LOAD_REGISTER_IMM(1); |
| batch[i++] = BCS_SWCTRL; |
| |
| mask = (BCS_SRC_Y | BCS_DST_Y) << 16; |
| if (src_tiling == I915_TILING_Y) |
| mask |= BCS_SRC_Y; |
| if (dst_tiling == I915_TILING_Y) |
| mask |= BCS_DST_Y; |
| batch[i++] = mask; |
| } |
| |
| batch[i] = src_copy_dword0(src_tiling, dst_tiling, bpp, gen); |
| batch[i++] |= 6 + 2 * has_64b_reloc; |
| batch[i++] = src_copy_dword1(dst_pitch, bpp); |
| batch[i++] = (dst_y << 16) | dst_x; /* dst x1,y1 */ |
| batch[i++] = ((dst_y + height) << 16) | (dst_x + width); /* dst x2,y2 */ |
| dst_reloc_offset = i; |
| batch[i++] = dst_offset + dst_delta; /* dst address lower bits */ |
| if (has_64b_reloc) |
| batch[i++] = (dst_offset + dst_delta) >> 32; /* dst address upper bits */ |
| batch[i++] = (src_y << 16) | src_x; /* src x1,y1 */ |
| batch[i++] = src_pitch; |
| src_reloc_offset = i; |
| batch[i++] = src_offset + src_delta; /* src address lower bits */ |
| if (has_64b_reloc) |
| batch[i++] = (src_offset + src_delta) >> 32; /* src address upper bits */ |
| |
| if ((src_tiling | dst_tiling) >= I915_TILING_Y) { |
| igt_assert(gen >= 6); |
| batch[i++] = MI_FLUSH_DW_CMD | 2; |
| batch[i++] = 0; |
| batch[i++] = 0; |
| batch[i++] = 0; |
| |
| batch[i++] = MI_LOAD_REGISTER_IMM(1); |
| batch[i++] = BCS_SWCTRL; |
| batch[i++] = (BCS_SRC_Y | BCS_DST_Y) << 16; |
| } |
| |
| batch[i++] = MI_BATCH_BUFFER_END; |
| batch[i++] = MI_NOOP; |
| |
| igt_assert(i <= ARRAY_SIZE(batch)); |
| |
| gem_write(fd, batch_handle, 0, batch, sizeof(batch)); |
| |
| fill_relocation(&relocs[0], dst_handle, dst_offset, |
| dst_delta, dst_reloc_offset, |
| I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER); |
| fill_relocation(&relocs[1], src_handle, src_offset, |
| src_delta, src_reloc_offset, |
| I915_GEM_DOMAIN_RENDER, 0); |
| |
| fill_object(&objs[0], dst_handle, dst_offset, NULL, 0); |
| fill_object(&objs[1], src_handle, src_offset, NULL, 0); |
| fill_object(&objs[2], batch_handle, batch_offset, relocs, !ahnd ? 2 : 0); |
| |
| objs[0].flags |= EXEC_OBJECT_NEEDS_FENCE | EXEC_OBJECT_WRITE; |
| objs[1].flags |= EXEC_OBJECT_NEEDS_FENCE; |
| |
| if (ahnd) { |
| objs[0].flags |= EXEC_OBJECT_PINNED | EXEC_OBJECT_SUPPORTS_48B_ADDRESS; |
| objs[1].flags |= EXEC_OBJECT_PINNED | EXEC_OBJECT_SUPPORTS_48B_ADDRESS; |
| objs[2].flags |= EXEC_OBJECT_PINNED | EXEC_OBJECT_SUPPORTS_48B_ADDRESS; |
| } |
| |
| exec_blit(fd, objs, 3, ctx, cfg); |
| |
| gem_close(fd, batch_handle); |
| } |
| |
| /** |
| * igt_blitter_fast_copy__raw: |
| * @fd: file descriptor of the i915 driver |
| * @ahnd: handle to an allocator |
| * @ctx: context within which execute copy blit |
| * @cfg: intel_ctx configuration, NULL for default context or legacy mode |
| * @src_handle: GEM handle of the source buffer |
| * @src_delta: offset into the source GEM bo, in bytes |
| * @src_stride: Stride (in bytes) of the source buffer |
| * @src_tiling: Tiling mode of the source buffer |
| * @src_x: X coordinate of the source region to copy |
| * @src_y: Y coordinate of the source region to copy |
| * @src_size: size of the src bo required for allocator and softpin |
| * @width: Width of the region to copy |
| * @height: Height of the region to copy |
| * @bpp: source and destination bits per pixel |
| * @dst_handle: GEM handle of the destination buffer |
| * @dst_delta: offset into the destination GEM bo, in bytes |
| * @dst_stride: Stride (in bytes) of the destination buffer |
| * @dst_tiling: Tiling mode of the destination buffer |
| * @dst_x: X coordinate of destination |
| * @dst_y: Y coordinate of destination |
| * @dst_size: size of the dst bo required for allocator and softpin |
| * |
| * Like igt_blitter_fast_copy(), but talking to the kernel directly. |
| */ |
| void igt_blitter_fast_copy__raw(int fd, |
| uint64_t ahnd, |
| uint32_t ctx, |
| const intel_ctx_cfg_t *cfg, |
| /* src */ |
| uint32_t src_handle, |
| unsigned int src_delta, |
| unsigned int src_stride, |
| unsigned int src_tiling, |
| unsigned int src_x, unsigned src_y, |
| uint64_t src_size, |
| |
| /* size */ |
| unsigned int width, unsigned int height, |
| |
| /* bpp */ |
| int bpp, |
| |
| /* dst */ |
| uint32_t dst_handle, |
| unsigned dst_delta, |
| unsigned int dst_stride, |
| unsigned int dst_tiling, |
| unsigned int dst_x, unsigned dst_y, |
| uint64_t dst_size) |
| { |
| uint32_t batch[12]; |
| struct drm_i915_gem_exec_object2 objs[3]; |
| struct drm_i915_gem_relocation_entry relocs[2]; |
| uint32_t batch_handle; |
| uint32_t dword0, dword1; |
| uint32_t src_pitch, dst_pitch; |
| uint64_t batch_offset, src_offset, dst_offset; |
| int i = 0; |
| |
| batch_handle = gem_create(fd, 4096); |
| if (ahnd) { |
| src_offset = get_offset(ahnd, src_handle, src_size, 0); |
| dst_offset = get_offset(ahnd, dst_handle, dst_size, 0); |
| batch_offset = get_offset(ahnd, batch_handle, 4096, 0); |
| } else { |
| src_offset = 16 << 20; |
| dst_offset = ALIGN(src_offset + src_size, 1 << 20); |
| batch_offset = ALIGN(dst_offset + dst_size, 1 << 20); |
| } |
| |
| src_pitch = fast_copy_pitch(src_stride, src_tiling); |
| dst_pitch = fast_copy_pitch(dst_stride, dst_tiling); |
| dword0 = fast_copy_dword0(src_tiling, dst_tiling); |
| dword1 = fast_copy_dword1(fd, src_tiling, dst_tiling, bpp); |
| |
| CHECK_RANGE(src_x); CHECK_RANGE(src_y); |
| CHECK_RANGE(dst_x); CHECK_RANGE(dst_y); |
| CHECK_RANGE(width); CHECK_RANGE(height); |
| CHECK_RANGE(src_x + width); CHECK_RANGE(src_y + height); |
| CHECK_RANGE(dst_x + width); CHECK_RANGE(dst_y + height); |
| CHECK_RANGE(src_pitch); CHECK_RANGE(dst_pitch); |
| |
| batch[i++] = dword0; |
| batch[i++] = dword1 | dst_pitch; |
| batch[i++] = (dst_y << 16) | dst_x; /* dst x1,y1 */ |
| batch[i++] = ((dst_y + height) << 16) | (dst_x + width); /* dst x2,y2 */ |
| batch[i++] = dst_offset + dst_delta; /* dst address lower bits */ |
| batch[i++] = (dst_offset + dst_delta) >> 32; /* dst address upper bits */ |
| batch[i++] = (src_y << 16) | src_x; /* src x1,y1 */ |
| batch[i++] = src_pitch; |
| batch[i++] = src_offset + src_delta; /* src address lower bits */ |
| batch[i++] = (src_offset + src_delta) >> 32; /* src address upper bits */ |
| batch[i++] = MI_BATCH_BUFFER_END; |
| batch[i++] = MI_NOOP; |
| |
| igt_assert(i == ARRAY_SIZE(batch)); |
| |
| gem_write(fd, batch_handle, 0, batch, sizeof(batch)); |
| |
| fill_relocation(&relocs[0], dst_handle, dst_offset, dst_delta, 4, |
| I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER); |
| fill_relocation(&relocs[1], src_handle, src_offset, src_delta, 8, |
| I915_GEM_DOMAIN_RENDER, 0); |
| |
| fill_object(&objs[0], dst_handle, dst_offset, NULL, 0); |
| objs[0].flags |= EXEC_OBJECT_WRITE; |
| fill_object(&objs[1], src_handle, src_offset, NULL, 0); |
| fill_object(&objs[2], batch_handle, batch_offset, relocs, !ahnd ? 2 : 0); |
| |
| if (ahnd) { |
| objs[0].flags |= EXEC_OBJECT_PINNED; |
| objs[1].flags |= EXEC_OBJECT_PINNED; |
| objs[2].flags |= EXEC_OBJECT_PINNED; |
| } |
| |
| exec_blit(fd, objs, 3, ctx, cfg); |
| |
| gem_close(fd, batch_handle); |
| } |
| |
| /** |
| * igt_get_render_copyfunc: |
| * @devid: pci device id |
| * |
| * Returns: |
| * |
| * The platform-specific render copy function pointer for the device |
| * specified with @devid. Will return NULL when no render copy function is |
| * implemented. |
| */ |
| igt_render_copyfunc_t igt_get_render_copyfunc(int devid) |
| { |
| igt_render_copyfunc_t copy = NULL; |
| |
| if (IS_GEN2(devid)) |
| copy = gen2_render_copyfunc; |
| else if (IS_GEN3(devid)) |
| copy = gen3_render_copyfunc; |
| else if (IS_GEN4(devid) || IS_GEN5(devid)) |
| copy = gen4_render_copyfunc; |
| else if (IS_GEN6(devid)) |
| copy = gen6_render_copyfunc; |
| else if (IS_GEN7(devid)) |
| copy = gen7_render_copyfunc; |
| else if (IS_GEN8(devid)) |
| copy = gen8_render_copyfunc; |
| else if (IS_GEN9(devid) || IS_GEN10(devid)) |
| copy = gen9_render_copyfunc; |
| else if (IS_GEN11(devid)) |
| copy = gen11_render_copyfunc; |
| else if (HAS_FLATCCS(devid)) |
| copy = gen12p71_render_copyfunc; |
| else if (IS_METEORLAKE(devid)) |
| copy = mtl_render_copyfunc; |
| else if (IS_GEN12(devid)) |
| copy = gen12_render_copyfunc; |
| |
| return copy; |
| } |
| |
| igt_vebox_copyfunc_t igt_get_vebox_copyfunc(int devid) |
| { |
| igt_vebox_copyfunc_t copy = NULL; |
| |
| if (IS_GEN12(devid)) |
| copy = gen12_vebox_copyfunc; |
| |
| return copy; |
| } |
| |
| igt_render_clearfunc_t igt_get_render_clearfunc(int devid) |
| { |
| if (IS_METEORLAKE(devid)) { |
| return mtl_render_clearfunc; |
| } else if (IS_DG2(devid)) { |
| return gen12p71_render_clearfunc; |
| } else if (IS_GEN12(devid)) { |
| return gen12_render_clearfunc; |
| } else { |
| return NULL; |
| } |
| } |
| |
| /** |
| * igt_get_media_fillfunc: |
| * @devid: pci device id |
| * |
| * Returns: |
| * |
| * The platform-specific media fill function pointer for the device specified |
| * with @devid. Will return NULL when no media fill function is implemented. |
| */ |
| igt_fillfunc_t igt_get_media_fillfunc(int devid) |
| { |
| igt_fillfunc_t fill = NULL; |
| |
| if (intel_graphics_ver(devid) >= IP_VER(12, 50)) { |
| /* current implementation defeatured PIPELINE_MEDIA */ |
| } else if (IS_GEN12(devid)) |
| fill = gen12_media_fillfunc; |
| else if (IS_GEN9(devid) || IS_GEN10(devid) || IS_GEN11(devid)) |
| fill = gen9_media_fillfunc; |
| else if (IS_GEN8(devid)) |
| fill = gen8_media_fillfunc; |
| else if (IS_GEN7(devid)) |
| fill = gen7_media_fillfunc; |
| |
| return fill; |
| } |
| |
| igt_vme_func_t igt_get_media_vme_func(int devid) |
| { |
| igt_vme_func_t fill = NULL; |
| const struct intel_device_info *devinfo = intel_get_device_info(devid); |
| |
| if (IS_GEN11(devid) && !devinfo->is_elkhartlake && !devinfo->is_jasperlake) |
| fill = gen11_media_vme_func; |
| |
| return fill; |
| } |
| |
| /** |
| * igt_get_gpgpu_fillfunc: |
| * @devid: pci device id |
| * |
| * Returns: |
| * |
| * The platform-specific gpgpu fill function pointer for the device specified |
| * with @devid. Will return NULL when no gpgpu fill function is implemented. |
| */ |
| igt_fillfunc_t igt_get_gpgpu_fillfunc(int devid) |
| { |
| igt_fillfunc_t fill = NULL; |
| |
| if (intel_graphics_ver(devid) >= IP_VER(12, 60)) |
| fill = xehpc_gpgpu_fillfunc; |
| else if (intel_graphics_ver(devid) >= IP_VER(12, 50)) |
| fill = xehp_gpgpu_fillfunc; |
| else if (IS_GEN12(devid)) |
| fill = gen12_gpgpu_fillfunc; |
| else if (IS_GEN11(devid)) |
| fill = gen11_gpgpu_fillfunc; |
| else if (IS_GEN9(devid) || IS_GEN10(devid)) |
| fill = gen9_gpgpu_fillfunc; |
| else if (IS_GEN8(devid)) |
| fill = gen8_gpgpu_fillfunc; |
| else if (IS_GEN7(devid)) |
| fill = gen7_gpgpu_fillfunc; |
| |
| return fill; |
| } |
| |
| /** |
| * igt_get_media_spinfunc: |
| * @devid: pci device id |
| * |
| * Returns: |
| * |
| * The platform-specific media spin function pointer for the device specified |
| * with @devid. Will return NULL when no media spin function is implemented. |
| */ |
| igt_media_spinfunc_t igt_get_media_spinfunc(int devid) |
| { |
| igt_media_spinfunc_t spin = NULL; |
| |
| if (IS_GEN9(devid)) |
| spin = gen9_media_spinfunc; |
| else if (IS_GEN8(devid)) |
| spin = gen8_media_spinfunc; |
| |
| return spin; |
| } |
| |
| /* Intel batchbuffer v2 */ |
| static bool intel_bb_debug_tree = false; |
| |
| /* |
| * __reallocate_objects: |
| * @ibb: pointer to intel_bb |
| * |
| * Increases number of objects if necessary. |
| */ |
| static void __reallocate_objects(struct intel_bb *ibb) |
| { |
| const uint32_t inc = 4096 / sizeof(*ibb->objects); |
| |
| if (ibb->num_objects == ibb->allocated_objects) { |
| ibb->objects = realloc(ibb->objects, |
| sizeof(*ibb->objects) * |
| (inc + ibb->allocated_objects)); |
| |
| igt_assert(ibb->objects); |
| ibb->allocated_objects += inc; |
| |
| memset(&ibb->objects[ibb->num_objects], 0, |
| inc * sizeof(*ibb->objects)); |
| } |
| } |
| |
| static inline uint64_t __intel_bb_get_offset(struct intel_bb *ibb, |
| uint32_t handle, |
| uint64_t size, |
| uint32_t alignment) |
| { |
| uint64_t offset; |
| |
| if (ibb->enforce_relocs) |
| return 0; |
| |
| offset = intel_allocator_alloc(ibb->allocator_handle, |
| handle, size, alignment); |
| |
| return offset; |
| } |
| |
| /** |
| * __intel_bb_create: |
| * @fd: drm fd - i915 or xe |
| * @ctx: for i915 context id, for xe engine id |
| * @vm: for xe vm_id, unused for i915 |
| * @cfg: for i915 intel_ctx configuration, NULL for default context or legacy mode, |
| * unused for xe |
| * @size: size of the batchbuffer |
| * @do_relocs: use relocations or allocator |
| * @allocator_type: allocator type, must be INTEL_ALLOCATOR_NONE for relocations |
| * |
| * intel-bb assumes it will work in one of two modes - with relocations or |
| * with using allocator (currently RELOC and SIMPLE are implemented). |
| * Some description is required to describe how they maintain the addresses. |
| * |
| * Before entering into each scenarios generic rule is intel-bb keeps objects |
| * and their offsets in the internal cache and reuses in subsequent execs. |
| * |
| * 1. intel-bb with relocations (i915 only) |
| * |
| * Creating new intel-bb adds handle to cache implicitly and sets its address |
| * to 0. Objects added to intel-bb later also have address 0 set for first run. |
| * After calling execbuf cache is altered with new addresses. As intel-bb |
| * works in reloc mode addresses are only suggestion to the driver and we |
| * cannot be sure they won't change at next exec. |
| * |
| * 2. with allocator (i915 or xe) |
| * |
| * This mode is valid only for ppgtt. Addresses are acquired from allocator |
| * and softpinned (i915) or vm-binded (xe). intel-bb cache must be then |
| * coherent with allocator (simple is coherent, reloc partially [doesn't |
| * support address reservation]). |
| * When we do intel-bb reset with purging cache it has to reacquire addresses |
| * from allocator (allocator should return same address - what is true for |
| * simple and reloc allocators). |
| * |
| * If we do reset without purging caches we use addresses from intel-bb cache |
| * during execbuf objects construction. |
| * |
| * If we do reset with purging caches allocator entries are freed as well. |
| * |
| * __intel_bb_create checks if a context configuration for intel_ctx_t was |
| * passed in. If this is the case, it copies the information over to the |
| * newly created batch buffer. |
| * |
| * Returns: |
| * |
| * Pointer the intel_bb, asserts on failure. |
| */ |
| static struct intel_bb * |
| __intel_bb_create(int fd, uint32_t ctx, uint32_t vm, const intel_ctx_cfg_t *cfg, |
| uint32_t size, bool do_relocs, |
| uint64_t start, uint64_t end, |
| uint8_t allocator_type, enum allocator_strategy strategy) |
| { |
| struct drm_i915_gem_exec_object2 *object; |
| struct intel_bb *ibb = calloc(1, sizeof(*ibb)); |
| |
| igt_assert(ibb); |
| |
| ibb->devid = intel_get_drm_devid(fd); |
| ibb->gen = intel_gen(ibb->devid); |
| ibb->ctx = ctx; |
| |
| ibb->fd = fd; |
| ibb->driver = is_i915_device(fd) ? INTEL_DRIVER_I915 : |
| is_xe_device(fd) ? INTEL_DRIVER_XE : 0; |
| igt_assert(ibb->driver); |
| |
| /* |
| * If we don't have full ppgtt driver can change our addresses |
| * so allocator is useless in this case. Just enforce relocations |
| * for such gens and don't use allocator at all. |
| */ |
| if (ibb->driver == INTEL_DRIVER_I915) { |
| ibb->uses_full_ppgtt = gem_uses_full_ppgtt(fd); |
| ibb->alignment = gem_detect_safe_alignment(fd); |
| ibb->gtt_size = gem_aperture_size(fd); |
| ibb->handle = gem_create(fd, size); |
| |
| if (!ibb->uses_full_ppgtt) |
| do_relocs = true; |
| |
| /* |
| * For softpin mode allocator has full control over offsets allocation |
| * so we want kernel to not interfere with this. |
| */ |
| if (do_relocs) { |
| ibb->allows_obj_alignment = gem_allows_obj_alignment(fd); |
| allocator_type = INTEL_ALLOCATOR_NONE; |
| } else { |
| /* Use safe start offset instead assuming 0x0 is safe */ |
| start = max_t(uint64_t, start, gem_detect_safe_start_offset(fd)); |
| |
| /* if relocs are set we won't use an allocator */ |
| ibb->allocator_handle = |
| intel_allocator_open_full(fd, ctx, start, end, |
| allocator_type, |
| strategy, 0); |
| } |
| |
| ibb->vm_id = 0; |
| } else { |
| igt_assert(!do_relocs); |
| |
| ibb->alignment = xe_get_default_alignment(fd); |
| size = ALIGN(size, ibb->alignment); |
| ibb->handle = xe_bo_create_flags(fd, 0, size, visible_vram_if_possible(fd, 0)); |
| |
| /* Limit to 48-bit due to MI_* address limitation */ |
| ibb->gtt_size = 1ull << min_t(uint32_t, xe_va_bits(fd), 48); |
| end = ibb->gtt_size; |
| |
| if (!vm) { |
| igt_assert_f(!ctx, "No vm provided for engine"); |
| vm = xe_vm_create(fd, DRM_XE_VM_CREATE_ASYNC_BIND_OPS, 0); |
| } |
| |
| ibb->uses_full_ppgtt = true; |
| ibb->allocator_handle = |
| intel_allocator_open_full(fd, vm, start, end, |
| allocator_type, strategy, |
| ibb->alignment); |
| ibb->vm_id = vm; |
| ibb->last_engine = ~0U; |
| } |
| |
| ibb->allocator_type = allocator_type; |
| ibb->allocator_strategy = strategy; |
| ibb->allocator_start = start; |
| ibb->allocator_end = end; |
| ibb->enforce_relocs = do_relocs; |
| |
| ibb->size = size; |
| ibb->batch = calloc(1, size); |
| igt_assert(ibb->batch); |
| ibb->ptr = ibb->batch; |
| ibb->fence = -1; |
| |
| /* Cache context configuration */ |
| if (cfg) { |
| ibb->cfg = malloc(sizeof(*cfg)); |
| igt_assert(ibb->cfg); |
| memcpy(ibb->cfg, cfg, sizeof(*cfg)); |
| } |
| |
| if ((ibb->gtt_size - 1) >> 32) |
| ibb->supports_48b_address = true; |
| |
| object = intel_bb_add_object(ibb, ibb->handle, ibb->size, |
| INTEL_BUF_INVALID_ADDRESS, ibb->alignment, |
| false); |
| ibb->batch_offset = object->offset; |
| |
| IGT_INIT_LIST_HEAD(&ibb->intel_bufs); |
| |
| ibb->refcount = 1; |
| |
| if (intel_bb_do_tracking && ibb->allocator_type != INTEL_ALLOCATOR_NONE) { |
| pthread_mutex_lock(&intel_bb_list_lock); |
| igt_list_add(&ibb->link, &intel_bb_list); |
| pthread_mutex_unlock(&intel_bb_list_lock); |
| } |
| |
| return ibb; |
| } |
| |
| /** |
| * intel_bb_create_full: |
| * @fd: drm fd - i915 or xe |
| * @ctx: for i915 context id, for xe engine id |
| * @vm: for xe vm_id, unused for i915 |
| * @cfg: intel_ctx configuration, NULL for default context or legacy mode |
| * @size: size of the batchbuffer |
| * @start: allocator vm start address |
| * @end: allocator vm start address |
| * @allocator_type: allocator type, SIMPLE, RELOC, ... |
| * @strategy: allocation strategy |
| * |
| * Creates bb with context passed in @ctx, size in @size and allocator type |
| * in @allocator_type. Relocations are set to false because IGT allocator |
| * is used in that case. VM range is passed to allocator (@start and @end) |
| * and allocation @strategy (suggestion to allocator about address allocation |
| * preferences). |
| * |
| * Returns: |
| * |
| * Pointer the intel_bb, asserts on failure. |
| */ |
| struct intel_bb *intel_bb_create_full(int fd, uint32_t ctx, uint32_t vm, |
| const intel_ctx_cfg_t *cfg, uint32_t size, |
| uint64_t start, uint64_t end, |
| uint8_t allocator_type, |
| enum allocator_strategy strategy) |
| { |
| return __intel_bb_create(fd, ctx, vm, cfg, size, false, start, end, |
| allocator_type, strategy); |
| } |
| |
| /** |
| * intel_bb_create_with_allocator: |
| * @fd: drm fd - i915 or xe |
| * @ctx: for i915 context id, for xe engine id |
| * @vm: for xe vm_id, unused for i915 |
| * @cfg: intel_ctx configuration, NULL for default context or legacy mode |
| * @size: size of the batchbuffer |
| * @allocator_type: allocator type, SIMPLE, RANDOM, ... |
| * |
| * Creates bb with context passed in @ctx, size in @size and allocator type |
| * in @allocator_type. Relocations are set to false because IGT allocator |
| * is used in that case. |
| * |
| * Returns: |
| * |
| * Pointer the intel_bb, asserts on failure. |
| */ |
| struct intel_bb *intel_bb_create_with_allocator(int fd, uint32_t ctx, uint32_t vm, |
| const intel_ctx_cfg_t *cfg, |
| uint32_t size, |
| uint8_t allocator_type) |
| { |
| return __intel_bb_create(fd, ctx, vm, cfg, size, false, 0, 0, |
| allocator_type, ALLOC_STRATEGY_HIGH_TO_LOW); |
| } |
| |
| static bool aux_needs_softpin(int fd) |
| { |
| return intel_gen(intel_get_drm_devid(fd)) >= 12; |
| } |
| |
| static bool has_ctx_cfg(struct intel_bb *ibb) |
| { |
| return ibb->cfg && ibb->cfg->num_engines > 0; |
| } |
| |
| /** |
| * intel_bb_create: |
| * @fd: drm fd - i915 or xe |
| * @size: size of the batchbuffer |
| * |
| * Creates bb with default context. |
| * |
| * Returns: |
| * |
| * Pointer the intel_bb, asserts on failure. |
| * |
| * Notes: |
| * |
| * intel_bb must not be created in igt_fixture. The reason is intel_bb |
| * "opens" connection to the allocator and when test completes it can |
| * leave the allocator in unknown state (mostly for failed tests). |
| * As igt_core was armed to reset the allocator infrastructure |
| * connection to it inside intel_bb is not valid anymore. |
| * Trying to use it leads to catastrofic errors. |
| */ |
| struct intel_bb *intel_bb_create(int fd, uint32_t size) |
| { |
| bool relocs = is_i915_device(fd) && gem_has_relocations(fd); |
| |
| return __intel_bb_create(fd, 0, 0, NULL, size, |
| relocs && !aux_needs_softpin(fd), 0, 0, |
| INTEL_ALLOCATOR_SIMPLE, |
| ALLOC_STRATEGY_HIGH_TO_LOW); |
| } |
| |
| /** |
| * intel_bb_create_with_context: |
| * @fd: drm fd - i915 or xe |
| * @ctx: for i915 context id, for xe engine id |
| * @vm: for xe vm_id, unused for i915 |
| * @cfg: intel_ctx configuration, NULL for default context or legacy mode |
| * @size: size of the batchbuffer |
| * |
| * Creates bb with context passed in @ctx and @cfg configuration (when |
| * working with custom engines layout). |
| * |
| * Returns: |
| * |
| * Pointer the intel_bb, asserts on failure. |
| */ |
| struct intel_bb * |
| intel_bb_create_with_context(int fd, uint32_t ctx, uint32_t vm, |
| const intel_ctx_cfg_t *cfg, uint32_t size) |
| { |
| bool relocs = is_i915_device(fd) && gem_has_relocations(fd); |
| |
| return __intel_bb_create(fd, ctx, vm, cfg, size, |
| relocs && !aux_needs_softpin(fd), 0, 0, |
| INTEL_ALLOCATOR_SIMPLE, |
| ALLOC_STRATEGY_HIGH_TO_LOW); |
| } |
| |
| /** |
| * intel_bb_create_with_relocs: |
| * @fd: drm fd - i915 |
| * @size: size of the batchbuffer |
| * |
| * Creates bb which will disable passing addresses. |
| * This will lead to relocations when objects are not previously pinned. |
| * |
| * Returns: |
| * |
| * Pointer the intel_bb, asserts on failure. |
| */ |
| struct intel_bb *intel_bb_create_with_relocs(int fd, uint32_t size) |
| { |
| igt_require(is_i915_device(fd) && gem_has_relocations(fd)); |
| |
| return __intel_bb_create(fd, 0, 0, NULL, size, true, 0, 0, |
| INTEL_ALLOCATOR_NONE, ALLOC_STRATEGY_NONE); |
| } |
| |
| /** |
| * intel_bb_create_with_relocs_and_context: |
| * @fd: drm fd - i915 |
| * @ctx: context |
| * @cfg: intel_ctx configuration, NULL for default context or legacy mode |
| * @size: size of the batchbuffer |
| * |
| * Creates bb with default context which will disable passing addresses. |
| * This will lead to relocations when objects are not previously pinned. |
| * |
| * Returns: |
| * |
| * Pointer the intel_bb, asserts on failure. |
| */ |
| struct intel_bb * |
| intel_bb_create_with_relocs_and_context(int fd, uint32_t ctx, |
| const intel_ctx_cfg_t *cfg, |
| uint32_t size) |
| { |
| igt_require(is_i915_device(fd) && gem_has_relocations(fd)); |
| |
| return __intel_bb_create(fd, ctx, 0, cfg, size, true, 0, 0, |
| INTEL_ALLOCATOR_NONE, ALLOC_STRATEGY_NONE); |
| } |
| |
| /** |
| * intel_bb_create_no_relocs: |
| * @fd: drm fd |
| * @size: size of the batchbuffer |
| * |
| * Creates bb with disabled relocations. |
| * This enables passing addresses and requires pinning objects. |
| * |
| * Returns: |
| * |
| * Pointer the intel_bb, asserts on failure. |
| */ |
| struct intel_bb *intel_bb_create_no_relocs(int fd, uint32_t size) |
| { |
| igt_require(gem_uses_full_ppgtt(fd)); |
| |
| return __intel_bb_create(fd, 0, 0, NULL, size, false, 0, 0, |
| INTEL_ALLOCATOR_SIMPLE, |
| ALLOC_STRATEGY_HIGH_TO_LOW); |
| } |
| |
| static void __intel_bb_destroy_relocations(struct intel_bb *ibb) |
| { |
| uint32_t i; |
| |
| /* Free relocations */ |
| for (i = 0; i < ibb->num_objects; i++) { |
| free(from_user_pointer(ibb->objects[i]->relocs_ptr)); |
| ibb->objects[i]->relocs_ptr = to_user_pointer(NULL); |
| ibb->objects[i]->relocation_count = 0; |
| } |
| |
| ibb->relocs = NULL; |
| ibb->num_relocs = 0; |
| ibb->allocated_relocs = 0; |
| } |
| |
| static void __intel_bb_destroy_objects(struct intel_bb *ibb) |
| { |
| free(ibb->objects); |
| ibb->objects = NULL; |
| |
| tdestroy(ibb->current, free); |
| ibb->current = NULL; |
| |
| ibb->num_objects = 0; |
| ibb->allocated_objects = 0; |
| } |
| |
| static void __intel_bb_destroy_cache(struct intel_bb *ibb) |
| { |
| tdestroy(ibb->root, free); |
| ibb->root = NULL; |
| } |
| |
| static void __intel_bb_remove_intel_bufs(struct intel_bb *ibb) |
| { |
| struct intel_buf *entry, *tmp; |
| |
| igt_list_for_each_entry_safe(entry, tmp, &ibb->intel_bufs, link) |
| intel_bb_remove_intel_buf(ibb, entry); |
| } |
| |
| /** |
| * intel_bb_destroy: |
| * @ibb: pointer to intel_bb |
| * |
| * Frees all relocations / objects allocated during filling the batch. |
| */ |
| void intel_bb_destroy(struct intel_bb *ibb) |
| { |
| igt_assert(ibb); |
| |
| ibb->refcount--; |
| igt_assert_f(ibb->refcount == 0, "Trying to destroy referenced bb!"); |
| |
| __intel_bb_remove_intel_bufs(ibb); |
| __intel_bb_destroy_relocations(ibb); |
| __intel_bb_destroy_objects(ibb); |
| __intel_bb_destroy_cache(ibb); |
| |
| if (ibb->allocator_type != INTEL_ALLOCATOR_NONE) { |
| if (intel_bb_do_tracking) { |
| pthread_mutex_lock(&intel_bb_list_lock); |
| igt_list_del(&ibb->link); |
| pthread_mutex_unlock(&intel_bb_list_lock); |
| } |
| |
| intel_allocator_free(ibb->allocator_handle, ibb->handle); |
| intel_allocator_close(ibb->allocator_handle); |
| } |
| gem_close(ibb->fd, ibb->handle); |
| |
| if (ibb->fence >= 0) |
| close(ibb->fence); |
| if (ibb->engine_syncobj) |
| syncobj_destroy(ibb->fd, ibb->engine_syncobj); |
| if (ibb->vm_id && !ibb->ctx) |
| xe_vm_destroy(ibb->fd, ibb->vm_id); |
| |
| free(ibb->batch); |
| free(ibb->cfg); |
| free(ibb); |
| } |
| |
| static struct drm_xe_vm_bind_op *xe_alloc_bind_ops(struct intel_bb *ibb, |
| uint32_t op, uint32_t region) |
| { |
| struct drm_i915_gem_exec_object2 **objects = ibb->objects; |
| struct drm_xe_vm_bind_op *bind_ops, *ops; |
| bool set_obj = (op & 0xffff) == XE_VM_BIND_OP_MAP; |
| |
| bind_ops = calloc(ibb->num_objects, sizeof(*bind_ops)); |
| igt_assert(bind_ops); |
| |
| igt_debug("bind_ops: %s\n", set_obj ? "MAP" : "UNMAP"); |
| for (int i = 0; i < ibb->num_objects; i++) { |
| ops = &bind_ops[i]; |
| |
| if (set_obj) |
| ops->obj = objects[i]->handle; |
| |
| ops->op = op; |
| ops->obj_offset = 0; |
| ops->addr = objects[i]->offset; |
| ops->range = objects[i]->rsvd1; |
| ops->region = region; |
| |
| igt_debug(" [%d]: handle: %u, offset: %llx, size: %llx\n", |
| i, ops->obj, (long long)ops->addr, (long long)ops->range); |
| } |
| |
| return bind_ops; |
| } |
| |
| static void __unbind_xe_objects(struct intel_bb *ibb) |
| { |
| struct drm_xe_sync syncs[2] = { |
| { .flags = DRM_XE_SYNC_SYNCOBJ }, |
| { .flags = DRM_XE_SYNC_SYNCOBJ | DRM_XE_SYNC_SIGNAL, }, |
| }; |
| int ret; |
| |
| syncs[0].handle = ibb->engine_syncobj; |
| syncs[1].handle = syncobj_create(ibb->fd, 0); |
| |
| if (ibb->num_objects > 1) { |
| struct drm_xe_vm_bind_op *bind_ops; |
| uint32_t op = XE_VM_BIND_OP_UNMAP | XE_VM_BIND_FLAG_ASYNC; |
| |
| bind_ops = xe_alloc_bind_ops(ibb, op, 0); |
| xe_vm_bind_array(ibb->fd, ibb->vm_id, 0, bind_ops, |
| ibb->num_objects, syncs, 2); |
| free(bind_ops); |
| } else { |
| igt_debug("bind: UNMAP\n"); |
| igt_debug(" offset: %llx, size: %llx\n", |
| (long long)ibb->batch_offset, (long long)ibb->size); |
| xe_vm_unbind_async(ibb->fd, ibb->vm_id, 0, 0, |
| ibb->batch_offset, ibb->size, syncs, 2); |
| } |
| ret = syncobj_wait_err(ibb->fd, &syncs[1].handle, 1, INT64_MAX, 0); |
| igt_assert_eq(ret, 0); |
| syncobj_destroy(ibb->fd, syncs[1].handle); |
| |
| ibb->xe_bound = false; |
| } |
| |
| /* |
| * intel_bb_reset: |
| * @ibb: pointer to intel_bb |
| * @purge_objects_cache: if true destroy internal execobj and relocs + cache |
| * |
| * Recreate batch bo when there's no additional reference. |
| * |
| * When purge_object_cache == true we destroy cache as well as remove intel_buf |
| * from intel-bb tracking list. Removing intel_bufs releases their addresses |
| * in the allocator. |
| */ |
| |
| void intel_bb_reset(struct intel_bb *ibb, bool purge_objects_cache) |
| { |
| uint32_t i; |
| |
| if (purge_objects_cache && ibb->refcount > 1) |
| igt_warn("Cannot purge objects cache on bb, refcount > 1!"); |
| |
| /* Someone keeps reference, just exit */ |
| if (ibb->refcount > 1) |
| return; |
| |
| /* |
| * To avoid relocation objects previously pinned to high virtual |
| * addresses should keep 48bit flag. Ensure we won't clear it |
| * in the reset path. |
| */ |
| for (i = 0; i < ibb->num_objects; i++) |
| ibb->objects[i]->flags &= EXEC_OBJECT_SUPPORTS_48B_ADDRESS; |
| |
| if (ibb->driver == INTEL_DRIVER_XE && ibb->xe_bound) |
| __unbind_xe_objects(ibb); |
| |
| __intel_bb_destroy_relocations(ibb); |
| __intel_bb_destroy_objects(ibb); |
| __reallocate_objects(ibb); |
| |
| if (purge_objects_cache) { |
| __intel_bb_remove_intel_bufs(ibb); |
| __intel_bb_destroy_cache(ibb); |
| } |
| |
| /* |
| * When we use allocators we're in no-reloc mode so we have to free |
| * and reacquire offset (ibb->handle can change in multiprocess |
| * environment). We also have to remove and add it again to |
| * objects and cache tree. |
| */ |
| if (ibb->allocator_type != INTEL_ALLOCATOR_NONE && !purge_objects_cache) |
| intel_bb_remove_object(ibb, ibb->handle, ibb->batch_offset, |
| ibb->size); |
| |
| gem_close(ibb->fd, ibb->handle); |
| if (ibb->driver == INTEL_DRIVER_I915) |
| ibb->handle = gem_create(ibb->fd, ibb->size); |
| else |
| ibb->handle = xe_bo_create_flags(ibb->fd, 0, ibb->size, |
| visible_vram_if_possible(ibb->fd, 0)); |
| |
| /* Reacquire offset for RELOC and SIMPLE */ |
| if (ibb->allocator_type == INTEL_ALLOCATOR_SIMPLE || |
| ibb->allocator_type == INTEL_ALLOCATOR_RELOC) |
| ibb->batch_offset = __intel_bb_get_offset(ibb, |
| ibb->handle, |
| ibb->size, |
| ibb->alignment); |
| |
| intel_bb_add_object(ibb, ibb->handle, ibb->size, |
| ibb->batch_offset, |
| ibb->alignment, false); |
| ibb->ptr = ibb->batch; |
| memset(ibb->batch, 0, ibb->size); |
| } |
| |
| /* |
| * intel_bb_sync: |
| * @ibb: pointer to intel_bb |
| * |
| * Waits for bb completion. Returns 0 on success, otherwise errno. |
| */ |
| int intel_bb_sync(struct intel_bb *ibb) |
| { |
| int ret; |
| |
| if (ibb->fence < 0 && !ibb->engine_syncobj) |
| return 0; |
| |
| if (ibb->fence >= 0) { |
| ret = sync_fence_wait(ibb->fence, -1); |
| if (ret == 0) { |
| close(ibb->fence); |
| ibb->fence = -1; |
| } |
| } else { |
| igt_assert_neq(ibb->engine_syncobj, 0); |
| ret = syncobj_wait_err(ibb->fd, &ibb->engine_syncobj, |
| 1, INT64_MAX, 0); |
| } |
| |
| return ret; |
| } |
| |
| /* |
| * intel_bb_print: |
| * @ibb: pointer to intel_bb |
| * |
| * Prints batch to stdout. |
| */ |
| void intel_bb_print(struct intel_bb *ibb) |
| { |
| igt_info("drm fd: %d, gen: %d, devid: %u, debug: %d\n", |
| ibb->fd, ibb->gen, ibb->devid, ibb->debug); |
| igt_info("handle: %u, size: %u, batch: %p, ptr: %p\n", |
| ibb->handle, ibb->size, ibb->batch, ibb->ptr); |
| igt_info("gtt_size: %" PRIu64 ", supports 48bit: %d\n", |
| ibb->gtt_size, ibb->supports_48b_address); |
| igt_info("ctx: %u\n", ibb->ctx); |
| igt_info("root: %p\n", ibb->root); |
| igt_info("objects: %p, num_objects: %u, allocated obj: %u\n", |
| ibb->objects, ibb->num_objects, ibb->allocated_objects); |
| igt_info("relocs: %p, num_relocs: %u, allocated_relocs: %u\n----\n", |
| ibb->relocs, ibb->num_relocs, ibb->allocated_relocs); |
| } |
| |
| /* |
| * intel_bb_dump: |
| * @ibb: pointer to intel_bb |
| * @filename: name to which write bb |
| * |
| * Dump batch bo to file. |
| */ |
| void intel_bb_dump(struct intel_bb *ibb, const char *filename) |
| { |
| FILE *out; |
| void *ptr; |
| |
| ptr = gem_mmap__device_coherent(ibb->fd, ibb->handle, 0, ibb->size, |
| PROT_READ); |
| out = fopen(filename, "wb"); |
| igt_assert(out); |
| fwrite(ptr, ibb->size, 1, out); |
| fclose(out); |
| munmap(ptr, ibb->size); |
| } |
| |
| /** |
| * intel_bb_set_debug: |
| * @ibb: pointer to intel_bb |
| * @debug: true / false |
| * |
| * Sets debug to true / false. Execbuf is then called synchronously and |
| * object/reloc arrays are printed after execution. |
| */ |
| void intel_bb_set_debug(struct intel_bb *ibb, bool debug) |
| { |
| ibb->debug = debug; |
| } |
| |
| /** |
| * intel_bb_set_dump_base64: |
| * @ibb: pointer to intel_bb |
| * @dump: true / false |
| * |
| * Do bb dump as base64 string before execbuf call. |
| */ |
| void intel_bb_set_dump_base64(struct intel_bb *ibb, bool dump) |
| { |
| ibb->dump_base64 = dump; |
| } |
| |
| static int __compare_objects(const void *p1, const void *p2) |
| { |
| const struct drm_i915_gem_exec_object2 *o1 = p1, *o2 = p2; |
| |
| return (int) ((int64_t) o1->handle - (int64_t) o2->handle); |
| } |
| |
| static struct drm_i915_gem_exec_object2 * |
| __add_to_cache(struct intel_bb *ibb, uint32_t handle) |
| { |
| struct drm_i915_gem_exec_object2 **found, *object; |
| |
| object = malloc(sizeof(*object)); |
| igt_assert(object); |
| |
| object->handle = handle; |
| object->alignment = 0; |
| found = tsearch((void *) object, &ibb->root, __compare_objects); |
| |
| if (*found == object) { |
| memset(object, 0, sizeof(*object)); |
| object->handle = handle; |
| object->offset = INTEL_BUF_INVALID_ADDRESS; |
| } else { |
| free(object); |
| object = *found; |
| } |
| |
| return object; |
| } |
| |
| static bool __remove_from_cache(struct intel_bb *ibb, uint32_t handle) |
| { |
| struct drm_i915_gem_exec_object2 **found, *object; |
| |
| object = intel_bb_find_object(ibb, handle); |
| if (!object) { |
| igt_warn("Object: handle: %u not found\n", handle); |
| return false; |
| } |
| |
| found = tdelete((void *) object, &ibb->root, __compare_objects); |
| if (!found) |
| return false; |
| |
| free(object); |
| |
| return true; |
| } |
| |
| static int __compare_handles(const void *p1, const void *p2) |
| { |
| return (int) (*(int32_t *) p1 - *(int32_t *) p2); |
| } |
| |
| static void __add_to_objects(struct intel_bb *ibb, |
| struct drm_i915_gem_exec_object2 *object) |
| { |
| uint32_t **found, *handle; |
| |
| handle = malloc(sizeof(*handle)); |
| igt_assert(handle); |
| |
| *handle = object->handle; |
| found = tsearch((void *) handle, &ibb->current, __compare_handles); |
| |
| if (*found == handle) { |
| __reallocate_objects(ibb); |
| igt_assert(ibb->num_objects < ibb->allocated_objects); |
| ibb->objects[ibb->num_objects++] = object; |
| } else { |
| free(handle); |
| } |
| } |
| |
| static void __remove_from_objects(struct intel_bb *ibb, |
| struct drm_i915_gem_exec_object2 *object) |
| { |
| uint32_t i, **handle, *to_free; |
| bool found = false; |
| |
| for (i = 0; i < ibb->num_objects; i++) { |
| if (ibb->objects[i] == object) { |
| found = true; |
| break; |
| } |
| } |
| |
| /* |
| * When we reset bb (without purging) we have: |
| * 1. cache which contains all cached objects |
| * 2. objects array which contains only bb object (cleared in reset |
| * path with bb object added at the end) |
| * So !found is normal situation and no warning is added here. |
| */ |
| if (!found) |
| return; |
| |
| ibb->num_objects--; |
| if (i < ibb->num_objects) |
| memmove(&ibb->objects[i], &ibb->objects[i + 1], |
| sizeof(object) * (ibb->num_objects - i)); |
| |
| handle = tfind((void *) &object->handle, |
| &ibb->current, __compare_handles); |
| if (!handle) { |
| igt_warn("Object %u doesn't exist in the tree, can't remove", |
| object->handle); |
| return; |
| } |
| |
| to_free = *handle; |
| tdelete((void *) &object->handle, &ibb->current, __compare_handles); |
| free(to_free); |
| } |
| |
| /** |
| * __intel_bb_add_object: |
| * @ibb: pointer to intel_bb |
| * @handle: which handle to add to objects array |
| * @size: object size |
| * @offset: presumed offset of the object when no relocation is enforced |
| * @alignment: alignment of the object, if 0 it will be set to page size |
| * @write: does a handle is a render target |
| * |
| * Function adds or updates execobj slot in bb objects array and |
| * in the object tree. When object is a render target it has to |
| * be marked with EXEC_OBJECT_WRITE flag. |
| */ |
| static struct drm_i915_gem_exec_object2 * |
| __intel_bb_add_object(struct intel_bb *ibb, uint32_t handle, uint64_t size, |
| uint64_t offset, uint64_t alignment, bool write) |
| { |
| struct drm_i915_gem_exec_object2 *object; |
| |
| igt_assert(INVALID_ADDR(offset) || alignment == 0 |
| || ALIGN(offset, alignment) == offset); |
| igt_assert(is_power_of_two(alignment)); |
| |
| if (ibb->driver == INTEL_DRIVER_I915) |
| alignment = max_t(uint64_t, alignment, gem_detect_safe_alignment(ibb->fd)); |
| else |
| alignment = max_t(uint64_t, ibb->alignment, alignment); |
| |
| object = __add_to_cache(ibb, handle); |
| __add_to_objects(ibb, object); |
| |
| /* |
| * If object->offset == INVALID_ADDRESS we added freshly object to the |
| * cache. In that case we have two choices: |
| * a) get new offset (passed offset was invalid) |
| * b) use offset passed in the call (valid) |
| */ |
| if (INVALID_ADDR(object->offset)) { |
| if (INVALID_ADDR(offset)) { |
| offset = __intel_bb_get_offset(ibb, handle, size, |
| alignment); |
| } else { |
| offset = offset & (ibb->gtt_size - 1); |
| |
| /* |
| * For simple allocator check entry consistency |
| * - reserve if it is not already allocated. |
| */ |
| if (ibb->allocator_type == INTEL_ALLOCATOR_SIMPLE) { |
| bool allocated, reserved; |
| |
| reserved = intel_allocator_reserve_if_not_allocated(ibb->allocator_handle, |
| handle, size, offset, |
| &allocated); |
| igt_assert_f(allocated || reserved, |
| "Can't get offset, allocated: %d, reserved: %d\n", |
| allocated, reserved); |
| } |
| } |
| } else { |
| /* |
| * This assertion makes sense only when we have to be consistent |
| * with underlying allocator. For relocations and when !ppgtt |
| * we can expect addresses passed by the user can be moved |
| * within the driver. |
| */ |
| if (ibb->allocator_type == INTEL_ALLOCATOR_SIMPLE) |
| igt_assert_f(object->offset == offset, |
| "(pid: %ld) handle: %u, offset not match: %" PRIx64 " <> %" PRIx64 "\n", |
| (long) getpid(), handle, |
| (uint64_t) object->offset, |
| offset); |
| } |
| |
| object->offset = offset; |
| |
| if (write) |
| object->flags |= EXEC_OBJECT_WRITE; |
| |
| if (ibb->supports_48b_address) |
| object->flags |= EXEC_OBJECT_SUPPORTS_48B_ADDRESS; |
| |
| if (ibb->uses_full_ppgtt && !ibb->enforce_relocs) |
| object->flags |= EXEC_OBJECT_PINNED; |
| |
| if (ibb->allows_obj_alignment) |
| object->alignment = alignment; |
| |
| if (ibb->driver == INTEL_DRIVER_XE) { |
| object->alignment = alignment; |
| object->rsvd1 = size; |
| } |
| |
| return object; |
| } |
| |
| struct drm_i915_gem_exec_object2 * |
| intel_bb_add_object(struct intel_bb *ibb, uint32_t handle, uint64_t size, |
| uint64_t offset, uint64_t alignment, bool write) |
| { |
| struct drm_i915_gem_exec_object2 *obj = NULL; |
| |
| obj = __intel_bb_add_object(ibb, handle, size, offset, |
| alignment, write); |
| igt_assert(obj); |
| |
| return obj; |
| } |
| |
| bool intel_bb_remove_object(struct intel_bb *ibb, uint32_t handle, |
| uint64_t offset, uint64_t size) |
| { |
| struct drm_i915_gem_exec_object2 *object; |
| bool is_reserved; |
| |
| object = intel_bb_find_object(ibb, handle); |
| if (!object) |
| return false; |
| |
| if (ibb->allocator_type != INTEL_ALLOCATOR_NONE) { |
| intel_allocator_free(ibb->allocator_handle, handle); |
| is_reserved = intel_allocator_is_reserved(ibb->allocator_handle, |
| size, offset); |
| if (is_reserved) |
| intel_allocator_unreserve(ibb->allocator_handle, handle, |
| size, offset); |
| } |
| |
| __remove_from_objects(ibb, object); |
| __remove_from_cache(ibb, handle); |
| |
| return true; |
| } |
| |
| static struct drm_i915_gem_exec_object2 * |
| __intel_bb_add_intel_buf(struct intel_bb *ibb, struct intel_buf *buf, |
| uint64_t alignment, bool write) |
| { |
| struct drm_i915_gem_exec_object2 *obj; |
| |
| igt_assert(ibb); |
| igt_assert(buf); |
| igt_assert(!buf->ibb || buf->ibb == ibb); |
| igt_assert(ALIGN(alignment, 4096) == alignment); |
| |
| if (!alignment) { |
| alignment = 0x1000; |
| |
| /* |
| * TODO: |
| * Find out why MTL need special alignment, spec says 32k |
| * is enough for MTL. |
| */ |
| if (ibb->gen >= 12 && buf->compression) |
| alignment = IS_METEORLAKE(ibb->devid) ? 0x100000 : 0x10000; |
| |
| /* For gen3 ensure tiled buffers are aligned to power of two size */ |
| if (ibb->gen == 3 && buf->tiling) { |
| alignment = 1024 * 1024; |
| |
| while (alignment < buf->surface[0].size) |
| alignment <<= 1; |
| } |
| } |
| |
| obj = intel_bb_add_object(ibb, buf->handle, intel_buf_bo_size(buf), |
| buf->addr.offset, alignment, write); |
| buf->addr.offset = obj->offset; |
| |
| if (igt_list_empty(&buf->link)) { |
| igt_list_add_tail(&buf->link, &ibb->intel_bufs); |
| buf->ibb = ibb; |
| } else { |
| igt_assert(buf->ibb == ibb); |
| } |
| |
| return obj; |
| } |
| |
| struct drm_i915_gem_exec_object2 * |
| intel_bb_add_intel_buf(struct intel_bb *ibb, struct intel_buf *buf, bool write) |
| { |
| return __intel_bb_add_intel_buf(ibb, buf, 0, write); |
| } |
| |
| struct drm_i915_gem_exec_object2 * |
| intel_bb_add_intel_buf_with_alignment(struct intel_bb *ibb, struct intel_buf *buf, |
| uint64_t alignment, bool write) |
| { |
| return __intel_bb_add_intel_buf(ibb, buf, alignment, write); |
| } |
| |
| bool intel_bb_remove_intel_buf(struct intel_bb *ibb, struct intel_buf *buf) |
| { |
| bool removed; |
| |
| igt_assert(ibb); |
| igt_assert(buf); |
| igt_assert(!buf->ibb || buf->ibb == ibb); |
| |
| if (igt_list_empty(&buf->link)) |
| return false; |
| |
| removed = intel_bb_remove_object(ibb, buf->handle, |
| buf->addr.offset, |
| intel_buf_bo_size(buf)); |
| if (removed) { |
| buf->addr.offset = INTEL_BUF_INVALID_ADDRESS; |
| buf->ibb = NULL; |
| igt_list_del_init(&buf->link); |
| } |
| |
| return removed; |
| } |
| |
| void intel_bb_print_intel_bufs(struct intel_bb *ibb) |
| { |
| struct intel_buf *entry; |
| |
| igt_list_for_each_entry(entry, &ibb->intel_bufs, link) { |
| igt_info("handle: %u, ibb: %p, offset: %lx\n", |
| entry->handle, entry->ibb, |
| (long) entry->addr.offset); |
| } |
| } |
| |
| struct drm_i915_gem_exec_object2 * |
| intel_bb_find_object(struct intel_bb *ibb, uint32_t handle) |
| { |
| struct drm_i915_gem_exec_object2 object = { .handle = handle }; |
| struct drm_i915_gem_exec_object2 **found; |
| |
| found = tfind((void *) &object, &ibb->root, __compare_objects); |
| if (!found) |
| return NULL; |
| |
| return *found; |
| } |
| |
| bool |
| intel_bb_object_set_flag(struct intel_bb *ibb, uint32_t handle, uint64_t flag) |
| { |
| struct drm_i915_gem_exec_object2 object = { .handle = handle }; |
| struct drm_i915_gem_exec_object2 **found; |
| |
| igt_assert_f(ibb->root, "Trying to search in null tree\n"); |
| |
| found = tfind((void *) &object, &ibb->root, __compare_objects); |
| if (!found) { |
| igt_warn("Trying to set fence on not found handle: %u\n", |
| handle); |
| return false; |
| } |
| |
| (*found)->flags |= flag; |
| |
| return true; |
| } |
| |
| bool |
| intel_bb_object_clear_flag(struct intel_bb *ibb, uint32_t handle, uint64_t flag) |
| { |
| struct drm_i915_gem_exec_object2 object = { .handle = handle }; |
| struct drm_i915_gem_exec_object2 **found; |
| |
| found = tfind((void *) &object, &ibb->root, __compare_objects); |
| if (!found) { |
| igt_warn("Trying to set fence on not found handle: %u\n", |
| handle); |
| return false; |
| } |
| |
| (*found)->flags &= ~flag; |
| |
| return true; |
| } |
| |
| /* |
| * intel_bb_add_reloc: |
| * @ibb: pointer to intel_bb |
| * @to_handle: object handle in which do relocation |
| * @handle: object handle which address will be taken to patch the @to_handle |
| * @read_domains: gem domain bits for the relocation |
| * @write_domain: gem domain bit for the relocation |
| * @delta: delta value to add to @buffer's gpu address |
| * @offset: offset within bb to be patched |
| * |
| * When relocations are requested function allocates additional relocation slot |
| * in reloc array for a handle. |
| * Object must be previously added to bb. |
| */ |
| static uint64_t intel_bb_add_reloc(struct intel_bb *ibb, |
| uint32_t to_handle, |
| uint32_t handle, |
| uint32_t read_domains, |
| uint32_t write_domain, |
| uint64_t delta, |
| uint64_t offset, |
| uint64_t presumed_offset) |
| { |
| struct drm_i915_gem_relocation_entry *relocs; |
| struct drm_i915_gem_exec_object2 *object, *to_object; |
| uint32_t i; |
| |
| object = intel_bb_find_object(ibb, handle); |
| igt_assert(object); |
| |
| /* In no-reloc mode we just return the previously assigned address */ |
| if (!ibb->enforce_relocs) |
| goto out; |
| |
| /* For ibb we have relocs allocated in chunks */ |
| if (to_handle == ibb->handle) { |
| relocs = ibb->relocs; |
| if (ibb->num_relocs == ibb->allocated_relocs) { |
| ibb->allocated_relocs += 4096 / sizeof(*relocs); |
| relocs = realloc(relocs, sizeof(*relocs) * ibb->allocated_relocs); |
| igt_assert(relocs); |
| ibb->relocs = relocs; |
| } |
| i = ibb->num_relocs++; |
| } else { |
| to_object = intel_bb_find_object(ibb, to_handle); |
| igt_assert_f(to_object, "object has to be added to ibb first!\n"); |
| |
| i = to_object->relocation_count++; |
| relocs = from_user_pointer(to_object->relocs_ptr); |
| relocs = realloc(relocs, sizeof(*relocs) * to_object->relocation_count); |
| to_object->relocs_ptr = to_user_pointer(relocs); |
| igt_assert(relocs); |
| } |
| |
| memset(&relocs[i], 0, sizeof(*relocs)); |
| relocs[i].target_handle = handle; |
| relocs[i].read_domains = read_domains; |
| relocs[i].write_domain = write_domain; |
| relocs[i].delta = delta; |
| relocs[i].offset = offset; |
| if (ibb->enforce_relocs) |
| relocs[i].presumed_offset = -1; |
| else |
| relocs[i].presumed_offset = object->offset; |
| |
| igt_debug("add reloc: to_handle: %u, handle: %u, r/w: 0x%x/0x%x, " |
| "delta: 0x%" PRIx64 ", " |
| "offset: 0x%" PRIx64 ", " |
| "poffset: %p\n", |
| to_handle, handle, read_domains, write_domain, |
| delta, offset, |
| from_user_pointer(relocs[i].presumed_offset)); |
| |
| out: |
| return object->offset; |
| } |
| |
| static uint64_t __intel_bb_emit_reloc(struct intel_bb *ibb, |
| uint32_t to_handle, |
| uint32_t to_offset, |
| uint32_t handle, |
| uint32_t read_domains, |
| uint32_t write_domain, |
| uint64_t delta, |
| uint64_t presumed_offset) |
| { |
| uint64_t address; |
| |
| igt_assert(ibb); |
| |
| address = intel_bb_add_reloc(ibb, to_handle, handle, |
| read_domains, write_domain, |
| delta, to_offset, |
| presumed_offset); |
| |
| intel_bb_out(ibb, delta + address); |
| if (ibb->gen >= 8) |
| intel_bb_out(ibb, (delta + address) >> 32); |
| |
| return address; |
| } |
| |
| /** |
| * intel_bb_emit_reloc: |
| * @ibb: pointer to intel_bb |
| * @handle: object handle which address will be taken to patch the bb |
| * @read_domains: gem domain bits for the relocation |
| * @write_domain: gem domain bit for the relocation |
| * @delta: delta value to add to @buffer's gpu address |
| * @presumed_offset: address of the object in address space. If -1 is passed |
| * then final offset of the object will be randomized (for no-reloc bb) or |
| * 0 (for reloc bb, in that case reloc.presumed_offset will be -1). In |
| * case address is known it should passed in @presumed_offset (for no-reloc). |
| * @write: does a handle is a render target |
| * |
| * Function prepares relocation (execobj if required + reloc) and emits |
| * offset in bb. For I915_EXEC_NO_RELOC presumed_offset is a hint we already |
| * have object in valid place and relocation step can be skipped in this case. |
| * |
| * Note: delta is value added to address, mostly used when some instructions |
| * require modify-bit set to apply change. Which delta is valid depends |
| * on instruction (see instruction specification). |
| */ |
| uint64_t intel_bb_emit_reloc(struct intel_bb *ibb, |
| uint32_t handle, |
| uint32_t read_domains, |
| uint32_t write_domain, |
| uint64_t delta, |
| uint64_t presumed_offset) |
| { |
| igt_assert(ibb); |
| |
| return __intel_bb_emit_reloc(ibb, ibb->handle, intel_bb_offset(ibb), |
| handle, read_domains, write_domain, |
| delta, presumed_offset); |
| } |
| |
| uint64_t intel_bb_emit_reloc_fenced(struct intel_bb *ibb, |
| uint32_t handle, |
| uint32_t read_domains, |
| uint32_t write_domain, |
| uint64_t delta, |
| uint64_t presumed_offset) |
| { |
| uint64_t address; |
| |
| address = intel_bb_emit_reloc(ibb, handle, read_domains, write_domain, |
| delta, presumed_offset); |
| |
| intel_bb_object_set_flag(ibb, handle, EXEC_OBJECT_NEEDS_FENCE); |
| |
| return address; |
| } |
| |
| /** |
| * intel_bb_offset_reloc: |
| * @ibb: pointer to intel_bb |
| * @handle: object handle which address will be taken to patch the bb |
| * @read_domains: gem domain bits for the relocation |
| * @write_domain: gem domain bit for the relocation |
| * @offset: offset within bb to be patched |
| * @presumed_offset: address of the object in address space. If -1 is passed |
| * then final offset of the object will be randomized (for no-reloc bb) or |
| * 0 (for reloc bb, in that case reloc.presumed_offset will be -1). In |
| * case address is known it should passed in @presumed_offset (for no-reloc). |
| * |
| * Function prepares relocation (execobj if required + reloc). It it used |
| * for editing batchbuffer via modifying structures. It means when we're |
| * preparing batchbuffer it is more descriptive to edit the structure |
| * than emitting dwords. But it require for some fields to point the |
| * relocation. For that case @offset is passed by the user and it points |
| * to the offset in bb where the relocation will be applied. |
| */ |
| uint64_t intel_bb_offset_reloc(struct intel_bb *ibb, |
| uint32_t handle, |
| uint32_t read_domains, |
| uint32_t write_domain, |
| uint32_t offset, |
| uint64_t presumed_offset) |
| { |
| igt_assert(ibb); |
| |
| return intel_bb_add_reloc(ibb, ibb->handle, handle, |
| read_domains, write_domain, |
| 0, offset, presumed_offset); |
| } |
| |
| uint64_t intel_bb_offset_reloc_with_delta(struct intel_bb *ibb, |
| uint32_t handle, |
| uint32_t read_domains, |
| uint32_t write_domain, |
| uint32_t delta, |
| uint32_t offset, |
| uint64_t presumed_offset) |
| { |
| igt_assert(ibb); |
| |
| return intel_bb_add_reloc(ibb, ibb->handle, handle, |
| read_domains, write_domain, |
| delta, offset, presumed_offset); |
| } |
| |
| uint64_t intel_bb_offset_reloc_to_object(struct intel_bb *ibb, |
| uint32_t to_handle, |
| uint32_t handle, |
| uint32_t read_domains, |
| uint32_t write_domain, |
| uint32_t delta, |
| uint32_t offset, |
| uint64_t presumed_offset) |
| { |
| igt_assert(ibb); |
| |
| return intel_bb_add_reloc(ibb, to_handle, handle, |
| read_domains, write_domain, |
| delta, offset, presumed_offset); |
| } |
| |
| /* |
| * @intel_bb_set_pxp: |
| * @ibb: pointer to intel_bb |
| * @new_state: enable or disable pxp session |
| * @apptype: pxp session input identifies what type of session to enable |
| * @appid: pxp session input provides which appid to use |
| * |
| * This function merely stores the pxp state and session information to |
| * be retrieved and programmed later by supporting libraries such as |
| * gen12_render_copy that must program the HW within the same dispatch |
| */ |
| void intel_bb_set_pxp(struct intel_bb *ibb, bool new_state, |
| uint32_t apptype, uint32_t appid) |
| { |
| igt_assert(ibb); |
| |
| ibb->pxp.enabled = new_state; |
| ibb->pxp.apptype = new_state ? apptype : 0; |
| ibb->pxp.appid = new_state ? appid : 0; |
| } |
| |
| static void intel_bb_dump_execbuf(struct intel_bb *ibb, |
| struct drm_i915_gem_execbuffer2 *execbuf) |
| { |
| struct drm_i915_gem_exec_object2 *objects; |
| struct drm_i915_gem_relocation_entry *relocs, *reloc; |
| int i, j; |
| uint64_t address; |
| |
| igt_debug("execbuf [pid: %ld, fd: %d, ctx: %u]\n", |
| (long) getpid(), ibb->fd, ibb->ctx); |
| igt_debug("execbuf batch len: %u, start offset: 0x%x, " |
| "DR1: 0x%x, DR4: 0x%x, " |
| "num clip: %u, clipptr: 0x%llx, " |
| "flags: 0x%llx, rsvd1: 0x%llx, rsvd2: 0x%llx\n", |
| execbuf->batch_len, execbuf->batch_start_offset, |
| execbuf->DR1, execbuf->DR4, |
| execbuf->num_cliprects, execbuf->cliprects_ptr, |
| execbuf->flags, execbuf->rsvd1, execbuf->rsvd2); |
| |
| igt_debug("execbuf buffer_count: %d\n", execbuf->buffer_count); |
| for (i = 0; i < execbuf->buffer_count; i++) { |
| objects = &((struct drm_i915_gem_exec_object2 *) |
| from_user_pointer(execbuf->buffers_ptr))[i]; |
| relocs = from_user_pointer(objects->relocs_ptr); |
| address = objects->offset; |
| igt_debug(" [%d] handle: %u, reloc_count: %d, reloc_ptr: %p, " |
| "align: 0x%llx, offset: 0x%" PRIx64 ", flags: 0x%llx, " |
| "rsvd1: 0x%llx, rsvd2: 0x%llx\n", |
| i, objects->handle, objects->relocation_count, |
| relocs, |
| objects->alignment, |
| address, |
| objects->flags, |
| objects->rsvd1, objects->rsvd2); |
| if (objects->relocation_count) { |
| igt_debug("\texecbuf relocs:\n"); |
| for (j = 0; j < objects->relocation_count; j++) { |
| reloc = &relocs[j]; |
| address = reloc->presumed_offset; |
| igt_debug("\t [%d] target handle: %u, " |
| "offset: 0x%llx, delta: 0x%x, " |
| "presumed_offset: 0x%" PRIx64 ", " |
| "read_domains: 0x%x, " |
| "write_domain: 0x%x\n", |
| j, reloc->target_handle, |
| reloc->offset, reloc->delta, |
| address, |
| reloc->read_domains, |
| reloc->write_domain); |
| } |
| } |
| } |
| } |
| |
| static void intel_bb_dump_base64(struct intel_bb *ibb, int linelen) |
| { |
| int outsize; |
| gchar *str, *pos; |
| |
| igt_info("--- bb ---\n"); |
| pos = str = g_base64_encode((const guchar *) ibb->batch, ibb->size); |
| outsize = strlen(str); |
| |
| while (outsize > 0) { |
| igt_info("%.*s\n", min(outsize, linelen), pos); |
| pos += linelen; |
| outsize -= linelen; |
| } |
| |
| free(str); |
| } |
| |
| static void print_node(const void *node, VISIT which, int depth) |
| { |
| const struct drm_i915_gem_exec_object2 *object = |
| *(const struct drm_i915_gem_exec_object2 **) node; |
| (void) depth; |
| |
| switch (which) { |
| case preorder: |
| case endorder: |
| break; |
| |
| case postorder: |
| case leaf: |
| igt_info("\t handle: %u, offset: 0x%" PRIx64 "\n", |
| object->handle, (uint64_t) object->offset); |
| break; |
| } |
| } |
| |
| void intel_bb_dump_cache(struct intel_bb *ibb) |
| { |
| igt_info("[pid: %ld] dump cache\n", (long) getpid()); |
| twalk(ibb->root, print_node); |
| } |
| |
| static struct drm_i915_gem_exec_object2 * |
| create_objects_array(struct intel_bb *ibb) |
| { |
| struct drm_i915_gem_exec_object2 *objects; |
| uint32_t i; |
| |
| objects = malloc(sizeof(*objects) * ibb->num_objects); |
| igt_assert(objects); |
| |
| for (i = 0; i < ibb->num_objects; i++) { |
| objects[i] = *(ibb->objects[i]); |
| objects[i].offset = CANONICAL(objects[i].offset); |
| } |
| |
| return objects; |
| } |
| |
| static void update_offsets(struct intel_bb *ibb, |
| struct drm_i915_gem_exec_object2 *objects) |
| { |
| struct drm_i915_gem_exec_object2 *object; |
| struct intel_buf *entry; |
| uint32_t i; |
| |
| for (i = 0; i < ibb->num_objects; i++) { |
| object = intel_bb_find_object(ibb, objects[i].handle); |
| igt_assert(object); |
| |
| object->offset = DECANONICAL(objects[i].offset); |
| |
| if (i == 0) |
| ibb->batch_offset = object->offset; |
| } |
| |
| igt_list_for_each_entry(entry, &ibb->intel_bufs, link) { |
| object = intel_bb_find_object(ibb, entry->handle); |
| igt_assert(object); |
| |
| if (ibb->allocator_type == INTEL_ALLOCATOR_SIMPLE) |
| igt_assert(object->offset == entry->addr.offset); |
| else |
| entry->addr.offset = object->offset; |
| |
| entry->addr.ctx = ibb->ctx; |
| } |
| } |
| |
| #define LINELEN 76 |
| |
| static int |
| __xe_bb_exec(struct intel_bb *ibb, uint64_t flags, bool sync) |
| { |
| uint32_t engine = flags & (I915_EXEC_BSD_MASK | I915_EXEC_RING_MASK); |
| uint32_t engine_id; |
| struct drm_xe_sync syncs[2] = { |
| { .flags = DRM_XE_SYNC_SYNCOBJ | DRM_XE_SYNC_SIGNAL, }, |
| { .flags = DRM_XE_SYNC_SYNCOBJ | DRM_XE_SYNC_SIGNAL, }, |
| }; |
| struct drm_xe_vm_bind_op *bind_ops; |
| void *map; |
| |
| igt_assert_eq(ibb->num_relocs, 0); |
| igt_assert_eq(ibb->xe_bound, false); |
| |
| if (ibb->ctx) { |
| engine_id = ibb->ctx; |
| } else if (ibb->last_engine != engine) { |
| struct drm_xe_engine_class_instance inst = { }; |
| |
| inst.engine_instance = |
| (flags & I915_EXEC_BSD_MASK) >> I915_EXEC_BSD_SHIFT; |
| |
| switch (flags & I915_EXEC_RING_MASK) { |
| case I915_EXEC_DEFAULT: |
| case I915_EXEC_BLT: |
| inst.engine_class = DRM_XE_ENGINE_CLASS_COPY; |
| break; |
| case I915_EXEC_BSD: |
| inst.engine_class = DRM_XE_ENGINE_CLASS_VIDEO_DECODE; |
| break; |
| case I915_EXEC_RENDER: |
| if (IS_PONTEVECCHIO(xe_dev_id(ibb->fd))) |
| inst.engine_class = DRM_XE_ENGINE_CLASS_COMPUTE; |
| else |
| inst.engine_class = DRM_XE_ENGINE_CLASS_RENDER; |
| break; |
| case I915_EXEC_VEBOX: |
| inst.engine_class = DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE; |
| break; |
| default: |
| igt_assert_f(false, "Unknown engine: %x", (uint32_t) flags); |
| } |
| igt_debug("Run on %s\n", xe_engine_class_string(inst.engine_class)); |
| |
| if (ibb->engine_id) |
| xe_exec_queue_destroy(ibb->fd, ibb->engine_id); |
| |
| ibb->engine_id = engine_id = |
| xe_exec_queue_create(ibb->fd, ibb->vm_id, &inst, 0); |
| } else { |
| engine_id = ibb->engine_id; |
| } |
| ibb->last_engine = engine; |
| |
| map = xe_bo_map(ibb->fd, ibb->handle, ibb->size); |
| memcpy(map, ibb->batch, ibb->size); |
| gem_munmap(map, ibb->size); |
| |
| syncs[0].handle = syncobj_create(ibb->fd, 0); |
| if (ibb->num_objects > 1) { |
| bind_ops = xe_alloc_bind_ops(ibb, XE_VM_BIND_OP_MAP | XE_VM_BIND_FLAG_ASYNC, 0); |
| xe_vm_bind_array(ibb->fd, ibb->vm_id, 0, bind_ops, |
| ibb->num_objects, syncs, 1); |
| free(bind_ops); |
| } else { |
| igt_debug("bind: MAP\n"); |
| igt_debug(" handle: %u, offset: %llx, size: %llx\n", |
| ibb->handle, (long long)ibb->batch_offset, |
| (long long)ibb->size); |
| xe_vm_bind_async(ibb->fd, ibb->vm_id, 0, ibb->handle, 0, |
| ibb->batch_offset, ibb->size, syncs, 1); |
| } |
| ibb->xe_bound = true; |
| |
| syncs[0].flags &= ~DRM_XE_SYNC_SIGNAL; |
| ibb->engine_syncobj = syncobj_create(ibb->fd, 0); |
| syncs[1].handle = ibb->engine_syncobj; |
| |
| xe_exec_sync(ibb->fd, engine_id, ibb->batch_offset, syncs, 2); |
| |
| if (sync) |
| intel_bb_sync(ibb); |
| |
| return 0; |
| } |
| |
| /* |
| * __intel_bb_exec: |
| * @ibb: pointer to intel_bb |
| * @end_offset: offset of the last instruction in the bb |
| * @flags: flags passed directly to execbuf |
| * @sync: if true wait for execbuf completion, otherwise caller is responsible |
| * to wait for completion |
| * |
| * Returns: 0 on success, otherwise errno. |
| * |
| * Note: In this step execobj for bb is allocated and inserted to the objects |
| * array. |
| */ |
| int __intel_bb_exec(struct intel_bb *ibb, uint32_t end_offset, |
| uint64_t flags, bool sync) |
| { |
| struct drm_i915_gem_execbuffer2 execbuf; |
| struct drm_i915_gem_exec_object2 *objects; |
| int ret, fence, new_fence; |
| |
| ibb->objects[0]->relocs_ptr = to_user_pointer(ibb->relocs); |
| ibb->objects[0]->relocation_count = ibb->num_relocs; |
| ibb->objects[0]->handle = ibb->handle; |
| ibb->objects[0]->offset = ibb->batch_offset; |
| |
| gem_write(ibb->fd, ibb->handle, 0, ibb->batch, ibb->size); |
| |
| memset(&execbuf, 0, sizeof(execbuf)); |
| objects = create_objects_array(ibb); |
| execbuf.buffers_ptr = to_user_pointer(objects); |
| execbuf.buffer_count = ibb->num_objects; |
| execbuf.batch_len = end_offset; |
| execbuf.rsvd1 = ibb->ctx; |
| execbuf.flags = flags | I915_EXEC_BATCH_FIRST | I915_EXEC_FENCE_OUT; |
| if (ibb->enforce_relocs) |
| execbuf.flags &= ~I915_EXEC_NO_RELOC; |
| execbuf.rsvd2 = 0; |
| |
| if (ibb->dump_base64) |
| intel_bb_dump_base64(ibb, LINELEN); |
| |
| /* For debugging on CI, remove in final series */ |
| intel_bb_dump_execbuf(ibb, &execbuf); |
| |
| ret = __gem_execbuf_wr(ibb->fd, &execbuf); |
| if (ret) { |
| intel_bb_dump_execbuf(ibb, &execbuf); |
| free(objects); |
| return ret; |
| } |
| |
| /* Update addresses in the cache */ |
| update_offsets(ibb, objects); |
| |
| /* Save/merge fences */ |
| fence = execbuf.rsvd2 >> 32; |
| |
| if (ibb->fence < 0) { |
| ibb->fence = fence; |
| } else { |
| new_fence = sync_fence_merge(ibb->fence, fence); |
| close(ibb->fence); |
| close(fence); |
| ibb->fence = new_fence; |
| } |
| |
| if (sync || ibb->debug) |
| igt_assert(intel_bb_sync(ibb) == 0); |
| |
| if (ibb->debug) { |
| intel_bb_dump_execbuf(ibb, &execbuf); |
| if (intel_bb_debug_tree) { |
| igt_info("\nTree:\n"); |
| twalk(ibb->root, print_node); |
| } |
| } |
| |
| free(objects); |
| |
| return 0; |
| } |
| |
| /** |
| * intel_bb_exec: |
| * @ibb: pointer to intel_bb |
| * @end_offset: offset of the last instruction in the bb (for i915) |
| * @flags: flags passed directly to execbuf |
| * @sync: if true wait for execbuf completion, otherwise caller is responsible |
| * to wait for completion |
| * |
| * Do execbuf on context selected during bb creation. Asserts on failure. |
| */ |
| void intel_bb_exec(struct intel_bb *ibb, uint32_t end_offset, |
| uint64_t flags, bool sync) |
| { |
| if (ibb->dump_base64) |
| intel_bb_dump_base64(ibb, LINELEN); |
| |
| if (ibb->driver == INTEL_DRIVER_I915) |
| igt_assert_eq(__intel_bb_exec(ibb, end_offset, flags, sync), 0); |
| else |
| igt_assert_eq(__xe_bb_exec(ibb, flags, sync), 0); |
| } |
| |
| /** |
| * intel_bb_get_object_address: |
| * @ibb: pointer to intel_bb |
| * @handle: object handle |
| * |
| * When objects addresses are previously pinned and we don't want to relocate |
| * we need to acquire them from previous execbuf. Function returns previous |
| * object offset for @handle or 0 if object is not found. |
| */ |
| uint64_t intel_bb_get_object_offset(struct intel_bb *ibb, uint32_t handle) |
| { |
| struct drm_i915_gem_exec_object2 object = { .handle = handle }; |
| struct drm_i915_gem_exec_object2 **found; |
| |
| igt_assert(ibb); |
| |
| found = tfind((void *)&object, &ibb->root, __compare_objects); |
| if (!found) |
| return INTEL_BUF_INVALID_ADDRESS; |
| |
| return (*found)->offset; |
| } |
| |
| /* |
| * intel_bb_emit_bbe: |
| * @ibb: batchbuffer |
| * |
| * Outputs MI_BATCH_BUFFER_END and ensures batch is properly aligned. |
| */ |
| uint32_t intel_bb_emit_bbe(struct intel_bb *ibb) |
| { |
| /* Mark the end of the buffer. */ |
| intel_bb_out(ibb, MI_BATCH_BUFFER_END); |
| intel_bb_ptr_align(ibb, 8); |
| |
| return intel_bb_offset(ibb); |
| } |
| |
| /* |
| * intel_bb_emit_flush_common: |
| * @ibb: batchbuffer |
| * |
| * Emits instructions which completes batch buffer. |
| * |
| * Returns: offset in batch buffer where there's end of instructions. |
| */ |
| uint32_t intel_bb_emit_flush_common(struct intel_bb *ibb) |
| { |
| if (intel_bb_offset(ibb) == 0) |
| return 0; |
| |
| if (ibb->gen == 5) { |
| /* |
| * emit gen5 w/a without batch space checks - we reserve that |
| * already. |
| */ |
| intel_bb_out(ibb, CMD_POLY_STIPPLE_OFFSET << 16); |
| intel_bb_out(ibb, 0); |
| } |
| |
| /* Round batchbuffer usage to 2 DWORDs. */ |
| if ((intel_bb_offset(ibb) & 4) == 0) |
| intel_bb_out(ibb, 0); |
| |
| intel_bb_emit_bbe(ibb); |
| |
| return intel_bb_offset(ibb); |
| } |
| |
| static void intel_bb_exec_with_ring(struct intel_bb *ibb,uint32_t ring) |
| { |
| intel_bb_exec(ibb, intel_bb_offset(ibb), |
| ring | I915_EXEC_NO_RELOC, false); |
| intel_bb_reset(ibb, false); |
| } |
| |
| /* |
| * intel_bb_flush: |
| * @ibb: batchbuffer |
| * @ring: ring |
| * |
| * If batch is not empty emit batch buffer end, execute on ring, |
| * then reset the batch. |
| */ |
| void intel_bb_flush(struct intel_bb *ibb, uint32_t ring) |
| { |
| if (intel_bb_emit_flush_common(ibb) == 0) |
| return; |
| |
| intel_bb_exec_with_ring(ibb, ring); |
| } |
| |
| /* |
| * intel_bb_flush_render: |
| * @ibb: batchbuffer |
| * |
| * If batch is not empty emit batch buffer end, find the render engine id, |
| * execute on the ring and reset the batch. Context used to execute |
| * is batch context. |
| */ |
| void intel_bb_flush_render(struct intel_bb *ibb) |
| { |
| uint32_t ring; |
| |
| if (intel_bb_emit_flush_common(ibb) == 0) |
| return; |
| |
| if (has_ctx_cfg(ibb)) |
| ring = find_engine(ibb->cfg, I915_ENGINE_CLASS_RENDER); |
| else |
| ring = I915_EXEC_RENDER; |
| |
| intel_bb_exec_with_ring(ibb, ring); |
| } |
| |
| /* |
| * intel_bb_flush_blit: |
| * @ibb: batchbuffer |
| * |
| * If batch is not empty emit batch buffer end, find a suitable ring |
| * (depending on gen and context configuration) and reset the batch. |
| * Context used to execute is batch context. |
| */ |
| void intel_bb_flush_blit(struct intel_bb *ibb) |
| { |
| uint32_t ring; |
| |
| if (intel_bb_emit_flush_common(ibb) == 0) |
| return; |
| |
| if (has_ctx_cfg(ibb)) |
| ring = find_engine(ibb->cfg, I915_ENGINE_CLASS_COPY); |
| else |
| ring = HAS_BLT_RING(ibb->devid) ? I915_EXEC_BLT : I915_EXEC_DEFAULT; |
| |
| intel_bb_exec_with_ring(ibb, ring); |
| } |
| |
| /* |
| * intel_bb_copy_data: |
| * @ibb: batchbuffer |
| * @data: pointer of data which should be copied into batch |
| * @bytes: number of bytes to copy, must be dword multiplied |
| * @align: alignment in the batch |
| * |
| * Function copies @bytes of data pointed by @data into batch buffer. |
| */ |
| uint32_t intel_bb_copy_data(struct intel_bb *ibb, |
| const void *data, unsigned int bytes, |
| uint32_t align) |
| { |
| uint32_t *subdata, offset; |
| |
| igt_assert((bytes & 3) == 0); |
| |
| intel_bb_ptr_align(ibb, align); |
| offset = intel_bb_offset(ibb); |
| igt_assert(offset + bytes < ibb->size); |
| |
| subdata = intel_bb_ptr(ibb); |
| memcpy(subdata, data, bytes); |
| intel_bb_ptr_add(ibb, bytes); |
| |
| return offset; |
| } |
| |
| /* |
| * intel_bb_blit_start: |
| * @ibb: batchbuffer |
| * @flags: flags to blit command |
| * |
| * Function emits XY_SRC_COPY_BLT instruction with size appropriate size |
| * which depend on gen. |
| */ |
| void intel_bb_blit_start(struct intel_bb *ibb, uint32_t flags) |
| { |
| if (blt_has_xy_src_copy(ibb->fd)) |
| intel_bb_out(ibb, XY_SRC_COPY_BLT_CMD | |
| XY_SRC_COPY_BLT_WRITE_ALPHA | |
| XY_SRC_COPY_BLT_WRITE_RGB | |
| flags | |
| (6 + 2 * (ibb->gen >= 8))); |
| else if (blt_has_fast_copy(ibb->fd)) |
| intel_bb_out(ibb, XY_FAST_COPY_BLT | flags); |
| else |
| igt_assert_f(0, "No supported blit command found\n"); |
| } |
| |
| /* |
| * intel_bb_emit_blt_copy: |
| * @ibb: batchbuffer |
| * @src: source buffer (intel_buf) |
| * @src_x1: source x1 position |
| * @src_y1: source y1 position |
| * @src_pitch: source pitch |
| * @dst: destination buffer (intel_buf) |
| * @dst_x1: destination x1 position |
| * @dst_y1: destination y1 position |
| * @dst_pitch: destination pitch |
| * @width: width of data to copy |
| * @height: height of data to copy |
| * |
| * Function emits complete blit command. |
| */ |
| void intel_bb_emit_blt_copy(struct intel_bb *ibb, |
| struct intel_buf *src, |
| int src_x1, int src_y1, int src_pitch, |
| struct intel_buf *dst, |
| int dst_x1, int dst_y1, int dst_pitch, |
| int width, int height, int bpp) |
| { |
| const unsigned int gen = ibb->gen; |
| uint32_t cmd_bits = 0; |
| uint32_t br13_bits; |
| uint32_t mask; |
| |
| igt_assert(bpp*(src_x1 + width) <= 8*src_pitch); |
| igt_assert(bpp*(dst_x1 + width) <= 8*dst_pitch); |
| igt_assert(src_pitch * (src_y1 + height) <= src->surface[0].size); |
| igt_assert(dst_pitch * (dst_y1 + height) <= dst->surface[0].size); |
| |
| if (gen >= 4 && src->tiling != I915_TILING_NONE) { |
| src_pitch /= 4; |
| if (blt_has_xy_src_copy(ibb->fd)) |
| cmd_bits |= XY_SRC_COPY_BLT_SRC_TILED; |
| else if (blt_has_fast_copy(ibb->fd)) |
| cmd_bits |= fast_copy_dword0(src->tiling, dst->tiling); |
| else |
| igt_assert_f(0, "No supported blit command found\n"); |
| } |
| |
| if (gen >= 4 && dst->tiling != I915_TILING_NONE) { |
| dst_pitch /= 4; |
| if (blt_has_xy_src_copy(ibb->fd)) |
| cmd_bits |= XY_SRC_COPY_BLT_DST_TILED; |
| else |
| cmd_bits |= fast_copy_dword0(src->tiling, dst->tiling); |
| } |
| |
| CHECK_RANGE(src_x1); CHECK_RANGE(src_y1); |
| CHECK_RANGE(dst_x1); CHECK_RANGE(dst_y1); |
| CHECK_RANGE(width); CHECK_RANGE(height); |
| CHECK_RANGE(src_x1 + width); CHECK_RANGE(src_y1 + height); |
| CHECK_RANGE(dst_x1 + width); CHECK_RANGE(dst_y1 + height); |
| CHECK_RANGE(src_pitch); CHECK_RANGE(dst_pitch); |
| |
| br13_bits = 0; |
| if (blt_has_xy_src_copy(ibb->fd)) { |
| switch (bpp) { |
| case 8: |
| break; |
| case 16: /* supporting only RGB565, not ARGB1555 */ |
| br13_bits |= 1 << 24; |
| break; |
| case 32: |
| br13_bits |= 3 << 24; |
| cmd_bits |= (XY_SRC_COPY_BLT_WRITE_ALPHA | |
| XY_SRC_COPY_BLT_WRITE_RGB); |
| break; |
| default: |
| igt_assert_f(0, "Unsupported pixel depth\n"); |
| } |
| } else { |
| br13_bits = fast_copy_dword1(ibb->fd, src->tiling, dst->tiling, bpp); |
| } |
| |
| if ((src->tiling | dst->tiling) >= I915_TILING_Y) { |
| intel_bb_out(ibb, MI_LOAD_REGISTER_IMM(1)); |
| intel_bb_out(ibb, BCS_SWCTRL); |
| |
| mask = (BCS_SRC_Y | BCS_DST_Y) << 16; |
| if (src->tiling == I915_TILING_Y) |
| mask |= BCS_SRC_Y; |
| if (dst->tiling == I915_TILING_Y) |
| mask |= BCS_DST_Y; |
| intel_bb_out(ibb, mask); |
| } |
| |
| intel_bb_add_intel_buf(ibb, src, false); |
| intel_bb_add_intel_buf(ibb, dst, true); |
| |
| intel_bb_blit_start(ibb, cmd_bits); |
| intel_bb_out(ibb, (br13_bits) | |
| (0xcc << 16) | /* copy ROP */ |
| dst_pitch); |
| intel_bb_out(ibb, (dst_y1 << 16) | dst_x1); /* dst x1,y1 */ |
| intel_bb_out(ibb, ((dst_y1 + height) << 16) | (dst_x1 + width)); /* dst x2,y2 */ |
| intel_bb_emit_reloc_fenced(ibb, dst->handle, |
| I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, |
| 0, dst->addr.offset); |
| intel_bb_out(ibb, (src_y1 << 16) | src_x1); /* src x1,y1 */ |
| intel_bb_out(ibb, src_pitch); |
| intel_bb_emit_reloc_fenced(ibb, src->handle, |
| I915_GEM_DOMAIN_RENDER, 0, |
| 0, src->addr.offset); |
| |
| if (gen >= 6 && src->handle == dst->handle) { |
| intel_bb_out(ibb, XY_SETUP_CLIP_BLT_CMD); |
| intel_bb_out(ibb, 0); |
| intel_bb_out(ibb, 0); |
| } |
| |
| if ((src->tiling | dst->tiling) >= I915_TILING_Y) { |
| igt_assert(ibb->gen >= 6); |
| intel_bb_out(ibb, MI_FLUSH_DW_CMD | 2); |
| intel_bb_out(ibb, 0); |
| intel_bb_out(ibb, 0); |
| intel_bb_out(ibb, 0); |
| |
| intel_bb_out(ibb, MI_LOAD_REGISTER_IMM(1)); |
| intel_bb_out(ibb, BCS_SWCTRL); |
| intel_bb_out(ibb, (BCS_SRC_Y | BCS_DST_Y) << 16); |
| } |
| } |
| |
| void intel_bb_blt_copy(struct intel_bb *ibb, |
| struct intel_buf *src, |
| int src_x1, int src_y1, int src_pitch, |
| struct intel_buf *dst, |
| int dst_x1, int dst_y1, int dst_pitch, |
| int width, int height, int bpp) |
| { |
| intel_bb_emit_blt_copy(ibb, src, src_x1, src_y1, src_pitch, |
| dst, dst_x1, dst_y1, dst_pitch, |
| width, height, bpp); |
| intel_bb_flush_blit(ibb); |
| } |
| |
| /** |
| * intel_bb_copy_intel_buf: |
| * @batch: batchbuffer object |
| * @src: source buffer (intel_buf) |
| * @dst: destination libdrm buffer object |
| * @size: size of the copy range in bytes |
| * |
| * Emits a copy operation using blitter commands into the supplied batch. |
| * A total of @size bytes from the start of @src is copied |
| * over to @dst. Note that @size must be page-aligned. |
| */ |
| void intel_bb_copy_intel_buf(struct intel_bb *ibb, |
| struct intel_buf *src, struct intel_buf *dst, |
| long int size) |
| { |
| igt_assert(size % 4096 == 0); |
| |
| intel_bb_blt_copy(ibb, |
| src, 0, 0, 4096, |
| dst, 0, 0, 4096, |
| 4096/4, size/4096, 32); |
| } |
| |
| /** |
| * igt_get_huc_copyfunc: |
| * @devid: pci device id |
| * |
| * Returns: |
| * |
| * The platform-specific huc copy function pointer for the device specified |
| * with @devid. Will return NULL when no media spin function is implemented. |
| */ |
| igt_huc_copyfunc_t igt_get_huc_copyfunc(int devid) |
| { |
| igt_huc_copyfunc_t copy = NULL; |
| |
| if (IS_GEN12(devid) || IS_GEN11(devid) || IS_GEN9(devid)) |
| copy = gen9_huc_copyfunc; |
| |
| return copy; |
| } |
| |
| /** |
| * intel_bb_track: |
| * @do_tracking: bool |
| * |
| * Turn on (true) or off (false) tracking for intel_batchbuffers. |
| */ |
| void intel_bb_track(bool do_tracking) |
| { |
| if (intel_bb_do_tracking == do_tracking) |
| return; |
| |
| if (intel_bb_do_tracking) { |
| struct intel_bb *entry, *tmp; |
| |
| pthread_mutex_lock(&intel_bb_list_lock); |
| igt_list_for_each_entry_safe(entry, tmp, &intel_bb_list, link) |
| igt_list_del(&entry->link); |
| pthread_mutex_unlock(&intel_bb_list_lock); |
| } |
| |
| intel_bb_do_tracking = do_tracking; |
| } |
| |
| static void __intel_bb_reinit_alloc(struct intel_bb *ibb) |
| { |
| if (ibb->allocator_type == INTEL_ALLOCATOR_NONE) |
| return; |
| |
| ibb->allocator_handle = intel_allocator_open_full(ibb->fd, ibb->ctx, |
| ibb->allocator_start, ibb->allocator_end, |
| ibb->allocator_type, |
| ibb->allocator_strategy, |
| ibb->alignment); |
| |
| intel_bb_reset(ibb, true); |
| } |
| |
| /** |
| * intel_bb_reinit_allocator: |
| * |
| * Reinit allocator and get offsets in tracked intel_batchbuffers. |
| */ |
| void intel_bb_reinit_allocator(void) |
| { |
| struct intel_bb *iter; |
| |
| if (!intel_bb_do_tracking) |
| return; |
| |
| pthread_mutex_lock(&intel_bb_list_lock); |
| igt_list_for_each_entry(iter, &intel_bb_list, link) |
| __intel_bb_reinit_alloc(iter); |
| pthread_mutex_unlock(&intel_bb_list_lock); |
| } |