| #include <stdbool.h> |
| #include <stdint.h> |
| |
| #include "drmtest.h" |
| #include "intel_aux_pgtable.h" |
| #include "intel_batchbuffer.h" |
| #include "intel_bufops.h" |
| #include "intel_chipset.h" |
| #include "ioctl_wrappers.h" |
| |
| #include "i915/gem_mman.h" |
| |
| #define BITMASK(e, s) ((~0ULL << (s)) & \ |
| (~0ULL >> (BITS_PER_LONG_LONG - 1 - (e)))) |
| |
| #define GFX_ADDRESS_BITS 48 |
| |
| #define AUX_FORMAT_YCRCB 0x03 |
| #define AUX_FORMAT_P010 0x07 |
| #define AUX_FORMAT_P016 0x08 |
| #define AUX_FORMAT_AYUV 0x09 |
| #define AUX_FORMAT_ARGB_8B 0x0A |
| #define AUX_FORMAT_NV12_21 0x0F |
| |
| struct pgtable_level_desc { |
| int idx_shift; |
| int idx_bits; |
| int entry_ptr_shift; |
| int table_size; |
| }; |
| |
| struct pgtable_level_info { |
| const struct pgtable_level_desc *desc; |
| int table_count; |
| int alloc_base; |
| int alloc_ptr; |
| }; |
| |
| struct pgtable { |
| int levels; |
| struct pgtable_level_info *level_info; |
| int size; |
| int max_align; |
| struct intel_bb *ibb; |
| struct intel_buf *buf; |
| void *ptr; |
| }; |
| |
| static uint64_t last_buf_surface_end(struct intel_buf *buf) |
| { |
| uint64_t end_offset = 0; |
| int num_surfaces = buf->format_is_yuv_semiplanar ? 2 : 1; |
| int i; |
| |
| for (i = 0; i < num_surfaces; i++) { |
| uint64_t surface_end = buf->surface[i].offset + |
| buf->surface[i].size; |
| |
| if (surface_end > end_offset) |
| end_offset = surface_end; |
| } |
| |
| return end_offset; |
| } |
| |
| static int |
| pgt_table_count(int address_bits, struct intel_buf **bufs, int buf_count) |
| { |
| uint64_t end; |
| int count; |
| int i; |
| |
| count = 0; |
| end = 0; |
| for (i = 0; i < buf_count; i++) { |
| struct intel_buf *buf = bufs[i]; |
| uint64_t start; |
| |
| /* We require bufs to be sorted. */ |
| igt_assert(i == 0 || |
| buf->addr.offset >= bufs[i - 1]->addr.offset + |
| intel_buf_size(bufs[i - 1])); |
| start = ALIGN_DOWN(buf->addr.offset, 1UL << address_bits); |
| |
| /* Avoid double counting for overlapping aligned bufs. */ |
| start = max(start, end); |
| |
| end = ALIGN(buf->addr.offset + last_buf_surface_end(buf), |
| 1UL << address_bits); |
| igt_assert(end >= start); |
| |
| count += (end - start) >> address_bits; |
| } |
| |
| return count; |
| } |
| |
| static void |
| pgt_calc_size(struct pgtable *pgt, struct intel_buf **bufs, int buf_count) |
| { |
| int level; |
| |
| pgt->size = 0; |
| |
| for (level = pgt->levels - 1; level >= 0; level--) { |
| struct pgtable_level_info *li = &pgt->level_info[level]; |
| |
| li->alloc_base = ALIGN(pgt->size, li->desc->table_size); |
| li->alloc_ptr = li->alloc_base; |
| |
| li->table_count = pgt_table_count(li->desc->idx_shift + |
| li->desc->idx_bits, |
| bufs, buf_count); |
| |
| pgt->size = li->alloc_base + |
| li->table_count * li->desc->table_size; |
| } |
| } |
| |
| static uint64_t pgt_alloc_table(struct pgtable *pgt, int level) |
| { |
| struct pgtable_level_info *li = &pgt->level_info[level]; |
| uint64_t table; |
| |
| table = li->alloc_ptr; |
| li->alloc_ptr += li->desc->table_size; |
| |
| igt_assert(li->alloc_ptr <= |
| li->alloc_base + li->table_count * li->desc->table_size); |
| |
| return table; |
| } |
| |
| static int pgt_entry_index(struct pgtable *pgt, int level, uint64_t address) |
| { |
| const struct pgtable_level_desc *ld = pgt->level_info[level].desc; |
| uint64_t mask = BITMASK(ld->idx_shift + ld->idx_bits - 1, |
| ld->idx_shift); |
| |
| return (address & mask) >> ld->idx_shift; |
| } |
| |
| static uint64_t ptr_mask(struct pgtable *pgt, int level) |
| { |
| const struct pgtable_level_desc *ld = pgt->level_info[level].desc; |
| |
| return BITMASK(GFX_ADDRESS_BITS - 1, ld->entry_ptr_shift); |
| } |
| |
| static uint64_t |
| pgt_get_child_table(struct pgtable *pgt, uint64_t parent_table, |
| int level, uint64_t address, uint64_t flags) |
| { |
| uint64_t *parent_table_ptr; |
| int child_entry_idx; |
| uint64_t *child_entry_ptr; |
| uint64_t child_table; |
| |
| parent_table_ptr = pgt->ptr + parent_table; |
| child_entry_idx = pgt_entry_index(pgt, level, address); |
| child_entry_ptr = &parent_table_ptr[child_entry_idx]; |
| |
| if (!*child_entry_ptr) { |
| uint64_t pte; |
| uint32_t offset; |
| |
| child_table = pgt_alloc_table(pgt, level - 1); |
| igt_assert(!((child_table + pgt->buf->addr.offset) & |
| ~ptr_mask(pgt, level))); |
| |
| pte = child_table | flags; |
| *child_entry_ptr = pgt->buf->addr.offset + pte; |
| |
| igt_assert(pte <= INT32_MAX); |
| |
| offset = parent_table + child_entry_idx * sizeof(uint64_t); |
| intel_bb_offset_reloc_to_object(pgt->ibb, |
| pgt->buf->handle, |
| pgt->buf->handle, |
| 0, 0, |
| pte, offset, |
| pgt->buf->addr.offset); |
| } else { |
| child_table = (*child_entry_ptr & ptr_mask(pgt, level)) - |
| pgt->buf->addr.offset; |
| } |
| |
| return child_table; |
| } |
| |
| static void |
| pgt_set_l1_entry(struct pgtable *pgt, uint64_t l1_table, |
| uint64_t address, uint64_t ptr, uint64_t flags) |
| { |
| uint64_t *l1_table_ptr; |
| uint64_t *l1_entry_ptr; |
| |
| l1_table_ptr = pgt->ptr + l1_table; |
| l1_entry_ptr = &l1_table_ptr[pgt_entry_index(pgt, 0, address)]; |
| |
| igt_assert(!(ptr & ~ptr_mask(pgt, 0))); |
| *l1_entry_ptr = ptr | flags; |
| } |
| |
| #define DEPTH_VAL_RESERVED 3 |
| |
| static int bpp_to_depth_val(int bpp) |
| { |
| switch (bpp) { |
| case 8: |
| return 4; |
| case 10: |
| return 1; |
| case 12: |
| return 2; |
| case 16: |
| return 0; |
| case 32: |
| return 5; |
| case 64: |
| return 6; |
| default: |
| igt_assert_f(0, "invalid bpp %d\n", bpp); |
| } |
| } |
| |
| static uint64_t pgt_get_l1_flags(const struct intel_buf *buf, int surface_idx) |
| { |
| /* |
| * The offset of .tile_mode isn't specifed by bspec, it's what Mesa |
| * uses. |
| */ |
| union { |
| struct { |
| uint64_t valid:1; |
| uint64_t compression_mod:2; |
| uint64_t lossy_compression:1; |
| uint64_t pad:4; |
| uint64_t addr:40; |
| uint64_t pad2:4; |
| uint64_t tile_mode:2; |
| uint64_t depth:3; |
| uint64_t ycr:1; |
| uint64_t format:6; |
| } e; |
| uint64_t l; |
| } entry = { |
| .e = { |
| .valid = 1, |
| .tile_mode = buf->tiling == I915_TILING_Y ? 1 : |
| (buf->tiling == I915_TILING_4 ? 2 : 0), |
| } |
| }; |
| |
| /* |
| * TODO: Clarify if Yf is supported and if we need to differentiate |
| * Ys and Yf. |
| * Add support for more formats. |
| */ |
| igt_assert(buf->tiling == I915_TILING_Y || |
| buf->tiling == I915_TILING_Yf || |
| buf->tiling == I915_TILING_Ys || |
| buf->tiling == I915_TILING_4); |
| |
| entry.e.ycr = surface_idx > 0; |
| |
| if (buf->format_is_yuv_semiplanar) { |
| entry.e.depth = bpp_to_depth_val(buf->bpp); |
| switch (buf->yuv_semiplanar_bpp) { |
| case 8: |
| entry.e.format = AUX_FORMAT_NV12_21; |
| entry.e.depth = DEPTH_VAL_RESERVED; |
| break; |
| case 10: |
| entry.e.format = AUX_FORMAT_P010; |
| entry.e.depth = bpp_to_depth_val(10); |
| break; |
| case 12: |
| entry.e.format = AUX_FORMAT_P016; |
| entry.e.depth = bpp_to_depth_val(12); |
| break; |
| case 16: |
| entry.e.format = AUX_FORMAT_P016; |
| entry.e.depth = bpp_to_depth_val(16); |
| break; |
| default: |
| igt_assert(0); |
| } |
| } else if (buf->format_is_yuv) { |
| switch (buf->bpp) { |
| case 16: |
| entry.e.format = AUX_FORMAT_YCRCB; |
| entry.e.depth = DEPTH_VAL_RESERVED; |
| break; |
| case 32: |
| entry.e.format = AUX_FORMAT_AYUV; |
| entry.e.depth = DEPTH_VAL_RESERVED; |
| break; |
| default: |
| igt_assert(0); |
| } |
| } else { |
| switch (buf->bpp) { |
| case 32: |
| entry.e.format = AUX_FORMAT_ARGB_8B; |
| entry.e.depth = bpp_to_depth_val(32); |
| break; |
| default: |
| igt_assert(0); |
| } |
| } |
| |
| return entry.l; |
| } |
| |
| static uint64_t pgt_get_lx_flags(void) |
| { |
| union { |
| struct { |
| uint64_t valid:1; |
| uint64_t addr:47; |
| uint64_t pad:16; |
| } e; |
| uint64_t l; |
| } entry = { |
| .e = { |
| .valid = 1, |
| } |
| }; |
| |
| return entry.l; |
| } |
| |
| static void |
| pgt_populate_entries_for_buf(struct pgtable *pgt, |
| struct intel_buf *buf, |
| uint64_t top_table, |
| int surface_idx) |
| { |
| uint64_t surface_addr = buf->addr.offset + buf->surface[surface_idx].offset; |
| uint64_t surface_end = surface_addr + buf->surface[surface_idx].size; |
| uint64_t aux_addr = buf->addr.offset + buf->ccs[surface_idx].offset; |
| uint64_t l1_flags = pgt_get_l1_flags(buf, surface_idx); |
| uint64_t lx_flags = pgt_get_lx_flags(); |
| uint64_t aux_ccs_block_size = 1 << pgt->level_info->desc[0].entry_ptr_shift; |
| |
| /* |
| * The block size on the main surface mapped by one AUX CCS block: |
| * CCS block size * |
| * 8 bits per byte / |
| * 2 bits per main surface CL * |
| * 64 bytes per main surface CL |
| */ |
| uint64_t main_surface_block_size = aux_ccs_block_size * 8 / 2 * 64; |
| |
| igt_assert(!(buf->surface[surface_idx].stride % 512)); |
| igt_assert_eq(buf->ccs[surface_idx].stride, |
| buf->surface[surface_idx].stride / 512 * 64); |
| |
| for (; surface_addr < surface_end; |
| surface_addr += main_surface_block_size, |
| aux_addr += aux_ccs_block_size) { |
| uint64_t table = top_table; |
| int level; |
| |
| for (level = pgt->levels - 1; level >= 1; level--) |
| table = pgt_get_child_table(pgt, table, level, |
| surface_addr, lx_flags); |
| |
| pgt_set_l1_entry(pgt, table, surface_addr, aux_addr, l1_flags); |
| } |
| } |
| |
| static void pgt_map(int i915, struct pgtable *pgt) |
| { |
| pgt->ptr = gem_mmap__device_coherent(i915, pgt->buf->handle, 0, |
| pgt->size, PROT_READ | PROT_WRITE); |
| } |
| |
| static void pgt_unmap(struct pgtable *pgt) |
| { |
| munmap(pgt->ptr, pgt->size); |
| } |
| |
| static void pgt_populate_entries(struct pgtable *pgt, |
| struct intel_buf **bufs, |
| int buf_count) |
| { |
| uint64_t top_table; |
| int i; |
| |
| top_table = pgt_alloc_table(pgt, pgt->levels - 1); |
| /* Top level table must be at offset 0. */ |
| igt_assert(top_table == 0); |
| |
| for (i = 0; i < buf_count; i++) { |
| igt_assert_eq(bufs[i]->surface[0].offset, 0); |
| |
| pgt_populate_entries_for_buf(pgt, bufs[i], top_table, 0); |
| if (bufs[i]->format_is_yuv_semiplanar) |
| pgt_populate_entries_for_buf(pgt, bufs[i], top_table, 1); |
| } |
| } |
| |
| static struct pgtable * |
| pgt_create(const struct pgtable_level_desc *level_descs, int levels, |
| struct intel_buf **bufs, int buf_count) |
| { |
| struct pgtable *pgt; |
| int level; |
| |
| pgt = calloc(1, sizeof(*pgt)); |
| igt_assert(pgt); |
| |
| pgt->levels = levels; |
| |
| pgt->level_info = calloc(levels, sizeof(*pgt->level_info)); |
| igt_assert(pgt->level_info); |
| |
| for (level = 0; level < pgt->levels; level++) { |
| struct pgtable_level_info *li = &pgt->level_info[level]; |
| |
| li->desc = &level_descs[level]; |
| if (li->desc->table_size > pgt->max_align) |
| pgt->max_align = li->desc->table_size; |
| } |
| |
| pgt_calc_size(pgt, bufs, buf_count); |
| |
| return pgt; |
| } |
| |
| static void pgt_destroy(struct pgtable *pgt) |
| { |
| free(pgt->level_info); |
| free(pgt); |
| } |
| |
| struct intel_buf * |
| intel_aux_pgtable_create(struct intel_bb *ibb, |
| struct intel_buf **bufs, int buf_count) |
| { |
| static const struct pgtable_level_desc level_desc_table_tgl[] = { |
| { |
| .idx_shift = 16, |
| .idx_bits = 8, |
| .entry_ptr_shift = 8, |
| .table_size = 8 * 1024, |
| }, |
| { |
| .idx_shift = 24, |
| .idx_bits = 12, |
| .entry_ptr_shift = 13, |
| .table_size = 32 * 1024, |
| }, |
| { |
| .idx_shift = 36, |
| .idx_bits = 12, |
| .entry_ptr_shift = 15, |
| .table_size = 32 * 1024, |
| } |
| }; |
| static const struct pgtable_level_desc level_desc_table_mtl[] = { |
| { |
| .idx_shift = 20, |
| .idx_bits = 4, |
| .entry_ptr_shift = 12, |
| .table_size = 8 * 1024, |
| }, |
| { |
| .idx_shift = 24, |
| .idx_bits = 12, |
| .entry_ptr_shift = 11, |
| .table_size = 32 * 1024, |
| }, |
| { |
| .idx_shift = 36, |
| .idx_bits = 12, |
| .entry_ptr_shift = 15, |
| .table_size = 32 * 1024, |
| }, |
| }; |
| |
| const struct pgtable_level_desc *level_desc; |
| uint32_t levels; |
| struct pgtable *pgt; |
| struct buf_ops *bops; |
| struct intel_buf *buf; |
| |
| igt_assert(buf_count); |
| bops = bufs[0]->bops; |
| |
| if (IS_METEORLAKE(ibb->devid)) { |
| level_desc = level_desc_table_mtl; |
| levels = ARRAY_SIZE(level_desc_table_mtl); |
| } else { |
| level_desc = level_desc_table_tgl; |
| levels = ARRAY_SIZE(level_desc_table_tgl); |
| } |
| |
| pgt = pgt_create(&level_desc[0], levels, bufs, buf_count); |
| pgt->ibb = ibb; |
| pgt->buf = intel_buf_create(bops, pgt->size, 1, 8, 0, I915_TILING_NONE, |
| I915_COMPRESSION_NONE); |
| |
| /* We need to use pgt->max_align for aux table */ |
| intel_bb_add_intel_buf_with_alignment(ibb, pgt->buf, |
| pgt->max_align, false); |
| |
| pgt_map(ibb->fd, pgt); |
| pgt_populate_entries(pgt, bufs, buf_count); |
| pgt_unmap(pgt); |
| |
| buf = pgt->buf; |
| pgt_destroy(pgt); |
| |
| return buf; |
| } |
| |
| static void |
| aux_pgtable_reserve_buf_slot(struct intel_buf **bufs, int buf_count, |
| struct intel_buf *new_buf) |
| { |
| int i; |
| |
| for (i = 0; i < buf_count; i++) { |
| if (bufs[i]->addr.offset > new_buf->addr.offset) |
| break; |
| } |
| |
| memmove(&bufs[i + 1], &bufs[i], sizeof(bufs[0]) * (buf_count - i)); |
| |
| bufs[i] = new_buf; |
| } |
| |
| void |
| gen12_aux_pgtable_init(struct aux_pgtable_info *info, |
| struct intel_bb *ibb, |
| struct intel_buf *src_buf, |
| struct intel_buf *dst_buf) |
| { |
| struct intel_buf *bufs[2]; |
| int buf_count = 0; |
| struct intel_buf *reserved_bufs[2]; |
| int reserved_buf_count; |
| bool has_compressed_buf = false; |
| bool write_buf[2]; |
| int i; |
| |
| igt_assert_f(ibb->enforce_relocs == false, |
| "We support aux pgtables for non-forced relocs yet!"); |
| |
| if (src_buf) { |
| bufs[buf_count] = src_buf; |
| write_buf[buf_count] = false; |
| buf_count++; |
| |
| if (intel_buf_compressed(src_buf)) |
| has_compressed_buf = true; |
| } |
| if (dst_buf) { |
| bufs[buf_count] = dst_buf; |
| write_buf[buf_count] = true; |
| buf_count++; |
| |
| if (intel_buf_compressed(dst_buf)) |
| has_compressed_buf = true; |
| } |
| |
| if (!has_compressed_buf) |
| return; |
| |
| /* |
| * Surface index in pgt table depend on its address so: |
| * 1. if handle was previously executed in batch use that address |
| * 2. add object to batch, this will generate random address |
| * |
| * Randomizing addresses can lead to overlapping, but we don't have |
| * global address space generator in IGT. Currently assumption is |
| * randomizing address is spread over 48-bit address space equally |
| * so risk with overlapping is minimal. Of course it is growing |
| * with number of objects (+its sizes) involved in blit. |
| * To avoid relocation EXEC_OBJECT_PINNED flag is set for compressed |
| * surfaces. |
| */ |
| |
| for (i = 0; i < buf_count; i++) { |
| intel_bb_add_intel_buf(ibb, bufs[i], write_buf[i]); |
| if (intel_buf_compressed(bufs[i])) |
| intel_bb_object_set_flag(ibb, bufs[i]->handle, EXEC_OBJECT_PINNED); |
| } |
| |
| reserved_buf_count = 0; |
| /* First reserve space for any bufs that are bound already. */ |
| for (i = 0; i < buf_count; i++) { |
| igt_assert(bufs[i]->addr.offset != INTEL_BUF_INVALID_ADDRESS); |
| aux_pgtable_reserve_buf_slot(reserved_bufs, |
| reserved_buf_count++, |
| bufs[i]); |
| } |
| |
| /* Create AUX pgtable entries only for bufs with an AUX surface */ |
| info->buf_count = 0; |
| for (i = 0; i < reserved_buf_count; i++) { |
| if (!intel_buf_compressed(reserved_bufs[i])) |
| continue; |
| |
| info->bufs[info->buf_count] = reserved_bufs[i]; |
| info->buf_pin_offsets[info->buf_count] = |
| reserved_bufs[i]->addr.offset; |
| |
| info->buf_count++; |
| } |
| |
| info->pgtable_buf = intel_aux_pgtable_create(ibb, |
| info->bufs, |
| info->buf_count); |
| |
| igt_assert(info->pgtable_buf); |
| } |
| |
| void |
| gen12_aux_pgtable_cleanup(struct intel_bb *ibb, struct aux_pgtable_info *info) |
| { |
| int i; |
| |
| /* Check that the pinned bufs kept their offset after the exec. */ |
| for (i = 0; i < info->buf_count; i++) { |
| uint64_t addr; |
| |
| addr = intel_bb_get_object_offset(ibb, info->bufs[i]->handle); |
| igt_assert_eq_u64(addr, info->buf_pin_offsets[i]); |
| } |
| |
| if (info->pgtable_buf) { |
| intel_bb_remove_intel_buf(ibb, info->pgtable_buf); |
| intel_buf_destroy(info->pgtable_buf); |
| } |
| } |
| |
| uint32_t |
| gen12_create_aux_pgtable_state(struct intel_bb *ibb, |
| struct intel_buf *aux_pgtable_buf) |
| { |
| uint64_t *pgtable_ptr; |
| uint32_t pgtable_ptr_offset; |
| |
| if (!aux_pgtable_buf) |
| return 0; |
| |
| pgtable_ptr = intel_bb_ptr(ibb); |
| pgtable_ptr_offset = intel_bb_offset(ibb); |
| |
| *pgtable_ptr = intel_bb_offset_reloc(ibb, aux_pgtable_buf->handle, |
| 0, 0, |
| pgtable_ptr_offset, |
| aux_pgtable_buf->addr.offset); |
| intel_bb_ptr_add(ibb, sizeof(*pgtable_ptr)); |
| |
| return pgtable_ptr_offset; |
| } |
| |
| void |
| gen12_emit_aux_pgtable_state(struct intel_bb *ibb, uint32_t state, bool render) |
| { |
| uint32_t table_base_reg; |
| |
| if (render) { |
| table_base_reg = GEN12_GFX_AUX_TABLE_BASE_ADDR; |
| } else { |
| /* Vebox */ |
| if (IS_METEORLAKE(ibb->devid)) |
| table_base_reg = 0x380000 + GEN12_VEBOX_AUX_TABLE_BASE_ADDR; |
| else |
| table_base_reg = GEN12_VEBOX_AUX_TABLE_BASE_ADDR; |
| } |
| |
| if (!state) |
| return; |
| |
| intel_bb_out(ibb, MI_LOAD_REGISTER_MEM_CMD | MI_MMIO_REMAP_ENABLE_GEN12 | 2); |
| intel_bb_out(ibb, table_base_reg); |
| intel_bb_emit_reloc(ibb, ibb->handle, 0, 0, state, ibb->batch_offset); |
| |
| intel_bb_out(ibb, MI_LOAD_REGISTER_MEM_CMD | MI_MMIO_REMAP_ENABLE_GEN12 | 2); |
| intel_bb_out(ibb, table_base_reg + 4); |
| intel_bb_emit_reloc(ibb, ibb->handle, 0, 0, state + 4, ibb->batch_offset); |
| } |