| /* |
| * Copyright © 2013-2015 Intel Corporation |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a |
| * copy of this software and associated documentation files (the "Software"), |
| * to deal in the Software without restriction, including without limitation |
| * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| * and/or sell copies of the Software, and to permit persons to whom the |
| * Software is furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice (including the next |
| * paragraph) shall be included in all copies or substantial portions of the |
| * Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
| * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
| * IN THE SOFTWARE. |
| */ |
| |
| #include "brw_vec4_surface_builder.h" |
| |
| using namespace brw; |
| |
| namespace { |
| namespace array_utils { |
| /** |
| * Copy one every \p src_stride logical components of the argument into |
| * one every \p dst_stride logical components of the result. |
| */ |
| src_reg |
| emit_stride(const vec4_builder &bld, const src_reg &src, unsigned size, |
| unsigned dst_stride, unsigned src_stride) |
| { |
| if (src_stride == 1 && dst_stride == 1) { |
| return src; |
| } else { |
| const dst_reg dst = bld.vgrf(src.type, |
| DIV_ROUND_UP(size * dst_stride, 4)); |
| |
| for (unsigned i = 0; i < size; ++i) |
| bld.MOV(writemask(offset(dst, 8, i * dst_stride / 4), |
| 1 << (i * dst_stride % 4)), |
| swizzle(offset(src, 8, i * src_stride / 4), |
| brw_swizzle_for_mask(1 << (i * src_stride % 4)))); |
| |
| return src_reg(dst); |
| } |
| } |
| |
| /** |
| * Convert a VEC4 into an array of registers with the layout expected by |
| * the recipient shared unit. If \p has_simd4x2 is true the argument is |
| * left unmodified in SIMD4x2 form, otherwise it will be rearranged into |
| * a SIMD8 vector. |
| */ |
| src_reg |
| emit_insert(const vec4_builder &bld, const src_reg &src, |
| unsigned n, bool has_simd4x2) |
| { |
| if (src.file == BAD_FILE || n == 0) { |
| return src_reg(); |
| |
| } else { |
| /* Pad unused components with zeroes. */ |
| const unsigned mask = (1 << n) - 1; |
| const dst_reg tmp = bld.vgrf(src.type); |
| |
| bld.MOV(writemask(tmp, mask), src); |
| if (n < 4) |
| bld.MOV(writemask(tmp, ~mask), brw_imm_d(0)); |
| |
| return emit_stride(bld, src_reg(tmp), n, has_simd4x2 ? 1 : 4, 1); |
| } |
| } |
| |
| /** |
| * Convert an array of registers back into a VEC4 according to the |
| * layout expected from some shared unit. If \p has_simd4x2 is true the |
| * argument is left unmodified in SIMD4x2 form, otherwise it will be |
| * rearranged from SIMD8 form. |
| */ |
| src_reg |
| emit_extract(const vec4_builder &bld, const src_reg src, |
| unsigned n, bool has_simd4x2) |
| { |
| if (src.file == BAD_FILE || n == 0) { |
| return src_reg(); |
| |
| } else { |
| return emit_stride(bld, src, n, 1, has_simd4x2 ? 1 : 4); |
| } |
| } |
| } |
| } |
| |
| namespace brw { |
| namespace surface_access { |
| namespace { |
| using namespace array_utils; |
| |
| /** |
| * Generate a send opcode for a surface message and return the |
| * result. |
| */ |
| src_reg |
| emit_send(const vec4_builder &bld, enum opcode op, |
| const src_reg &header, |
| const src_reg &addr, unsigned addr_sz, |
| const src_reg &src, unsigned src_sz, |
| const src_reg &surface, |
| unsigned arg, unsigned ret_sz, |
| brw_predicate pred = BRW_PREDICATE_NONE) |
| { |
| /* Calculate the total number of components of the payload. */ |
| const unsigned header_sz = (header.file == BAD_FILE ? 0 : 1); |
| const unsigned sz = header_sz + addr_sz + src_sz; |
| |
| /* Construct the payload. */ |
| const dst_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD, sz); |
| unsigned n = 0; |
| |
| if (header_sz) |
| bld.exec_all().MOV(offset(payload, 8, n++), |
| retype(header, BRW_REGISTER_TYPE_UD)); |
| |
| for (unsigned i = 0; i < addr_sz; i++) |
| bld.MOV(offset(payload, 8, n++), |
| offset(retype(addr, BRW_REGISTER_TYPE_UD), 8, i)); |
| |
| for (unsigned i = 0; i < src_sz; i++) |
| bld.MOV(offset(payload, 8, n++), |
| offset(retype(src, BRW_REGISTER_TYPE_UD), 8, i)); |
| |
| /* Reduce the dynamically uniform surface index to a single |
| * scalar. |
| */ |
| const src_reg usurface = bld.emit_uniformize(surface); |
| |
| /* Emit the message send instruction. */ |
| const dst_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD, ret_sz); |
| vec4_instruction *inst = |
| bld.emit(op, dst, src_reg(payload), usurface, brw_imm_ud(arg)); |
| inst->mlen = sz; |
| inst->size_written = ret_sz * REG_SIZE; |
| inst->header_size = header_sz; |
| inst->predicate = pred; |
| |
| return src_reg(dst); |
| } |
| } |
| |
| /** |
| * Emit an untyped surface read opcode. \p dims determines the number |
| * of components of the address and \p size the number of components of |
| * the returned value. |
| */ |
| src_reg |
| emit_untyped_read(const vec4_builder &bld, |
| const src_reg &surface, const src_reg &addr, |
| unsigned dims, unsigned size, |
| brw_predicate pred) |
| { |
| return emit_send(bld, SHADER_OPCODE_UNTYPED_SURFACE_READ, src_reg(), |
| emit_insert(bld, addr, dims, true), 1, |
| src_reg(), 0, |
| surface, size, 1, pred); |
| } |
| |
| /** |
| * Emit an untyped surface write opcode. \p dims determines the number |
| * of components of the address and \p size the number of components of |
| * the argument. |
| */ |
| void |
| emit_untyped_write(const vec4_builder &bld, const src_reg &surface, |
| const src_reg &addr, const src_reg &src, |
| unsigned dims, unsigned size, |
| brw_predicate pred) |
| { |
| const bool has_simd4x2 = (bld.shader->devinfo->gen >= 8 || |
| bld.shader->devinfo->is_haswell); |
| emit_send(bld, SHADER_OPCODE_UNTYPED_SURFACE_WRITE, src_reg(), |
| emit_insert(bld, addr, dims, has_simd4x2), |
| has_simd4x2 ? 1 : dims, |
| emit_insert(bld, src, size, has_simd4x2), |
| has_simd4x2 ? 1 : size, |
| surface, size, 0, pred); |
| } |
| |
| /** |
| * Emit an untyped surface atomic opcode. \p dims determines the number |
| * of components of the address and \p rsize the number of components of |
| * the returned value (either zero or one). |
| */ |
| src_reg |
| emit_untyped_atomic(const vec4_builder &bld, |
| const src_reg &surface, const src_reg &addr, |
| const src_reg &src0, const src_reg &src1, |
| unsigned dims, unsigned rsize, unsigned op, |
| brw_predicate pred) |
| { |
| const bool has_simd4x2 = (bld.shader->devinfo->gen >= 8 || |
| bld.shader->devinfo->is_haswell); |
| |
| /* Zip the components of both sources, they are represented as the X |
| * and Y components of the same vector. |
| */ |
| const unsigned size = (src0.file != BAD_FILE) + (src1.file != BAD_FILE); |
| const dst_reg srcs = bld.vgrf(BRW_REGISTER_TYPE_UD); |
| |
| if (size >= 1) |
| bld.MOV(writemask(srcs, WRITEMASK_X), src0); |
| if (size >= 2) |
| bld.MOV(writemask(srcs, WRITEMASK_Y), src1); |
| |
| return emit_send(bld, SHADER_OPCODE_UNTYPED_ATOMIC, src_reg(), |
| emit_insert(bld, addr, dims, has_simd4x2), |
| has_simd4x2 ? 1 : dims, |
| emit_insert(bld, src_reg(srcs), size, has_simd4x2), |
| has_simd4x2 && size ? 1 : size, |
| surface, op, rsize, pred); |
| } |
| |
| namespace { |
| /** |
| * Initialize the header present in typed surface messages. |
| */ |
| src_reg |
| emit_typed_message_header(const vec4_builder &bld) |
| { |
| const vec4_builder ubld = bld.exec_all(); |
| const dst_reg dst = bld.vgrf(BRW_REGISTER_TYPE_UD); |
| |
| ubld.MOV(dst, brw_imm_d(0)); |
| |
| if (bld.shader->devinfo->gen == 7 && |
| !bld.shader->devinfo->is_haswell) { |
| /* The sample mask is used on IVB for the SIMD8 messages that |
| * have no SIMD4x2 variant. We only use the two X channels |
| * in that case, mask everything else out. |
| */ |
| ubld.MOV(writemask(dst, WRITEMASK_W), brw_imm_d(0x11)); |
| } |
| |
| return src_reg(dst); |
| } |
| } |
| |
| /** |
| * Emit a typed surface read opcode. \p dims determines the number of |
| * components of the address and \p size the number of components of the |
| * returned value. |
| */ |
| src_reg |
| emit_typed_read(const vec4_builder &bld, const src_reg &surface, |
| const src_reg &addr, unsigned dims, unsigned size) |
| { |
| const bool has_simd4x2 = (bld.shader->devinfo->gen >= 8 || |
| bld.shader->devinfo->is_haswell); |
| const src_reg tmp = |
| emit_send(bld, SHADER_OPCODE_TYPED_SURFACE_READ, |
| emit_typed_message_header(bld), |
| emit_insert(bld, addr, dims, has_simd4x2), |
| has_simd4x2 ? 1 : dims, |
| src_reg(), 0, |
| surface, size, |
| has_simd4x2 ? 1 : size); |
| |
| return emit_extract(bld, tmp, size, has_simd4x2); |
| } |
| |
| /** |
| * Emit a typed surface write opcode. \p dims determines the number of |
| * components of the address and \p size the number of components of the |
| * argument. |
| */ |
| void |
| emit_typed_write(const vec4_builder &bld, const src_reg &surface, |
| const src_reg &addr, const src_reg &src, |
| unsigned dims, unsigned size) |
| { |
| const bool has_simd4x2 = (bld.shader->devinfo->gen >= 8 || |
| bld.shader->devinfo->is_haswell); |
| emit_send(bld, SHADER_OPCODE_TYPED_SURFACE_WRITE, |
| emit_typed_message_header(bld), |
| emit_insert(bld, addr, dims, has_simd4x2), |
| has_simd4x2 ? 1 : dims, |
| emit_insert(bld, src, size, has_simd4x2), |
| has_simd4x2 ? 1 : size, |
| surface, size, 0); |
| } |
| |
| /** |
| * Emit a typed surface atomic opcode. \p dims determines the number of |
| * components of the address and \p rsize the number of components of |
| * the returned value (either zero or one). |
| */ |
| src_reg |
| emit_typed_atomic(const vec4_builder &bld, |
| const src_reg &surface, const src_reg &addr, |
| const src_reg &src0, const src_reg &src1, |
| unsigned dims, unsigned rsize, unsigned op, |
| brw_predicate pred) |
| { |
| const bool has_simd4x2 = (bld.shader->devinfo->gen >= 8 || |
| bld.shader->devinfo->is_haswell); |
| |
| /* Zip the components of both sources, they are represented as the X |
| * and Y components of the same vector. |
| */ |
| const unsigned size = (src0.file != BAD_FILE) + (src1.file != BAD_FILE); |
| const dst_reg srcs = bld.vgrf(BRW_REGISTER_TYPE_UD); |
| |
| if (size >= 1) |
| bld.MOV(writemask(srcs, WRITEMASK_X), src0); |
| if (size >= 2) |
| bld.MOV(writemask(srcs, WRITEMASK_Y), src1); |
| |
| return emit_send(bld, SHADER_OPCODE_TYPED_ATOMIC, |
| emit_typed_message_header(bld), |
| emit_insert(bld, addr, dims, has_simd4x2), |
| has_simd4x2 ? 1 : dims, |
| emit_insert(bld, src_reg(srcs), size, has_simd4x2), |
| has_simd4x2 ? 1 : size, |
| surface, op, rsize, pred); |
| } |
| } |
| } |