blob: 9adaa8fcda9fa5c36fc0f5254f7ec54c454137d5 [file] [log] [blame]
#
# Copyright (C) 2020 Collabora, Ltd.
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice (including the next
# paragraph) shall be included in all copies or substantial portions of the
# Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
import sys
from isa_parse import parse_instructions, opname_to_c
from mako.template import Template
instructions = parse_instructions(sys.argv[1])
# Packs sources into an argument. Offset argument to work around a quirk of our
# compiler IR when dealing with staging registers (TODO: reorder in the IR to
# fix this)
def pack_sources(sources, body, pack_exprs, offset):
for i, src in enumerate(sources):
body.append('unsigned src{} = bi_get_src(ins, regs, {});'.format(i, i + offset))
# Validate the source
if src[1] != 0xFF:
body.append('assert((1 << src{}) & {});'.format(i, hex(src[1])))
# Sources are state-invariant
for state in pack_exprs:
state.append('(src{} << {})'.format(i, src[0]))
body.append('')
# Gets the argument that the source modifier applies to from the name if
# applicable, otherwise defaults to the first argument
def mod_arg(mod):
return int(mod[-1]) if mod[-1] in "0123" else 0
# Widen/lane/swz/swap/replicate modifiers conceptually act as a combined extend
# + swizzle. We look at the size of the argument to determine if we apply
# them, and look at the swizzle to pick which one.
def pack_widen(mod, opts, body, pack_exprs):
marg = mod_arg(mod)
body.append('unsigned {}_sz = nir_alu_type_get_type_size(ins->src_types[{}]);'.format(mod, mod_arg(mod)))
body.append('unsigned {}_temp = 0;'.format(mod))
first = True
for i, op in enumerate(opts):
if op is None or op == 'reserved':
continue
t_else = 'else ' if not first else ''
first = False
if op in ['none', 'w0']:
body.append('{}if ({}_sz == 32) {}_temp = {};'.format(t_else, mod, mod, i))
elif op == 'd0':
body.append('{}if ({}_sz == 64) {}_temp = {};'.format(t_else, mod, mod, i))
else:
assert(op[0] in ['h', 'b'])
sz = 16 if op[0] == 'h' else 8
# Condition on the swizzle
conds = ['ins->swizzle[{}][{}] == {}'.format(marg, idx, lane) for idx, lane in enumerate(op[1:])]
cond = " && ".join(conds)
body.append('{}if ({}_sz == {} && {}) {}_temp = {};'.format(t_else, mod, sz, cond, mod, i))
body.append('else unreachable("Could not pattern match widen");')
return mod + '_temp'
# abs/neg are stored in ins->src_{abs,neg}[src] arrays
def pack_absneg(mod, opts, body, pack_exprs):
return 'ins->src_{}[{}]'.format(mod[0:-1] if mod[-1] in "0123" else mod, mod_arg(mod))
# ins->roundmode is the native format (RTE/RTP/RTN/RTZ) for most ops. But there
# are some others we might encounter that we don't support in the IR at this
# point, and there are a few that force a subset of round modes.
def pack_round(mod, opts, body, pack_exprs):
if opts == ['none', 'rtz']:
body.append('assert(ins->roundmode == BIFROST_RTE || ins->roundmode == BIFROST_RTZ);')
return '(ins->roundmode == BIFROST_RTZ) ? 1 : 0'
elif opts == ['rtn', 'rtp']:
body.append('assert(ins->roundmode == BIFROST_RTN || ins->roundmode == BIFROST_RTP);')
return '(ins->roundmode == BIFROST_RTP) ? 1 : 0'
elif opts[0:4] == ['none', 'rtp', 'rtn', 'rtz']:
return 'ins->roundmode'
else:
assert False
# Likewise, matches our native format
def pack_clamp(mod, opts, body, pack_exprs):
if opts == ['none', 'clamp_0_inf', 'clamp_m1_1', 'clamp_0_1']:
return 'ins->outmod'
elif opts == ['none', 'clamp_0_1']:
body.append('assert(ins->outmod == BIFROST_NONE || ins->outmod == BIFROST_SAT);')
return '(ins->outmod == BIFROST_SAT) ? 1 : 0'
else:
assert False
# Our modifiers match up in name, but there is no shortage of orders. So just
# emit a table on the fly for it, since you won't get something much better.
# ENUM_BI_COND must be kept synced with `enum bi_cond` in compiler.h
ENUM_BI_COND = [
"al",
"lt",
"le",
"ge",
"gt",
"eq",
"ne",
]
def pack_cmpf(mod, opts, body, pack_exprs):
# Generate a table mapping ENUM_BI_COND to opts, or an invalid
# sentintel if not used (which will then be asserted out in a debug build).
table = [str(opts.index(x)) if x in opts else '~0' for x in ENUM_BI_COND]
body.append('unsigned cmpf_table[] = {')
body.append(' ' + ', '.join(table))
body.append('};')
return 'cmpf_table[ins->cond]'
# Since our IR is explicitly typed, we look at the size/sign to determine sign
# extension behaviour
def pack_extend(mod, opts, body, pack_exprs):
body.append('ASSERTED bool {}_small = nir_alu_type_get_type_size(ins->src_types[{}]) <= 16;'.format(mod, mod_arg(mod)))
body.append('bool {}_signed = nir_alu_type_get_base_type(ins->src_types[{}]) == nir_type_int;'.format(mod, mod_arg(mod)))
if opts == ['none', 'sext', 'zext', 'reserved']:
return '{}_small ? ({}_signed ? 1 : 2) : 0'.format(mod, mod)
else:
assert opts == ['zext', 'sext']
body.append('assert({}_small);'.format(mod))
return '{}_signed ? 1 : 0'.format(mod)
# Packs special varying loads. Our BIFROST_FRAGZ etc defines match the hw in
# the bottom two bits (TODO drop upper bits)
def pack_varying_name(mod, opts, body, pack_exprs):
assert(opts[0] == 'point' and opts[2] == 'frag_w' and opts[3] == 'frag_z')
return 'ins->constant.u64 & 0x3'
def pack_not_src1(mod, opts, body, pack_exprs):
return 'ins->bitwise.src1_invert ? {} : {}'.format(opts.index('not'), opts.index('none'))
def pack_not_result(mod, opts, body, pack_exprs):
return 'ins->bitwise.dest_invert ? {} : {}'.format(opts.index('not'), opts.index('none'))
REGISTER_FORMATS = {
'f64': 'nir_type_float64',
'f32': 'nir_type_float32',
'f16': 'nir_type_float16',
'u64': 'nir_type_uint64',
'u32': 'nir_type_uint32',
'u16': 'nir_type_uint16',
'i64': 'nir_type_int64',
's32': 'nir_type_int32',
's16': 'nir_type_int16'
}
def pack_register_format(mod, opts, body, pack_exprs):
body.append('unsigned {}_temp = 0;'.format(mod))
first = True
for i, op in enumerate(opts):
if op is None or op == 'reserved':
continue
t_else = 'else ' if not first else ''
first = False
nir_type = REGISTER_FORMATS.get(op)
if nir_type:
body.append('{}if (ins->format == {}) {}_temp = {};'.format(t_else, nir_type, mod, i))
assert not first
body.append('else unreachable("Could not pattern match register format");')
return mod + '_temp'
def pack_seg(mod, opts, body, pack_exprs):
if len(opts) == 8:
body.append('assert(ins->segment);')
return 'ins->segment'
elif opts == ['none', 'wgl']:
body.append('assert(ins->segment == BI_SEGMENT_NONE || ins->segment == BI_SEGMENT_WLS);')
return 'ins->segment == BI_SEGMENT_WLS ? 1 : 0'
else:
assert(False)
# TODO: Update modes (perf / slow) For now just force store, except for special
# varyings for which we force clobber
def pack_update(mod, opts, body, pack_exprs):
if opts == ['store', 'retrieve', 'conditional', 'clobber']:
return '(ins->constant.u64 >= 20) ? 3 : 0'
else:
assert(opts[0] == 'store')
return '0'
# Processes modifiers. If used directly, emits a pack. Otherwise, just
# processes the value (grabbing it from the IR). This must sync with the IR.
modifier_map = {
"widen": pack_widen,
"widen0": pack_widen,
"widen1": pack_widen,
"lane": pack_widen,
"lane0": pack_widen,
"lane1": pack_widen,
"lane2": pack_widen,
"lane3": pack_widen,
"lanes0": pack_widen,
"lanes1": pack_widen,
"lanes2": pack_widen,
"swz": pack_widen,
"swz0": pack_widen,
"swz1": pack_widen,
"swz2": pack_widen,
"swap0": pack_widen,
"swap1": pack_widen,
"swap2": pack_widen,
"replicate0": pack_widen,
"replicate1": pack_widen,
"abs": pack_absneg,
"abs0": pack_absneg,
"abs1": pack_absneg,
"abs2": pack_absneg,
"neg": pack_absneg,
"neg0": pack_absneg,
"neg1": pack_absneg,
"neg2": pack_absneg,
"extend": pack_extend,
"extend0": pack_extend,
"extend1": pack_extend,
"extend2": pack_extend,
"sign0": pack_extend,
"sign1": pack_extend,
"clamp": pack_clamp,
"round": pack_round,
"cmpf": pack_cmpf,
"varying_name": pack_varying_name,
"not1": pack_not_src1,
"not_result": pack_not_result,
"register_format": pack_register_format,
"seg": pack_seg,
"update": pack_update,
# Just a minus one modifier
"vecsize": lambda a,b,c,d: 'ins->vector_channels - 1',
# 0: compute 1: zero
"lod_mode": lambda a,b,c,d: '1 - ins->texture.compute_lod',
"skip": lambda a,b,c,d: 'ins->skip',
# Not much choice in the matter...
"divzero": lambda a,b,c,d: '0',
"sem": lambda a,b,c,d: '0', # IEEE 754 compliant NaN rules
# We don't support these in the IR yet (TODO)
"saturate": lambda a,b,c,d: '0', # clamp to min/max int
"mask": lambda a,b,c,d: '0', # clz(~0) = ~0
"result_type": lambda a,opts,c,d: str(opts.index('m1')), # #1, #1.0, ~0 for cmp
"special": lambda a,b,c,d: '0', # none, which source wins..
"offset": lambda a,b,c,d: '0', # sin/cos thing
"adj": lambda a,b,c,d: '0', # sin/cos thing
"sqrt": lambda a,b,c,d: '0', # sin/cos thing
"log": lambda a,b,c,d: '1', # frexpe mode -- TODO: other transcendentals for g71
"scale": lambda a,b,c,d: '0', # sin/cos thing
"precision": lambda a,b,c,d: '0', # log thing
"mode": lambda a,b,c,d: '0', # log thing
"func": lambda a,b,c,d: '0', # pow special case thing
"h": lambda a,b,c,d: '0', # VN_ASST1.f16
"l": lambda a,b,c,d: '0', # VN_ASST1.f16
"sample": lambda a,b,c,d: '0', # LD_VAR center
"function": lambda a,b,c,d: '3', # LD_VAR_FLAT none
"preserve_null": lambda a,b,c,d: '0', # SEG_ADD none
"bytes2": lambda a,b,c,d: '0', # NIR shifts are in bits
"result_word": lambda a,b,c,d: '0', # 32-bit only shifts for now (TODO)
"source": lambda a,b,c,d: '7', # cycle_counter for LD_GCLK
"lane_op": lambda a,b,c,d: '0', # CLPER none
"subgroup": lambda a,b,c,d: '1', # CLPER subgroup4
"inactive_result": lambda a,b,c,d: '0', # CLPER zero
"threads": lambda a,b,c,d: '0', # IMULD odd
"stencil": lambda a,b,c,d: '1', # ZS_EMIT stencil
"z": lambda a,b,c,d: '1', # ZS_EMIT z
"combine": lambda a,b,c,d: '0', # BRANCHC any
"format": lambda a,b,c,d: '1', # LEA_TEX_IMM u32
"test_mode": lambda a,b,c,d: '0', # JUMP_EX z
"stack_mode": lambda a,b,c,d: '2', # JUMP_EX none
"atom_opc": lambda a,b,c,d: '2', # ATOM_C aadd
"mux": lambda a,b,c,d: '1', # MUX int_zero
}
def pack_modifier(mod, width, default, opts, body, pack_exprs):
# Invoke the specific one
fn = modifier_map.get(mod)
if fn is None:
return None
expr = fn(mod, opts, body, pack_exprs)
body.append('unsigned {} = {};'.format(mod, expr))
# Validate we don't overflow
try:
assert(int(expr) < (1 << width))
except:
body.append('assert({} < {});'.format(mod, (1 << width)))
body.append('')
return True
# Compiles an S-expression (and/or/eq/neq, modifiers, `ordering`, immediates)
# into a C boolean expression suitable to stick in an if-statement. Takes an
# imm_map to map modifiers to immediate values, parametrized by the ctx that
# we're looking up in (the first, non-immediate argument of the equality)
SEXPR_BINARY = {
"and": "&&",
"or": "||",
"eq": "==",
"neq": "!="
}
def compile_s_expr(expr, imm_map, ctx):
if expr[0] == 'alias':
return compile_s_expr(expr[1], imm_map, ctx)
elif expr == ['eq', 'ordering', '#gt']:
return '(src0 > src1)'
elif expr == ['neq', 'ordering', '#lt']:
return '(src0 >= src1)'
elif expr == ['neq', 'ordering', '#gt']:
return '(src0 <= src1)'
elif expr == ['eq', 'ordering', '#lt']:
return '(src0 < src1)'
elif expr == ['eq', 'ordering', '#eq']:
return '(src0 == src1)'
elif isinstance(expr, list):
sep = " {} ".format(SEXPR_BINARY[expr[0]])
return "(" + sep.join([compile_s_expr(s, imm_map, expr[1]) for s in expr[1:]]) + ")"
elif expr[0] == '#':
return str(imm_map[ctx][expr[1:]])
else:
return expr
# Packs a derived value. We just iterate through the possible choices and test
# whether the encoding matches, and if so we use it.
def pack_derived(pos, exprs, imm_map, body, pack_exprs):
body.append('unsigned derived_{} = 0;'.format(pos))
first = True
for i, expr in enumerate(exprs):
if expr is not None:
cond = compile_s_expr(expr, imm_map, None)
body.append('{}if {} derived_{} = {};'.format('' if first else 'else ', cond, pos, i))
first = False
assert (not first)
body.append('else unreachable("No pattern match at pos {}");'.format(pos))
body.append('')
assert(pos is not None)
pack_exprs.append('(derived_{} << {})'.format(pos, pos))
# Table mapping immediate names in the machine to expressions of `ins` to
# lookup the value in the IR, performing adjustments as needed
IMMEDIATE_TABLE = {
'attribute_index': 'bi_get_immediate(ins, 0)',
'varying_index': 'bi_get_immediate(ins, 0)',
'index': 'bi_get_immediate(ins, 0)',
'texture_index': 'ins->texture.texture_index',
'sampler_index': 'ins->texture.sampler_index',
'table': '63', # Bindless (flat addressing) mode for DTSEL_IMM
# Not supported in the IR (TODO)
'shift': '0',
'fill': '0', # WMASK
}
# Generates a routine to pack a single variant of a single- instruction.
# Template applies the needed formatting and combine to OR together all the
# pack_exprs to avoid bit fields.
#
# Argument swapping is sensitive to the order of operations. Dependencies:
# sources (RW), modifiers (RW), derived values (W). Hence we emit sources and
# modifiers first, then perform a swap if necessary overwriting
# sources/modifiers, and last calculate derived values and pack.
variant_template = Template("""static inline unsigned
pan_pack_${name}(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
{
${"\\n".join([(" " + x) for x in common_body])}
% if single_state:
% for (pack_exprs, s_body, _) in states:
${"\\n".join([" " + x for x in s_body + ["return {};".format( " | ".join(pack_exprs))]])}
% endfor
% else:
% for i, (pack_exprs, s_body, cond) in enumerate(states):
${'} else ' if i > 0 else ''}if ${cond} {
${"\\n".join([" " + x for x in s_body + ["return {};".format(" | ".join(pack_exprs))]])}
% endfor
} else {
unreachable("No matching state found in ${name}");
}
% endif
}
""")
def pack_variant(opname, states):
# Expressions to be ORed together for the final pack, an array per state
pack_exprs = [[hex(state[1]["exact"][1])] for state in states]
# Computations which need to be done to encode first, across states
common_body = []
# Map from modifier names to a map from modifier values to encoded values
# String -> { String -> Uint }. This can be shared across states since
# modifiers are (except the pos values) constant across state.
imm_map = {}
# Pack sources. Offset over to deal with staging/immediate weirdness in our
# IR (TODO: reorder sources upstream so this goes away). Note sources are
# constant across states.
staging = states[0][1].get("staging", "")
offset = 0
if staging in ["r", "rw"]:
offset += 1
offset += len(set(["attribute_index", "varying_index", "index"]) & set([x[0] for x in states[0][1].get("immediates", [])]))
if opname == '+LD_VAR_SPECIAL':
offset += 1
pack_sources(states[0][1].get("srcs", []), common_body, pack_exprs, offset)
modifiers_handled = []
for st in states:
for ((mod, _, width), default, opts) in st[1].get("modifiers", []):
if mod in modifiers_handled:
continue
modifiers_handled.append(mod)
if pack_modifier(mod, width, default, opts, common_body, pack_exprs) is None:
return None
imm_map[mod] = { x: y for y, x in enumerate(opts) }
for i, st in enumerate(states):
for ((mod, pos, width), default, opts) in st[1].get("modifiers", []):
if pos is not None:
pack_exprs[i].append('({} << {})'.format(mod, pos))
for ((src_a, src_b), cond, remap) in st[1].get("swaps", []):
# Figure out which vars to swap, in order to swap the arguments. This
# always includes the sources themselves, and may include source
# modifiers (with the same source indices). We swap based on which
# matches A, this is arbitrary but if we swapped both nothing would end
# up swapping at all since it would swap back.
vars_to_swap = ['src']
for ((mod, _, width), default, opts) in st[1].get("modifiers", []):
if mod[-1] in str(src_a):
vars_to_swap.append(mod[0:-1])
common_body.append('if {}'.format(compile_s_expr(cond, imm_map, None)) + ' {')
# Emit the swaps. We use a temp, and wrap in a block to avoid naming
# collisions with multiple swaps. {{Doubling}} to escape the format.
for v in vars_to_swap:
common_body.append(' {{ unsigned temp = {}{}; {}{} = {}{}; {}{} = temp; }}'.format(v, src_a, v, src_a, v, src_b, v, src_b))
# Also, remap. Bidrectional swaps are explicit in the XML.
for v in remap:
maps = remap[v]
imm = imm_map[v]
for i, l in enumerate(maps):
common_body.append(' {}if ({} == {}) {} = {};'.format('' if i == 0 else 'else ', v, imm[l], v, imm[maps[l]]))
common_body.append('}')
common_body.append('')
for (name, pos, width) in st[1].get("immediates", []):
if name not in IMMEDIATE_TABLE:
return None
common_body.append('unsigned {} = {};'.format(name, IMMEDIATE_TABLE[name]))
for st in pack_exprs:
st.append('({} << {})'.format(name, pos))
if staging == 'r':
common_body.append('bi_read_staging_register(clause, ins);')
elif staging == 'w':
common_body.append('bi_write_staging_register(clause, ins);')
elif staging == '':
pass
else:
assert staging == 'rw'
# XXX: register allocation requirement (!)
common_body.append('bi_read_staging_register(clause, ins);')
common_body.append('assert(ins->src[0] == ins->dest);')
# After this, we have to branch off, since deriveds *do* vary based on state.
state_body = [[] for s in states]
for i, (_, st) in enumerate(states):
for ((pos, width), exprs) in st.get("derived", []):
pack_derived(pos, exprs, imm_map, state_body[i], pack_exprs[i])
# How do we pick a state? Accumulate the conditions
state_conds = [compile_s_expr(st[0], imm_map, None) for st in states] if len(states) > 1 else [None]
if state_conds == None:
assert (states[0][0] == None)
# Finally, we'll collect everything together
return variant_template.render(name = opname_to_c(opname), states = zip(pack_exprs, state_body, state_conds), common_body = common_body, single_state = (len(states) == 1))
HEADER = """/*
* Copyright (C) 2020 Collabora, Ltd.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
/* Autogenerated file, do not edit */
#ifndef _BI_GENERATED_PACK_H
#define _BI_GENERATED_PACK_H
#include "compiler.h"
#include "bi_pack_helpers.h"
"""
print(HEADER)
packs = [pack_variant(e, instructions[e]) for e in instructions]
for p in packs:
print(p)
print("#endif")