blob: e61b969206887398c48da8bb55f2553f3c189b03 [file] [log] [blame]
#!/usr/bin/python3
#
# Copyright (C) 2018 The Android Open Source Project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""Generate intrinsics code."""
from collections import OrderedDict
import asm_defs
import json
import os
import re
import sys
# C-level intrinsic calling convention:
# 1. All arguments are passed using the natural data types:
# - int8_t passed as one byte argument (on the stack in IA32 mode, in GP register in x86-64 mode)
# - int32_t passed as 4 bytes argument (on the stack in IA32 mode, in GP register in x86-64 mode)
# - int64_t is passed as 8 byte argument (on the stack in IA32 mode, in GP register in x86-64 mode)
# - float is passed as float (on the stack in IA32 mode, in XMM register in x86-64 mode)
# - double is passed as double (on the stack in IA32 mode, in XMM register in x86-64 mode)
# - vector formats are passed as pointers to 128bit data structure
# 2. Return values.
# - Values are returned as std::tuple. This means that on IA32 it's always returned on stack.
INDENT = ' '
AUTOGEN = """\
// This file automatically generated by gen_intrinsics.py
// DO NOT EDIT!
"""
class VecFormat(object):
def __init__(self, num_elements, element_size, is_unsigned, is_float, index,
c_type):
self.num_elements = num_elements
self.element_size = element_size
self.is_unsigned = is_unsigned
self.is_float = is_float
self.index = index
self.c_type = c_type
# Vector format defined as:
# vector_size, element_size, is_unsigned, is_float, index, ir_format, c_type
# TODO(olonho): make flat numbering after removing legacy macro compat.
_VECTOR_FORMATS = {
'U8x8': VecFormat(8, 1, True, False, 1, 'uint8_t'),
'U16x4': VecFormat(4, 2, True, False, 2, 'uint16_t'),
'U32x2': VecFormat(2, 4, True, False, 3, 'uint32_t'),
'U64x1': VecFormat(1, 8, True, False, 4, 'uint64_t'),
'U8x16': VecFormat(16, 1, True, False, 5, 'uint8_t'),
'U16x8': VecFormat(8, 2, True, False, 6, 'uint16_t'),
'U32x4': VecFormat(4, 4, True, False, 7, 'uint32_t'),
'U64x2': VecFormat(2, 8, True, False, 8, 'uint64_t'),
'I8x8': VecFormat(8, 1, False, False, 9, 'int8_t'),
'I16x4': VecFormat(4, 2, False, False, 10, 'int16_t'),
'I32x2': VecFormat(2, 4, False, False, 11, 'int32_t'),
'I64x1': VecFormat(1, 8, False, False, 12, 'int64_t'),
'I8x16': VecFormat(16, 1, False, False, 13, 'int8_t'),
'I16x8': VecFormat(8, 2, False, False, 14, 'int16_t'),
'I32x4': VecFormat(4, 4, False, False, 15, 'int32_t'),
'I64x2': VecFormat(2, 8, False, False, 16, 'int64_t'),
'U8x1': VecFormat(1, 1, True, False, 17, 'uint8_t'),
'I8x1': VecFormat(1, 1, False, False, 18, 'int8_t'),
'U16x1': VecFormat(1, 2, True, False, 19, 'uint16_t'),
'I16x1': VecFormat(1, 2, False, False, 20, 'int16_t'),
'U32x1': VecFormat(1, 4, True, False, 21, 'uint32_t'),
'I32x1': VecFormat(1, 4, False, False, 22, 'int32_t'),
# These vector formats can never intersect with above, so can reuse index.
'F32x1': VecFormat(1, 4, False, True, 1, 'Float32'),
'F32x2': VecFormat(2, 4, False, True, 2, 'Float32'),
'F32x4': VecFormat(4, 4, False, True, 3, 'Float32'),
'F64x1': VecFormat(1, 8, False, True, 4, 'Float64'),
'F64x2': VecFormat(2, 8, False, True, 5, 'Float64'),
# Those vector formats can never intersect with above, so can reuse index.
'U8x4': VecFormat(4, 1, True, False, 1, 'uint8_t'),
'U16x2': VecFormat(2, 2, True, False, 2, 'uint16_t'),
'I8x4': VecFormat(4, 1, False, False, 3, 'int8_t'),
'I16x2': VecFormat(2, 2, False, False, 4, 'int16_t'),
}
class VecSize(object):
def __init__(self, num_elements, index):
self.num_elements = num_elements
self.index = index
_VECTOR_SIZES = {'X8': VecSize(8, 1), 'X16': VecSize(16, 2)}
def _is_imm_type(arg_type):
return 'imm' in arg_type
def _is_template_type(arg_type):
if not arg_type.startswith('Type'):
return False
assert isinstance(int(arg_type[4:]), int)
return True
def _get_imm_c_type(arg_type):
return {
'imm8' : 'int8_t',
'uimm8' : 'uint8_t',
'uimm32' : 'uint32_t',
}[arg_type]
def _get_c_type(arg_type):
if (arg_type in ('Float32', 'Float64', 'int8_t', 'uint8_t', 'int16_t',
'uint16_t', 'int32_t', 'uint32_t', 'int64_t', 'uint64_t',
'volatile uint8_t*', 'volatile uint32_t*') or
_is_template_type(arg_type)):
return arg_type
if arg_type in ('fp_flags', 'fp_control', 'int', 'flag', 'flags', 'vec32'):
return 'uint32_t'
if _is_imm_type(arg_type):
return _get_imm_c_type(arg_type)
if arg_type == 'vec':
return 'SIMD128Register'
raise Exception('Type %s not supported' % (arg_type))
def _get_semantic_player_type(arg_type, type_map):
if type_map is not None and arg_type in type_map:
return type_map[arg_type]
if arg_type in ('Float32', 'Float64', 'vec'):
return 'SimdRegister'
if _is_imm_type(arg_type):
return _get_imm_c_type(arg_type)
return 'Register'
def _gen_scalar_intr_decl(f, name, intr):
ins = intr.get('in')
outs = intr.get('out')
params = [_get_c_type(op) for op in ins]
if len(outs) > 0:
retval = 'std::tuple<' + ', '.join(_get_c_type(out) for out in outs) + '>'
else:
retval = 'void'
comment = intr.get('comment')
if comment:
print('// %s.' % (comment), file=f)
if intr.get('precise_nans', False):
print('template <bool precise_nan_operations_handling, '
'enum PreferredIntrinsicsImplementation = kUseAssemblerImplementationIfPossible>',
file=f)
print('%s %s(%s);' % (retval, name, ', '.join(params)), file=f)
def _gen_template_intr_decl(f, name, intr):
ins = intr.get('in')
outs = intr.get('out')
params = [_get_c_type(op) for op in ins]
if len(outs) > 0:
retval = 'std::tuple<' + ', '.join(_get_c_type(out) for out in outs) + '>'
else:
retval = 'void'
comment = intr.get('comment')
if comment:
print('// %s.' % (comment), file=f)
print('template <%s>' % _get_template_arguments(intr.get('variants')), file=f)
print('%s %s(%s);' % (retval, name, ', '.join(params)), file=f)
def _get_template_arguments(variants,
extra = ['enum PreferredIntrinsicsImplementation = kUseAssemblerImplementationIfPossible']):
template = None
for variant in variants:
counter = -1
def get_counter():
nonlocal counter
counter += 1
return counter
new_template = ', '.join([
'bool kBool%s' % get_counter() if param.strip() in ('true', 'false') else
'typename Type%d' % get_counter() if re.search('[_a-zA-Z]', param) else
'int kInt%s' % get_counter()
for param in variant.split(',')] + extra)
assert template is None or template == new_template
template = new_template
return template
def _is_vector_class(intr):
return intr.get('class') in ('vector_4', 'vector_8', 'vector_16',
'vector_8/16', 'vector_8/16/single',
'vector_8/single', 'vector_16/single')
def _is_simd128_conversion_required(t, type_map=None):
return (_get_semantic_player_type(t, type_map) == 'SimdRegister' and
_get_c_type(t) != 'SIMD128Register')
def _get_semantics_player_hook_result(intr):
outs = intr['out']
if len(outs) == 0:
return 'void'
elif len(outs) == 1:
# No tuple for single result.
return _get_semantic_player_type(outs[0], intr.get('sem-player-types'))
return 'std::tuple<' + ', '.join(
_get_semantic_player_type(out, intr.get('sem-player-types'))
for out in outs) + '>'
def _get_semantics_player_hook_proto_components(name, intr):
ins = intr['in']
args = []
if _is_vector_class(intr):
if 'raw' in intr['variants']:
assert len(intr['variants']) == 1, "Unexpected length of variants"
args = ["uint8_t size"]
else:
args = ["uint8_t elem_size", "uint8_t elem_num"]
if (_is_signed(intr) and _is_unsigned(intr)):
args += ['bool is_signed']
args += [
'%s arg%d' % (
_get_semantic_player_type(op, intr.get('sem-player-types')), num)
for num, op in enumerate(ins)
]
result = _get_semantics_player_hook_result(intr)
return result, name, ', '.join(args)
def _get_semantics_player_hook_proto(name, intr):
result, name, args = _get_semantics_player_hook_proto_components(name, intr)
if intr.get('class') == 'template':
return 'template<%s>\n%s %s(%s)' % (
_get_template_arguments(intr.get('variants'), []), result, name, args)
return '%s %s(%s)' % (result, name, args)
def _get_interpreter_hook_call_expr(name, intr, desc=None):
ins = intr['in']
outs = intr['out']
call_params = []
for num, op in enumerate(ins):
arg = 'arg%d' % (num)
semantic_player_type = _get_semantic_player_type(
op, intr.get('sem-player-types'))
if semantic_player_type == 'FpRegister':
call_params.append('FPRegToFloat<%s>(%s)' % (op, arg))
elif semantic_player_type == 'SimdRegister':
call_params.append(_get_cast_from_simd128(arg, op, ptr_bits=64))
elif '*' in _get_c_type(op):
call_params.append('bit_cast<%s>(%s)' % (_get_c_type(op), arg))
else:
call_params.append(arg)
call_expr = 'intrinsics::%s%s(%s)' % (
name, _get_desc_specializations(intr, desc).replace(
'Float', 'intrinsics::Float'), ', '.join(call_params))
if 'sem-player-types' in intr:
assert len(outs) == 1
out_type = _get_semantic_player_type(outs[0], intr.get('sem-player-types'))
if out_type == "FpRegister":
call_expr = 'FloatToFPReg(std::get<0>(%s))' % call_expr
else:
assert out_type == "Register"
assert not _is_simd128_conversion_required(
outs[0], intr.get('sem-player-types'))
call_expr = 'std::make_signed_t<%s>(std::get<0>(%s))' % (outs[0], call_expr)
elif len(outs) == 1:
# Unwrap tuple for single result.
call_expr = 'std::get<0>(%s)' % call_expr
# Currently this kind of mismatch can only happen for single result, so we
# can keep simple code here for now.
if _is_simd128_conversion_required(outs[0]):
out_type = _get_c_type(outs[0])
if out_type in ('Float32', 'Float64'):
call_expr = 'SimdRegister(%s)' % call_expr
else:
raise Exception('Type %s is not supported' % (out_type))
else:
if any(_is_simd128_conversion_required(out) for out in outs):
raise Exception(
'Unsupported SIMD128Register conversion with multiple results')
return call_expr
def _get_interpreter_hook_return_stmt(name, intr, desc=None):
return 'return ' + _get_interpreter_hook_call_expr(name, intr, desc) + ';'
def _get_semantics_player_hook_raw_vector_body(name, intr, get_return_stmt):
outs = intr['out']
if (len(outs) == 0):
raise Exception('No result raw vector intrinsic is not supported')
reg_class = intr.get('class')
yield 'switch (size) {'
for fmt, desc in _VECTOR_SIZES.items():
if _check_reg_class_size(reg_class, desc.num_elements):
yield INDENT + 'case %s:' % desc.num_elements
yield 2 * INDENT + get_return_stmt(name, intr, desc)
yield INDENT + 'default:'
yield 2 * INDENT + 'LOG_ALWAYS_FATAL("Unsupported size");'
yield 2 * INDENT + 'return {};'
yield '}'
def _is_signed(intr):
return any(v.startswith("signed") for v in intr['variants'])
def _is_unsigned(intr):
return any(v.startswith("unsigned") for v in intr['variants'])
def _get_vector_format_init_expr(intr):
variants = intr.get('variants')
if ('Float32' in variants or 'Float64' in variants):
return 'intrinsics::GetVectorFormatFP(elem_size, elem_num)'
assert _is_signed(intr) or _is_unsigned(intr), "Unexpected intrinsic class"
if _is_signed(intr) and _is_unsigned(intr):
signed_arg = ', is_signed'
else:
signed_arg = ', true' if _is_signed(intr) else ', false'
return 'intrinsics::GetVectorFormatInt(elem_size, elem_num%s)' % signed_arg
def _get_semantics_player_hook_vector_body(name, intr, get_return_stmt):
outs = intr['out']
if (len(outs) == 0):
raise Exception('No result vector intrinsic is not supported')
reg_class = intr.get('class')
yield 'auto format = %s;' % _get_vector_format_init_expr(intr)
yield 'switch (format) {'
for variant in intr.get('variants'):
for fmt, desc in _VECTOR_FORMATS.items():
if (_check_reg_class_size(reg_class,
desc.element_size * desc.num_elements) and
_check_typed_variant(variant, desc)):
yield INDENT + 'case intrinsics::kVector%s:' % fmt
yield 2 * INDENT + get_return_stmt(name, intr, desc)
elif (reg_class in ('vector_8/single', 'vector_8/16/single', 'vector_16/single') and
desc.num_elements == 1 and
_check_typed_variant(variant, desc)):
assert desc.element_size <= 8, "Unexpected element size"
yield INDENT + 'case intrinsics::kVector%s:' % fmt
yield 2 * INDENT + get_return_stmt(name, intr, desc)
yield INDENT + 'default:'
yield 2 * INDENT + 'LOG_ALWAYS_FATAL("Unsupported format");'
yield 2 * INDENT + 'return {};'
yield '}'
# Syntax sugar heavily used in tests.
def _get_interpreter_hook_vector_body(name, intr):
return _get_semantics_player_hook_vector_body(
name, intr, _get_interpreter_hook_return_stmt)
def _gen_interpreter_hook(f, name, intr):
print('%s {' % (_get_semantics_player_hook_proto(name, intr)), file=f)
if _is_vector_class(intr):
if 'raw' in intr['variants']:
assert len(intr['variants']) == 1, "Unexpected length of variants"
lines = _get_semantics_player_hook_raw_vector_body(
name,
intr,
_get_interpreter_hook_return_stmt)
else:
lines = _get_interpreter_hook_vector_body(name, intr)
lines = [INDENT + l for l in lines]
print('\n'.join(lines), file=f)
else:
print(INDENT + _get_interpreter_hook_return_stmt(name, intr), file=f)
print('}\n', file=f)
def _get_translator_hook_call_expr(name, intr, desc = None):
desc_spec = _get_desc_specializations(intr, desc).replace(
'Float', 'intrinsics::Float')
args = [('arg%d' % n) for n, _ in enumerate(intr['in'])]
template_params = ['&intrinsics::' + name + desc_spec]
template_params += [_get_semantics_player_hook_result(intr)]
return 'CallIntrinsic<%s>(%s)' % (', '.join(template_params), ', '.join(args))
def _get_translator_hook_return_stmt(name, intr, desc=None):
return 'return ' + _get_translator_hook_call_expr(name, intr, desc) + ';'
def _gen_translator_hook(f, name, intr):
print('%s {' % (_get_semantics_player_hook_proto(name, intr)), file=f)
if _is_vector_class(intr):
if 'raw' in intr['variants']:
assert len(intr['variants']) == 1, "Unexpected length of variants"
lines = _get_semantics_player_hook_raw_vector_body(
name,
intr,
_get_translator_hook_return_stmt)
else:
lines = _get_semantics_player_hook_vector_body(
name,
intr,
_get_translator_hook_return_stmt)
lines = [INDENT + l for l in lines]
print('\n'.join(lines), file=f)
else:
print(INDENT + _get_translator_hook_return_stmt(name, intr), file=f)
print('}\n', file=f)
def _gen_mock_semantics_listener_hook(f, name, intr):
result, name, args = _get_semantics_player_hook_proto_components(name, intr)
print('MOCK_METHOD((%s), %s, (%s));' % (result, name, args), file=f)
def _check_signed_variant(variant, desc):
if variant == 'signed':
return True
if variant == 'signed_32':
return desc.element_size == 4
if variant == 'signed_64':
return desc.element_size == 8
if variant == 'signed_16/32':
return desc.element_size in (2, 4)
if variant == 'signed_8/16/32':
return desc.element_size in (1, 2, 4)
if variant == 'signed_16/32/64':
return desc.element_size in (2, 4, 8)
if variant == 'signed_8/16/32/64':
return desc.element_size in (1, 2, 4, 8)
if variant == 'signed_32/64':
return desc.element_size in (4, 8)
return False
def _check_unsigned_variant(variant, desc):
if variant == 'unsigned':
return True
if variant == 'unsigned_8':
return desc.element_size == 1
if variant == 'unsigned_16':
return desc.element_size == 2
if variant == 'unsigned_32':
return desc.element_size == 4
if variant == 'unsigned_64':
return desc.element_size == 8
if variant == 'unsigned_8/16':
return desc.element_size in (1, 2)
if variant == 'unsigned_8/16/32':
return desc.element_size in (1, 2, 4)
if variant == 'unsigned_16/32/64':
return desc.element_size in (2, 4, 8)
if variant == 'unsigned_8/16/32/64':
return desc.element_size in (1, 2, 4, 8)
if variant == 'unsigned_32/64':
return desc.element_size in (4, 8)
return False
def _check_reg_class_size(reg_class, size):
# Small vectors are separate namespace.
if size == 4 and reg_class == 'vector_4':
return True
if size == 8 and reg_class in ('vector_8', 'vector_8/16', 'vector_8/16/single',
'vector_8/single'):
return True
if size == 16 and reg_class in ('vector_16', 'vector_8/16', 'vector_8/16/single',
'vector_16/single'):
return True
return False
def _check_typed_variant(variant, desc):
if desc.is_unsigned and not desc.is_float:
return _check_unsigned_variant(variant, desc)
if not desc.is_unsigned and not desc.is_float:
return _check_signed_variant(variant, desc)
if desc.is_float:
if desc.element_size == 4:
return variant == 'Float32'
if desc.element_size == 8:
return variant == 'Float64'
return False
def _get_formats_with_descriptions(intr):
reg_class = intr.get('class')
for variant in intr.get('variants'):
found_fmt = False
for fmt, desc in _VECTOR_FORMATS.items():
if (_check_reg_class_size(reg_class,
desc.element_size * desc.num_elements) and
_check_typed_variant(variant, desc) and
(reg_class != 'vector_4' or desc.element_size < 4)):
found_fmt = True
yield fmt, desc
if variant == 'raw':
for fmt, desc in _VECTOR_SIZES.items():
if _check_reg_class_size(reg_class, desc.num_elements):
found_fmt = True
yield fmt, desc
assert found_fmt, 'Couldn\'t expand %s' % reg_class
def _get_result_type(outs):
result_type = 'void'
return_stmt = ''
if len(outs) >= 1:
result_type = ('std::tuple<' +
', '.join(_get_c_type(out) for out in outs) + '>')
return_stmt = 'return '
return result_type, return_stmt
def _get_in_params(params):
for param_index, param in enumerate(params):
yield _get_c_type(param), 'in%d' % (param_index)
def _get_out_params(params):
for param_index, param in enumerate(params):
yield _get_c_type(param), 'out%d' % (param_index)
def _get_cast_from_simd128(var, target_type, ptr_bits):
if ('*' in target_type):
return 'bit_cast<%s>(%s.Get<uint%d_t>(0))' % (_get_c_type(target_type), var,
ptr_bits)
cast_map = {
'Float32': '.Get<intrinsics::Float32>(0)',
'Float64': '.Get<intrinsics::Float64>(0)',
'int8_t': '.Get<int8_t>(0)',
'uint8_t': '.Get<uint8_t>(0)',
'int16_t': '.Get<int16_t>(0)',
'uint16_t': '.Get<uint16_t>(0)',
'int32_t': '.Get<int32_t>(0)',
'uint32_t': '.Get<uint32_t>(0)',
'int64_t': '.Get<int64_t>(0)',
'uint64_t': '.Get<uint64_t>(0)',
'SIMD128Register': ''
}
return '%s%s' % (var, cast_map[_get_c_type(target_type)])
def _get_desc_specializations(intr, desc=None):
if intr.get('class') == 'template':
spec = _get_template_spec_arguments(intr.get('variants'))
elif hasattr(desc, 'c_type'):
spec = [desc.c_type, str(desc.num_elements)]
elif hasattr(desc, 'num_elements'):
spec = [str(desc.num_elements)]
else:
spec = []
if intr.get('precise_nans', False):
spec = ['config::kPreciseNaNOperationsHandling'] + spec
if not len(spec):
return ''
return '<%s>' % ', '.join(spec)
def _get_template_spec_arguments(variants):
spec = None
for variant in variants:
counter = -1
def get_counter():
nonlocal counter
counter += 1
return counter
new_spec = [
'kBool%s' % get_counter() if param.strip() in ('true', 'false') else
'Type%d' % get_counter() if re.search('[_a-zA-Z]', param) else
'kInt%s' % get_counter()
for param in variant.split(',')]
assert spec is None or spec == new_spec
spec = new_spec
return spec
def _intr_has_side_effects(intr, fmt=None):
# If we have 'has_side_effects' mark in JSON file then we use it "as is".
if 'has_side_effects' in intr:
return intr.get('has_side_effects')
# Otherwise we mark all floating-point related intrinsics as "volatile".
# TODO(b/68857496): move that information in HIR/LIR and stop doing that.
if 'Float32' in intr.get('in') or 'Float64' in intr.get('in'):
return True
if 'Float32' in intr.get('out') or 'Float64' in intr.get('out'):
return True
if fmt is not None and fmt.startswith('F'):
return True
return False
def _gen_intrinsics_inl_h(f, intrs):
print(AUTOGEN, file=f)
for name, intr in intrs:
if intr.get('class') == 'scalar':
_gen_scalar_intr_decl(f, name, intr)
elif intr.get('class') == 'template':
_gen_template_intr_decl(f, name, intr)
def _gen_semantic_player_types(intrs):
for name, intr in intrs:
if intr.get('class') == 'template':
map = None
for variant in intr.get('variants'):
counter = -1
def get_counter():
nonlocal counter
counter += 1
return counter
new_map = {
'Float32': 'FpRegister',
'Float64': 'FpRegister',
}
for type in filter(
lambda param: param.strip() not in ('true', 'false') and
re.search('[_a-zA-Z]', param),
variant.split(',')):
new_map['Type%d' % get_counter()] = (
'FpRegister' if type.strip() in ('Float32', 'Float64') else
_get_semantic_player_type(type, None))
assert map is None or map == new_map
map = new_map
intr['sem-player-types'] = map
def _gen_interpreter_intrinsics_hooks_impl_inl_h(f, intrs):
print(AUTOGEN, file=f)
for name, intr in intrs:
_gen_interpreter_hook(f, name, intr)
def _gen_translator_intrinsics_hooks_impl_inl_h(f, intrs):
print(AUTOGEN, file=f)
for name, intr in intrs:
_gen_translator_hook(f, name, intr)
def _gen_mock_semantics_listener_intrinsics_hooks_impl_inl_h(f, intrs):
print(AUTOGEN, file=f)
for name, intr in intrs:
_gen_mock_semantics_listener_hook(f, name, intr)
def _get_reg_operand_info(arg, info_prefix=None):
need_tmp = arg['class'] in ('EAX', 'EDX', 'CL', 'ECX')
if info_prefix is None:
class_info = 'void'
else:
class_info = '%s::%s' % (info_prefix, arg['class'])
if arg['class'] == 'Imm8':
return 'ImmArg<%d, int8_t, %s>' % (arg['ir_arg'], class_info)
if info_prefix is None:
using_info = 'void'
else:
using_info = '%s::%s' % (info_prefix, {
'def': 'Def',
'def_early_clobber': 'DefEarlyClobber',
'use': 'Use',
'use_def': 'UseDef'
}[arg['usage']])
if arg['usage'] == 'use':
if need_tmp:
return 'InTmpArg<%d, %s, %s>' % (arg['ir_arg'], class_info, using_info)
return 'InArg<%d, %s, %s>' % (arg['ir_arg'], class_info, using_info)
if arg['usage'] in ('def', 'def_early_clobber'):
assert 'ir_arg' not in arg
if 'ir_res' in arg:
if need_tmp:
return 'OutTmpArg<%d, %s, %s>' % (arg['ir_res'], class_info, using_info)
return 'OutArg<%d, %s, %s>' % (arg['ir_res'], class_info, using_info)
return 'TmpArg<%s, %s>' % (class_info, using_info)
if arg['usage'] == 'use_def':
if 'ir_res' in arg:
if need_tmp:
return 'InOutTmpArg<%s, %s, %s, %s>' % (arg['ir_arg'], arg['ir_res'],
class_info, using_info)
return 'InOutArg<%s, %s, %s, %s>' % (arg['ir_arg'], arg['ir_res'],
class_info, using_info)
return 'InTmpArg<%s, %s, %s>' % (arg['ir_arg'], class_info, using_info)
assert False, 'unknown operand usage %s' % (arg['usage'])
def _gen_make_intrinsics(f, intrs, archs):
print("""%s
template <%s,
typename MacroAssembler,
typename Callback,
typename... Args>
void ProcessAllBindings(Callback callback, Args&&... args) {""" % (
AUTOGEN,
',\n '.join(['typename Assembler_%s' % arch for arch in archs])),
file=f)
for line in _gen_c_intrinsics_generator(
intrs, _is_interpreter_compatible_assembler, False): # False for gen_builder
print(line, file=f)
print('}', file=f)
def _gen_opcode_generators_f(f, intrs):
for line in _gen_opcode_generators(intrs):
print(line, file=f)
def _gen_opcode_generators(intrs):
opcode_generators = {}
for name, intr in intrs:
if 'asm' not in intr:
continue
if 'variants' in intr:
variants = _get_formats_with_descriptions(intr)
variants = sorted(variants, key=lambda variant: variant[1].index)
# Collect intr_asms for all variants of intrinsic.
# Note: not all variants are guaranteed to have an asm variant!
# If that happens the list of intr_asms for that variant will be empty.
variants = [[
intr_asm for intr_asm in _gen_sorted_asms(intr)
if fmt in intr_asm['variants']
] for fmt, _ in variants]
# Print intrinsic generator
for intr_asms in variants:
if len(intr_asms) > 0:
for intr_asm in intr_asms:
for line in _gen_opcode_generator(intr_asm, opcode_generators):
yield line
else:
for intr_asm in _gen_sorted_asms(intr):
for line in _gen_opcode_generator(intr_asm, opcode_generators):
yield line
def _gen_opcode_generator(asm, opcode_generators):
name = asm['name']
if name not in opcode_generators:
opcode_generators[name] = True
yield """
// TODO(b/260725458): Pass lambda as template argument after C++20 becomes available.
class GetOpcode%s {
public:
template <typename Opcode>
constexpr auto operator()() {
return Opcode::kMachineOp%s;
}
};""" % (name, name)
def _gen_process_bindings(f, intrs, archs):
print('%s' % AUTOGEN, file=f)
_gen_opcode_generators_f(f, intrs)
print("""
template <auto kFunc,
%s,
typename MacroAssembler,
typename Result,
typename Callback,
typename... Args>
Result ProcessBindings(Callback callback, Result def_result, Args&&... args) {""" % (
',\n '.join(['typename Assembler_%s' % arch for arch in archs])),
file=f)
for line in _gen_c_intrinsics_generator(
intrs, _is_translator_compatible_assembler, True): # True for gen_builder
print(line, file=f)
print(""" }
return std::forward<Result>(def_result);
}""", file=f)
def _gen_c_intrinsics_generator(intrs, check_compatible_assembler, gen_builder):
string_labels = {}
mnemo_idx = [0]
for name, intr in intrs:
ins = intr.get('in')
outs = intr.get('out')
params = _get_in_params(ins)
formal_args = ', '.join('%s %s' % (type, param) for type, param in params)
result_type, _ = _get_result_type(outs)
if 'asm' not in intr:
continue
if 'variants' in intr:
variants = _get_formats_with_descriptions(intr)
# Sort by index, to keep order close to what _gen_intrs_enum produces.
variants = sorted(variants, key=lambda variant: variant[1].index)
# Collect intr_asms for all versions of intrinsic.
# Note: not all variants are guaranteed to have asm version!
# If that happens list of intr_asms for that variant would be empty.
variants = [(desc, [
intr_asm for intr_asm in _gen_sorted_asms(intr)
if fmt in intr_asm['variants']
]) for fmt, desc in variants]
# Print intrinsic generator
for desc, intr_asms in variants:
if len(intr_asms) > 0:
if 'raw' in intr['variants']:
spec = '%d' % (desc.num_elements)
else:
spec = '%s, %d' % (desc.c_type, desc.num_elements)
for intr_asm in intr_asms:
for line in _gen_c_intrinsic('%s<%s>' % (name, spec),
intr,
intr_asm,
string_labels,
mnemo_idx,
check_compatible_assembler,
gen_builder):
yield line
else:
for intr_asm in _gen_sorted_asms(intr):
for line in _gen_c_intrinsic(name,
intr,
intr_asm,
string_labels,
mnemo_idx,
check_compatible_assembler,
gen_builder):
yield line
def _gen_sorted_asms(intr):
return sorted(intr['asm'],
key = lambda intr:
intr.get('nan', '') +
_KNOWN_FEATURES_KEYS.get(
intr.get('feature', ''), intr.get('feature', '')), reverse = True)
_KNOWN_FEATURES_KEYS = {
'LZCNT': '001',
'BMI': '002',
'BMI2': '003',
'SSE': '010',
'SSE2': '011',
'SSE3': '012',
'SSSE3': '013',
'SSE4a': '014',
'SSE4_1': '015',
'SSE4_2': '016',
'AVX': '017',
'AVX2': '018',
'FMA': '019',
'FMA4': '020'
}
def _gen_c_intrinsic(name,
intr,
asm,
string_labels,
mnemo_idx,
check_compatible_assembler,
gen_builder):
if not check_compatible_assembler(asm):
return
cpuid_restriction = 'intrinsics::bindings::kNoCPUIDRestriction'
if 'feature' in asm:
if asm['feature'] == 'AuthenticAMD':
cpuid_restriction = 'intrinsics::bindings::kIsAuthenticAMD'
else:
cpuid_restriction = 'intrinsics::bindings::kHas%s' % asm['feature']
nan_restriction = 'intrinsics::bindings::kNoNansOperation'
if 'nan' in asm:
nan_restriction = 'intrinsics::bindings::k%sNanOperationsHandling' % asm['nan']
template_arg = 'true' if asm['nan'] == "Precise" else "false"
if '<' in name:
template_pos = name.index('<')
name = name[0:template_pos+1] + template_arg + ", " + name[template_pos+1:]
else:
name += '<' + template_arg + '>'
if name not in string_labels:
name_label = 'kName%d' % len(string_labels)
string_labels[name] = name_label
if check_compatible_assembler == _is_translator_compatible_assembler:
yield ' %s if constexpr (std::is_same_v<FunctionCompareTag<kFunc>,' % (
'' if name_label == 'kName0' else ' } else'
)
yield ' FunctionCompareTag<%s>>) {' % name
yield ' static constexpr const char %s[] = "%s";' % (
name_label, name)
else:
name_label = string_labels[name]
mnemo = asm['mnemo']
mnemo_label = 'kMnemo%d' % mnemo_idx[0]
mnemo_idx[0] += 1
yield ' static constexpr const char %s[] = "%s";' % (
mnemo_label, mnemo)
restriction = [cpuid_restriction, nan_restriction]
if check_compatible_assembler == _is_translator_compatible_assembler:
yield ' if (auto result = callback('
else:
yield ' callback('
yield ' intrinsics::bindings::AsmCallInfo<'
yield ' %s>(),' % (
',\n '.join(
[name_label,
_get_asm_reference(asm, gen_builder),
mnemo_label,
_get_builder_reference(intr, asm) if gen_builder else 'void',
cpuid_restriction,
nan_restriction,
'true' if _intr_has_side_effects(intr) else 'false',
_get_c_type_tuple(intr['in']),
_get_c_type_tuple(intr['out'])] +
[_get_reg_operand_info(arg, 'intrinsics::bindings')
for arg in asm['args']]))
if check_compatible_assembler == _is_translator_compatible_assembler:
yield ' std::forward<Args>(args)...); result.has_value()) {'
yield ' return *std::move(result);'
yield ' }'
else:
yield ' std::forward<Args>(args)...);'
def _get_c_type_tuple(arguments):
return 'std::tuple<%s>' % ', '.join(
_get_c_type(argument) for argument in arguments).replace(
'Float', 'intrinsics::Float')
def _get_asm_type(asm, prefix=''):
args = filter(
lambda arg: not asm_defs.is_implicit_reg(arg['class']), asm['args'])
return ', '.join(_get_asm_operand_type(arg, prefix) for arg in args)
def _get_asm_operand_type(arg, prefix=''):
cls = arg.get('class')
if asm_defs.is_x87reg(cls):
return prefix + 'X87Register'
if asm_defs.is_greg(cls):
return prefix + 'Register'
if asm_defs.is_xreg(cls):
return prefix + 'XMMRegister'
if asm_defs.is_mem_op(cls):
return 'const ' + prefix + 'Operand&'
if asm_defs.is_imm(cls):
if cls == 'Imm2':
return 'int8_t'
return 'int' + cls[3:] + '_t'
assert False
def _get_asm_reference(asm, gen_builder):
# Because of misfeature of Itanium C++ ABI we couldn't just use MacroAssembler
# to static cast these references if we want to use them as template argument:
# https://ibob.bg/blog/2018/08/18/a-bug-in-the-cpp-standard/
# Thankfully there are usually no need to use the same trick for MacroInstructions
# since we may always rename these, except when immediates are involved.
# But for assembler we need to use actual type from where these
# instructions come from!
#
# E.g. LZCNT have to be processed like this:
# static_cast<void (Assembler_common_x86::*)(
# typename Assembler_common_x86::Register,
# typename Assembler_common_x86::Register)>(
# &Assembler_common_x86::Lzcntl)
if 'arch' in asm:
assembler = 'Assembler_%s' % asm['arch']
elif gen_builder:
assembler = 'std::tuple_element_t<0, MacroAssembler>'
elif any(arg['class'].startswith('Imm') for arg in asm['args']):
assembler = 'MacroAssembler'
else:
return '&MacroAssembler::%s%s' % (
'template ' if '<' in asm['asm'] else '',
asm['asm'])
return 'static_cast<void (%s::*)(%s)>(%s&%s::%s%s)' % (
assembler,
_get_asm_type(asm, 'typename %s::' % assembler),
'\n ',
assembler,
'template ' if '<' in asm['asm'] else '',
asm['asm'])
def _get_builder_reference(intr, asm):
return 'GetOpcode%s' % (asm['name'])
def _load_intrs_def_files(intrs_def_files):
result = {}
for intrs_def in intrs_def_files:
with open(intrs_def) as intrs:
result.update(json.load(intrs))
result.pop('License', None)
return result
def _load_intrs_arch_def(intrs_defs):
json_data = []
for intrs_def in intrs_defs:
with open(intrs_def) as intrs:
json_array = json.load(intrs)
while isinstance(json_array[0], str):
json_array.pop(0)
json_data.extend(json_array)
return json_data
def _load_macro_def(intrs, arch_intrs, insns_def):
arch, insns = asm_defs.load_asm_defs(insns_def)
if arch is not None:
for insn in insns:
insn['arch'] = arch
insns_map = dict((insn['name'], insn) for insn in insns)
unprocessed_intrs = []
for arch_intr in arch_intrs:
if arch_intr['insn'] in insns_map:
insn = insns_map[arch_intr['insn']]
_add_asm_insn(intrs, arch_intr, insn)
else:
unprocessed_intrs.append(arch_intr)
return arch, unprocessed_intrs
def _is_interpreter_compatible_assembler(intr_asm):
if intr_asm.get('usage', '') == 'translate-only':
return False
return True
def _is_translator_compatible_assembler(intr_asm):
if intr_asm.get('usage', '') == 'interpret-only':
return False
return True
def _add_asm_insn(intrs, arch_intr, insn):
name = ','.join(name_part.strip() for name_part in arch_intr['name'].split(','))
# Sanity checks: MacroInstruction could implement few different intrinsics but
# number of arguments in arch intrinsic and arch-independent intrinsic
# should match.
#
# Note: we allow combining intrinsics with variants and intrinsics without
# variants (e.g. AbsF32 is combined with VectorAbsoluteFP for F32x2 and F32x4),
# but don't allow macroinstructions which would handle different set of
# variants for different intrinsics.
assert 'variants' not in insn or insn['variants'] == arch_intr['variants']
assert 'feature' not in insn or insn['feature'] == arch_intr['feature']
assert 'nan' not in insn or insn['nan'] == arch_intr['nan']
assert 'usage' not in insn or insn['usage'] == arch_intr['usage']
assert len(intrs[name]['in']) == len(arch_intr['in'])
assert len(intrs[name]['out']) == len(arch_intr['out'])
if 'variants' in arch_intr:
insn['variants'] = arch_intr['variants']
if 'feature' in arch_intr:
insn['feature'] = arch_intr['feature']
if 'nan' in arch_intr:
insn['nan'] = arch_intr['nan']
if 'usage' in arch_intr:
insn['usage'] = arch_intr['usage']
for count, in_arg in enumerate(arch_intr['in']):
# Sanity check: each in argument should only be used once - but if two
# different intrinsics use them same macroinstruction it could be already
# defined... yet it must be defined identically.
assert ('ir_arg' not in insn['args'][in_arg] or
insn['args'][in_arg]['ir_arg'] == count)
insn['args'][in_arg]['ir_arg'] = count
for count, out_arg in enumerate(arch_intr['out']):
# Sanity check: each out argument should only be used once, too.
assert ('ir_res' not in insn['args'][out_arg] or
insn['args'][out_arg]['ir_res'] == count)
insn['args'][out_arg]['ir_res'] = count
# Note: one intrinsic could have more than one implementation (e.g.
# SSE2 vs SSE4.2).
if 'asm' not in intrs[name]:
intrs[name]['asm'] = []
intrs[name]['asm'].append(insn)
def _open_asm_def_files(def_files, arch_def_files, asm_def_files, need_archs=True):
intrs = _load_intrs_def_files(def_files)
expanded_intrs = _expand_template_intrinsics(intrs)
arch_intrs = _load_intrs_arch_def(arch_def_files)
archs = []
for macro_def in asm_def_files:
arch, arch_intrs = _load_macro_def(expanded_intrs, arch_intrs, macro_def)
if arch is not None:
archs.append(arch)
# Make sure that all intrinsics were found during processing of arch_intrs.
assert arch_intrs == []
if need_archs:
return archs, sorted(intrs.items()), sorted(expanded_intrs.items())
else:
return sorted(intrs.items())
def _expand_template_intrinsics(intrs):
expanded_intrs = {}
for name, intr in intrs.items():
if intr.get('class') != 'template':
expanded_intrs[name] = intr
else:
for variant in intr.get('variants'):
types = {}
params = [param.strip() for param in variant.split(',')]
for param in params:
if param in ('true', 'false'):
continue
if re.search('[_a-zA-Z]', param):
types['Type'+str(len(types))] = param
new_intr = intr.copy()
del new_intr['variants']
new_intr['in'] = [types.get(param, param) for param in new_intr.get('in')]
new_intr['out'] = [types.get(param, param) for param in new_intr.get('out')]
expanded_intrs[name+'<'+','.join(params)+'>'] = new_intr
return expanded_intrs
def main(argv):
# Usage:
# gen_intrinsics.py --public_headers <intrinsics-inl.h>
# <intrinsics_process_bindings-inl.h>
# <interpreter_intrinsics_hooks-inl.h>
# <translator_intrinsics_hooks-inl.h>
# <mock_semantics_listener_intrinsics_hooks-inl.h>
# <riscv64_to_x86_64/intrinsic_def.json",
# ...
# <riscv64_to_x86_64/machine_ir_intrinsic_binding.json>,
# ...
# <riscv64_to_x86_64/macro_def.json>,
# ...
# gen_intrinsics.py --text_asm_intrinsics_bindings <make_intrinsics-inl.h>
# <riscv64_to_x86_64/intrinsic_def.json",
# ...
# <riscv64_to_x86_64/machine_ir_intrinsic_binding.json>,
# ...
# <riscv64_to_x86_64/macro_def.json>,
# ...
def open_out_file(name):
try:
os.makedirs(os.path.dirname(name))
except:
pass
return open(name, 'w')
mode = argv[1]
if mode in ('--text_asm_intrinsics_bindings', '--public_headers'):
out_files_end = 3 if mode == '--text_asm_intrinsics_bindings' else 7
def_files_end = out_files_end
while argv[def_files_end].endswith('intrinsic_def.json'):
def_files_end += 1
arch_def_files_end = def_files_end
while argv[arch_def_files_end].endswith('machine_ir_intrinsic_binding.json'):
arch_def_files_end += 1
archs, intrs, expanded_intrs = _open_asm_def_files(
argv[out_files_end:def_files_end],
argv[def_files_end:arch_def_files_end],
argv[arch_def_files_end:],
True)
if mode == '--text_asm_intrinsics_bindings':
_gen_make_intrinsics(open_out_file(argv[2]), expanded_intrs, archs)
else:
_gen_intrinsics_inl_h(open_out_file(argv[2]), intrs)
_gen_process_bindings(open_out_file(argv[3]), expanded_intrs, archs)
_gen_semantic_player_types(intrs)
_gen_interpreter_intrinsics_hooks_impl_inl_h(open_out_file(argv[4]), intrs)
_gen_translator_intrinsics_hooks_impl_inl_h(
open_out_file(argv[5]), intrs)
_gen_mock_semantics_listener_intrinsics_hooks_impl_inl_h(
open_out_file(argv[6]), intrs)
else:
assert False, 'unknown option %s' % (mode)
return 0
if __name__ == '__main__':
sys.exit(main(sys.argv))