Move FSgnj{,n,x} to intrinsics.h Bug: 276787675 Test: m berberis_all Test: berberis_host_tests/berberis_host_test Change-Id: Ibbc2b8be19c9abd4c1ea2acd690dcfe49ef6255e

commit: b833c5f13580f6965e2bb4df230ae77b550b00f0 [log] [tgz]
author: Victor Khimenko <khim@google.com> Thu Jun 22 00:38:14 2023 +0000
committer: Victor Khimenko <khim@google.com> Thu Jun 22 11:39:29 2023 +0000
tree: 55dc7f8ba1c6a5e4b2763e78c0178b71b6bb1950
parent: ebafaef6f70f814e8ed0ee907a0101d1a4dd50e1 [diff]
diff --git a/Android.bp b/Android.bp
index d2a498f..791c4f2 100644
--- a/Android.bp
+++ b/Android.bp

@@ -93,6 +93,7 @@
         "libberberis_assembler",
         "libberberis_base",
         "libberberis_config_globals",
+        "libberberis_intrinsics",
         "libberberis_runtime_primitives",
         "libberberis_tinyloader",
         "libgmock",

diff --git a/interpreter/riscv64/interpreter.cc b/interpreter/riscv64/interpreter.cc
index 07d3eec..72dae9c 100644
--- a/interpreter/riscv64/interpreter.cc
+++ b/interpreter/riscv64/interpreter.cc

@@ -29,6 +29,7 @@
 #include "berberis/guest_state/guest_addr.h"
 #include "berberis/guest_state/guest_state_riscv64.h"
 #include "berberis/intrinsics/guest_fp_flags.h"  // ToHostRoundingMode
+#include "berberis/intrinsics/intrinsics.h"
 #include "berberis/intrinsics/intrinsics_float.h"
 #include "berberis/intrinsics/type_traits.h"
 #include "berberis/kernel_api/run_guest_syscall.h"
@@ -674,19 +675,13 @@
 
   template <typename FloatType>
   FloatType OpFpNoRounding(Decoder::OpFpNoRoundingOpcode opcode, FloatType arg1, FloatType arg2) {
-    using Int = typename TypeTraits<FloatType>::Int;
-    using UInt = std::make_unsigned_t<Int>;
-    constexpr UInt sign_bit = std::numeric_limits<Int>::min();
-    constexpr UInt non_sign_bit = std::numeric_limits<Int>::max();
     switch (opcode) {
       case Decoder::OpFpNoRoundingOpcode::kFSgnj:
-        return bit_cast<FloatType>((bit_cast<UInt>(arg1) & non_sign_bit) |
-                                   (bit_cast<UInt>(arg2) & sign_bit));
+        return std::get<0>(FSgnj(arg1, arg2));
       case Decoder::OpFpNoRoundingOpcode::kFSgnjn:
-        return bit_cast<FloatType>((bit_cast<UInt>(arg1) & non_sign_bit) |
-                                   ((bit_cast<UInt>(arg2) & sign_bit) ^ sign_bit));
+        return std::get<0>(FSgnjn(arg1, arg2));
       case Decoder::OpFpNoRoundingOpcode::kFSgnjx:
-        return bit_cast<FloatType>(bit_cast<UInt>(arg1) ^ (bit_cast<UInt>(arg2) & sign_bit));
+        return std::get<0>(FSgnjx(arg1, arg2));
       case Decoder::OpFpNoRoundingOpcode::kFMin:
         return Min(arg1, arg2);
       case Decoder::OpFpNoRoundingOpcode::kFMax:

diff --git a/intrinsics/Android.bp b/intrinsics/Android.bp
index 6af8c65..9ad408f 100644
--- a/intrinsics/Android.bp
+++ b/intrinsics/Android.bp

@@ -17,14 +17,24 @@
     default_applicable_licenses: ["Android-Apache-2.0"],
 }
 
+python_binary_host {
+    name: "gen_intrinsics",
+    main: "gen_intrinsics.py",
+    srcs: ["gen_intrinsics.py"],
+    libs: ["asm_defs_lib"],
+}
+
+filegroup {
+    name: "libberberis_intrinsics_gen_inputs_riscv64_to_x86_64",
+    srcs: [
+        "riscv64_to_x86_64/intrinsic_def.json",
+    ],
+}
+
 cc_library_headers {
     name: "libberberis_intrinsics_riscv64_headers",
     defaults: ["berberis_defaults"],
     host_supported: true,
-    export_include_dirs: [
-        "riscv64_to_x86_64/include",
-        "riscv64/include",
-    ],
     header_libs: [
         "libberberis_base_headers",
         "libberberis_intrinsics_headers",
@@ -33,6 +43,16 @@
         "libberberis_base_headers",
         "libberberis_intrinsics_headers",
     ],
+    arch: {
+        x86_64: {
+            generated_headers: ["libberberis_intrinsics_gen_public_headers_riscv64_to_x86_64"],
+            export_generated_headers: ["libberberis_intrinsics_gen_public_headers_riscv64_to_x86_64"],
+            export_include_dirs: [
+                "riscv64_to_x86_64/include",
+                "riscv64/include",
+            ],
+        },
+    },
 }
 
 cc_library_headers {
@@ -44,15 +64,31 @@
     export_header_lib_headers: ["libberberis_base_headers"],
 }
 
+genrule {
+    name: "libberberis_intrinsics_gen_public_headers_riscv64_to_x86_64",
+    out: [
+        "berberis/intrinsics/intrinsics-inl.h",
+        "berberis/intrinsics/interpreter_intrinsics_hooks-inl.h",
+        "berberis/intrinsics/translator_intrinsics_hooks-inl.h",
+        "berberis/intrinsics/mock_semantics_listener_intrinsics_hooks-inl.h",
+    ],
+    srcs: [":libberberis_intrinsics_gen_inputs_riscv64_to_x86_64"],
+    tools: ["gen_intrinsics"],
+    cmd: "$(location gen_intrinsics) --public_headers $(out) $(in)",
+}
+
 cc_library_static {
     name: "libberberis_intrinsics",
     defaults: ["berberis_defaults"],
     host_supported: true,
-    srcs: [
-        "intrinsics_impl.cc",
-    ],
+    arch: {
+        x86_64: {
+            srcs: ["intrinsics_impl.cc"],
+        },
+    },
+    header_libs: ["libberberis_intrinsics_riscv64_headers"],
     static_libs: ["libberberis_base"],
-    export_include_dirs: ["include"],
+    export_header_lib_headers: ["libberberis_intrinsics_riscv64_headers"],
 }
 
 cc_test_library {
@@ -62,7 +98,10 @@
     srcs: ["intrinsics_float_test.cc"],
     arch: {
         x86_64: {
-            srcs: ["tuple_test.cc"],
+            srcs: [
+                "intrinsics_impl_test.cc",
+                "tuple_test.cc",
+            ],
         },
     },
     static_libs: [

diff --git a/intrinsics/gen_intrinsics.py b/intrinsics/gen_intrinsics.py
new file mode 100755
index 0000000..8e7b0d9
--- /dev/null
+++ b/intrinsics/gen_intrinsics.py

@@ -0,0 +1,849 @@
+#!/usr/bin/python3
+#
+# Copyright (C) 2018 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""Generate intrinsics code."""
+
+from collections import OrderedDict
+
+import asm_defs
+import json
+import os
+import sys
+
+# C-level intrinsic calling convention:
+# 1. All arguments are passed using the natural data types:
+#  - int8_t passed as one byte argument (on the stack in IA32 mode, in GP register in x86-64 mode)
+#  - int32_t passed as 4 bytes argument (on the stack in IA32 mode, in GP register in x86-64 mode)
+#  - int64_t is passed as 8 byte argument (on the stack in IA32 mode, in GP register in x86-64 mode)
+#  - float is passed as float (on the stack in IA32 mode, in XMM register in x86-64 mode)
+#  - double is passed as double (on the stack in IA32 mode, in XMM register in x86-64 mode)
+#  - vector formats are passed as pointers to 128bit data structure
+# 2. Return values.
+#  - Values are returned as std::tuple.  This means that on IA32 it's always returned on stack.
+
+INDENT = '  '
+AUTOGEN = """\
+// This file automatically generated by gen_intrinsics.py
+// DO NOT EDIT!
+"""
+
+
+class VecFormat(object):
+
+  def __init__(self, num_elements, element_size, is_unsigned, is_float, index,
+               c_type):
+    self.num_elements = num_elements
+    self.element_size = element_size
+    self.is_unsigned = is_unsigned
+    self.is_float = is_float
+    self.index = index
+    self.c_type = c_type
+
+
+# Vector format defined as:
+#  vector_size, element_size, is_unsigned, is_float, index, ir_format, c_type
+# TODO(olonho): make flat numbering after removing legacy macro compat.
+_VECTOR_FORMATS = {
+    'U8x8': VecFormat(8, 1, True, False, 1, 'uint8_t'),
+    'U16x4': VecFormat(4, 2, True, False, 2, 'uint16_t'),
+    'U32x2': VecFormat(2, 4, True, False, 3, 'uint32_t'),
+    'U64x1': VecFormat(1, 8, True, False, 4, 'uint64_t'),
+    'U8x16': VecFormat(16, 1, True, False, 5, 'uint8_t'),
+    'U16x8': VecFormat(8, 2, True, False, 6, 'uint16_t'),
+    'U32x4': VecFormat(4, 4, True, False, 7, 'uint32_t'),
+    'U64x2': VecFormat(2, 8, True, False, 8, 'uint64_t'),
+    'I8x8': VecFormat(8, 1, False, False, 9, 'int8_t'),
+    'I16x4': VecFormat(4, 2, False, False, 10, 'int16_t'),
+    'I32x2': VecFormat(2, 4, False, False, 11, 'int32_t'),
+    'I64x1': VecFormat(1, 8, False, False, 12, 'int64_t'),
+    'I8x16': VecFormat(16, 1, False, False, 13, 'int8_t'),
+    'I16x8': VecFormat(8, 2, False, False, 14, 'int16_t'),
+    'I32x4': VecFormat(4, 4, False, False, 15, 'int32_t'),
+    'I64x2': VecFormat(2, 8, False, False, 16, 'int64_t'),
+    'U8x1': VecFormat(1, 1, True, False, 17, 'uint8_t'),
+    'I8x1': VecFormat(1, 1, False, False, 18, 'int8_t'),
+    'U16x1': VecFormat(1, 2, True, False, 19, 'uint16_t'),
+    'I16x1': VecFormat(1, 2, False, False, 20, 'int16_t'),
+    'U32x1': VecFormat(1, 4, True, False, 21, 'uint32_t'),
+    'I32x1': VecFormat(1, 4, False, False, 22, 'int32_t'),
+    # These vector formats can never intersect with above, so can reuse index.
+    'F32x1': VecFormat(1, 4, False, True, 1, 'Float32'),
+    'F32x2': VecFormat(2, 4, False, True, 2, 'Float32'),
+    'F32x4': VecFormat(4, 4, False, True, 3, 'Float32'),
+    'F64x1': VecFormat(1, 8, False, True, 4, 'Float64'),
+    'F64x2': VecFormat(2, 8, False, True, 5, 'Float64'),
+    # Those vector formats can never intersect with above, so can reuse index.
+    'U8x4': VecFormat(4, 1, True, False, 1, 'uint8_t'),
+    'U16x2': VecFormat(2, 2, True, False, 2, 'uint16_t'),
+    'I8x4': VecFormat(4, 1, False, False, 3, 'int8_t'),
+    'I16x2': VecFormat(2, 2, False, False, 4, 'int16_t'),
+}
+
+
+class VecSize(object):
+
+  def __init__(self, num_elements, index):
+    self.num_elements = num_elements
+    self.index = index
+
+
+_VECTOR_SIZES = {'X8': VecSize(8, 1), 'X16': VecSize(16, 2)}
+
+
+def _is_imm_type(arg_type):
+  return 'imm' in arg_type
+
+
+def _get_imm_c_type(arg_type):
+  return {
+      'imm8' : 'int8_t',
+      'uimm8' : 'uint8_t',
+      'uimm32' : 'uint32_t',
+  }[arg_type]
+
+
+def _get_c_type(arg_type):
+  if arg_type in ('Float32', 'Float64', 'int8_t', 'uint8_t', 'int16_t',
+                  'uint16_t', 'int32_t', 'uint32_t', 'int64_t', 'uint64_t',
+                  'volatile uint8_t*', 'volatile uint32_t*'):
+    return arg_type
+  if arg_type in ('fp_flags', 'fp_control', 'int', 'flag', 'flags', 'vec32'):
+    return 'uint32_t'
+  if _is_imm_type(arg_type):
+    return _get_imm_c_type(arg_type)
+  if arg_type == 'vec':
+    return 'SIMD128Register'
+  raise Exception('Type %s not supported' % (arg_type))
+
+
+def _get_semantic_player_type(arg_type):
+  if arg_type in ('Float32', 'Float64', 'vec'):
+    return 'SimdRegister'
+  if _is_imm_type(arg_type):
+    return _get_imm_c_type(arg_type)
+  return 'Register'
+
+
+def _gen_scalar_intr_decl(f, name, intr):
+  ins = intr.get('in')
+  outs = intr.get('out')
+  params = [_get_c_type(op) for op in ins]
+  if len(outs) > 0:
+    retval = 'std::tuple<' + ', '.join(_get_c_type(out) for out in outs) + '>'
+  else:
+    retval = 'void'
+  comment = intr.get('comment')
+  if comment:
+    print('// %s.' % (comment), file=f)
+  if intr.get('precise_nans', False):
+    print('template <bool precise_nan_operations_handling, '
+          'enum PreferCppImplementation = kUseAssemblerImplementationIfPossible>',
+          file=f)
+  print('%s %s(%s);' % (retval, name, ', '.join(params)), file=f)
+
+
+def _is_vector_class(intr):
+  return intr.get('class') in ('vector_4', 'vector_8', 'vector_16',
+                               'vector_8/16', 'vector_8/16/single',
+                               'vector_8/single', 'vector_16/single')
+
+
+def _is_simd128_conversion_required(t):
+  return (_get_semantic_player_type(t) == 'SimdRegister' and
+          _get_c_type(t) != 'SIMD128Register')
+
+
+def _get_semantics_player_hook_result(intr):
+  outs = intr['out']
+  if len(outs) == 0:
+    return 'void'
+  elif len(outs) == 1:
+    # No tuple for single result.
+    return _get_semantic_player_type(outs[0])
+  return 'std::tuple<' + ', '.join(
+      _get_semantic_player_type(out) for out in outs) + '>'
+
+
+def _get_semantics_player_hook_proto_components(name, intr):
+  ins = intr['in']
+
+  args = []
+  if _is_vector_class(intr):
+    if 'raw' in intr['variants']:
+      assert len(intr['variants']) == 1, "Unexpected length of variants"
+      args = ["uint8_t size"]
+    else:
+      args = ["uint8_t elem_size", "uint8_t elem_num"]
+      if (_is_signed(intr) and _is_unsigned(intr)):
+        args += ['bool is_signed']
+
+  args += [
+      '%s arg%d' % (_get_semantic_player_type(op), num)
+      for num, op in enumerate(ins)
+  ]
+
+  result = _get_semantics_player_hook_result(intr)
+
+  return result, name, ', '.join(args)
+
+
+def _get_semantics_player_hook_proto(name, intr):
+  result, name, args = _get_semantics_player_hook_proto_components(name, intr)
+  return '%s %s(%s)' % (result, name, args)
+
+
+def _get_interpreter_hook_call_expr(name, intr, desc=None):
+  ins = intr['in']
+  outs = intr['out']
+
+  call_params = []
+  for num, op in enumerate(ins):
+    arg = 'arg%d' % (num)
+    if _get_semantic_player_type(op) == 'SimdRegister':
+      call_params.append(_get_cast_from_simd128(arg, op, ptr_bits=64))
+    elif '*' in _get_c_type(op):
+      call_params.append('bit_cast<%s>(%s)' % (_get_c_type(op), arg))
+    else:
+      call_params.append(arg)
+
+  call_expr = 'intrinsics::%s%s(%s)' % (
+      name, _get_desc_specializations(intr, desc).replace(
+          'Float', 'intrinsics::Float'), ', '.join(call_params))
+
+  if (len(outs) == 1):
+    # Unwrap tuple for single result.
+    call_expr = 'std::get<0>(%s)' % call_expr
+    # Currently this kind of mismatch can only happen for single result, so we
+    # can keep simple code here for now.
+    if (_is_simd128_conversion_required(outs[0])):
+      out_type = _get_c_type(outs[0])
+      if out_type in ('Float32', 'Float64'):
+        call_expr = 'SimdRegister(%s)' % call_expr
+      else:
+        raise Exception('Type %s is not supported' % (out_type))
+  else:
+    if (any(_is_simd128_conversion_required(out) for out in outs)):
+      raise Exception(
+          'Unsupported SIMD128Register conversion with multiple results')
+
+  return call_expr
+
+
+def _get_interpreter_hook_return_stmt(name, intr, desc=None):
+  return 'return ' + _get_interpreter_hook_call_expr(name, intr, desc) + ';'
+
+
+def _get_semantics_player_hook_raw_vector_body(name, intr, get_return_stmt):
+  outs = intr['out']
+  if (len(outs) == 0):
+    raise Exception('No result raw vector intrinsic is not supported')
+  reg_class = intr.get('class')
+  yield 'switch (size) {'
+  for fmt, desc in _VECTOR_SIZES.items():
+    if _check_reg_class_size(reg_class, desc.num_elements):
+      yield INDENT + 'case %s:' % desc.num_elements
+      yield 2 * INDENT + get_return_stmt(name, intr, desc)
+  yield INDENT + 'default:'
+  yield 2 * INDENT + 'LOG_ALWAYS_FATAL("Unsupported size");'
+  yield 2 * INDENT + 'return {};'
+  yield '}'
+
+
+def _is_signed(intr):
+  return any(v.startswith("signed") for v in intr['variants'])
+
+
+def _is_unsigned(intr):
+  return any(v.startswith("unsigned") for v in intr['variants'])
+
+
+def _get_vector_format_init_expr(intr):
+  variants = intr.get('variants')
+
+  if ('Float32' in variants or 'Float64' in variants):
+    return 'intrinsics::GetVectorFormatFP(elem_size, elem_num)'
+
+  assert _is_signed(intr) or _is_unsigned(intr), "Unexpected intrinsic class"
+  if _is_signed(intr) and _is_unsigned(intr):
+    signed_arg = ', is_signed'
+  else:
+    signed_arg = ', true' if _is_signed(intr) else ', false'
+  return 'intrinsics::GetVectorFormatInt(elem_size, elem_num%s)' % signed_arg
+
+
+def _get_semantics_player_hook_vector_body(name, intr, get_return_stmt):
+  outs = intr['out']
+  if (len(outs) == 0):
+    raise Exception('No result vector intrinsic is not supported')
+  reg_class = intr.get('class')
+  yield 'auto format = %s;' % _get_vector_format_init_expr(intr)
+  yield 'switch (format) {'
+  for variant in intr.get('variants'):
+    for fmt, desc in _VECTOR_FORMATS.items():
+      if (_check_reg_class_size(reg_class,
+                                desc.element_size * desc.num_elements) and
+          _check_typed_variant(variant, desc)):
+        yield INDENT + 'case intrinsics::kVector%s:' % fmt
+        yield 2 * INDENT + get_return_stmt(name, intr, desc)
+      elif (reg_class in ('vector_8/single', 'vector_8/16/single', 'vector_16/single') and
+            desc.num_elements == 1 and
+          _check_typed_variant(variant, desc)):
+        assert desc.element_size <= 8, "Unexpected element size"
+        yield INDENT + 'case intrinsics::kVector%s:' % fmt
+        yield 2 * INDENT + get_return_stmt(name, intr, desc)
+  yield INDENT + 'default:'
+  yield 2 * INDENT + 'LOG_ALWAYS_FATAL("Unsupported format");'
+  yield 2 * INDENT + 'return {};'
+  yield '}'
+
+
+# Syntax sugar heavily used in tests.
+def _get_interpreter_hook_vector_body(name, intr):
+  return _get_semantics_player_hook_vector_body(
+      name, intr, _get_interpreter_hook_return_stmt)
+
+
+def _gen_interpreter_hook(f, name, intr):
+  print('%s {' % (_get_semantics_player_hook_proto(name, intr)), file=f)
+
+  if _is_vector_class(intr):
+    if 'raw' in intr['variants']:
+      assert len(intr['variants']) == 1, "Unexpected length of variants"
+      lines = _get_semantics_player_hook_raw_vector_body(
+          name,
+          intr,
+          _get_interpreter_hook_return_stmt)
+    else:
+      lines = _get_interpreter_hook_vector_body(name, intr)
+
+    lines = [INDENT + l for l in lines]
+    print('\n'.join(lines), file=f)
+  else:
+    print(INDENT + _get_interpreter_hook_return_stmt(name, intr), file=f)
+
+  print('}\n', file=f)
+
+
+def _get_translator_hook_call_expr(name, intr, desc = None):
+  desc_spec = _get_desc_specializations(intr, desc).replace(
+      'Float', 'intrinsics::Float')
+  args = [('arg%d' % n) for n, _ in enumerate(intr['in'])]
+  template_params = ['&intrinsics::' + name + desc_spec]
+  template_params += [_get_semantics_player_hook_result(intr)]
+  return 'CallIntrinsic<%s>(%s)' % (', '.join(template_params), ', '.join(args))
+
+
+def _get_translator_hook_return_stmt(name, intr, desc=None):
+  return 'return ' + _get_translator_hook_call_expr(name, intr, desc) + ';'
+
+
+def _gen_translator_hook(f, name, intr):
+  print('%s {' % (_get_semantics_player_hook_proto(name, intr)), file=f)
+
+  if _is_vector_class(intr):
+    if 'raw' in intr['variants']:
+      assert len(intr['variants']) == 1, "Unexpected length of variants"
+      lines = _get_semantics_player_hook_raw_vector_body(
+          name,
+          intr,
+          _get_translator_hook_return_stmt)
+    else:
+      lines = _get_semantics_player_hook_vector_body(
+          name,
+          intr,
+          _get_translator_hook_return_stmt)
+    lines = [INDENT + l for l in lines]
+    print('\n'.join(lines), file=f)
+  else:
+    print(INDENT + _get_translator_hook_return_stmt(name, intr), file=f)
+
+  print('}\n', file=f)
+
+
+def _gen_mock_semantics_listener_hook(f, name, intr):
+  result, name, args = _get_semantics_player_hook_proto_components(name, intr)
+  print('MOCK_METHOD((%s), %s, (%s));' % (result, name, args), file=f)
+
+
+def _check_signed_variant(variant, desc):
+  if variant == 'signed':
+    return True
+  if variant == 'signed_32':
+    return desc.element_size == 4
+  if variant == 'signed_64':
+    return desc.element_size == 8
+  if variant == 'signed_16/32':
+    return desc.element_size in (2, 4)
+  if variant == 'signed_8/16/32':
+    return desc.element_size in (1, 2, 4)
+  if variant == 'signed_16/32/64':
+    return desc.element_size in (2, 4, 8)
+  if variant == 'signed_8/16/32/64':
+    return desc.element_size in (1, 2, 4, 8)
+  if variant == 'signed_32/64':
+    return desc.element_size in (4, 8)
+  return False
+
+
+def _check_unsigned_variant(variant, desc):
+  if variant == 'unsigned':
+    return True
+  if variant == 'unsigned_8':
+    return desc.element_size == 1
+  if variant == 'unsigned_16':
+    return desc.element_size == 2
+  if variant == 'unsigned_32':
+    return desc.element_size == 4
+  if variant == 'unsigned_64':
+    return desc.element_size == 8
+  if variant == 'unsigned_8/16':
+    return desc.element_size in (1, 2)
+  if variant == 'unsigned_8/16/32':
+    return desc.element_size in (1, 2, 4)
+  if variant == 'unsigned_16/32/64':
+    return desc.element_size in (2, 4, 8)
+  if variant == 'unsigned_8/16/32/64':
+    return desc.element_size in (1, 2, 4, 8)
+  if variant == 'unsigned_32/64':
+    return desc.element_size in (4, 8)
+  return False
+
+
+def _check_reg_class_size(reg_class, size):
+  # Small vectors are separate namespace.
+  if size == 4 and reg_class == 'vector_4':
+    return True
+  if size == 8 and reg_class in ('vector_8', 'vector_8/16', 'vector_8/16/single',
+                                 'vector_8/single'):
+    return True
+  if size == 16 and reg_class in ('vector_16', 'vector_8/16', 'vector_8/16/single',
+                                  'vector_16/single'):
+    return True
+  return False
+
+
+def _check_typed_variant(variant, desc):
+  if desc.is_unsigned and not desc.is_float:
+    return _check_unsigned_variant(variant, desc)
+  if not desc.is_unsigned and not desc.is_float:
+    return _check_signed_variant(variant, desc)
+  if desc.is_float:
+    if desc.element_size == 4:
+      return variant == 'Float32'
+    if desc.element_size == 8:
+      return variant == 'Float64'
+  return False
+
+
+def _get_formats_with_descriptions(intr):
+  reg_class = intr.get('class')
+  for variant in intr.get('variants'):
+    found_fmt = False
+    for fmt, desc in _VECTOR_FORMATS.items():
+      if (_check_reg_class_size(reg_class,
+                                desc.element_size * desc.num_elements) and
+          _check_typed_variant(variant, desc) and
+          (reg_class != 'vector_4' or desc.element_size < 4)):
+        found_fmt = True
+        yield fmt, desc
+
+    if variant == 'raw':
+      for fmt, desc in _VECTOR_SIZES.items():
+        if _check_reg_class_size(reg_class, desc.num_elements):
+          found_fmt = True
+          yield fmt, desc
+
+    assert found_fmt, 'Couldn\'t expand %s' % reg_class
+
+
+def _get_result_type(outs):
+  result_type = 'void'
+  return_stmt = ''
+  if len(outs) >= 1:
+    result_type = ('std::tuple<' +
+                   ', '.join(_get_c_type(out) for out in outs) + '>')
+    return_stmt = 'return '
+  return result_type, return_stmt
+
+
+def _get_in_params(params):
+  for param_index, param in enumerate(params):
+    yield _get_c_type(param), 'in%d' % (param_index)
+
+
+def _get_out_params(params):
+  for param_index, param in enumerate(params):
+    yield _get_c_type(param), 'out%d' % (param_index)
+
+
+def _get_cast_from_simd128(var, target_type, ptr_bits):
+  if ('*' in target_type):
+    return 'bit_cast<%s>(%s.Get<uint%d_t>(0))' % (_get_c_type(target_type), var,
+                                                  ptr_bits)
+
+  cast_map = {
+      'Float32': '.Get<intrinsics::Float32>(0)',
+      'Float64': '.Get<intrinsics::Float64>(0)',
+      'int8_t': '.Get<int8_t>(0)',
+      'uint8_t': '.Get<uint8_t>(0)',
+      'int16_t': '.Get<int16_t>(0)',
+      'uint16_t': '.Get<uint16_t>(0)',
+      'int32_t': '.Get<int32_t>(0)',
+      'uint32_t': '.Get<uint32_t>(0)',
+      'int64_t': '.Get<int64_t>(0)',
+      'uint64_t': '.Get<uint64_t>(0)',
+      'SIMD128Register': ''
+  }
+  return '%s%s' % (var, cast_map[_get_c_type(target_type)])
+
+
+def _get_desc_specializations(intr, desc=None):
+  if hasattr(desc, 'c_type'):
+    spec = [desc.c_type, str(desc.num_elements)]
+  elif hasattr(desc, 'num_elements'):
+    spec = [str(desc.num_elements)]
+  else:
+    spec = []
+  if intr.get('precise_nans', False):
+    spec = ['Config::kPreciseNaNOperationsHandling'] + spec
+  if not len(spec):
+    return ''
+  return '<%s>' % ', '.join(spec)
+
+
+def _intr_has_side_effects(intr, fmt=None):
+  # If we have 'has_side_effects' mark in JSON file then we use it "as is".
+  if 'has_side_effects' in intr:
+    return intr.get('has_side_effects')
+  # Otherwise we mark all floating-point related intrinsics as "volatile".
+  # TODO(b/68857496): move that information in HIR/LIR and stop doing that.
+  if 'Float32' in intr.get('in') or 'Float64' in intr.get('in'):
+    return True
+  if 'Float32' in intr.get('out') or 'Float64' in intr.get('out'):
+    return True
+  if fmt is not None and fmt.startswith('F'):
+    return True
+  return False
+
+
+def _gen_intrinsics_inl_h(f, intrs):
+  print(AUTOGEN, file=f)
+  for name, intr in intrs:
+    if intr.get('class') == 'scalar':
+      _gen_scalar_intr_decl(f, name, intr)
+
+
+def _gen_interpreter_intrinsics_hooks_impl_inl_h(f, intrs):
+  print(AUTOGEN, file=f)
+  for name, intr in intrs:
+    _gen_interpreter_hook(f, name, intr)
+
+
+def _gen_translator_intrinsics_hooks_impl_inl_h(f, intrs):
+  print(AUTOGEN, file=f)
+  for name, intr in intrs:
+    _gen_translator_hook(f, name, intr)
+
+
+def _gen_mock_semantics_listener_intrinsics_hooks_impl_inl_h(f, intrs):
+  print(AUTOGEN, file=f)
+  for name, intr in intrs:
+    _gen_mock_semantics_listener_hook(f, name, intr)
+
+
+def _get_reg_operand_info(arg, info_prefix=None):
+  need_tmp = arg['class'] in ('EAX', 'EDX', 'CL', 'ECX')
+  if info_prefix is None:
+    class_info = 'void'
+  else:
+    class_info = '%s::%s' % (info_prefix, arg['class'])
+  if arg['class'] == 'Imm8':
+    return 'ImmArg<%d, int8_t, %s>' % (arg['ir_arg'], class_info)
+  if info_prefix is None:
+    using_info = 'void'
+  else:
+    using_info = '%s::%s' % (info_prefix, {
+        'def': 'Def',
+        'def_early_clobber': 'DefEarlyClobber',
+        'use': 'Use',
+        'use_def': 'UseDef'
+    }[arg['usage']])
+  if arg['usage'] == 'use':
+    if need_tmp:
+      return 'InTmpArg<%d, %s, %s>' % (arg['ir_arg'], class_info, using_info)
+    return 'InArg<%d, %s, %s>' % (arg['ir_arg'], class_info, using_info)
+  if arg['usage'] in ('def', 'def_early_clobber'):
+    assert 'ir_arg' not in arg
+    if 'ir_res' in arg:
+      if need_tmp:
+        return 'OutTmpArg<%d, %s, %s>' % (arg['ir_res'], class_info, using_info)
+      return 'OutArg<%d, %s, %s>' % (arg['ir_res'], class_info, using_info)
+    return 'TmpArg<%s, %s>' % (class_info, using_info)
+  if arg['usage'] == 'use_def':
+    if 'ir_res' in arg:
+      if need_tmp:
+        return 'InOutTmpArg<%s, %s, %s, %s>' % (arg['ir_arg'], arg['ir_res'],
+                                                class_info, using_info)
+      return 'InOutArg<%s, %s, %s, %s>' % (arg['ir_arg'], arg['ir_res'],
+                                           class_info, using_info)
+    return 'InTmpArg<%s, %s, %s>' % (arg['ir_arg'], class_info, using_info)
+  assert False, 'unknown operand usage %s' % (arg['usage'])
+
+
+def _gen_make_intrinsics(f, intrs):
+  print("""%s
+void MakeIntrinsics(FILE* out) {
+  using MacroAssembler = MacroAssembler<TextAssembler>;
+  namespace OperandClass = x86::OperandClass;
+  std::unique_ptr<GenerateAsmCallBase> asm_call_generators[] = {""" % AUTOGEN, file=f)
+  for line in _gen_c_intrinsics_generator(intrs):
+    print(line, file=f)
+  print("""  };
+  GenerateAsmCalls(out, std::forward<decltype(asm_call_generators)>(asm_call_generators));
+}""", file=f)
+
+
+def _gen_c_intrinsics_generator(intrs):
+  for name, intr in intrs:
+    ins = intr.get('in')
+    outs = intr.get('out')
+    params = _get_in_params(ins)
+    formal_args = ', '.join('%s %s' % (type, param) for type, param in params)
+    result_type, _ = _get_result_type(outs)
+    if 'asm' not in intr:
+      continue
+    if 'variants' in intr:
+      variants = _get_formats_with_descriptions(intr)
+      # Sort by index, to keep order close to what _gen_intrs_enum produces.
+      variants = sorted(variants, key=lambda variant: variant[1].index)
+      # Collect intr_asms for all versions of intrinsic.
+      # Note: not all variants are guaranteed to have asm version!
+      # If that happens list of intr_asms for that variant would be empty.
+      variants = [(desc, [
+          intr_asm for intr_asm in intr['asm'] if fmt in intr_asm['variants']
+      ]) for fmt, desc in variants]
+      # Print intrinsic generator
+      for desc, intr_asms in variants:
+        if len(intr_asms) > 0:
+          if 'raw' in intr['variants']:
+            spec = '%d' % (desc.num_elements)
+          else:
+            spec = '%s, %d' % (desc.c_type, desc.num_elements)
+          for intr_asm in intr_asms:
+            for line in _gen_c_intrinsic('%s<%s>' % (name, spec), intr, intr_asm):
+              yield line
+    else:
+      for intr_asm in intr['asm']:
+        for line in _gen_c_intrinsic(name, intr, intr_asm):
+          yield line
+
+
+MAX_GENERATED_LINE_LENGTH = 100
+
+
+def _gen_c_intrinsic(name, intr, asm):
+  if not _is_interpreter_compatible_assembler(asm):
+    return
+
+  sse_restriction = 'GenerateAsmCallBase::kNoSSERestriction'
+  if 'feature' in asm:
+    if asm['feature'] == 'AuthenticAMD':
+      sse_restriction = 'GenerateAsmCallBase::kIsAuthenticAMD'
+    else:
+      sse_restriction = 'GenerateAsmCallBase::kHas%s' % asm['feature']
+
+  nan_restriction = 'GenerateAsmCallBase::kNoNansOperation'
+  if 'nan' in asm:
+    nan_restriction = 'GenerateAsmCallBase::k%sNanOperationsHandling' % asm['nan']
+
+  restriction = [sse_restriction, nan_restriction]
+
+  yield '      std::unique_ptr<GenerateAsmCallBase>('
+
+  def get_c_type_tuple(arguments):
+    return 'std::tuple<%s>' % ', '.join(
+        _get_c_type(argument) for argument in arguments).replace(
+            'Float', 'intrinsics::Float')
+
+  yield '          new GenerateAsmCall<%s>(' % (
+    ',\n                              '.join(
+        ['true' if _intr_has_side_effects(intr) else 'false'] +
+        [get_c_type_tuple(intr['in'])] + [get_c_type_tuple(intr['out'])] +
+        [_get_reg_operand_info(arg, 'OperandClass')
+         for arg in asm['args']]))
+
+  one_line = '              out, &MacroAssembler::%s, %s)),' % (
+      asm['asm'], ', '.join(['"%s"' % name] + restriction))
+  if len(one_line) <= MAX_GENERATED_LINE_LENGTH:
+    yield one_line
+    return
+
+  yield '              out,'
+  yield '              &MacroAssembler::%s,' % asm['asm']
+  values = ['"%s"' % name] + restriction
+  for index, value in enumerate(values):
+    if index + 1 == len(values):
+      yield '              %s)),' % value
+    else:
+      yield '              %s,' % value
+
+
+def _load_intrs_def_files(intrs_def_files):
+  result = {}
+  for intrs_def in intrs_def_files:
+    with open(intrs_def) as intrs:
+      result.update(json.load(intrs))
+  return result
+
+
+def _load_intrs_arch_def(intrs_defs):
+  json_data = []
+  for intrs_def in intrs_defs:
+    with open(intrs_def) as intrs:
+      json_data.extend(json.load(intrs))
+  return json_data
+
+
+def _load_macro_def(intrs, arch_intrs, insns_def):
+  _, insns = asm_defs.load_asm_defs(insns_def)
+  insns_map = dict((insn['name'], insn) for insn in insns)
+  unprocessed_intrs = []
+  for arch_intr in arch_intrs:
+    if arch_intr['insn'] in insns_map:
+      insn = insns_map[arch_intr['insn']]
+      _add_asm_insn(intrs, arch_intr, insn)
+    else:
+      unprocessed_intrs.append(arch_intr)
+  return unprocessed_intrs
+
+
+def _is_interpreter_compatible_assembler(intr_asm):
+  if intr_asm.get('usage', '') == 'translate-only':
+    return False
+  return True
+
+
+def _add_asm_insn(intrs, arch_intr, insn):
+  name = arch_intr['name']
+  # Sanity checks: MacroInstruction could implement few different intrinsics but
+  # number of arguments in arch intrinsic and arch-independent intrinsic
+  # should match.
+  #
+  # Note: we allow combining intrinsics with variants and intrinsics without
+  # variants (e.g. AbsF32 is combined with VectorAbsoluteFP for F32x2 and F32x4),
+  # but don't allow macroinstructions which would handle different set of
+  # variants for different intrinsics.
+
+  assert 'variants' not in insn or insn['variants'] == arch_intr['variants']
+  assert 'feature' not in insn or insn['feature'] == arch_intr['feature']
+  assert 'nan' not in insn or insn['nan'] == arch_intr['nan']
+  assert 'usage' not in insn or insn['usage'] == arch_intr['usage']
+  assert len(intrs[name]['in']) == len(arch_intr['in'])
+  assert len(intrs[name]['out']) == len(arch_intr['out'])
+
+  if 'variants' in arch_intr:
+    insn['variants'] = arch_intr['variants']
+  if 'feature' in arch_intr:
+    insn['feature'] = arch_intr['feature']
+  if 'nan' in arch_intr:
+    insn['nan'] = arch_intr['nan']
+  if 'usage' in arch_intr:
+    insn['usage'] = arch_intr['usage']
+
+  for count, in_arg in enumerate(arch_intr['in']):
+    # Sanity check: each in argument should only be used once - but if two
+    # different intrinsics use them same macroinstruction it could be already
+    # defined... yet it must be defined identically.
+    assert ('ir_arg' not in insn['args'][in_arg] or
+            insn['args'][in_arg]['ir_arg'] == count)
+    insn['args'][in_arg]['ir_arg'] = count
+
+  for count, out_arg in enumerate(arch_intr['out']):
+    # Sanity check: each out argument should only be used once, too.
+    assert ('ir_res' not in insn['args'][out_arg] or
+            insn['args'][out_arg]['ir_res'] == count)
+    insn['args'][out_arg]['ir_res'] = count
+
+  # Note: one intrinsic could have more than one implementation (e.g.
+  # SSE2 vs SSE4.2).
+  if 'asm' not in intrs[name]:
+    intrs[name]['asm'] = []
+  intrs[name]['asm'].append(insn)
+
+
+def _open_asm_def_files(def_files, arch_def_files, asm_def_files):
+  intrs = _load_intrs_def_files(def_files)
+  arch_intrs = _load_intrs_arch_def(arch_def_files)
+  for macro_def in asm_def_files:
+    arch_intrs = _load_macro_def(intrs, arch_intrs, macro_def)
+  # Make sure that all intrinsics were found during processing of arch_intrs.
+  assert arch_intrs == []
+  return sorted(intrs.items())
+
+
+def main(argv):
+  # Usage:
+  #   gen_intrinsics.py --public_headers <intrinsics-inl.h>
+  #                                      <interpreter_intrinsics_hooks-inl.h>
+  #                                      <translator_intrinsics_hooks-inl.h>
+  #                                      <mock_semantics_listener_intrinsics_hooks-inl.h>
+  #                                      <riscv64_to_x86_64/intrinsic_def.json",
+  #                                      ...
+  #   gen_intrinsics.py --make_intrinsics_cc <make_intrinsics.cc>
+  #                                          <riscv64_to_x86_64/intrinsic_def.json",
+  #                                          ...
+  #                                          <riscv64_to_x86_64/machine_ir_intrinsic_binding.json>,
+  #                                          ...
+  #                                          <riscv64_to_x86_64/macro_def.json>,
+  #                                          ...
+
+  def open_out_file(name):
+    try:
+      os.makedirs(os.path.dirname(name))
+    except:
+      pass
+    return open(name, 'w')
+
+  mode = argv[1]
+  if mode == '--public_headers':
+    intrs = sorted(_load_intrs_def_files(argv[6:]).items())
+    _gen_intrinsics_inl_h(open_out_file(argv[2]), intrs)
+    _gen_interpreter_intrinsics_hooks_impl_inl_h(open_out_file(argv[3]), intrs)
+    _gen_translator_intrinsics_hooks_impl_inl_h(
+        open_out_file(argv[4]), intrs)
+    _gen_mock_semantics_listener_intrinsics_hooks_impl_inl_h(
+        open_out_file(argv[5]), intrs)
+  elif mode == '--make_intrinsics_cc':
+    def_files_end = 3
+    while argv[def_files_end].endswith('intrinsic_def.json'):
+      def_files_end += 1
+    arch_def_files_end = def_files_end
+    while argv[arch_def_files_end].endswith('machine_ir_intrinsic_binding.json'):
+      arch_def_files_end += 1
+    intrs = _open_asm_def_files(
+      argv[3:def_files_end],
+      argv[def_files_end:arch_def_files_end],
+      argv[arch_def_files_end:])
+    _gen_make_intrinsics(open_out_file(argv[2]), intrs)
+  else:
+    assert False, 'unknown option %s' % (mode)
+
+  return 0
+
+
+if __name__ == '__main__':
+  sys.exit(main(sys.argv))

diff --git a/intrinsics/include/berberis/intrinsics/common/intrinsics.h b/intrinsics/include/berberis/intrinsics/common/intrinsics.h
new file mode 100644
index 0000000..67248ed
--- /dev/null
+++ b/intrinsics/include/berberis/intrinsics/common/intrinsics.h

@@ -0,0 +1,62 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef BERBERIS_INTRINSICS_COMMON_INTRINSICS_H_
+#define BERBERIS_INTRINSICS_COMMON_INTRINSICS_H_
+
+#include <limits>
+#include <tuple>
+#include <type_traits>
+
+#include "berberis/base/bit_util.h"
+#include "berberis/intrinsics/intrinsics_float.h"  // Float32/Float64/ProcessNans
+#include "berberis/intrinsics/type_traits.h"
+
+namespace berberis {
+
+class SIMD128Register;
+
+namespace intrinsics {
+
+#include "berberis/intrinsics/intrinsics-inl.h"  // NOLINT: generated file!
+
+template <typename FloatType>
+std::tuple<FloatType> FSgnj(FloatType x, FloatType y) {
+  using Int = typename TypeTraits<FloatType>::Int;
+  using UInt = std::make_unsigned_t<Int>;
+  constexpr UInt sign_bit = std::numeric_limits<Int>::min();
+  constexpr UInt non_sign_bit = std::numeric_limits<Int>::max();
+  return {bit_cast<FloatType>((bit_cast<UInt>(x) & non_sign_bit) | (bit_cast<UInt>(y) & sign_bit))};
+}
+
+template <typename FloatType>
+std::tuple<FloatType> FSgnjn(FloatType x, FloatType y) {
+  return FSgnj(x, Negative(y));
+}
+
+template <typename FloatType>
+std::tuple<FloatType> FSgnjx(FloatType x, FloatType y) {
+  using Int = typename TypeTraits<FloatType>::Int;
+  using UInt = std::make_unsigned_t<Int>;
+  constexpr UInt sign_bit = std::numeric_limits<Int>::min();
+  return {bit_cast<FloatType>(bit_cast<UInt>(x) ^ (bit_cast<UInt>(y) & sign_bit))};
+}
+
+}  // namespace intrinsics
+
+}  // namespace berberis
+
+#endif  // BERBERIS_INTRINSICS_COMMON_INTRINSICS_H_

diff --git a/intrinsics/include/berberis/intrinsics/simd_register.h b/intrinsics/include/berberis/intrinsics/simd_register.h
new file mode 100644
index 0000000..5cb23d3
--- /dev/null
+++ b/intrinsics/include/berberis/intrinsics/simd_register.h

@@ -0,0 +1,173 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef BERBERIS_INTRINSICS_SIMD_REGISTER_H_
+#define BERBERIS_INTRINSICS_SIMD_REGISTER_H_
+
+#include <stdint.h>
+#include <string.h>
+
+#include "berberis/intrinsics/intrinsics_float.h"
+
+namespace berberis {
+
+class SIMD128Register;
+
+/*
+ * We want to use partial specialization for SIMD128Register::[GS]et, but it's
+ * it's not allowed for class members.  Use helper functions instead.
+ */
+template <typename T>
+T SIMD128RegisterGet(const SIMD128Register* reg, int index);
+template <typename T>
+T SIMD128RegisterSet(SIMD128Register* reg, T elem, int index);
+
+class SIMD128Register {
+ public:
+  template <typename T>
+  explicit SIMD128Register(T elem) {
+    Set<T>(elem, 0);
+  }
+  SIMD128Register() = default;
+  SIMD128Register(const SIMD128Register&) = default;
+  SIMD128Register(SIMD128Register&&) = default;
+  SIMD128Register& operator=(const SIMD128Register&) = default;
+  SIMD128Register& operator=(SIMD128Register&&) = default;
+
+  template <typename T>
+  T Get(int index) const {
+    return SIMD128RegisterGet<T>(this, index);
+  }
+  template <typename T>
+  T Set(T elem, int index) {
+    return SIMD128RegisterSet<T>(this, elem, index);
+  }
+  template <typename T>
+  auto Get() const -> std::enable_if_t<sizeof(T) == 16, T> {
+    return SIMD128RegisterGet<T>(this, 0);
+  }
+  template <typename T>
+  auto Set(T elem) -> std::enable_if_t<sizeof(T) == 16, T> {
+    return SIMD128RegisterSet<T>(this, elem, 0);
+  }
+
+ private:
+  union {
+#ifdef __GNUC__
+    // Note: we are violating strict aliasing rules in the code below (Get and Set function) thus we
+    // need to mask these fields "may_alias". Unknown attributes could be silently ignored by the
+    // compiler. We protect definitions with #ifdef __GNU__ to make sure may_alias is not ignored.
+    [[gnu::vector_size(16), gnu::may_alias]] int8_t int8;
+    [[gnu::vector_size(16), gnu::may_alias]] uint8_t uint8;
+    [[gnu::vector_size(16), gnu::may_alias]] int16_t int16;
+    [[gnu::vector_size(16), gnu::may_alias]] uint16_t uint16;
+    [[gnu::vector_size(16), gnu::may_alias]] int32_t int32;
+    [[gnu::vector_size(16), gnu::may_alias]] uint32_t uint32;
+    [[gnu::vector_size(16), gnu::may_alias]] int64_t int64;
+    [[gnu::vector_size(16), gnu::may_alias]] uint64_t uint64;
+#if defined(__x86_64)
+    [[gnu::vector_size(16), gnu::may_alias]] __uint128_t uint128;
+#endif
+    // Note: we couldn't use Float32/Float64 here because [[gnu::vector]] only works with
+    // raw integer or FP-types.
+    [[gnu::vector_size(16), gnu::may_alias]] float float32;
+    [[gnu::vector_size(16), gnu::may_alias]] double float64;
+#else
+#error Unsupported compiler.
+#endif
+  };
+  template <typename T>
+  friend T SIMD128RegisterGet(const SIMD128Register* reg, int index);
+  template <typename T>
+  friend T SIMD128RegisterSet(SIMD128Register* reg, T elem, int index);
+};
+
+static_assert(sizeof(SIMD128Register) == 16, "Unexpected size of SIMD128Register");
+
+#if defined(__i386__)
+static_assert(alignof(SIMD128Register) == 16, "Unexpected align of SIMD128Register");
+#elif defined(__x86_64)
+static_assert(alignof(SIMD128Register) == 16, "Unexpected align of SIMD128Register");
+#else
+#error Unsupported architecture
+#endif
+
+/*
+ * Partial specializations of SIMD128Register getters/setters for most types
+ *
+ * GNU C makes it possible to use unions to quickly and efficiently
+ * operate with subvalues of different types:
+ *   http://gcc.gnu.org/onlinedocs/gcc/Optimize-Options.html#Type-punning
+ * Unfortunately it's not a valid ANSI C code thus we always do that via
+ * Get<type>(index) and Set<type>(value, index) accessors.
+ *
+ * For other compilers one will need to use memcpy to guarantee safety.
+ */
+#ifdef __GNUC__
+#define SIMD_128_REGISTER_GETTER_SETTER(TYPE, MEMBER)                                 \
+  template <>                                                                         \
+  inline TYPE SIMD128RegisterGet<TYPE>(const SIMD128Register* reg, int index) {       \
+    CHECK_LT(unsigned(index), sizeof(*reg) / sizeof(TYPE));                           \
+    return reg->MEMBER[index];                                                        \
+  }                                                                                   \
+  template <>                                                                         \
+  inline TYPE SIMD128RegisterSet<TYPE>(SIMD128Register * reg, TYPE elem, int index) { \
+    CHECK_LT(unsigned(index), sizeof(*reg) / sizeof(TYPE));                           \
+    return reg->MEMBER[index] = elem;                                                 \
+  }
+#define SIMD_128_REGISTER_GETTER_SETTЕR(TYPE, MEMBER_TYPE, MEMBER)                    \
+  template <>                                                                         \
+  inline TYPE SIMD128RegisterGet<TYPE>(const SIMD128Register* reg, int index) {       \
+    CHECK_LT(unsigned(index), sizeof(*reg) / sizeof(TYPE));                           \
+    static_assert(sizeof(TYPE) == sizeof(MEMBER_TYPE));                               \
+    /* Don't use bit_cast because it's unsafe if -O0 is used. */                      \
+    /* See intrinsics_float.h for explanation. */                                     \
+    TYPE elem;                                                                        \
+    MEMBER_TYPE melem;                                                                \
+    melem = reg->MEMBER[index];                                                       \
+    memcpy(&elem, &melem, sizeof(TYPE));                                              \
+    return elem;                                                                      \
+  }                                                                                   \
+  template <>                                                                         \
+  inline TYPE SIMD128RegisterSet<TYPE>(SIMD128Register * reg, TYPE elem, int index) { \
+    CHECK_LT(unsigned(index), sizeof(*reg) / sizeof(TYPE));                           \
+    static_assert(sizeof(TYPE) == sizeof(MEMBER_TYPE));                               \
+    /* Don't use bit_cast because it's unsafe if -O0 is used. */                      \
+    /* See intrinsics_float.h for explanation. */                                     \
+    MEMBER_TYPE melem;                                                                \
+    memcpy(&melem, &elem, sizeof(TYPE));                                              \
+    reg->MEMBER[index] = melem;                                                       \
+    return elem;                                                                      \
+  }
+#endif
+SIMD_128_REGISTER_GETTER_SETTER(int8_t, int8);
+SIMD_128_REGISTER_GETTER_SETTER(uint8_t, uint8);
+SIMD_128_REGISTER_GETTER_SETTER(int16_t, int16);
+SIMD_128_REGISTER_GETTER_SETTER(uint16_t, uint16);
+SIMD_128_REGISTER_GETTER_SETTER(int32_t, int32);
+SIMD_128_REGISTER_GETTER_SETTER(uint32_t, uint32);
+SIMD_128_REGISTER_GETTER_SETTER(int64_t, int64);
+SIMD_128_REGISTER_GETTER_SETTER(uint64_t, uint64);
+#if defined(__x86_64)
+SIMD_128_REGISTER_GETTER_SETTER(__uint128_t, uint128);
+#endif
+SIMD_128_REGISTER_GETTER_SETTЕR(intrinsics::Float32, float, float32);
+SIMD_128_REGISTER_GETTER_SETTЕR(intrinsics::Float64, double, float64);
+#undef SIMD_128_REGISTER_GETTER_SETTER
+
+}  // namespace berberis
+
+#endif  // BERBERIS_INTRINSICS_SIMD_REGISTER_H_

diff --git a/intrinsics/intrinsics_impl.cc b/intrinsics/intrinsics_impl.cc
index 3cec356..fa6756f 100644
--- a/intrinsics/intrinsics_impl.cc
+++ b/intrinsics/intrinsics_impl.cc

@@ -13,3 +13,33 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
+#include "berberis/intrinsics/intrinsics.h"
+
+namespace berberis::intrinsics {
+
+std::tuple<Float32> FSgnjS(Float32 x, Float32 y) {
+  return FSgnj(x, y);
+}
+
+std::tuple<Float64> FSgnjD(Float64 x, Float64 y) {
+  return FSgnj(x, y);
+}
+
+std::tuple<Float32> FSgnjnS(Float32 x, Float32 y) {
+  return FSgnjn(x, y);
+}
+
+std::tuple<Float64> FSgnjnD(Float64 x, Float64 y) {
+  return FSgnjn(x, y);
+}
+
+std::tuple<Float32> FSgnjxS(Float32 x, Float32 y) {
+  return FSgnjx(x, y);
+}
+
+std::tuple<Float64> FSgnjxD(Float64 x, Float64 y) {
+  return FSgnjx(x, y);
+}
+
+}  // namespace berberis::intrinsics

diff --git a/intrinsics/intrinsics_impl_test.cc b/intrinsics/intrinsics_impl_test.cc
new file mode 100644
index 0000000..8388aae
--- /dev/null
+++ b/intrinsics/intrinsics_impl_test.cc

@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "gtest/gtest.h"
+
+#include "berberis/intrinsics/intrinsics.h"
+
+namespace berberis::intrinsics {
+
+namespace {
+
+TEST(Intrinsics, FSgnjS) {
+  EXPECT_EQ(std::tuple{Float32{1.0f}}, FSgnjS(Float32{1.0f}, Float32{2.0f}));
+  EXPECT_EQ(std::tuple{Float32{1.0f}}, FSgnjS(Float32{-1.0f}, Float32{2.0f}));
+  EXPECT_EQ(std::tuple{Float32{-1.0f}}, FSgnjS(Float32{1.0f}, Float32{-2.0f}));
+  EXPECT_EQ(std::tuple{Float32{-1.0f}}, FSgnjS(Float32{-1.0f}, Float32{-2.0f}));
+}
+
+TEST(Intrinsics, FSgnjD) {
+  EXPECT_EQ(std::tuple{Float64{1.0}}, FSgnjD(Float64{1.0}, Float64{2.0}));
+  EXPECT_EQ(std::tuple{Float64{1.0}}, FSgnjD(Float64{-1.0}, Float64{2.0}));
+  EXPECT_EQ(std::tuple{Float64{-1.0}}, FSgnjD(Float64{1.0}, Float64{-2.0}));
+  EXPECT_EQ(std::tuple{Float64{-1.0}}, FSgnjD(Float64{-1.0}, Float64{-2.0}));
+}
+
+TEST(Intrinsics, FSgnjnS) {
+  EXPECT_EQ(std::tuple{Float32{-1.0f}}, FSgnjnS(Float32{1.0f}, Float32{2.0f}));
+  EXPECT_EQ(std::tuple{Float32{-1.0f}}, FSgnjnS(Float32{-1.0f}, Float32{2.0f}));
+  EXPECT_EQ(std::tuple{Float32{1.0f}}, FSgnjnS(Float32{1.0f}, Float32{-2.0f}));
+  EXPECT_EQ(std::tuple{Float32{1.0f}}, FSgnjnS(Float32{-1.0f}, Float32{-2.0f}));
+}
+
+TEST(Intrinsics, FSgnjnD) {
+  EXPECT_EQ(std::tuple{Float64{-1.0}}, FSgnjnD(Float64{1.0}, Float64{2.0}));
+  EXPECT_EQ(std::tuple{Float64{-1.0}}, FSgnjnD(Float64{-1.0}, Float64{2.0}));
+  EXPECT_EQ(std::tuple{Float64{1.0}}, FSgnjnD(Float64{1.0}, Float64{-2.0}));
+  EXPECT_EQ(std::tuple{Float64{1.0}}, FSgnjnD(Float64{-1.0}, Float64{-2.0}));
+}
+TEST(Intrinsics, FSgnjxS) {
+  EXPECT_EQ(std::tuple{Float32{1.0f}}, FSgnjxS(Float32{1.0f}, Float32{2.0f}));
+  EXPECT_EQ(std::tuple{Float32{-1.0f}}, FSgnjxS(Float32{-1.0f}, Float32{2.0f}));
+  EXPECT_EQ(std::tuple{Float32{-1.0f}}, FSgnjxS(Float32{1.0f}, Float32{-2.0f}));
+  EXPECT_EQ(std::tuple{Float32{1.0f}}, FSgnjxS(Float32{-1.0f}, Float32{-2.0f}));
+}
+
+TEST(Intrinsics, FSgnjxD) {
+  EXPECT_EQ(std::tuple{Float64{1.0}}, FSgnjxD(Float64{1.0}, Float64{2.0}));
+  EXPECT_EQ(std::tuple{Float64{-1.0}}, FSgnjxD(Float64{-1.0}, Float64{2.0}));
+  EXPECT_EQ(std::tuple{Float64{-1.0}}, FSgnjxD(Float64{1.0}, Float64{-2.0}));
+  EXPECT_EQ(std::tuple{Float64{1.0}}, FSgnjxD(Float64{-1.0}, Float64{-2.0}));
+}
+
+}  // namespace
+
+}  // namespace berberis::intrinsics
\ No newline at end of file

diff --git a/intrinsics/riscv64_to_x86_64/include/berberis/intrinsics/intrinsics.h b/intrinsics/riscv64_to_x86_64/include/berberis/intrinsics/intrinsics.h
new file mode 100644
index 0000000..7f9bb77
--- /dev/null
+++ b/intrinsics/riscv64_to_x86_64/include/berberis/intrinsics/intrinsics.h

@@ -0,0 +1,22 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef RISCV64_TO_X86_64_BERBERIS_INTRINSICS_INTRINSICS_H_
+#define RISCV64_TO_X86_64_BERBERIS_INTRINSICS_INTRINSICS_H_
+
+#include "berberis/intrinsics/common/intrinsics.h"
+
+#endif /* RISCV64_TO_X86_64_BERBERIS_INTRINSICS_INTRINSICS_H_ */

diff --git a/intrinsics/riscv64_to_x86_64/include/berberis/intrinsics/intrinsics_float.h b/intrinsics/riscv64_to_x86_64/include/berberis/intrinsics/intrinsics_float.h
index cdb9b28..6333260 100644
--- a/intrinsics/riscv64_to_x86_64/include/berberis/intrinsics/intrinsics_float.h
+++ b/intrinsics/riscv64_to_x86_64/include/berberis/intrinsics/intrinsics_float.h

@@ -21,6 +21,7 @@
 
 #include "berberis/base/bit_util.h"
 #include "berberis/intrinsics/common/intrinsics_float.h"
+#include "berberis/intrinsics/guest_fp_flags.h"        // ToHostRoundingMode
 #include "berberis/intrinsics/guest_rounding_modes.h"  // ScopedRoundingMode
 #include "berberis/intrinsics/type_traits.h"
 

diff --git a/intrinsics/riscv64_to_x86_64/intrinsic_def.json b/intrinsics/riscv64_to_x86_64/intrinsic_def.json
new file mode 100644
index 0000000..7fa69e5
--- /dev/null
+++ b/intrinsics/riscv64_to_x86_64/intrinsic_def.json

@@ -0,0 +1,38 @@
+{
+  "FSgnjD": {
+    "comment": "Copy sign of one float to another",
+    "class": "scalar",
+    "in": [ "Float64", "Float64" ],
+    "out": [ "Float64" ]
+  },
+  "FSgnjS": {
+    "comment": "Copy sign of one float to another",
+    "class": "scalar",
+    "in": [ "Float32", "Float32" ],
+    "out": [ "Float32" ]
+  },
+  "FSgnjnD": {
+    "comment": "Copy negated sign of one float to another",
+    "class": "scalar",
+    "in": [ "Float64", "Float64" ],
+    "out": [ "Float64" ]
+  },
+  "FSgnjnS": {
+    "comment": "Copy negated sign of one float to another",
+    "class": "scalar",
+    "in": [ "Float32", "Float32" ],
+    "out": [ "Float32" ]
+  },
+  "FSgnjxD": {
+    "comment": "Xor two sign bits of two floats and return",
+    "class": "scalar",
+    "in": [ "Float64", "Float64" ],
+    "out": [ "Float64" ]
+  },
+  "FSgnjxS": {
+    "comment": "Xor two sign bits of two floats and return",
+    "class": "scalar",
+    "in": [ "Float32", "Float32" ],
+    "out": [ "Float32" ]
+  }
+}
commit	b833c5f13580f6965e2bb4df230ae77b550b00f0	[log] [tgz]
author	Victor Khimenko <khim@google.com>	Thu Jun 22 00:38:14 2023 +0000
committer	Victor Khimenko <khim@google.com>	Thu Jun 22 11:39:29 2023 +0000
tree	55dc7f8ba1c6a5e4b2763e78c0178b71b6bb1950
parent	ebafaef6f70f814e8ed0ee907a0101d1a4dd50e1 [diff]