Merge "Revert^2 "Verify profile wrt dex file in dex2oat"""
diff --git a/compiler/Android.bp b/compiler/Android.bp
index 1475679..c50c197 100644
--- a/compiler/Android.bp
+++ b/compiler/Android.bp
@@ -204,10 +204,10 @@
cmd: "$(location generate-operator-out.py) art/compiler $(in) > $(out)",
tool_files: ["generate-operator-out.py"],
srcs: [
- "compiled_method.h",
"dex/dex_to_dex_compiler.h",
"driver/compiler_driver.h",
"driver/compiler_options.h",
+ "linker/linker_patch.h",
"optimizing/locations.h",
"utils/arm/constants_arm.h",
@@ -310,13 +310,14 @@
"art_gtest_defaults",
],
srcs: [
- "compiled_method_test.cc",
"debug/dwarf/dwarf_test.cc",
+ "debug/src_map_elem_test.cc",
"dex/dex_to_dex_decompiler_test.cc",
"driver/compiled_method_storage_test.cc",
"driver/compiler_driver_test.cc",
"exception_test.cc",
"jni/jni_compiler_test.cc",
+ "linker/linker_patch_test.cc",
"linker/method_bss_mapping_encoder_test.cc",
"linker/output_stream_test.cc",
"optimizing/bounds_check_elimination_test.cc",
diff --git a/compiler/common_compiler_test.cc b/compiler/common_compiler_test.cc
index 0d38620..500fc4a 100644
--- a/compiler/common_compiler_test.cc
+++ b/compiler/common_compiler_test.cc
@@ -22,7 +22,7 @@
#include "base/callee_save_type.h"
#include "base/enums.h"
#include "class_linker.h"
-#include "compiled_method.h"
+#include "compiled_method-inl.h"
#include "dex/quick_compiler_callbacks.h"
#include "dex/verification_results.h"
#include "driver/compiler_driver.h"
diff --git a/compiler/compiled_method-inl.h b/compiler/compiled_method-inl.h
new file mode 100644
index 0000000..c432747
--- /dev/null
+++ b/compiler/compiled_method-inl.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_COMPILED_METHOD_INL_H_
+#define ART_COMPILER_COMPILED_METHOD_INL_H_
+
+#include "compiled_method.h"
+
+#include "base/array_ref.h"
+#include "base/length_prefixed_array.h"
+#include "linker/linker_patch.h"
+
+namespace art {
+
+inline ArrayRef<const uint8_t> CompiledCode::GetQuickCode() const {
+ return GetArray(quick_code_);
+}
+
+template <typename T>
+inline ArrayRef<const T> CompiledCode::GetArray(const LengthPrefixedArray<T>* array) {
+ if (array == nullptr) {
+ return ArrayRef<const T>();
+ }
+ DCHECK_NE(array->size(), 0u);
+ return ArrayRef<const T>(&array->At(0), array->size());
+}
+
+inline ArrayRef<const uint8_t> CompiledMethod::GetMethodInfo() const {
+ return GetArray(method_info_);
+}
+
+inline ArrayRef<const uint8_t> CompiledMethod::GetVmapTable() const {
+ return GetArray(vmap_table_);
+}
+
+inline ArrayRef<const uint8_t> CompiledMethod::GetCFIInfo() const {
+ return GetArray(cfi_info_);
+}
+
+inline ArrayRef<const linker::LinkerPatch> CompiledMethod::GetPatches() const {
+ return GetArray(patches_);
+}
+
+} // namespace art
+
+#endif // ART_COMPILER_COMPILED_METHOD_INL_H_
diff --git a/compiler/compiled_method.cc b/compiler/compiled_method.cc
index 0d9021f..111469f 100644
--- a/compiler/compiled_method.cc
+++ b/compiler/compiled_method.cc
@@ -22,7 +22,8 @@
namespace art {
-CompiledCode::CompiledCode(CompilerDriver* compiler_driver, InstructionSet instruction_set,
+CompiledCode::CompiledCode(CompilerDriver* compiler_driver,
+ InstructionSet instruction_set,
const ArrayRef<const uint8_t>& quick_code)
: compiler_driver_(compiler_driver),
instruction_set_(instruction_set),
@@ -77,8 +78,7 @@
}
}
-const void* CompiledCode::CodePointer(const void* code_pointer,
- InstructionSet instruction_set) {
+const void* CompiledCode::CodePointer(const void* code_pointer, InstructionSet instruction_set) {
switch (instruction_set) {
case kArm:
case kArm64:
@@ -108,7 +108,7 @@
const ArrayRef<const uint8_t>& method_info,
const ArrayRef<const uint8_t>& vmap_table,
const ArrayRef<const uint8_t>& cfi_info,
- const ArrayRef<const LinkerPatch>& patches)
+ const ArrayRef<const linker::LinkerPatch>& patches)
: CompiledCode(driver, instruction_set, quick_code),
frame_size_in_bytes_(frame_size_in_bytes),
core_spill_mask_(core_spill_mask),
@@ -129,7 +129,7 @@
const ArrayRef<const uint8_t>& method_info,
const ArrayRef<const uint8_t>& vmap_table,
const ArrayRef<const uint8_t>& cfi_info,
- const ArrayRef<const LinkerPatch>& patches) {
+ const ArrayRef<const linker::LinkerPatch>& patches) {
SwapAllocator<CompiledMethod> alloc(driver->GetCompiledMethodStorage()->GetSwapSpaceAllocator());
CompiledMethod* ret = alloc.allocate(1);
alloc.construct(ret,
diff --git a/compiler/compiled_method.h b/compiler/compiled_method.h
index 5ef6cbf..892bc59 100644
--- a/compiler/compiled_method.h
+++ b/compiler/compiled_method.h
@@ -17,27 +17,28 @@
#ifndef ART_COMPILER_COMPILED_METHOD_H_
#define ART_COMPILER_COMPILED_METHOD_H_
-#include <iosfwd>
#include <memory>
#include <string>
#include <vector>
#include "arch/instruction_set.h"
-#include "base/array_ref.h"
-#include "base/bit_utils.h"
-#include "base/length_prefixed_array.h"
-#include "dex_file_types.h"
-#include "method_reference.h"
namespace art {
+template <typename T> class ArrayRef;
class CompilerDriver;
class CompiledMethodStorage;
+template<typename T> class LengthPrefixedArray;
+
+namespace linker {
+class LinkerPatch;
+} // namespace linker
class CompiledCode {
public:
// For Quick to supply an code blob
- CompiledCode(CompilerDriver* compiler_driver, InstructionSet instruction_set,
+ CompiledCode(CompilerDriver* compiler_driver,
+ InstructionSet instruction_set,
const ArrayRef<const uint8_t>& quick_code);
virtual ~CompiledCode();
@@ -46,9 +47,7 @@
return instruction_set_;
}
- ArrayRef<const uint8_t> GetQuickCode() const {
- return GetArray(quick_code_);
- }
+ ArrayRef<const uint8_t> GetQuickCode() const;
bool operator==(const CompiledCode& rhs) const;
@@ -66,18 +65,11 @@
// Returns a pointer suitable for invoking the code at the argument
// code_pointer address. Mainly to cope with kThumb2 where the
// lower bit must be set to indicate Thumb mode.
- static const void* CodePointer(const void* code_pointer,
- InstructionSet instruction_set);
+ static const void* CodePointer(const void* code_pointer, InstructionSet instruction_set);
protected:
template <typename T>
- static ArrayRef<const T> GetArray(const LengthPrefixedArray<T>* array) {
- if (array == nullptr) {
- return ArrayRef<const T>();
- }
- DCHECK_NE(array->size(), 0u);
- return ArrayRef<const T>(&array->At(0), array->size());
- }
+ static ArrayRef<const T> GetArray(const LengthPrefixedArray<T>* array);
CompilerDriver* GetCompilerDriver() {
return compiler_driver_;
@@ -92,298 +84,6 @@
const LengthPrefixedArray<uint8_t>* const quick_code_;
};
-class SrcMapElem {
- public:
- uint32_t from_;
- int32_t to_;
-};
-
-inline bool operator<(const SrcMapElem& lhs, const SrcMapElem& rhs) {
- if (lhs.from_ != rhs.from_) {
- return lhs.from_ < rhs.from_;
- }
- return lhs.to_ < rhs.to_;
-}
-
-inline bool operator==(const SrcMapElem& lhs, const SrcMapElem& rhs) {
- return lhs.from_ == rhs.from_ && lhs.to_ == rhs.to_;
-}
-
-class LinkerPatch {
- public:
- // Note: We explicitly specify the underlying type of the enum because GCC
- // would otherwise select a bigger underlying type and then complain that
- // 'art::LinkerPatch::patch_type_' is too small to hold all
- // values of 'enum class art::LinkerPatch::Type'
- // which is ridiculous given we have only a handful of values here. If we
- // choose to squeeze the Type into fewer than 8 bits, we'll have to declare
- // patch_type_ as an uintN_t and do explicit static_cast<>s.
- enum class Type : uint8_t {
- kMethodRelative, // NOTE: Actual patching is instruction_set-dependent.
- kMethodBssEntry, // NOTE: Actual patching is instruction_set-dependent.
- kCall,
- kCallRelative, // NOTE: Actual patching is instruction_set-dependent.
- kTypeRelative, // NOTE: Actual patching is instruction_set-dependent.
- kTypeClassTable, // NOTE: Actual patching is instruction_set-dependent.
- kTypeBssEntry, // NOTE: Actual patching is instruction_set-dependent.
- kStringRelative, // NOTE: Actual patching is instruction_set-dependent.
- kStringInternTable, // NOTE: Actual patching is instruction_set-dependent.
- kStringBssEntry, // NOTE: Actual patching is instruction_set-dependent.
- kBakerReadBarrierBranch, // NOTE: Actual patching is instruction_set-dependent.
- };
-
- static LinkerPatch RelativeMethodPatch(size_t literal_offset,
- const DexFile* target_dex_file,
- uint32_t pc_insn_offset,
- uint32_t target_method_idx) {
- LinkerPatch patch(literal_offset, Type::kMethodRelative, target_dex_file);
- patch.method_idx_ = target_method_idx;
- patch.pc_insn_offset_ = pc_insn_offset;
- return patch;
- }
-
- static LinkerPatch MethodBssEntryPatch(size_t literal_offset,
- const DexFile* target_dex_file,
- uint32_t pc_insn_offset,
- uint32_t target_method_idx) {
- LinkerPatch patch(literal_offset, Type::kMethodBssEntry, target_dex_file);
- patch.method_idx_ = target_method_idx;
- patch.pc_insn_offset_ = pc_insn_offset;
- return patch;
- }
-
- static LinkerPatch CodePatch(size_t literal_offset,
- const DexFile* target_dex_file,
- uint32_t target_method_idx) {
- LinkerPatch patch(literal_offset, Type::kCall, target_dex_file);
- patch.method_idx_ = target_method_idx;
- return patch;
- }
-
- static LinkerPatch RelativeCodePatch(size_t literal_offset,
- const DexFile* target_dex_file,
- uint32_t target_method_idx) {
- LinkerPatch patch(literal_offset, Type::kCallRelative, target_dex_file);
- patch.method_idx_ = target_method_idx;
- return patch;
- }
-
- static LinkerPatch RelativeTypePatch(size_t literal_offset,
- const DexFile* target_dex_file,
- uint32_t pc_insn_offset,
- uint32_t target_type_idx) {
- LinkerPatch patch(literal_offset, Type::kTypeRelative, target_dex_file);
- patch.type_idx_ = target_type_idx;
- patch.pc_insn_offset_ = pc_insn_offset;
- return patch;
- }
-
- static LinkerPatch TypeClassTablePatch(size_t literal_offset,
- const DexFile* target_dex_file,
- uint32_t pc_insn_offset,
- uint32_t target_type_idx) {
- LinkerPatch patch(literal_offset, Type::kTypeClassTable, target_dex_file);
- patch.type_idx_ = target_type_idx;
- patch.pc_insn_offset_ = pc_insn_offset;
- return patch;
- }
-
- static LinkerPatch TypeBssEntryPatch(size_t literal_offset,
- const DexFile* target_dex_file,
- uint32_t pc_insn_offset,
- uint32_t target_type_idx) {
- LinkerPatch patch(literal_offset, Type::kTypeBssEntry, target_dex_file);
- patch.type_idx_ = target_type_idx;
- patch.pc_insn_offset_ = pc_insn_offset;
- return patch;
- }
-
- static LinkerPatch RelativeStringPatch(size_t literal_offset,
- const DexFile* target_dex_file,
- uint32_t pc_insn_offset,
- uint32_t target_string_idx) {
- LinkerPatch patch(literal_offset, Type::kStringRelative, target_dex_file);
- patch.string_idx_ = target_string_idx;
- patch.pc_insn_offset_ = pc_insn_offset;
- return patch;
- }
-
- static LinkerPatch StringInternTablePatch(size_t literal_offset,
- const DexFile* target_dex_file,
- uint32_t pc_insn_offset,
- uint32_t target_string_idx) {
- LinkerPatch patch(literal_offset, Type::kStringInternTable, target_dex_file);
- patch.string_idx_ = target_string_idx;
- patch.pc_insn_offset_ = pc_insn_offset;
- return patch;
- }
-
- static LinkerPatch StringBssEntryPatch(size_t literal_offset,
- const DexFile* target_dex_file,
- uint32_t pc_insn_offset,
- uint32_t target_string_idx) {
- LinkerPatch patch(literal_offset, Type::kStringBssEntry, target_dex_file);
- patch.string_idx_ = target_string_idx;
- patch.pc_insn_offset_ = pc_insn_offset;
- return patch;
- }
-
- static LinkerPatch BakerReadBarrierBranchPatch(size_t literal_offset,
- uint32_t custom_value1 = 0u,
- uint32_t custom_value2 = 0u) {
- LinkerPatch patch(literal_offset, Type::kBakerReadBarrierBranch, nullptr);
- patch.baker_custom_value1_ = custom_value1;
- patch.baker_custom_value2_ = custom_value2;
- return patch;
- }
-
- LinkerPatch(const LinkerPatch& other) = default;
- LinkerPatch& operator=(const LinkerPatch& other) = default;
-
- size_t LiteralOffset() const {
- return literal_offset_;
- }
-
- Type GetType() const {
- return patch_type_;
- }
-
- bool IsPcRelative() const {
- switch (GetType()) {
- case Type::kMethodRelative:
- case Type::kMethodBssEntry:
- case Type::kCallRelative:
- case Type::kTypeRelative:
- case Type::kTypeClassTable:
- case Type::kTypeBssEntry:
- case Type::kStringRelative:
- case Type::kStringInternTable:
- case Type::kStringBssEntry:
- case Type::kBakerReadBarrierBranch:
- return true;
- default:
- return false;
- }
- }
-
- MethodReference TargetMethod() const {
- DCHECK(patch_type_ == Type::kMethodRelative ||
- patch_type_ == Type::kMethodBssEntry ||
- patch_type_ == Type::kCall ||
- patch_type_ == Type::kCallRelative);
- return MethodReference(target_dex_file_, method_idx_);
- }
-
- const DexFile* TargetTypeDexFile() const {
- DCHECK(patch_type_ == Type::kTypeRelative ||
- patch_type_ == Type::kTypeClassTable ||
- patch_type_ == Type::kTypeBssEntry);
- return target_dex_file_;
- }
-
- dex::TypeIndex TargetTypeIndex() const {
- DCHECK(patch_type_ == Type::kTypeRelative ||
- patch_type_ == Type::kTypeClassTable ||
- patch_type_ == Type::kTypeBssEntry);
- return dex::TypeIndex(type_idx_);
- }
-
- const DexFile* TargetStringDexFile() const {
- DCHECK(patch_type_ == Type::kStringRelative ||
- patch_type_ == Type::kStringInternTable ||
- patch_type_ == Type::kStringBssEntry);
- return target_dex_file_;
- }
-
- dex::StringIndex TargetStringIndex() const {
- DCHECK(patch_type_ == Type::kStringRelative ||
- patch_type_ == Type::kStringInternTable ||
- patch_type_ == Type::kStringBssEntry);
- return dex::StringIndex(string_idx_);
- }
-
- uint32_t PcInsnOffset() const {
- DCHECK(patch_type_ == Type::kMethodRelative ||
- patch_type_ == Type::kMethodBssEntry ||
- patch_type_ == Type::kTypeRelative ||
- patch_type_ == Type::kTypeClassTable ||
- patch_type_ == Type::kTypeBssEntry ||
- patch_type_ == Type::kStringRelative ||
- patch_type_ == Type::kStringInternTable ||
- patch_type_ == Type::kStringBssEntry);
- return pc_insn_offset_;
- }
-
- uint32_t GetBakerCustomValue1() const {
- DCHECK(patch_type_ == Type::kBakerReadBarrierBranch);
- return baker_custom_value1_;
- }
-
- uint32_t GetBakerCustomValue2() const {
- DCHECK(patch_type_ == Type::kBakerReadBarrierBranch);
- return baker_custom_value2_;
- }
-
- private:
- LinkerPatch(size_t literal_offset, Type patch_type, const DexFile* target_dex_file)
- : target_dex_file_(target_dex_file),
- literal_offset_(literal_offset),
- patch_type_(patch_type) {
- cmp1_ = 0u;
- cmp2_ = 0u;
- // The compiler rejects methods that are too big, so the compiled code
- // of a single method really shouln't be anywhere close to 16MiB.
- DCHECK(IsUint<24>(literal_offset));
- }
-
- const DexFile* target_dex_file_;
- // TODO: Clean up naming. Some patched locations are literals but others are not.
- uint32_t literal_offset_ : 24; // Method code size up to 16MiB.
- Type patch_type_ : 8;
- union {
- uint32_t cmp1_; // Used for relational operators.
- uint32_t method_idx_; // Method index for Call/Method patches.
- uint32_t type_idx_; // Type index for Type patches.
- uint32_t string_idx_; // String index for String patches.
- uint32_t baker_custom_value1_;
- static_assert(sizeof(method_idx_) == sizeof(cmp1_), "needed by relational operators");
- static_assert(sizeof(type_idx_) == sizeof(cmp1_), "needed by relational operators");
- static_assert(sizeof(string_idx_) == sizeof(cmp1_), "needed by relational operators");
- static_assert(sizeof(baker_custom_value1_) == sizeof(cmp1_), "needed by relational operators");
- };
- union {
- // Note: To avoid uninitialized padding on 64-bit systems, we use `size_t` for `cmp2_`.
- // This allows a hashing function to treat an array of linker patches as raw memory.
- size_t cmp2_; // Used for relational operators.
- // Literal offset of the insn loading PC (same as literal_offset if it's the same insn,
- // may be different if the PC-relative addressing needs multiple insns).
- uint32_t pc_insn_offset_;
- uint32_t baker_custom_value2_;
- static_assert(sizeof(pc_insn_offset_) <= sizeof(cmp2_), "needed by relational operators");
- static_assert(sizeof(baker_custom_value2_) <= sizeof(cmp2_), "needed by relational operators");
- };
-
- friend bool operator==(const LinkerPatch& lhs, const LinkerPatch& rhs);
- friend bool operator<(const LinkerPatch& lhs, const LinkerPatch& rhs);
-};
-std::ostream& operator<<(std::ostream& os, const LinkerPatch::Type& type);
-
-inline bool operator==(const LinkerPatch& lhs, const LinkerPatch& rhs) {
- return lhs.literal_offset_ == rhs.literal_offset_ &&
- lhs.patch_type_ == rhs.patch_type_ &&
- lhs.target_dex_file_ == rhs.target_dex_file_ &&
- lhs.cmp1_ == rhs.cmp1_ &&
- lhs.cmp2_ == rhs.cmp2_;
-}
-
-inline bool operator<(const LinkerPatch& lhs, const LinkerPatch& rhs) {
- return (lhs.literal_offset_ != rhs.literal_offset_) ? lhs.literal_offset_ < rhs.literal_offset_
- : (lhs.patch_type_ != rhs.patch_type_) ? lhs.patch_type_ < rhs.patch_type_
- : (lhs.target_dex_file_ != rhs.target_dex_file_) ? lhs.target_dex_file_ < rhs.target_dex_file_
- : (lhs.cmp1_ != rhs.cmp1_) ? lhs.cmp1_ < rhs.cmp1_
- : lhs.cmp2_ < rhs.cmp2_;
-}
-
class CompiledMethod FINAL : public CompiledCode {
public:
// Constructs a CompiledMethod.
@@ -398,7 +98,7 @@
const ArrayRef<const uint8_t>& method_info,
const ArrayRef<const uint8_t>& vmap_table,
const ArrayRef<const uint8_t>& cfi_info,
- const ArrayRef<const LinkerPatch>& patches);
+ const ArrayRef<const linker::LinkerPatch>& patches);
virtual ~CompiledMethod();
@@ -412,7 +112,7 @@
const ArrayRef<const uint8_t>& method_info,
const ArrayRef<const uint8_t>& vmap_table,
const ArrayRef<const uint8_t>& cfi_info,
- const ArrayRef<const LinkerPatch>& patches);
+ const ArrayRef<const linker::LinkerPatch>& patches);
static void ReleaseSwapAllocatedCompiledMethod(CompilerDriver* driver, CompiledMethod* m);
@@ -428,21 +128,13 @@
return fp_spill_mask_;
}
- ArrayRef<const uint8_t> GetMethodInfo() const {
- return GetArray(method_info_);
- }
+ ArrayRef<const uint8_t> GetMethodInfo() const;
- ArrayRef<const uint8_t> GetVmapTable() const {
- return GetArray(vmap_table_);
- }
+ ArrayRef<const uint8_t> GetVmapTable() const;
- ArrayRef<const uint8_t> GetCFIInfo() const {
- return GetArray(cfi_info_);
- }
+ ArrayRef<const uint8_t> GetCFIInfo() const;
- ArrayRef<const LinkerPatch> GetPatches() const {
- return GetArray(patches_);
- }
+ ArrayRef<const linker::LinkerPatch> GetPatches() const;
private:
// For quick code, the size of the activation used by the code.
@@ -458,7 +150,7 @@
// For quick code, a FDE entry for the debug_frame section.
const LengthPrefixedArray<uint8_t>* const cfi_info_;
// For quick code, linker patches needed by the method.
- const LengthPrefixedArray<LinkerPatch>* const patches_;
+ const LengthPrefixedArray<linker::LinkerPatch>* const patches_;
};
} // namespace art
diff --git a/compiler/debug/elf_debug_line_writer.h b/compiler/debug/elf_debug_line_writer.h
index cf5d65e..49d52c4 100644
--- a/compiler/debug/elf_debug_line_writer.h
+++ b/compiler/debug/elf_debug_line_writer.h
@@ -20,10 +20,10 @@
#include <unordered_set>
#include <vector>
-#include "compiled_method.h"
#include "debug/dwarf/debug_line_opcode_writer.h"
#include "debug/dwarf/headers.h"
#include "debug/elf_compilation_unit.h"
+#include "debug/src_map_elem.h"
#include "dex_file-inl.h"
#include "linker/elf_builder.h"
#include "stack_map.h"
diff --git a/compiler/debug/method_debug_info.h b/compiler/debug/method_debug_info.h
index 5678910..a8225fa 100644
--- a/compiler/debug/method_debug_info.h
+++ b/compiler/debug/method_debug_info.h
@@ -19,7 +19,8 @@
#include <string>
-#include "compiled_method.h"
+#include "arch/instruction_set.h"
+#include "base/array_ref.h"
#include "dex_file.h"
namespace art {
diff --git a/compiler/debug/src_map_elem.h b/compiler/debug/src_map_elem.h
new file mode 100644
index 0000000..5286b8c
--- /dev/null
+++ b/compiler/debug/src_map_elem.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_DEBUG_SRC_MAP_ELEM_H_
+#define ART_COMPILER_DEBUG_SRC_MAP_ELEM_H_
+
+#include <stdint.h>
+
+namespace art {
+
+class SrcMapElem {
+ public:
+ uint32_t from_;
+ int32_t to_;
+};
+
+inline bool operator<(const SrcMapElem& lhs, const SrcMapElem& rhs) {
+ if (lhs.from_ != rhs.from_) {
+ return lhs.from_ < rhs.from_;
+ }
+ return lhs.to_ < rhs.to_;
+}
+
+inline bool operator==(const SrcMapElem& lhs, const SrcMapElem& rhs) {
+ return lhs.from_ == rhs.from_ && lhs.to_ == rhs.to_;
+}
+
+} // namespace art
+
+#endif // ART_COMPILER_DEBUG_SRC_MAP_ELEM_H_
diff --git a/compiler/debug/src_map_elem_test.cc b/compiler/debug/src_map_elem_test.cc
new file mode 100644
index 0000000..ceaa53f
--- /dev/null
+++ b/compiler/debug/src_map_elem_test.cc
@@ -0,0 +1,53 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include "src_map_elem.h"
+
+#include "base/macros.h"
+
+namespace art {
+namespace debug {
+
+TEST(SrcMapElem, Operators) {
+ SrcMapElem elems[] = {
+ { 1u, -1 },
+ { 1u, 0 },
+ { 1u, 1 },
+ { 2u, -1 },
+ { 2u, 0 }, // Index 4.
+ { 2u, 1 },
+ { 2u, 0u }, // Index 6: Arbitrarily add identical SrcMapElem with index 4.
+ };
+
+ for (size_t i = 0; i != arraysize(elems); ++i) {
+ for (size_t j = 0; j != arraysize(elems); ++j) {
+ bool expected = (i != 6u ? i : 4u) == (j != 6u ? j : 4u);
+ EXPECT_EQ(expected, elems[i] == elems[j]) << i << " " << j;
+ }
+ }
+
+ for (size_t i = 0; i != arraysize(elems); ++i) {
+ for (size_t j = 0; j != arraysize(elems); ++j) {
+ bool expected = (i != 6u ? i : 4u) < (j != 6u ? j : 4u);
+ EXPECT_EQ(expected, elems[i] < elems[j]) << i << " " << j;
+ }
+ }
+}
+
+} // namespace debug
+} // namespace art
diff --git a/compiler/dex/dex_to_dex_compiler.cc b/compiler/dex/dex_to_dex_compiler.cc
index 9d57b96..e49f83f 100644
--- a/compiler/dex/dex_to_dex_compiler.cc
+++ b/compiler/dex/dex_to_dex_compiler.cc
@@ -395,7 +395,7 @@
ArrayRef<const uint8_t>(), // method_info
ArrayRef<const uint8_t>(quicken_data), // vmap_table
ArrayRef<const uint8_t>(), // cfi data
- ArrayRef<const LinkerPatch>());
+ ArrayRef<const linker::LinkerPatch>());
}
return nullptr;
}
diff --git a/compiler/dex/dex_to_dex_decompiler_test.cc b/compiler/dex/dex_to_dex_decompiler_test.cc
index e36d416..6637be2 100644
--- a/compiler/dex/dex_to_dex_decompiler_test.cc
+++ b/compiler/dex/dex_to_dex_decompiler_test.cc
@@ -18,7 +18,7 @@
#include "class_linker.h"
#include "common_compiler_test.h"
-#include "compiled_method.h"
+#include "compiled_method-inl.h"
#include "compiler_callbacks.h"
#include "dex_file.h"
#include "driver/compiler_driver.h"
diff --git a/compiler/driver/compiled_method_storage.cc b/compiler/driver/compiled_method_storage.cc
index 528b0a2..c739333 100644
--- a/compiler/driver/compiled_method_storage.cc
+++ b/compiler/driver/compiled_method_storage.cc
@@ -21,6 +21,7 @@
#include "base/logging.h"
#include "compiled_method.h"
+#include "linker/linker_patch.h"
#include "thread-current-inl.h"
#include "utils.h"
#include "utils/dedupe_set-inl.h"
@@ -178,7 +179,7 @@
LengthPrefixedArrayAlloc<uint8_t>(swap_space_.get())),
dedupe_cfi_info_("dedupe cfi info", LengthPrefixedArrayAlloc<uint8_t>(swap_space_.get())),
dedupe_linker_patches_("dedupe cfi info",
- LengthPrefixedArrayAlloc<LinkerPatch>(swap_space_.get())) {
+ LengthPrefixedArrayAlloc<linker::LinkerPatch>(swap_space_.get())) {
}
CompiledMethodStorage::~CompiledMethodStorage() {
@@ -234,13 +235,13 @@
ReleaseArrayIfNotDeduplicated(cfi_info);
}
-const LengthPrefixedArray<LinkerPatch>* CompiledMethodStorage::DeduplicateLinkerPatches(
- const ArrayRef<const LinkerPatch>& linker_patches) {
+const LengthPrefixedArray<linker::LinkerPatch>* CompiledMethodStorage::DeduplicateLinkerPatches(
+ const ArrayRef<const linker::LinkerPatch>& linker_patches) {
return AllocateOrDeduplicateArray(linker_patches, &dedupe_linker_patches_);
}
void CompiledMethodStorage::ReleaseLinkerPatches(
- const LengthPrefixedArray<LinkerPatch>* linker_patches) {
+ const LengthPrefixedArray<linker::LinkerPatch>* linker_patches) {
ReleaseArrayIfNotDeduplicated(linker_patches);
}
diff --git a/compiler/driver/compiled_method_storage.h b/compiler/driver/compiled_method_storage.h
index 27011e8..249f06c 100644
--- a/compiler/driver/compiled_method_storage.h
+++ b/compiler/driver/compiled_method_storage.h
@@ -28,7 +28,9 @@
namespace art {
+namespace linker {
class LinkerPatch;
+} // namespace linker
class CompiledMethodStorage {
public:
@@ -61,9 +63,9 @@
const LengthPrefixedArray<uint8_t>* DeduplicateCFIInfo(const ArrayRef<const uint8_t>& cfi_info);
void ReleaseCFIInfo(const LengthPrefixedArray<uint8_t>* cfi_info);
- const LengthPrefixedArray<LinkerPatch>* DeduplicateLinkerPatches(
- const ArrayRef<const LinkerPatch>& linker_patches);
- void ReleaseLinkerPatches(const LengthPrefixedArray<LinkerPatch>* linker_patches);
+ const LengthPrefixedArray<linker::LinkerPatch>* DeduplicateLinkerPatches(
+ const ArrayRef<const linker::LinkerPatch>& linker_patches);
+ void ReleaseLinkerPatches(const LengthPrefixedArray<linker::LinkerPatch>* linker_patches);
private:
template <typename T, typename DedupeSetType>
@@ -98,7 +100,7 @@
ArrayDedupeSet<uint8_t> dedupe_method_info_;
ArrayDedupeSet<uint8_t> dedupe_vmap_table_;
ArrayDedupeSet<uint8_t> dedupe_cfi_info_;
- ArrayDedupeSet<LinkerPatch> dedupe_linker_patches_;
+ ArrayDedupeSet<linker::LinkerPatch> dedupe_linker_patches_;
DISALLOW_COPY_AND_ASSIGN(CompiledMethodStorage);
};
diff --git a/compiler/driver/compiled_method_storage_test.cc b/compiler/driver/compiled_method_storage_test.cc
index 2ec2af5..e1ea630 100644
--- a/compiler/driver/compiled_method_storage_test.cc
+++ b/compiler/driver/compiled_method_storage_test.cc
@@ -18,7 +18,7 @@
#include <gtest/gtest.h>
-#include "compiled_method.h"
+#include "compiled_method-inl.h"
#include "compiler_driver.h"
#include "compiler_options.h"
#include "dex/verification_results.h"
@@ -70,17 +70,17 @@
ArrayRef<const uint8_t>(raw_cfi_info1),
ArrayRef<const uint8_t>(raw_cfi_info2),
};
- const LinkerPatch raw_patches1[] = {
- LinkerPatch::CodePatch(0u, nullptr, 1u),
- LinkerPatch::RelativeMethodPatch(4u, nullptr, 0u, 1u),
+ const linker::LinkerPatch raw_patches1[] = {
+ linker::LinkerPatch::CodePatch(0u, nullptr, 1u),
+ linker::LinkerPatch::RelativeMethodPatch(4u, nullptr, 0u, 1u),
};
- const LinkerPatch raw_patches2[] = {
- LinkerPatch::CodePatch(0u, nullptr, 1u),
- LinkerPatch::RelativeMethodPatch(4u, nullptr, 0u, 2u),
+ const linker::LinkerPatch raw_patches2[] = {
+ linker::LinkerPatch::CodePatch(0u, nullptr, 1u),
+ linker::LinkerPatch::RelativeMethodPatch(4u, nullptr, 0u, 2u),
};
- ArrayRef<const LinkerPatch> patches[] = {
- ArrayRef<const LinkerPatch>(raw_patches1),
- ArrayRef<const LinkerPatch>(raw_patches2),
+ ArrayRef<const linker::LinkerPatch> patches[] = {
+ ArrayRef<const linker::LinkerPatch>(raw_patches1),
+ ArrayRef<const linker::LinkerPatch>(raw_patches2),
};
std::vector<CompiledMethod*> compiled_methods;
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 678f090..03d8ef5 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -37,7 +37,7 @@
#include "base/time_utils.h"
#include "base/timing_logger.h"
#include "class_linker-inl.h"
-#include "compiled_method.h"
+#include "compiled_method-inl.h"
#include "compiler.h"
#include "compiler_callbacks.h"
#include "compiler_driver-inl.h"
@@ -55,6 +55,7 @@
#include "handle_scope-inl.h"
#include "intrinsics_enum.h"
#include "jni_internal.h"
+#include "linker/linker_patch.h"
#include "mirror/class-inl.h"
#include "mirror/class_loader.h"
#include "mirror/dex_cache-inl.h"
@@ -618,7 +619,7 @@
if (compiled_method != nullptr) {
// Count non-relative linker patches.
size_t non_relative_linker_patch_count = 0u;
- for (const LinkerPatch& patch : compiled_method->GetPatches()) {
+ for (const linker::LinkerPatch& patch : compiled_method->GetPatches()) {
if (!patch.IsPcRelative()) {
++non_relative_linker_patch_count;
}
diff --git a/compiler/jni/quick/jni_compiler.cc b/compiler/jni/quick/jni_compiler.cc
index e7e4647..c66a2a6 100644
--- a/compiler/jni/quick/jni_compiler.cc
+++ b/compiler/jni/quick/jni_compiler.cc
@@ -665,7 +665,7 @@
/* method_info */ ArrayRef<const uint8_t>(),
/* vmap_table */ ArrayRef<const uint8_t>(),
ArrayRef<const uint8_t>(*jni_asm->cfi().data()),
- ArrayRef<const LinkerPatch>());
+ ArrayRef<const linker::LinkerPatch>());
}
// Copy a single parameter from the managed to the JNI calling convention.
diff --git a/compiler/linker/arm/relative_patcher_arm_base.cc b/compiler/linker/arm/relative_patcher_arm_base.cc
index cb6522c..2cb23d1 100644
--- a/compiler/linker/arm/relative_patcher_arm_base.cc
+++ b/compiler/linker/arm/relative_patcher_arm_base.cc
@@ -17,9 +17,10 @@
#include "linker/arm/relative_patcher_arm_base.h"
#include "base/stl_util.h"
-#include "compiled_method.h"
+#include "compiled_method-inl.h"
#include "debug/method_debug_info.h"
#include "dex_file_types.h"
+#include "linker/linker_patch.h"
#include "linker/output_stream.h"
#include "oat.h"
#include "oat_quick_method_header.h"
diff --git a/compiler/linker/arm/relative_patcher_thumb2.cc b/compiler/linker/arm/relative_patcher_thumb2.cc
index 704feeb..f84fea3 100644
--- a/compiler/linker/arm/relative_patcher_thumb2.cc
+++ b/compiler/linker/arm/relative_patcher_thumb2.cc
@@ -21,6 +21,7 @@
#include "base/bit_utils.h"
#include "compiled_method.h"
#include "entrypoints/quick/quick_entrypoints_enum.h"
+#include "linker/linker_patch.h"
#include "lock_word.h"
#include "mirror/array-inl.h"
#include "mirror/object.h"
diff --git a/compiler/linker/arm64/relative_patcher_arm64.cc b/compiler/linker/arm64/relative_patcher_arm64.cc
index 82f502a..828c99b 100644
--- a/compiler/linker/arm64/relative_patcher_arm64.cc
+++ b/compiler/linker/arm64/relative_patcher_arm64.cc
@@ -20,10 +20,11 @@
#include "arch/arm64/instruction_set_features_arm64.h"
#include "art_method.h"
#include "base/bit_utils.h"
-#include "compiled_method.h"
+#include "compiled_method-inl.h"
#include "driver/compiler_driver.h"
#include "entrypoints/quick/quick_entrypoints_enum.h"
#include "heap_poisoning.h"
+#include "linker/linker_patch.h"
#include "linker/output_stream.h"
#include "lock_word.h"
#include "mirror/array-inl.h"
diff --git a/compiler/linker/linker_patch.h b/compiler/linker/linker_patch.h
new file mode 100644
index 0000000..0ac1490
--- /dev/null
+++ b/compiler/linker/linker_patch.h
@@ -0,0 +1,311 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_LINKER_LINKER_PATCH_H_
+#define ART_COMPILER_LINKER_LINKER_PATCH_H_
+
+#include <iosfwd>
+#include <stdint.h>
+
+#include "base/bit_utils.h"
+#include "base/logging.h"
+#include "method_reference.h"
+
+namespace art {
+
+class DexFile;
+
+namespace linker {
+
+class LinkerPatch {
+ public:
+ // Note: We explicitly specify the underlying type of the enum because GCC
+ // would otherwise select a bigger underlying type and then complain that
+ // 'art::LinkerPatch::patch_type_' is too small to hold all
+ // values of 'enum class art::LinkerPatch::Type'
+ // which is ridiculous given we have only a handful of values here. If we
+ // choose to squeeze the Type into fewer than 8 bits, we'll have to declare
+ // patch_type_ as an uintN_t and do explicit static_cast<>s.
+ enum class Type : uint8_t {
+ kMethodRelative, // NOTE: Actual patching is instruction_set-dependent.
+ kMethodBssEntry, // NOTE: Actual patching is instruction_set-dependent.
+ kCall,
+ kCallRelative, // NOTE: Actual patching is instruction_set-dependent.
+ kTypeRelative, // NOTE: Actual patching is instruction_set-dependent.
+ kTypeClassTable, // NOTE: Actual patching is instruction_set-dependent.
+ kTypeBssEntry, // NOTE: Actual patching is instruction_set-dependent.
+ kStringRelative, // NOTE: Actual patching is instruction_set-dependent.
+ kStringInternTable, // NOTE: Actual patching is instruction_set-dependent.
+ kStringBssEntry, // NOTE: Actual patching is instruction_set-dependent.
+ kBakerReadBarrierBranch, // NOTE: Actual patching is instruction_set-dependent.
+ };
+
+ static LinkerPatch RelativeMethodPatch(size_t literal_offset,
+ const DexFile* target_dex_file,
+ uint32_t pc_insn_offset,
+ uint32_t target_method_idx) {
+ LinkerPatch patch(literal_offset, Type::kMethodRelative, target_dex_file);
+ patch.method_idx_ = target_method_idx;
+ patch.pc_insn_offset_ = pc_insn_offset;
+ return patch;
+ }
+
+ static LinkerPatch MethodBssEntryPatch(size_t literal_offset,
+ const DexFile* target_dex_file,
+ uint32_t pc_insn_offset,
+ uint32_t target_method_idx) {
+ LinkerPatch patch(literal_offset, Type::kMethodBssEntry, target_dex_file);
+ patch.method_idx_ = target_method_idx;
+ patch.pc_insn_offset_ = pc_insn_offset;
+ return patch;
+ }
+
+ static LinkerPatch CodePatch(size_t literal_offset,
+ const DexFile* target_dex_file,
+ uint32_t target_method_idx) {
+ LinkerPatch patch(literal_offset, Type::kCall, target_dex_file);
+ patch.method_idx_ = target_method_idx;
+ return patch;
+ }
+
+ static LinkerPatch RelativeCodePatch(size_t literal_offset,
+ const DexFile* target_dex_file,
+ uint32_t target_method_idx) {
+ LinkerPatch patch(literal_offset, Type::kCallRelative, target_dex_file);
+ patch.method_idx_ = target_method_idx;
+ return patch;
+ }
+
+ static LinkerPatch RelativeTypePatch(size_t literal_offset,
+ const DexFile* target_dex_file,
+ uint32_t pc_insn_offset,
+ uint32_t target_type_idx) {
+ LinkerPatch patch(literal_offset, Type::kTypeRelative, target_dex_file);
+ patch.type_idx_ = target_type_idx;
+ patch.pc_insn_offset_ = pc_insn_offset;
+ return patch;
+ }
+
+ static LinkerPatch TypeClassTablePatch(size_t literal_offset,
+ const DexFile* target_dex_file,
+ uint32_t pc_insn_offset,
+ uint32_t target_type_idx) {
+ LinkerPatch patch(literal_offset, Type::kTypeClassTable, target_dex_file);
+ patch.type_idx_ = target_type_idx;
+ patch.pc_insn_offset_ = pc_insn_offset;
+ return patch;
+ }
+
+ static LinkerPatch TypeBssEntryPatch(size_t literal_offset,
+ const DexFile* target_dex_file,
+ uint32_t pc_insn_offset,
+ uint32_t target_type_idx) {
+ LinkerPatch patch(literal_offset, Type::kTypeBssEntry, target_dex_file);
+ patch.type_idx_ = target_type_idx;
+ patch.pc_insn_offset_ = pc_insn_offset;
+ return patch;
+ }
+
+ static LinkerPatch RelativeStringPatch(size_t literal_offset,
+ const DexFile* target_dex_file,
+ uint32_t pc_insn_offset,
+ uint32_t target_string_idx) {
+ LinkerPatch patch(literal_offset, Type::kStringRelative, target_dex_file);
+ patch.string_idx_ = target_string_idx;
+ patch.pc_insn_offset_ = pc_insn_offset;
+ return patch;
+ }
+
+ static LinkerPatch StringInternTablePatch(size_t literal_offset,
+ const DexFile* target_dex_file,
+ uint32_t pc_insn_offset,
+ uint32_t target_string_idx) {
+ LinkerPatch patch(literal_offset, Type::kStringInternTable, target_dex_file);
+ patch.string_idx_ = target_string_idx;
+ patch.pc_insn_offset_ = pc_insn_offset;
+ return patch;
+ }
+
+ static LinkerPatch StringBssEntryPatch(size_t literal_offset,
+ const DexFile* target_dex_file,
+ uint32_t pc_insn_offset,
+ uint32_t target_string_idx) {
+ LinkerPatch patch(literal_offset, Type::kStringBssEntry, target_dex_file);
+ patch.string_idx_ = target_string_idx;
+ patch.pc_insn_offset_ = pc_insn_offset;
+ return patch;
+ }
+
+ static LinkerPatch BakerReadBarrierBranchPatch(size_t literal_offset,
+ uint32_t custom_value1 = 0u,
+ uint32_t custom_value2 = 0u) {
+ LinkerPatch patch(literal_offset, Type::kBakerReadBarrierBranch, nullptr);
+ patch.baker_custom_value1_ = custom_value1;
+ patch.baker_custom_value2_ = custom_value2;
+ return patch;
+ }
+
+ LinkerPatch(const LinkerPatch& other) = default;
+ LinkerPatch& operator=(const LinkerPatch& other) = default;
+
+ size_t LiteralOffset() const {
+ return literal_offset_;
+ }
+
+ Type GetType() const {
+ return patch_type_;
+ }
+
+ bool IsPcRelative() const {
+ switch (GetType()) {
+ case Type::kMethodRelative:
+ case Type::kMethodBssEntry:
+ case Type::kCallRelative:
+ case Type::kTypeRelative:
+ case Type::kTypeClassTable:
+ case Type::kTypeBssEntry:
+ case Type::kStringRelative:
+ case Type::kStringInternTable:
+ case Type::kStringBssEntry:
+ case Type::kBakerReadBarrierBranch:
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ MethodReference TargetMethod() const {
+ DCHECK(patch_type_ == Type::kMethodRelative ||
+ patch_type_ == Type::kMethodBssEntry ||
+ patch_type_ == Type::kCall ||
+ patch_type_ == Type::kCallRelative);
+ return MethodReference(target_dex_file_, method_idx_);
+ }
+
+ const DexFile* TargetTypeDexFile() const {
+ DCHECK(patch_type_ == Type::kTypeRelative ||
+ patch_type_ == Type::kTypeClassTable ||
+ patch_type_ == Type::kTypeBssEntry);
+ return target_dex_file_;
+ }
+
+ dex::TypeIndex TargetTypeIndex() const {
+ DCHECK(patch_type_ == Type::kTypeRelative ||
+ patch_type_ == Type::kTypeClassTable ||
+ patch_type_ == Type::kTypeBssEntry);
+ return dex::TypeIndex(type_idx_);
+ }
+
+ const DexFile* TargetStringDexFile() const {
+ DCHECK(patch_type_ == Type::kStringRelative ||
+ patch_type_ == Type::kStringInternTable ||
+ patch_type_ == Type::kStringBssEntry);
+ return target_dex_file_;
+ }
+
+ dex::StringIndex TargetStringIndex() const {
+ DCHECK(patch_type_ == Type::kStringRelative ||
+ patch_type_ == Type::kStringInternTable ||
+ patch_type_ == Type::kStringBssEntry);
+ return dex::StringIndex(string_idx_);
+ }
+
+ uint32_t PcInsnOffset() const {
+ DCHECK(patch_type_ == Type::kMethodRelative ||
+ patch_type_ == Type::kMethodBssEntry ||
+ patch_type_ == Type::kTypeRelative ||
+ patch_type_ == Type::kTypeClassTable ||
+ patch_type_ == Type::kTypeBssEntry ||
+ patch_type_ == Type::kStringRelative ||
+ patch_type_ == Type::kStringInternTable ||
+ patch_type_ == Type::kStringBssEntry);
+ return pc_insn_offset_;
+ }
+
+ uint32_t GetBakerCustomValue1() const {
+ DCHECK(patch_type_ == Type::kBakerReadBarrierBranch);
+ return baker_custom_value1_;
+ }
+
+ uint32_t GetBakerCustomValue2() const {
+ DCHECK(patch_type_ == Type::kBakerReadBarrierBranch);
+ return baker_custom_value2_;
+ }
+
+ private:
+ LinkerPatch(size_t literal_offset, Type patch_type, const DexFile* target_dex_file)
+ : target_dex_file_(target_dex_file),
+ literal_offset_(literal_offset),
+ patch_type_(patch_type) {
+ cmp1_ = 0u;
+ cmp2_ = 0u;
+ // The compiler rejects methods that are too big, so the compiled code
+ // of a single method really shouln't be anywhere close to 16MiB.
+ DCHECK(IsUint<24>(literal_offset));
+ }
+
+ const DexFile* target_dex_file_;
+ // TODO: Clean up naming. Some patched locations are literals but others are not.
+ uint32_t literal_offset_ : 24; // Method code size up to 16MiB.
+ Type patch_type_ : 8;
+ union {
+ uint32_t cmp1_; // Used for relational operators.
+ uint32_t method_idx_; // Method index for Call/Method patches.
+ uint32_t type_idx_; // Type index for Type patches.
+ uint32_t string_idx_; // String index for String patches.
+ uint32_t baker_custom_value1_;
+ static_assert(sizeof(method_idx_) == sizeof(cmp1_), "needed by relational operators");
+ static_assert(sizeof(type_idx_) == sizeof(cmp1_), "needed by relational operators");
+ static_assert(sizeof(string_idx_) == sizeof(cmp1_), "needed by relational operators");
+ static_assert(sizeof(baker_custom_value1_) == sizeof(cmp1_), "needed by relational operators");
+ };
+ union {
+ // Note: To avoid uninitialized padding on 64-bit systems, we use `size_t` for `cmp2_`.
+ // This allows a hashing function to treat an array of linker patches as raw memory.
+ size_t cmp2_; // Used for relational operators.
+ // Literal offset of the insn loading PC (same as literal_offset if it's the same insn,
+ // may be different if the PC-relative addressing needs multiple insns).
+ uint32_t pc_insn_offset_;
+ uint32_t baker_custom_value2_;
+ static_assert(sizeof(pc_insn_offset_) <= sizeof(cmp2_), "needed by relational operators");
+ static_assert(sizeof(baker_custom_value2_) <= sizeof(cmp2_), "needed by relational operators");
+ };
+
+ friend bool operator==(const LinkerPatch& lhs, const LinkerPatch& rhs);
+ friend bool operator<(const LinkerPatch& lhs, const LinkerPatch& rhs);
+};
+std::ostream& operator<<(std::ostream& os, const LinkerPatch::Type& type);
+
+inline bool operator==(const LinkerPatch& lhs, const LinkerPatch& rhs) {
+ return lhs.literal_offset_ == rhs.literal_offset_ &&
+ lhs.patch_type_ == rhs.patch_type_ &&
+ lhs.target_dex_file_ == rhs.target_dex_file_ &&
+ lhs.cmp1_ == rhs.cmp1_ &&
+ lhs.cmp2_ == rhs.cmp2_;
+}
+
+inline bool operator<(const LinkerPatch& lhs, const LinkerPatch& rhs) {
+ return (lhs.literal_offset_ != rhs.literal_offset_) ? lhs.literal_offset_ < rhs.literal_offset_
+ : (lhs.patch_type_ != rhs.patch_type_) ? lhs.patch_type_ < rhs.patch_type_
+ : (lhs.target_dex_file_ != rhs.target_dex_file_) ? lhs.target_dex_file_ < rhs.target_dex_file_
+ : (lhs.cmp1_ != rhs.cmp1_) ? lhs.cmp1_ < rhs.cmp1_
+ : lhs.cmp2_ < rhs.cmp2_;
+}
+
+} // namespace linker
+} // namespace art
+
+#endif // ART_COMPILER_LINKER_LINKER_PATCH_H_
diff --git a/compiler/compiled_method_test.cc b/compiler/linker/linker_patch_test.cc
similarity index 90%
rename from compiler/compiled_method_test.cc
rename to compiler/linker/linker_patch_test.cc
index f4a72cf..e87dc8d 100644
--- a/compiler/compiled_method_test.cc
+++ b/compiler/linker/linker_patch_test.cc
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2015 The Android Open Source Project
+ * Copyright (C) 2017 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -16,37 +16,12 @@
#include <gtest/gtest.h>
-#include "compiled_method.h"
+#include "linker_patch.h"
namespace art {
+namespace linker {
-TEST(CompiledMethod, SrcMapElemOperators) {
- SrcMapElem elems[] = {
- { 1u, -1 },
- { 1u, 0 },
- { 1u, 1 },
- { 2u, -1 },
- { 2u, 0 }, // Index 4.
- { 2u, 1 },
- { 2u, 0u }, // Index 6: Arbitrarily add identical SrcMapElem with index 4.
- };
-
- for (size_t i = 0; i != arraysize(elems); ++i) {
- for (size_t j = 0; j != arraysize(elems); ++j) {
- bool expected = (i != 6u ? i : 4u) == (j != 6u ? j : 4u);
- EXPECT_EQ(expected, elems[i] == elems[j]) << i << " " << j;
- }
- }
-
- for (size_t i = 0; i != arraysize(elems); ++i) {
- for (size_t j = 0; j != arraysize(elems); ++j) {
- bool expected = (i != 6u ? i : 4u) < (j != 6u ? j : 4u);
- EXPECT_EQ(expected, elems[i] < elems[j]) << i << " " << j;
- }
- }
-}
-
-TEST(CompiledMethod, LinkerPatchOperators) {
+TEST(LinkerPatch, LinkerPatchOperators) {
const DexFile* dex_file1 = reinterpret_cast<const DexFile*>(1);
const DexFile* dex_file2 = reinterpret_cast<const DexFile*>(2);
LinkerPatch patches[] = {
@@ -191,4 +166,5 @@
}
}
+} // namespace linker
} // namespace art
diff --git a/compiler/linker/mips/relative_patcher_mips.cc b/compiler/linker/mips/relative_patcher_mips.cc
index 408ac22..69e0846 100644
--- a/compiler/linker/mips/relative_patcher_mips.cc
+++ b/compiler/linker/mips/relative_patcher_mips.cc
@@ -18,6 +18,7 @@
#include "compiled_method.h"
#include "debug/method_debug_info.h"
+#include "linker/linker_patch.h"
namespace art {
namespace linker {
diff --git a/compiler/linker/mips64/relative_patcher_mips64.cc b/compiler/linker/mips64/relative_patcher_mips64.cc
index 2bcd98a..aae5746 100644
--- a/compiler/linker/mips64/relative_patcher_mips64.cc
+++ b/compiler/linker/mips64/relative_patcher_mips64.cc
@@ -18,6 +18,7 @@
#include "compiled_method.h"
#include "debug/method_debug_info.h"
+#include "linker/linker_patch.h"
namespace art {
namespace linker {
diff --git a/compiler/linker/relative_patcher.h b/compiler/linker/relative_patcher.h
index e079946..548e128 100644
--- a/compiler/linker/relative_patcher.h
+++ b/compiler/linker/relative_patcher.h
@@ -28,7 +28,6 @@
namespace art {
class CompiledMethod;
-class LinkerPatch;
namespace debug {
struct MethodDebugInfo;
@@ -36,6 +35,7 @@
namespace linker {
+class LinkerPatch;
class OutputStream;
/**
diff --git a/compiler/linker/relative_patcher_test.h b/compiler/linker/relative_patcher_test.h
index f7dbc1e..6297dd0 100644
--- a/compiler/linker/relative_patcher_test.h
+++ b/compiler/linker/relative_patcher_test.h
@@ -21,7 +21,7 @@
#include "arch/instruction_set_features.h"
#include "base/array_ref.h"
#include "base/macros.h"
-#include "compiled_method.h"
+#include "compiled_method-inl.h"
#include "dex/verification_results.h"
#include "driver/compiler_driver.h"
#include "driver/compiler_options.h"
diff --git a/compiler/linker/x86/relative_patcher_x86.cc b/compiler/linker/x86/relative_patcher_x86.cc
index 6967b0b..cdd2cef 100644
--- a/compiler/linker/x86/relative_patcher_x86.cc
+++ b/compiler/linker/x86/relative_patcher_x86.cc
@@ -17,6 +17,7 @@
#include "linker/x86/relative_patcher_x86.h"
#include "compiled_method.h"
+#include "linker/linker_patch.h"
namespace art {
namespace linker {
diff --git a/compiler/linker/x86_64/relative_patcher_x86_64.cc b/compiler/linker/x86_64/relative_patcher_x86_64.cc
index 156ece9..9633564 100644
--- a/compiler/linker/x86_64/relative_patcher_x86_64.cc
+++ b/compiler/linker/x86_64/relative_patcher_x86_64.cc
@@ -17,6 +17,7 @@
#include "linker/x86_64/relative_patcher_x86_64.h"
#include "compiled_method.h"
+#include "linker/linker_patch.h"
namespace art {
namespace linker {
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 1e5f1ec..6533e2b 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -288,7 +288,8 @@
GetAssembler()->FinalizeInstructions(code);
}
-void CodeGenerator::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches ATTRIBUTE_UNUSED) {
+void CodeGenerator::EmitLinkerPatches(
+ ArenaVector<linker::LinkerPatch>* linker_patches ATTRIBUTE_UNUSED) {
// No linker patches by default.
}
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 30c2b52..4b4abdf 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -61,9 +61,12 @@
class CodeGenerator;
class CompilerDriver;
class CompilerOptions;
-class LinkerPatch;
class ParallelMoveResolver;
+namespace linker {
+class LinkerPatch;
+} // namespace linker
+
class CodeAllocator {
public:
CodeAllocator() {}
@@ -205,7 +208,7 @@
virtual void Initialize() = 0;
virtual void Finalize(CodeAllocator* allocator);
- virtual void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches);
+ virtual void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches);
virtual void GenerateFrameEntry() = 0;
virtual void GenerateFrameExit() = 0;
virtual void Bind(HBasicBlock* block) = 0;
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 8814cfc..aaea7c1 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -31,6 +31,7 @@
#include "intrinsics.h"
#include "intrinsics_arm64.h"
#include "linker/arm64/relative_patcher_arm64.h"
+#include "linker/linker_patch.h"
#include "lock_word.h"
#include "mirror/array-inl.h"
#include "mirror/class-inl.h"
@@ -4754,10 +4755,10 @@
__ ldr(out, MemOperand(base, /* offset placeholder */ 0));
}
-template <LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
+template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
inline void CodeGeneratorARM64::EmitPcRelativeLinkerPatches(
const ArenaDeque<PcRelativePatchInfo>& infos,
- ArenaVector<LinkerPatch>* linker_patches) {
+ ArenaVector<linker::LinkerPatch>* linker_patches) {
for (const PcRelativePatchInfo& info : infos) {
linker_patches->push_back(Factory(info.label.GetLocation(),
&info.target_dex_file,
@@ -4766,7 +4767,7 @@
}
}
-void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) {
+void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) {
DCHECK(linker_patches->empty());
size_t size =
pc_relative_method_patches_.size() +
@@ -4778,28 +4779,28 @@
baker_read_barrier_patches_.size();
linker_patches->reserve(size);
if (GetCompilerOptions().IsBootImage()) {
- EmitPcRelativeLinkerPatches<LinkerPatch::RelativeMethodPatch>(pc_relative_method_patches_,
- linker_patches);
- EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(pc_relative_type_patches_,
- linker_patches);
- EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(pc_relative_string_patches_,
- linker_patches);
+ EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>(
+ pc_relative_method_patches_, linker_patches);
+ EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>(
+ pc_relative_type_patches_, linker_patches);
+ EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>(
+ pc_relative_string_patches_, linker_patches);
} else {
DCHECK(pc_relative_method_patches_.empty());
- EmitPcRelativeLinkerPatches<LinkerPatch::TypeClassTablePatch>(pc_relative_type_patches_,
- linker_patches);
- EmitPcRelativeLinkerPatches<LinkerPatch::StringInternTablePatch>(pc_relative_string_patches_,
- linker_patches);
+ EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeClassTablePatch>(
+ pc_relative_type_patches_, linker_patches);
+ EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringInternTablePatch>(
+ pc_relative_string_patches_, linker_patches);
}
- EmitPcRelativeLinkerPatches<LinkerPatch::MethodBssEntryPatch>(method_bss_entry_patches_,
- linker_patches);
- EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_,
- linker_patches);
- EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(string_bss_entry_patches_,
- linker_patches);
+ EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>(
+ method_bss_entry_patches_, linker_patches);
+ EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeBssEntryPatch>(
+ type_bss_entry_patches_, linker_patches);
+ EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringBssEntryPatch>(
+ string_bss_entry_patches_, linker_patches);
for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) {
- linker_patches->push_back(LinkerPatch::BakerReadBarrierBranchPatch(info.label.GetLocation(),
- info.custom_data));
+ linker_patches->push_back(linker::LinkerPatch::BakerReadBarrierBranchPatch(
+ info.label.GetLocation(), info.custom_data));
}
DCHECK_EQ(size, linker_patches->size());
}
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 69c5119..cebdaa1 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -627,7 +627,7 @@
vixl::aarch64::Register out,
vixl::aarch64::Register base);
- void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE;
+ void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) OVERRIDE;
void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) OVERRIDE;
@@ -805,9 +805,9 @@
void EmitJumpTables();
- template <LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
+ template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
static void EmitPcRelativeLinkerPatches(const ArenaDeque<PcRelativePatchInfo>& infos,
- ArenaVector<LinkerPatch>* linker_patches);
+ ArenaVector<linker::LinkerPatch>* linker_patches);
// Labels for each block that will be compiled.
// We use a deque so that the `vixl::aarch64::Label` objects do not move in memory.
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index baf68c4..e1ea080 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -30,6 +30,7 @@
#include "heap_poisoning.h"
#include "intrinsics_arm_vixl.h"
#include "linker/arm/relative_patcher_thumb2.h"
+#include "linker/linker_patch.h"
#include "mirror/array-inl.h"
#include "mirror/class-inl.h"
#include "thread.h"
@@ -9191,10 +9192,10 @@
});
}
-template <LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
+template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
inline void CodeGeneratorARMVIXL::EmitPcRelativeLinkerPatches(
const ArenaDeque<PcRelativePatchInfo>& infos,
- ArenaVector<LinkerPatch>* linker_patches) {
+ ArenaVector<linker::LinkerPatch>* linker_patches) {
for (const PcRelativePatchInfo& info : infos) {
const DexFile& dex_file = info.target_dex_file;
size_t offset_or_index = info.offset_or_index;
@@ -9211,7 +9212,7 @@
}
}
-void CodeGeneratorARMVIXL::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) {
+void CodeGeneratorARMVIXL::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) {
DCHECK(linker_patches->empty());
size_t size =
/* MOVW+MOVT for each entry */ 2u * pc_relative_method_patches_.size() +
@@ -9223,28 +9224,28 @@
baker_read_barrier_patches_.size();
linker_patches->reserve(size);
if (GetCompilerOptions().IsBootImage()) {
- EmitPcRelativeLinkerPatches<LinkerPatch::RelativeMethodPatch>(pc_relative_method_patches_,
- linker_patches);
- EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(pc_relative_type_patches_,
- linker_patches);
- EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(pc_relative_string_patches_,
- linker_patches);
+ EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>(
+ pc_relative_method_patches_, linker_patches);
+ EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>(
+ pc_relative_type_patches_, linker_patches);
+ EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>(
+ pc_relative_string_patches_, linker_patches);
} else {
DCHECK(pc_relative_method_patches_.empty());
- EmitPcRelativeLinkerPatches<LinkerPatch::TypeClassTablePatch>(pc_relative_type_patches_,
- linker_patches);
- EmitPcRelativeLinkerPatches<LinkerPatch::StringInternTablePatch>(pc_relative_string_patches_,
- linker_patches);
+ EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeClassTablePatch>(
+ pc_relative_type_patches_, linker_patches);
+ EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringInternTablePatch>(
+ pc_relative_string_patches_, linker_patches);
}
- EmitPcRelativeLinkerPatches<LinkerPatch::MethodBssEntryPatch>(method_bss_entry_patches_,
- linker_patches);
- EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_,
- linker_patches);
- EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(string_bss_entry_patches_,
- linker_patches);
+ EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>(
+ method_bss_entry_patches_, linker_patches);
+ EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeBssEntryPatch>(
+ type_bss_entry_patches_, linker_patches);
+ EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringBssEntryPatch>(
+ string_bss_entry_patches_, linker_patches);
for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) {
- linker_patches->push_back(LinkerPatch::BakerReadBarrierBranchPatch(info.label.GetLocation(),
- info.custom_data));
+ linker_patches->push_back(linker::LinkerPatch::BakerReadBarrierBranchPatch(
+ info.label.GetLocation(), info.custom_data));
}
DCHECK_EQ(size, linker_patches->size());
}
diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h
index e78bc15..337ecf1 100644
--- a/compiler/optimizing/code_generator_arm_vixl.h
+++ b/compiler/optimizing/code_generator_arm_vixl.h
@@ -594,7 +594,7 @@
dex::TypeIndex type_index,
Handle<mirror::Class> handle);
- void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE;
+ void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) OVERRIDE;
void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) OVERRIDE;
@@ -778,9 +778,9 @@
PcRelativePatchInfo* NewPcRelativePatch(const DexFile& dex_file,
uint32_t offset_or_index,
ArenaDeque<PcRelativePatchInfo>* patches);
- template <LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
+ template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
static void EmitPcRelativeLinkerPatches(const ArenaDeque<PcRelativePatchInfo>& infos,
- ArenaVector<LinkerPatch>* linker_patches);
+ ArenaVector<linker::LinkerPatch>* linker_patches);
// Labels for each block that will be compiled.
// We use a deque so that the `vixl::aarch32::Label` objects do not move in memory.
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index 6256722..8ada76a 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -29,6 +29,7 @@
#include "heap_poisoning.h"
#include "intrinsics.h"
#include "intrinsics_mips.h"
+#include "linker/linker_patch.h"
#include "mirror/array-inl.h"
#include "mirror/class-inl.h"
#include "offsets.h"
@@ -1628,10 +1629,10 @@
}
}
-template <LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
+template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
inline void CodeGeneratorMIPS::EmitPcRelativeLinkerPatches(
const ArenaDeque<PcRelativePatchInfo>& infos,
- ArenaVector<LinkerPatch>* linker_patches) {
+ ArenaVector<linker::LinkerPatch>* linker_patches) {
for (const PcRelativePatchInfo& info : infos) {
const DexFile& dex_file = info.target_dex_file;
size_t offset_or_index = info.offset_or_index;
@@ -1647,7 +1648,7 @@
}
}
-void CodeGeneratorMIPS::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) {
+void CodeGeneratorMIPS::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) {
DCHECK(linker_patches->empty());
size_t size =
pc_relative_method_patches_.size() +
@@ -1658,25 +1659,25 @@
string_bss_entry_patches_.size();
linker_patches->reserve(size);
if (GetCompilerOptions().IsBootImage()) {
- EmitPcRelativeLinkerPatches<LinkerPatch::RelativeMethodPatch>(pc_relative_method_patches_,
- linker_patches);
- EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(pc_relative_type_patches_,
- linker_patches);
- EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(pc_relative_string_patches_,
- linker_patches);
+ EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>(
+ pc_relative_method_patches_, linker_patches);
+ EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>(
+ pc_relative_type_patches_, linker_patches);
+ EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>(
+ pc_relative_string_patches_, linker_patches);
} else {
DCHECK(pc_relative_method_patches_.empty());
- EmitPcRelativeLinkerPatches<LinkerPatch::TypeClassTablePatch>(pc_relative_type_patches_,
- linker_patches);
- EmitPcRelativeLinkerPatches<LinkerPatch::StringInternTablePatch>(pc_relative_string_patches_,
- linker_patches);
+ EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeClassTablePatch>(
+ pc_relative_type_patches_, linker_patches);
+ EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringInternTablePatch>(
+ pc_relative_string_patches_, linker_patches);
}
- EmitPcRelativeLinkerPatches<LinkerPatch::MethodBssEntryPatch>(method_bss_entry_patches_,
- linker_patches);
- EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_,
- linker_patches);
- EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(string_bss_entry_patches_,
- linker_patches);
+ EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>(
+ method_bss_entry_patches_, linker_patches);
+ EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeBssEntryPatch>(
+ type_bss_entry_patches_, linker_patches);
+ EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringBssEntryPatch>(
+ string_bss_entry_patches_, linker_patches);
DCHECK_EQ(size, linker_patches->size());
}
diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h
index f15f8c6..2b1075d 100644
--- a/compiler/optimizing/code_generator_mips.h
+++ b/compiler/optimizing/code_generator_mips.h
@@ -395,7 +395,7 @@
const MipsAssembler& GetAssembler() const OVERRIDE { return assembler_; }
// Emit linker patches.
- void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE;
+ void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) OVERRIDE;
void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) OVERRIDE;
// Fast path implementation of ReadBarrier::Barrier for a heap
@@ -679,9 +679,9 @@
const PcRelativePatchInfo* info_high,
ArenaDeque<PcRelativePatchInfo>* patches);
- template <LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
+ template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
void EmitPcRelativeLinkerPatches(const ArenaDeque<PcRelativePatchInfo>& infos,
- ArenaVector<LinkerPatch>* linker_patches);
+ ArenaVector<linker::LinkerPatch>* linker_patches);
// Labels for each block that will be compiled.
MipsLabel* block_labels_;
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index e8ae2db..119e0f6 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -27,6 +27,7 @@
#include "heap_poisoning.h"
#include "intrinsics.h"
#include "intrinsics_mips64.h"
+#include "linker/linker_patch.h"
#include "mirror/array-inl.h"
#include "mirror/class-inl.h"
#include "offsets.h"
@@ -1541,10 +1542,10 @@
}
}
-template <LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
+template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
inline void CodeGeneratorMIPS64::EmitPcRelativeLinkerPatches(
const ArenaDeque<PcRelativePatchInfo>& infos,
- ArenaVector<LinkerPatch>* linker_patches) {
+ ArenaVector<linker::LinkerPatch>* linker_patches) {
for (const PcRelativePatchInfo& info : infos) {
const DexFile& dex_file = info.target_dex_file;
size_t offset_or_index = info.offset_or_index;
@@ -1556,7 +1557,7 @@
}
}
-void CodeGeneratorMIPS64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) {
+void CodeGeneratorMIPS64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) {
DCHECK(linker_patches->empty());
size_t size =
pc_relative_method_patches_.size() +
@@ -1567,25 +1568,25 @@
string_bss_entry_patches_.size();
linker_patches->reserve(size);
if (GetCompilerOptions().IsBootImage()) {
- EmitPcRelativeLinkerPatches<LinkerPatch::RelativeMethodPatch>(pc_relative_method_patches_,
- linker_patches);
- EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(pc_relative_type_patches_,
- linker_patches);
- EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(pc_relative_string_patches_,
- linker_patches);
+ EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>(
+ pc_relative_method_patches_, linker_patches);
+ EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>(
+ pc_relative_type_patches_, linker_patches);
+ EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>(
+ pc_relative_string_patches_, linker_patches);
} else {
DCHECK(pc_relative_method_patches_.empty());
- EmitPcRelativeLinkerPatches<LinkerPatch::TypeClassTablePatch>(pc_relative_type_patches_,
- linker_patches);
- EmitPcRelativeLinkerPatches<LinkerPatch::StringInternTablePatch>(pc_relative_string_patches_,
- linker_patches);
+ EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeClassTablePatch>(
+ pc_relative_type_patches_, linker_patches);
+ EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringInternTablePatch>(
+ pc_relative_string_patches_, linker_patches);
}
- EmitPcRelativeLinkerPatches<LinkerPatch::MethodBssEntryPatch>(method_bss_entry_patches_,
- linker_patches);
- EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_,
- linker_patches);
- EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(string_bss_entry_patches_,
- linker_patches);
+ EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>(
+ method_bss_entry_patches_, linker_patches);
+ EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeBssEntryPatch>(
+ type_bss_entry_patches_, linker_patches);
+ EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringBssEntryPatch>(
+ string_bss_entry_patches_, linker_patches);
DCHECK_EQ(size, linker_patches->size());
}
diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h
index 3035621..9fe47ee 100644
--- a/compiler/optimizing/code_generator_mips64.h
+++ b/compiler/optimizing/code_generator_mips64.h
@@ -374,7 +374,7 @@
const Mips64Assembler& GetAssembler() const OVERRIDE { return assembler_; }
// Emit linker patches.
- void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE;
+ void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) OVERRIDE;
void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) OVERRIDE;
// Fast path implementation of ReadBarrier::Barrier for a heap
@@ -643,9 +643,9 @@
const PcRelativePatchInfo* info_high,
ArenaDeque<PcRelativePatchInfo>* patches);
- template <LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
+ template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
void EmitPcRelativeLinkerPatches(const ArenaDeque<PcRelativePatchInfo>& infos,
- ArenaVector<LinkerPatch>* linker_patches);
+ ArenaVector<linker::LinkerPatch>* linker_patches);
// Labels for each block that will be compiled.
Mips64Label* block_labels_; // Indexed by block id.
diff --git a/compiler/optimizing/code_generator_vector_arm64.cc b/compiler/optimizing/code_generator_vector_arm64.cc
index 18a55c8..3f576c8 100644
--- a/compiler/optimizing/code_generator_vector_arm64.cc
+++ b/compiler/optimizing/code_generator_vector_arm64.cc
@@ -949,20 +949,18 @@
}
}
-void LocationsBuilderARM64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
- LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr);
- switch (instr->GetPackedType()) {
+// Helper to set up locations for vector accumulations.
+static void CreateVecAccumLocations(ArenaAllocator* arena, HVecOperation* instruction) {
+ LocationSummary* locations = new (arena) LocationSummary(instruction);
+ switch (instruction->GetPackedType()) {
case Primitive::kPrimByte:
case Primitive::kPrimChar:
case Primitive::kPrimShort:
case Primitive::kPrimInt:
- locations->SetInAt(
- HVecMultiplyAccumulate::kInputAccumulatorIndex, Location::RequiresFpuRegister());
- locations->SetInAt(
- HVecMultiplyAccumulate::kInputMulLeftIndex, Location::RequiresFpuRegister());
- locations->SetInAt(
- HVecMultiplyAccumulate::kInputMulRightIndex, Location::RequiresFpuRegister());
- DCHECK_EQ(HVecMultiplyAccumulate::kInputAccumulatorIndex, 0);
+ case Primitive::kPrimLong:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetInAt(2, Location::RequiresFpuRegister());
locations->SetOut(Location::SameAsFirstInput());
break;
default:
@@ -971,18 +969,25 @@
}
}
+void LocationsBuilderARM64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
+ CreateVecAccumLocations(GetGraph()->GetArena(), instruction);
+}
+
// Some early revisions of the Cortex-A53 have an erratum (835769) whereby it is possible for a
// 64-bit scalar multiply-accumulate instruction in AArch64 state to generate an incorrect result.
// However vector MultiplyAccumulate instruction is not affected.
-void InstructionCodeGeneratorARM64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
- LocationSummary* locations = instr->GetLocations();
- VRegister acc = VRegisterFrom(locations->InAt(HVecMultiplyAccumulate::kInputAccumulatorIndex));
- VRegister left = VRegisterFrom(locations->InAt(HVecMultiplyAccumulate::kInputMulLeftIndex));
- VRegister right = VRegisterFrom(locations->InAt(HVecMultiplyAccumulate::kInputMulRightIndex));
- switch (instr->GetPackedType()) {
+void InstructionCodeGeneratorARM64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ VRegister acc = VRegisterFrom(locations->InAt(0));
+ VRegister left = VRegisterFrom(locations->InAt(1));
+ VRegister right = VRegisterFrom(locations->InAt(2));
+
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+
+ switch (instruction->GetPackedType()) {
case Primitive::kPrimByte:
- DCHECK_EQ(16u, instr->GetVectorLength());
- if (instr->GetOpKind() == HInstruction::kAdd) {
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ if (instruction->GetOpKind() == HInstruction::kAdd) {
__ Mla(acc.V16B(), left.V16B(), right.V16B());
} else {
__ Mls(acc.V16B(), left.V16B(), right.V16B());
@@ -990,16 +995,16 @@
break;
case Primitive::kPrimChar:
case Primitive::kPrimShort:
- DCHECK_EQ(8u, instr->GetVectorLength());
- if (instr->GetOpKind() == HInstruction::kAdd) {
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ if (instruction->GetOpKind() == HInstruction::kAdd) {
__ Mla(acc.V8H(), left.V8H(), right.V8H());
} else {
__ Mls(acc.V8H(), left.V8H(), right.V8H());
}
break;
case Primitive::kPrimInt:
- DCHECK_EQ(4u, instr->GetVectorLength());
- if (instr->GetOpKind() == HInstruction::kAdd) {
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ if (instruction->GetOpKind() == HInstruction::kAdd) {
__ Mla(acc.V4S(), left.V4S(), right.V4S());
} else {
__ Mls(acc.V4S(), left.V4S(), right.V4S());
@@ -1007,6 +1012,186 @@
break;
default:
LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderARM64::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
+ CreateVecAccumLocations(GetGraph()->GetArena(), instruction);
+ // Some conversions require temporary registers.
+ LocationSummary* locations = instruction->GetLocations();
+ HVecOperation* a = instruction->InputAt(1)->AsVecOperation();
+ HVecOperation* b = instruction->InputAt(2)->AsVecOperation();
+ DCHECK_EQ(a->GetPackedType(), b->GetPackedType());
+ switch (a->GetPackedType()) {
+ case Primitive::kPrimByte:
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimLong:
+ locations->AddTemp(Location::RequiresFpuRegister());
+ locations->AddTemp(Location::RequiresFpuRegister());
+ FALLTHROUGH_INTENDED;
+ case Primitive::kPrimInt:
+ locations->AddTemp(Location::RequiresFpuRegister());
+ locations->AddTemp(Location::RequiresFpuRegister());
+ break;
+ default:
+ break;
+ }
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ if (instruction->GetPackedType() == Primitive::kPrimLong) {
+ locations->AddTemp(Location::RequiresFpuRegister());
+ locations->AddTemp(Location::RequiresFpuRegister());
+ }
+ break;
+ case Primitive::kPrimInt:
+ case Primitive::kPrimLong:
+ if (instruction->GetPackedType() == a->GetPackedType()) {
+ locations->AddTemp(Location::RequiresFpuRegister());
+ }
+ break;
+ default:
+ break;
+ }
+}
+
+void InstructionCodeGeneratorARM64::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ VRegister acc = VRegisterFrom(locations->InAt(0));
+ VRegister left = VRegisterFrom(locations->InAt(1));
+ VRegister right = VRegisterFrom(locations->InAt(2));
+
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+
+ // Handle all feasible acc_T += sad(a_S, b_S) type combinations (T x S).
+ HVecOperation* a = instruction->InputAt(1)->AsVecOperation();
+ HVecOperation* b = instruction->InputAt(2)->AsVecOperation();
+ DCHECK_EQ(a->GetPackedType(), b->GetPackedType());
+ switch (a->GetPackedType()) {
+ case Primitive::kPrimByte:
+ DCHECK_EQ(16u, a->GetVectorLength());
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ Sabal(acc.V8H(), left.V8B(), right.V8B());
+ __ Sabal2(acc.V8H(), left.V16B(), right.V16B());
+ break;
+ case Primitive::kPrimInt: {
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ VRegister tmp1 = VRegisterFrom(locations->GetTemp(0));
+ VRegister tmp2 = VRegisterFrom(locations->GetTemp(1));
+ __ Sxtl(tmp1.V8H(), left.V8B());
+ __ Sxtl(tmp2.V8H(), right.V8B());
+ __ Sabal(acc.V4S(), tmp1.V4H(), tmp2.V4H());
+ __ Sabal2(acc.V4S(), tmp1.V8H(), tmp2.V8H());
+ __ Sxtl2(tmp1.V8H(), left.V16B());
+ __ Sxtl2(tmp2.V8H(), right.V16B());
+ __ Sabal(acc.V4S(), tmp1.V4H(), tmp2.V4H());
+ __ Sabal2(acc.V4S(), tmp1.V8H(), tmp2.V8H());
+ break;
+ }
+ case Primitive::kPrimLong: {
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ VRegister tmp1 = VRegisterFrom(locations->GetTemp(0));
+ VRegister tmp2 = VRegisterFrom(locations->GetTemp(1));
+ VRegister tmp3 = VRegisterFrom(locations->GetTemp(2));
+ VRegister tmp4 = VRegisterFrom(locations->GetTemp(3));
+ __ Sxtl(tmp1.V8H(), left.V8B());
+ __ Sxtl(tmp2.V8H(), right.V8B());
+ __ Sxtl(tmp3.V4S(), tmp1.V4H());
+ __ Sxtl(tmp4.V4S(), tmp2.V4H());
+ __ Sabal(acc.V2D(), tmp3.V2S(), tmp4.V2S());
+ __ Sabal2(acc.V2D(), tmp3.V4S(), tmp4.V4S());
+ __ Sxtl2(tmp3.V4S(), tmp1.V8H());
+ __ Sxtl2(tmp4.V4S(), tmp2.V8H());
+ __ Sabal(acc.V2D(), tmp3.V2S(), tmp4.V2S());
+ __ Sabal2(acc.V2D(), tmp3.V4S(), tmp4.V4S());
+ __ Sxtl2(tmp1.V8H(), left.V16B());
+ __ Sxtl2(tmp2.V8H(), right.V16B());
+ __ Sxtl(tmp3.V4S(), tmp1.V4H());
+ __ Sxtl(tmp4.V4S(), tmp2.V4H());
+ __ Sabal(acc.V2D(), tmp3.V2S(), tmp4.V2S());
+ __ Sabal2(acc.V2D(), tmp3.V4S(), tmp4.V4S());
+ __ Sxtl2(tmp3.V4S(), tmp1.V8H());
+ __ Sxtl2(tmp4.V4S(), tmp2.V8H());
+ __ Sabal(acc.V2D(), tmp3.V2S(), tmp4.V2S());
+ __ Sabal2(acc.V2D(), tmp3.V4S(), tmp4.V4S());
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+ break;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ DCHECK_EQ(8u, a->GetVectorLength());
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimInt:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Sabal(acc.V4S(), left.V4H(), right.V4H());
+ __ Sabal2(acc.V4S(), left.V8H(), right.V8H());
+ break;
+ case Primitive::kPrimLong: {
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ VRegister tmp1 = VRegisterFrom(locations->GetTemp(0));
+ VRegister tmp2 = VRegisterFrom(locations->GetTemp(1));
+ __ Sxtl(tmp1.V4S(), left.V4H());
+ __ Sxtl(tmp2.V4S(), right.V4H());
+ __ Sabal(acc.V2D(), tmp1.V2S(), tmp2.V2S());
+ __ Sabal2(acc.V2D(), tmp1.V4S(), tmp2.V4S());
+ __ Sxtl2(tmp1.V4S(), left.V8H());
+ __ Sxtl2(tmp2.V4S(), right.V8H());
+ __ Sabal(acc.V2D(), tmp1.V2S(), tmp2.V2S());
+ __ Sabal2(acc.V2D(), tmp1.V4S(), tmp2.V4S());
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+ break;
+ case Primitive::kPrimInt:
+ DCHECK_EQ(4u, a->GetVectorLength());
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimInt: {
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ VRegister tmp = VRegisterFrom(locations->GetTemp(0));
+ __ Sub(tmp.V4S(), left.V4S(), right.V4S());
+ __ Abs(tmp.V4S(), tmp.V4S());
+ __ Add(acc.V4S(), acc.V4S(), tmp.V4S());
+ break;
+ }
+ case Primitive::kPrimLong:
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ __ Sabal(acc.V2D(), left.V2S(), right.V2S());
+ __ Sabal2(acc.V2D(), left.V4S(), right.V4S());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+ break;
+ case Primitive::kPrimLong:
+ DCHECK_EQ(2u, a->GetVectorLength());
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimLong: {
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ VRegister tmp = VRegisterFrom(locations->GetTemp(0));
+ __ Sub(tmp.V2D(), left.V2D(), right.V2D());
+ __ Abs(tmp.V2D(), tmp.V2D());
+ __ Add(acc.V2D(), acc.V2D(), tmp.V2D());
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
}
}
diff --git a/compiler/optimizing/code_generator_vector_arm_vixl.cc b/compiler/optimizing/code_generator_vector_arm_vixl.cc
index 7a11dff..069054c 100644
--- a/compiler/optimizing/code_generator_vector_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_vector_arm_vixl.cc
@@ -629,12 +629,40 @@
LOG(FATAL) << "No SIMD for " << instruction->GetId();
}
-void LocationsBuilderARMVIXL::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
- LOG(FATAL) << "No SIMD for " << instr->GetId();
+// Helper to set up locations for vector accumulations.
+static void CreateVecAccumLocations(ArenaAllocator* arena, HVecOperation* instruction) {
+ LocationSummary* locations = new (arena) LocationSummary(instruction);
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimLong:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetInAt(2, Location::RequiresFpuRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
-void InstructionCodeGeneratorARMVIXL::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
- LOG(FATAL) << "No SIMD for " << instr->GetId();
+void LocationsBuilderARMVIXL::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
+ CreateVecAccumLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARMVIXL::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
+ CreateVecAccumLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
}
// Return whether the vector memory access operation is guaranteed to be word-aligned (ARM word
diff --git a/compiler/optimizing/code_generator_vector_mips.cc b/compiler/optimizing/code_generator_vector_mips.cc
index c2fbf7f..0bedafc 100644
--- a/compiler/optimizing/code_generator_vector_mips.cc
+++ b/compiler/optimizing/code_generator_vector_mips.cc
@@ -826,21 +826,18 @@
LOG(FATAL) << "No SIMD for " << instruction->GetId();
}
-void LocationsBuilderMIPS::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
- LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr);
- switch (instr->GetPackedType()) {
+// Helper to set up locations for vector accumulations.
+static void CreateVecAccumLocations(ArenaAllocator* arena, HVecOperation* instruction) {
+ LocationSummary* locations = new (arena) LocationSummary(instruction);
+ switch (instruction->GetPackedType()) {
case Primitive::kPrimByte:
case Primitive::kPrimChar:
case Primitive::kPrimShort:
case Primitive::kPrimInt:
case Primitive::kPrimLong:
- locations->SetInAt(
- HVecMultiplyAccumulate::kInputAccumulatorIndex, Location::RequiresFpuRegister());
- locations->SetInAt(
- HVecMultiplyAccumulate::kInputMulLeftIndex, Location::RequiresFpuRegister());
- locations->SetInAt(
- HVecMultiplyAccumulate::kInputMulRightIndex, Location::RequiresFpuRegister());
- DCHECK_EQ(HVecMultiplyAccumulate::kInputAccumulatorIndex, 0);
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetInAt(2, Location::RequiresFpuRegister());
locations->SetOut(Location::SameAsFirstInput());
break;
default:
@@ -849,18 +846,19 @@
}
}
-void InstructionCodeGeneratorMIPS::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
- LocationSummary* locations = instr->GetLocations();
- VectorRegister acc =
- VectorRegisterFrom(locations->InAt(HVecMultiplyAccumulate::kInputAccumulatorIndex));
- VectorRegister left =
- VectorRegisterFrom(locations->InAt(HVecMultiplyAccumulate::kInputMulLeftIndex));
- VectorRegister right =
- VectorRegisterFrom(locations->InAt(HVecMultiplyAccumulate::kInputMulRightIndex));
- switch (instr->GetPackedType()) {
+void LocationsBuilderMIPS::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
+ CreateVecAccumLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ VectorRegister acc = VectorRegisterFrom(locations->InAt(0));
+ VectorRegister left = VectorRegisterFrom(locations->InAt(1));
+ VectorRegister right = VectorRegisterFrom(locations->InAt(2));
+ switch (instruction->GetPackedType()) {
case Primitive::kPrimByte:
- DCHECK_EQ(16u, instr->GetVectorLength());
- if (instr->GetOpKind() == HInstruction::kAdd) {
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ if (instruction->GetOpKind() == HInstruction::kAdd) {
__ MaddvB(acc, left, right);
} else {
__ MsubvB(acc, left, right);
@@ -868,24 +866,24 @@
break;
case Primitive::kPrimChar:
case Primitive::kPrimShort:
- DCHECK_EQ(8u, instr->GetVectorLength());
- if (instr->GetOpKind() == HInstruction::kAdd) {
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ if (instruction->GetOpKind() == HInstruction::kAdd) {
__ MaddvH(acc, left, right);
} else {
__ MsubvH(acc, left, right);
}
break;
case Primitive::kPrimInt:
- DCHECK_EQ(4u, instr->GetVectorLength());
- if (instr->GetOpKind() == HInstruction::kAdd) {
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ if (instruction->GetOpKind() == HInstruction::kAdd) {
__ MaddvW(acc, left, right);
} else {
__ MsubvW(acc, left, right);
}
break;
case Primitive::kPrimLong:
- DCHECK_EQ(2u, instr->GetVectorLength());
- if (instr->GetOpKind() == HInstruction::kAdd) {
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ if (instruction->GetOpKind() == HInstruction::kAdd) {
__ MaddvD(acc, left, right);
} else {
__ MsubvD(acc, left, right);
@@ -897,6 +895,15 @@
}
}
+void LocationsBuilderMIPS::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
+ CreateVecAccumLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ // TODO: implement this, location helper already filled out (shared with MulAcc).
+}
+
// Helper to set up locations for vector memory operations.
static void CreateVecMemLocations(ArenaAllocator* arena,
HVecMemoryOperation* instruction,
diff --git a/compiler/optimizing/code_generator_vector_mips64.cc b/compiler/optimizing/code_generator_vector_mips64.cc
index 9d3a777..db31bdc 100644
--- a/compiler/optimizing/code_generator_vector_mips64.cc
+++ b/compiler/optimizing/code_generator_vector_mips64.cc
@@ -830,21 +830,18 @@
LOG(FATAL) << "No SIMD for " << instruction->GetId();
}
-void LocationsBuilderMIPS64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
- LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr);
- switch (instr->GetPackedType()) {
+// Helper to set up locations for vector accumulations.
+static void CreateVecAccumLocations(ArenaAllocator* arena, HVecOperation* instruction) {
+ LocationSummary* locations = new (arena) LocationSummary(instruction);
+ switch (instruction->GetPackedType()) {
case Primitive::kPrimByte:
case Primitive::kPrimChar:
case Primitive::kPrimShort:
case Primitive::kPrimInt:
case Primitive::kPrimLong:
- locations->SetInAt(
- HVecMultiplyAccumulate::kInputAccumulatorIndex, Location::RequiresFpuRegister());
- locations->SetInAt(
- HVecMultiplyAccumulate::kInputMulLeftIndex, Location::RequiresFpuRegister());
- locations->SetInAt(
- HVecMultiplyAccumulate::kInputMulRightIndex, Location::RequiresFpuRegister());
- DCHECK_EQ(HVecMultiplyAccumulate::kInputAccumulatorIndex, 0);
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetInAt(2, Location::RequiresFpuRegister());
locations->SetOut(Location::SameAsFirstInput());
break;
default:
@@ -853,18 +850,19 @@
}
}
-void InstructionCodeGeneratorMIPS64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
- LocationSummary* locations = instr->GetLocations();
- VectorRegister acc =
- VectorRegisterFrom(locations->InAt(HVecMultiplyAccumulate::kInputAccumulatorIndex));
- VectorRegister left =
- VectorRegisterFrom(locations->InAt(HVecMultiplyAccumulate::kInputMulLeftIndex));
- VectorRegister right =
- VectorRegisterFrom(locations->InAt(HVecMultiplyAccumulate::kInputMulRightIndex));
- switch (instr->GetPackedType()) {
+void LocationsBuilderMIPS64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
+ CreateVecAccumLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ VectorRegister acc = VectorRegisterFrom(locations->InAt(0));
+ VectorRegister left = VectorRegisterFrom(locations->InAt(1));
+ VectorRegister right = VectorRegisterFrom(locations->InAt(2));
+ switch (instruction->GetPackedType()) {
case Primitive::kPrimByte:
- DCHECK_EQ(16u, instr->GetVectorLength());
- if (instr->GetOpKind() == HInstruction::kAdd) {
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ if (instruction->GetOpKind() == HInstruction::kAdd) {
__ MaddvB(acc, left, right);
} else {
__ MsubvB(acc, left, right);
@@ -872,24 +870,24 @@
break;
case Primitive::kPrimChar:
case Primitive::kPrimShort:
- DCHECK_EQ(8u, instr->GetVectorLength());
- if (instr->GetOpKind() == HInstruction::kAdd) {
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ if (instruction->GetOpKind() == HInstruction::kAdd) {
__ MaddvH(acc, left, right);
} else {
__ MsubvH(acc, left, right);
}
break;
case Primitive::kPrimInt:
- DCHECK_EQ(4u, instr->GetVectorLength());
- if (instr->GetOpKind() == HInstruction::kAdd) {
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ if (instruction->GetOpKind() == HInstruction::kAdd) {
__ MaddvW(acc, left, right);
} else {
__ MsubvW(acc, left, right);
}
break;
case Primitive::kPrimLong:
- DCHECK_EQ(2u, instr->GetVectorLength());
- if (instr->GetOpKind() == HInstruction::kAdd) {
+ DCHECK_EQ(2u, instruction->GetVectorLength());
+ if (instruction->GetOpKind() == HInstruction::kAdd) {
__ MaddvD(acc, left, right);
} else {
__ MsubvD(acc, left, right);
@@ -901,6 +899,15 @@
}
}
+void LocationsBuilderMIPS64::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
+ CreateVecAccumLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ // TODO: implement this, location helper already filled out (shared with MulAcc).
+}
+
// Helper to set up locations for vector memory operations.
static void CreateVecMemLocations(ArenaAllocator* arena,
HVecMemoryOperation* instruction,
diff --git a/compiler/optimizing/code_generator_vector_x86.cc b/compiler/optimizing/code_generator_vector_x86.cc
index 37190f8..5a012e7 100644
--- a/compiler/optimizing/code_generator_vector_x86.cc
+++ b/compiler/optimizing/code_generator_vector_x86.cc
@@ -51,7 +51,6 @@
: Location::RequiresFpuRegister());
locations->SetOut(is_zero ? Location::RequiresFpuRegister()
: Location::SameAsFirstInput());
-
break;
default:
LOG(FATAL) << "Unsupported SIMD type";
@@ -1033,12 +1032,42 @@
}
}
-void LocationsBuilderX86::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
- LOG(FATAL) << "No SIMD for " << instr->GetId();
+// Helper to set up locations for vector accumulations.
+static void CreateVecAccumLocations(ArenaAllocator* arena, HVecOperation* instruction) {
+ LocationSummary* locations = new (arena) LocationSummary(instruction);
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimLong:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetInAt(2, Location::RequiresFpuRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
}
-void InstructionCodeGeneratorX86::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
- LOG(FATAL) << "No SIMD for " << instr->GetId();
+void LocationsBuilderX86::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
+ CreateVecAccumLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
+ // TODO: pmaddwd?
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderX86::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
+ CreateVecAccumLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
+ // TODO: psadbw for unsigned?
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
}
// Helper to set up locations for vector memory operations.
diff --git a/compiler/optimizing/code_generator_vector_x86_64.cc b/compiler/optimizing/code_generator_vector_x86_64.cc
index edd0209..3698b7f 100644
--- a/compiler/optimizing/code_generator_vector_x86_64.cc
+++ b/compiler/optimizing/code_generator_vector_x86_64.cc
@@ -1005,11 +1005,41 @@
}
}
+// Helper to set up locations for vector accumulations.
+static void CreateVecAccumLocations(ArenaAllocator* arena, HVecOperation* instruction) {
+ LocationSummary* locations = new (arena) LocationSummary(instruction);
+ switch (instruction->GetPackedType()) {
+ case Primitive::kPrimByte:
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ case Primitive::kPrimInt:
+ case Primitive::kPrimLong:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetInAt(2, Location::RequiresFpuRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type";
+ UNREACHABLE();
+ }
+}
+
void LocationsBuilderX86_64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
- LOG(FATAL) << "No SIMD for " << instruction->GetId();
+ CreateVecAccumLocations(GetGraph()->GetArena(), instruction);
}
void InstructionCodeGeneratorX86_64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
+ // TODO: pmaddwd?
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderX86_64::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
+ CreateVecAccumLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
+ // TODO: psadbw for unsigned?
LOG(FATAL) << "No SIMD for " << instruction->GetId();
}
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 0b9130f..99581ee 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -26,6 +26,7 @@
#include "heap_poisoning.h"
#include "intrinsics.h"
#include "intrinsics_x86.h"
+#include "linker/linker_patch.h"
#include "lock_word.h"
#include "mirror/array-inl.h"
#include "mirror/class-inl.h"
@@ -4675,10 +4676,10 @@
// for method patch needs to point to the embedded constant which occupies the last 4 bytes.
constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u;
-template <LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
+template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
inline void CodeGeneratorX86::EmitPcRelativeLinkerPatches(
const ArenaDeque<X86PcRelativePatchInfo>& infos,
- ArenaVector<LinkerPatch>* linker_patches) {
+ ArenaVector<linker::LinkerPatch>* linker_patches) {
for (const X86PcRelativePatchInfo& info : infos) {
uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
linker_patches->push_back(Factory(
@@ -4686,7 +4687,7 @@
}
}
-void CodeGeneratorX86::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) {
+void CodeGeneratorX86::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) {
DCHECK(linker_patches->empty());
size_t size =
boot_image_method_patches_.size() +
@@ -4697,24 +4698,25 @@
string_bss_entry_patches_.size();
linker_patches->reserve(size);
if (GetCompilerOptions().IsBootImage()) {
- EmitPcRelativeLinkerPatches<LinkerPatch::RelativeMethodPatch>(boot_image_method_patches_,
- linker_patches);
- EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(boot_image_type_patches_,
- linker_patches);
- EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(string_patches_, linker_patches);
+ EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>(
+ boot_image_method_patches_, linker_patches);
+ EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>(
+ boot_image_type_patches_, linker_patches);
+ EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>(
+ string_patches_, linker_patches);
} else {
DCHECK(boot_image_method_patches_.empty());
- EmitPcRelativeLinkerPatches<LinkerPatch::TypeClassTablePatch>(boot_image_type_patches_,
- linker_patches);
- EmitPcRelativeLinkerPatches<LinkerPatch::StringInternTablePatch>(string_patches_,
- linker_patches);
+ EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeClassTablePatch>(
+ boot_image_type_patches_, linker_patches);
+ EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringInternTablePatch>(
+ string_patches_, linker_patches);
}
- EmitPcRelativeLinkerPatches<LinkerPatch::MethodBssEntryPatch>(method_bss_entry_patches_,
- linker_patches);
- EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_,
- linker_patches);
- EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(string_bss_entry_patches_,
- linker_patches);
+ EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>(
+ method_bss_entry_patches_, linker_patches);
+ EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeBssEntryPatch>(
+ type_bss_entry_patches_, linker_patches);
+ EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringBssEntryPatch>(
+ string_bss_entry_patches_, linker_patches);
DCHECK_EQ(size, linker_patches->size());
}
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index b32d57a..e8f919d 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -431,7 +431,7 @@
void MoveFromReturnRegister(Location trg, Primitive::Type type) OVERRIDE;
// Emit linker patches.
- void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE;
+ void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) OVERRIDE;
void PatchJitRootUse(uint8_t* code,
const uint8_t* roots_data,
@@ -617,9 +617,9 @@
HX86ComputeBaseMethodAddress* method_address;
};
- template <LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
+ template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
void EmitPcRelativeLinkerPatches(const ArenaDeque<X86PcRelativePatchInfo>& infos,
- ArenaVector<LinkerPatch>* linker_patches);
+ ArenaVector<linker::LinkerPatch>* linker_patches);
Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp);
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 39a6580..65b3f62 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -25,6 +25,7 @@
#include "heap_poisoning.h"
#include "intrinsics.h"
#include "intrinsics_x86_64.h"
+#include "linker/linker_patch.h"
#include "lock_word.h"
#include "mirror/array-inl.h"
#include "mirror/class-inl.h"
@@ -1106,10 +1107,10 @@
// for method patch needs to point to the embedded constant which occupies the last 4 bytes.
constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u;
-template <LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
+template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
inline void CodeGeneratorX86_64::EmitPcRelativeLinkerPatches(
const ArenaDeque<PatchInfo<Label>>& infos,
- ArenaVector<LinkerPatch>* linker_patches) {
+ ArenaVector<linker::LinkerPatch>* linker_patches) {
for (const PatchInfo<Label>& info : infos) {
uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
linker_patches->push_back(
@@ -1117,7 +1118,7 @@
}
}
-void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) {
+void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) {
DCHECK(linker_patches->empty());
size_t size =
boot_image_method_patches_.size() +
@@ -1128,24 +1129,25 @@
string_bss_entry_patches_.size();
linker_patches->reserve(size);
if (GetCompilerOptions().IsBootImage()) {
- EmitPcRelativeLinkerPatches<LinkerPatch::RelativeMethodPatch>(boot_image_method_patches_,
- linker_patches);
- EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(boot_image_type_patches_,
- linker_patches);
- EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(string_patches_, linker_patches);
+ EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>(
+ boot_image_method_patches_, linker_patches);
+ EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>(
+ boot_image_type_patches_, linker_patches);
+ EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>(
+ string_patches_, linker_patches);
} else {
DCHECK(boot_image_method_patches_.empty());
- EmitPcRelativeLinkerPatches<LinkerPatch::TypeClassTablePatch>(boot_image_type_patches_,
- linker_patches);
- EmitPcRelativeLinkerPatches<LinkerPatch::StringInternTablePatch>(string_patches_,
- linker_patches);
+ EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeClassTablePatch>(
+ boot_image_type_patches_, linker_patches);
+ EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringInternTablePatch>(
+ string_patches_, linker_patches);
}
- EmitPcRelativeLinkerPatches<LinkerPatch::MethodBssEntryPatch>(method_bss_entry_patches_,
- linker_patches);
- EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_,
- linker_patches);
- EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(string_bss_entry_patches_,
- linker_patches);
+ EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>(
+ method_bss_entry_patches_, linker_patches);
+ EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeBssEntryPatch>(
+ type_bss_entry_patches_, linker_patches);
+ EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringBssEntryPatch>(
+ string_bss_entry_patches_, linker_patches);
DCHECK_EQ(size, linker_patches->size());
}
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index f5fa86b..8e8e695 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -424,7 +424,7 @@
void MoveFromReturnRegister(Location trg, Primitive::Type type) OVERRIDE;
- void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE;
+ void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) OVERRIDE;
void PatchJitRootUse(uint8_t* code,
const uint8_t* roots_data,
@@ -586,9 +586,9 @@
static constexpr int32_t kDummy32BitOffset = 256;
private:
- template <LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
+ template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
static void EmitPcRelativeLinkerPatches(const ArenaDeque<PatchInfo<Label>>& infos,
- ArenaVector<LinkerPatch>* linker_patches);
+ ArenaVector<linker::LinkerPatch>* linker_patches);
// Labels for each block that will be compiled.
Label* block_labels_; // Indexed by block id.
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index baa0453..6f8743b 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -71,10 +71,13 @@
return false;
}
-// Detect a sign extension from the given type. Returns the promoted operand on success.
+// Detect a sign extension in instruction from the given type. The to64 parameter
+// denotes if result is long, and thus sign extension from int can be included.
+// Returns the promoted operand on success.
static bool IsSignExtensionAndGet(HInstruction* instruction,
Primitive::Type type,
- /*out*/ HInstruction** operand) {
+ /*out*/ HInstruction** operand,
+ bool to64 = false) {
// Accept any already wider constant that would be handled properly by sign
// extension when represented in the *width* of the given narrower data type
// (the fact that char normally zero extends does not matter here).
@@ -82,20 +85,24 @@
if (IsInt64AndGet(instruction, /*out*/ &value)) {
switch (type) {
case Primitive::kPrimByte:
- if (std::numeric_limits<int8_t>::min() <= value &&
- std::numeric_limits<int8_t>::max() >= value) {
+ if (IsInt<8>(value)) {
*operand = instruction;
return true;
}
return false;
case Primitive::kPrimChar:
case Primitive::kPrimShort:
- if (std::numeric_limits<int16_t>::min() <= value &&
- std::numeric_limits<int16_t>::max() <= value) {
+ if (IsInt<16>(value)) {
*operand = instruction;
return true;
}
return false;
+ case Primitive::kPrimInt:
+ if (IsInt<32>(value)) {
+ *operand = instruction;
+ return to64;
+ }
+ return false;
default:
return false;
}
@@ -110,40 +117,52 @@
case Primitive::kPrimShort:
*operand = instruction;
return true;
+ case Primitive::kPrimInt:
+ *operand = instruction;
+ return to64;
default:
return false;
}
}
- // TODO: perhaps explicit conversions later too?
- // (this may return something different from instruction)
+ // Explicit type conversion to long.
+ if (instruction->IsTypeConversion() && instruction->GetType() == Primitive::kPrimLong) {
+ return IsSignExtensionAndGet(instruction->InputAt(0), type, /*out*/ operand, /*to64*/ true);
+ }
return false;
}
-// Detect a zero extension from the given type. Returns the promoted operand on success.
+// Detect a zero extension in instruction from the given type. The to64 parameter
+// denotes if result is long, and thus zero extension from int can be included.
+// Returns the promoted operand on success.
static bool IsZeroExtensionAndGet(HInstruction* instruction,
Primitive::Type type,
- /*out*/ HInstruction** operand) {
+ /*out*/ HInstruction** operand,
+ bool to64 = false) {
// Accept any already wider constant that would be handled properly by zero
// extension when represented in the *width* of the given narrower data type
- // (the fact that byte/short normally sign extend does not matter here).
+ // (the fact that byte/short/int normally sign extend does not matter here).
int64_t value = 0;
if (IsInt64AndGet(instruction, /*out*/ &value)) {
switch (type) {
case Primitive::kPrimByte:
- if (std::numeric_limits<uint8_t>::min() <= value &&
- std::numeric_limits<uint8_t>::max() >= value) {
+ if (IsUint<8>(value)) {
*operand = instruction;
return true;
}
return false;
case Primitive::kPrimChar:
case Primitive::kPrimShort:
- if (std::numeric_limits<uint16_t>::min() <= value &&
- std::numeric_limits<uint16_t>::max() <= value) {
+ if (IsUint<16>(value)) {
*operand = instruction;
return true;
}
return false;
+ case Primitive::kPrimInt:
+ if (IsUint<32>(value)) {
+ *operand = instruction;
+ return to64;
+ }
+ return false;
default:
return false;
}
@@ -170,14 +189,21 @@
(IsInt64AndGet(b, /*out*/ &mask) && (IsSignExtensionAndGet(a, type, /*out*/ operand) ||
IsZeroExtensionAndGet(a, type, /*out*/ operand)))) {
switch ((*operand)->GetType()) {
- case Primitive::kPrimByte: return mask == std::numeric_limits<uint8_t>::max();
+ case Primitive::kPrimByte:
+ return mask == std::numeric_limits<uint8_t>::max();
case Primitive::kPrimChar:
- case Primitive::kPrimShort: return mask == std::numeric_limits<uint16_t>::max();
+ case Primitive::kPrimShort:
+ return mask == std::numeric_limits<uint16_t>::max();
+ case Primitive::kPrimInt:
+ return mask == std::numeric_limits<uint32_t>::max() && to64;
default: return false;
}
}
}
- // TODO: perhaps explicit conversions later too?
+ // Explicit type conversion to long.
+ if (instruction->IsTypeConversion() && instruction->GetType() == Primitive::kPrimLong) {
+ return IsZeroExtensionAndGet(instruction->InputAt(0), type, /*out*/ operand, /*to64*/ true);
+ }
return false;
}
@@ -214,6 +240,55 @@
return false;
}
+// Compute relative vector length based on type difference.
+static size_t GetOtherVL(Primitive::Type other_type, Primitive::Type vector_type, size_t vl) {
+ switch (other_type) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte:
+ switch (vector_type) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte: return vl;
+ default: break;
+ }
+ return vl;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort:
+ switch (vector_type) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte: return vl >> 1;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort: return vl;
+ default: break;
+ }
+ break;
+ case Primitive::kPrimInt:
+ switch (vector_type) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte: return vl >> 2;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort: return vl >> 1;
+ case Primitive::kPrimInt: return vl;
+ default: break;
+ }
+ break;
+ case Primitive::kPrimLong:
+ switch (vector_type) {
+ case Primitive::kPrimBoolean:
+ case Primitive::kPrimByte: return vl >> 3;
+ case Primitive::kPrimChar:
+ case Primitive::kPrimShort: return vl >> 2;
+ case Primitive::kPrimInt: return vl >> 1;
+ case Primitive::kPrimLong: return vl;
+ default: break;
+ }
+ break;
+ default:
+ break;
+ }
+ LOG(FATAL) << "Unsupported idiom conversion";
+ UNREACHABLE();
+}
+
// Detect up to two instructions a and b, and an acccumulated constant c.
static bool IsAddConstHelper(HInstruction* instruction,
/*out*/ HInstruction** a,
@@ -260,16 +335,16 @@
}
// Detect reductions of the following forms,
-// under assumption phi has only *one* use:
// x = x_phi + ..
// x = x_phi - ..
// x = max(x_phi, ..)
// x = min(x_phi, ..)
static bool HasReductionFormat(HInstruction* reduction, HInstruction* phi) {
if (reduction->IsAdd()) {
- return reduction->InputAt(0) == phi || reduction->InputAt(1) == phi;
+ return (reduction->InputAt(0) == phi && reduction->InputAt(1) != phi) ||
+ (reduction->InputAt(0) != phi && reduction->InputAt(1) == phi);
} else if (reduction->IsSub()) {
- return reduction->InputAt(0) == phi;
+ return (reduction->InputAt(0) == phi && reduction->InputAt(1) != phi);
} else if (reduction->IsInvokeStaticOrDirect()) {
switch (reduction->AsInvokeStaticOrDirect()->GetIntrinsic()) {
case Intrinsics::kMathMinIntInt:
@@ -280,7 +355,8 @@
case Intrinsics::kMathMaxLongLong:
case Intrinsics::kMathMaxFloatFloat:
case Intrinsics::kMathMaxDoubleDouble:
- return reduction->InputAt(0) == phi || reduction->InputAt(1) == phi;
+ return (reduction->InputAt(0) == phi && reduction->InputAt(1) != phi) ||
+ (reduction->InputAt(0) != phi && reduction->InputAt(1) == phi);
default:
return false;
}
@@ -288,9 +364,9 @@
return false;
}
-// Translates operation to reduction kind.
-static HVecReduce::ReductionKind GetReductionKind(HInstruction* reduction) {
- if (reduction->IsVecAdd() || reduction->IsVecSub()) {
+// Translates vector operation to reduction kind.
+static HVecReduce::ReductionKind GetReductionKind(HVecOperation* reduction) {
+ if (reduction->IsVecAdd() || reduction->IsVecSub() || reduction->IsVecSADAccumulate()) {
return HVecReduce::kSum;
} else if (reduction->IsVecMin()) {
return HVecReduce::kMin;
@@ -720,7 +796,6 @@
HBasicBlock* block,
HBasicBlock* exit,
int64_t trip_count) {
- Primitive::Type induc_type = Primitive::kPrimInt;
HBasicBlock* header = node->loop_info->GetHeader();
HBasicBlock* preheader = node->loop_info->GetPreHeader();
@@ -739,6 +814,10 @@
vector_header_ = header;
vector_body_ = block;
+ // Loop induction type.
+ Primitive::Type induc_type = main_phi->GetType();
+ DCHECK(induc_type == Primitive::kPrimInt || induc_type == Primitive::kPrimLong) << induc_type;
+
// Generate dynamic loop peeling trip count, if needed, under the assumption
// that the Android runtime guarantees at least "component size" alignment:
// ptc = (ALIGN - (&a[initial] % ALIGN)) / type-size
@@ -767,10 +846,10 @@
HInstruction* rem = Insert(
preheader, new (global_allocator_) HAnd(induc_type,
diff,
- graph_->GetIntConstant(chunk - 1)));
+ graph_->GetConstant(induc_type, chunk - 1)));
vtc = Insert(preheader, new (global_allocator_) HSub(induc_type, stc, rem));
}
- vector_index_ = graph_->GetIntConstant(0);
+ vector_index_ = graph_->GetConstant(induc_type, 0);
// Generate runtime disambiguation test:
// vtc = a != b ? vtc : 0;
@@ -779,7 +858,8 @@
preheader,
new (global_allocator_) HNotEqual(vector_runtime_test_a_, vector_runtime_test_b_));
vtc = Insert(preheader,
- new (global_allocator_) HSelect(rt, vtc, graph_->GetIntConstant(0), kNoDexPc));
+ new (global_allocator_)
+ HSelect(rt, vtc, graph_->GetConstant(induc_type, 0), kNoDexPc));
needs_cleanup = true;
}
@@ -793,7 +873,7 @@
graph_->TransformLoopForVectorization(vector_header_, vector_body_, exit),
vector_index_,
ptc,
- graph_->GetIntConstant(1),
+ graph_->GetConstant(induc_type, 1),
kNoUnrollingFactor);
}
@@ -806,7 +886,7 @@
graph_->TransformLoopForVectorization(vector_header_, vector_body_, exit),
vector_index_,
vtc,
- graph_->GetIntConstant(vector_length_), // increment per unroll
+ graph_->GetConstant(induc_type, vector_length_), // increment per unroll
unroll);
HLoopInformation* vloop = vector_header_->GetLoopInformation();
@@ -820,14 +900,20 @@
graph_->TransformLoopForVectorization(vector_header_, vector_body_, exit),
vector_index_,
stc,
- graph_->GetIntConstant(1),
+ graph_->GetConstant(induc_type, 1),
kNoUnrollingFactor);
}
// Link reductions to their final uses.
for (auto i = reductions_->begin(); i != reductions_->end(); ++i) {
if (i->first->IsPhi()) {
- i->first->ReplaceWith(ReduceAndExtractIfNeeded(i->second));
+ HInstruction* phi = i->first;
+ HInstruction* repl = ReduceAndExtractIfNeeded(i->second);
+ // Deal with regular uses.
+ for (const HUseListNode<HInstruction*>& use : phi->GetUses()) {
+ induction_range_.Replace(use.GetUser(), phi, repl); // update induction use
+ }
+ phi->ReplaceWith(repl);
}
}
@@ -853,7 +939,7 @@
HInstruction* step,
uint32_t unroll) {
DCHECK(unroll == 1 || vector_mode_ == kVector);
- Primitive::Type induc_type = Primitive::kPrimInt;
+ Primitive::Type induc_type = lo->GetType();
// Prepare new loop.
vector_preheader_ = new_preheader,
vector_header_ = vector_preheader_->GetSingleSuccessor();
@@ -942,8 +1028,10 @@
auto redit = reductions_->find(instruction);
if (redit != reductions_->end()) {
Primitive::Type type = instruction->GetType();
- if (TrySetVectorType(type, &restrictions) &&
- VectorizeUse(node, instruction, generate_code, type, restrictions)) {
+ // Recognize SAD idiom or direct reduction.
+ if (VectorizeSADIdiom(node, instruction, generate_code, type, restrictions) ||
+ (TrySetVectorType(type, &restrictions) &&
+ VectorizeUse(node, instruction, generate_code, type, restrictions))) {
if (generate_code) {
HInstruction* new_red = vector_map_->Get(instruction);
vector_permanent_map_->Put(new_red, vector_map_->Get(redit->second));
@@ -1029,14 +1117,20 @@
HInstruction* opa = conversion->InputAt(0);
Primitive::Type from = conversion->GetInputType();
Primitive::Type to = conversion->GetResultType();
- if ((to == Primitive::kPrimByte ||
- to == Primitive::kPrimChar ||
- to == Primitive::kPrimShort) && from == Primitive::kPrimInt) {
- // Accept a "narrowing" type conversion from a "wider" computation for
- // (1) conversion into final required type,
- // (2) vectorizable operand,
- // (3) "wider" operations cannot bring in higher order bits.
- if (to == type && VectorizeUse(node, opa, generate_code, type, restrictions | kNoHiBits)) {
+ if (Primitive::IsIntegralType(from) && Primitive::IsIntegralType(to)) {
+ size_t size_vec = Primitive::ComponentSize(type);
+ size_t size_from = Primitive::ComponentSize(from);
+ size_t size_to = Primitive::ComponentSize(to);
+ // Accept an integral conversion
+ // (1a) narrowing into vector type, "wider" operations cannot bring in higher order bits, or
+ // (1b) widening from at least vector type, and
+ // (2) vectorizable operand.
+ if ((size_to < size_from &&
+ size_to == size_vec &&
+ VectorizeUse(node, opa, generate_code, type, restrictions | kNoHiBits)) ||
+ (size_to >= size_from &&
+ size_from >= size_vec &&
+ VectorizeUse(node, opa, generate_code, type, restrictions))) {
if (generate_code) {
if (vector_mode_ == kVector) {
vector_map_->Put(instruction, vector_map_->Get(opa)); // operand pass-through
@@ -1088,7 +1182,7 @@
return true;
}
} else if (instruction->IsShl() || instruction->IsShr() || instruction->IsUShr()) {
- // Recognize vectorization idioms.
+ // Recognize halving add idiom.
if (VectorizeHalvingAddIdiom(node, instruction, generate_code, type, restrictions)) {
return true;
}
@@ -1181,7 +1275,8 @@
return false; // reject, unless all operands are same-extension narrower
}
// Accept MIN/MAX(x, y) for vectorizable operands.
- DCHECK(r != nullptr && s != nullptr);
+ DCHECK(r != nullptr);
+ DCHECK(s != nullptr);
if (generate_code && vector_mode_ != kVector) { // de-idiom
r = opa;
s = opb;
@@ -1232,11 +1327,11 @@
switch (type) {
case Primitive::kPrimBoolean:
case Primitive::kPrimByte:
- *restrictions |= kNoDiv | kNoReduction;
+ *restrictions |= kNoDiv;
return TrySetVectorLength(16);
case Primitive::kPrimChar:
case Primitive::kPrimShort:
- *restrictions |= kNoDiv | kNoReduction;
+ *restrictions |= kNoDiv;
return TrySetVectorLength(8);
case Primitive::kPrimInt:
*restrictions |= kNoDiv;
@@ -1261,17 +1356,17 @@
case Primitive::kPrimBoolean:
case Primitive::kPrimByte:
*restrictions |=
- kNoMul | kNoDiv | kNoShift | kNoAbs | kNoSignedHAdd | kNoUnroundedHAdd | kNoReduction;
+ kNoMul | kNoDiv | kNoShift | kNoAbs | kNoSignedHAdd | kNoUnroundedHAdd | kNoSAD;
return TrySetVectorLength(16);
case Primitive::kPrimChar:
case Primitive::kPrimShort:
- *restrictions |= kNoDiv | kNoAbs | kNoSignedHAdd | kNoUnroundedHAdd | kNoReduction;
+ *restrictions |= kNoDiv | kNoAbs | kNoSignedHAdd | kNoUnroundedHAdd | kNoSAD;
return TrySetVectorLength(8);
case Primitive::kPrimInt:
- *restrictions |= kNoDiv;
+ *restrictions |= kNoDiv | kNoSAD;
return TrySetVectorLength(4);
case Primitive::kPrimLong:
- *restrictions |= kNoMul | kNoDiv | kNoShr | kNoAbs | kNoMinMax;
+ *restrictions |= kNoMul | kNoDiv | kNoShr | kNoAbs | kNoMinMax | kNoSAD;
return TrySetVectorLength(2);
case Primitive::kPrimFloat:
*restrictions |= kNoMinMax | kNoReduction; // minmax: -0.0 vs +0.0
@@ -1289,17 +1384,17 @@
switch (type) {
case Primitive::kPrimBoolean:
case Primitive::kPrimByte:
- *restrictions |= kNoDiv | kNoReduction;
+ *restrictions |= kNoDiv | kNoReduction | kNoSAD;
return TrySetVectorLength(16);
case Primitive::kPrimChar:
case Primitive::kPrimShort:
- *restrictions |= kNoDiv | kNoStringCharAt | kNoReduction;
+ *restrictions |= kNoDiv | kNoStringCharAt | kNoReduction | kNoSAD;
return TrySetVectorLength(8);
case Primitive::kPrimInt:
- *restrictions |= kNoDiv | kNoReduction;
+ *restrictions |= kNoDiv | kNoReduction | kNoSAD;
return TrySetVectorLength(4);
case Primitive::kPrimLong:
- *restrictions |= kNoDiv | kNoReduction;
+ *restrictions |= kNoDiv | kNoReduction | kNoSAD;
return TrySetVectorLength(2);
case Primitive::kPrimFloat:
*restrictions |= kNoMinMax | kNoReduction; // min/max(x, NaN)
@@ -1317,17 +1412,17 @@
switch (type) {
case Primitive::kPrimBoolean:
case Primitive::kPrimByte:
- *restrictions |= kNoDiv | kNoReduction;
+ *restrictions |= kNoDiv | kNoReduction | kNoSAD;
return TrySetVectorLength(16);
case Primitive::kPrimChar:
case Primitive::kPrimShort:
- *restrictions |= kNoDiv | kNoStringCharAt | kNoReduction;
+ *restrictions |= kNoDiv | kNoStringCharAt | kNoReduction | kNoSAD;
return TrySetVectorLength(8);
case Primitive::kPrimInt:
- *restrictions |= kNoDiv | kNoReduction;
+ *restrictions |= kNoDiv | kNoReduction | kNoSAD;
return TrySetVectorLength(4);
case Primitive::kPrimLong:
- *restrictions |= kNoDiv | kNoReduction;
+ *restrictions |= kNoDiv | kNoReduction | kNoSAD;
return TrySetVectorLength(2);
case Primitive::kPrimFloat:
*restrictions |= kNoMinMax | kNoReduction; // min/max(x, NaN)
@@ -1371,8 +1466,16 @@
if (it != vector_permanent_map_->end()) {
vector = it->second; // reuse during unrolling
} else {
- vector = new (global_allocator_) HVecReplicateScalar(
- global_allocator_, org, type, vector_length_);
+ // Generates ReplicateScalar( (optional_type_conv) org ).
+ HInstruction* input = org;
+ Primitive::Type input_type = input->GetType();
+ if (type != input_type && (type == Primitive::kPrimLong ||
+ input_type == Primitive::kPrimLong)) {
+ input = Insert(vector_preheader_,
+ new (global_allocator_) HTypeConversion(type, input, kNoDexPc));
+ }
+ vector = new (global_allocator_)
+ HVecReplicateScalar(global_allocator_, input, type, vector_length_);
vector_permanent_map_->Put(org, Insert(vector_preheader_, vector));
}
vector_map_->Put(org, vector);
@@ -1465,10 +1568,15 @@
// Prepare the new initialization.
if (vector_mode_ == kVector) {
// Generate a [initial, 0, .., 0] vector.
- new_init = Insert(
- vector_preheader_,
- new (global_allocator_) HVecSetScalars(
- global_allocator_, &new_init, phi->GetType(), vector_length_, 1));
+ HVecOperation* red_vector = new_red->AsVecOperation();
+ size_t vector_length = red_vector->GetVectorLength();
+ Primitive::Type type = red_vector->GetPackedType();
+ new_init = Insert(vector_preheader_,
+ new (global_allocator_) HVecSetScalars(global_allocator_,
+ &new_init,
+ type,
+ vector_length,
+ 1));
} else {
new_init = ReduceAndExtractIfNeeded(new_init);
}
@@ -1484,18 +1592,20 @@
if (instruction->IsPhi()) {
HInstruction* input = instruction->InputAt(1);
if (input->IsVecOperation()) {
- Primitive::Type type = input->AsVecOperation()->GetPackedType();
+ HVecOperation* input_vector = input->AsVecOperation();
+ size_t vector_length = input_vector->GetVectorLength();
+ Primitive::Type type = input_vector->GetPackedType();
+ HVecReduce::ReductionKind kind = GetReductionKind(input_vector);
HBasicBlock* exit = instruction->GetBlock()->GetSuccessors()[0];
// Generate a vector reduction and scalar extract
// x = REDUCE( [x_1, .., x_n] )
// y = x_1
// along the exit of the defining loop.
- HVecReduce::ReductionKind kind = GetReductionKind(input);
HInstruction* reduce = new (global_allocator_) HVecReduce(
- global_allocator_, instruction, type, vector_length_, kind);
+ global_allocator_, instruction, type, vector_length, kind);
exit->InsertInstructionBefore(reduce, exit->GetFirstInstruction());
instruction = new (global_allocator_) HVecExtractScalar(
- global_allocator_, reduce, type, vector_length_, 0);
+ global_allocator_, reduce, type, vector_length, 0);
exit->InsertInstructionAfter(instruction, reduce);
}
}
@@ -1516,27 +1626,19 @@
HInstruction* opb,
Primitive::Type type,
bool is_unsigned) {
- if (vector_mode_ == kSequential) {
- // Non-converting scalar code follows implicit integral promotion.
- if (!org->IsTypeConversion() && (type == Primitive::kPrimBoolean ||
- type == Primitive::kPrimByte ||
- type == Primitive::kPrimChar ||
- type == Primitive::kPrimShort)) {
- type = Primitive::kPrimInt;
- }
- }
HInstruction* vector = nullptr;
+ Primitive::Type org_type = org->GetType();
switch (org->GetKind()) {
case HInstruction::kNeg:
DCHECK(opb == nullptr);
GENERATE_VEC(
new (global_allocator_) HVecNeg(global_allocator_, opa, type, vector_length_),
- new (global_allocator_) HNeg(type, opa));
+ new (global_allocator_) HNeg(org_type, opa));
case HInstruction::kNot:
DCHECK(opb == nullptr);
GENERATE_VEC(
new (global_allocator_) HVecNot(global_allocator_, opa, type, vector_length_),
- new (global_allocator_) HNot(type, opa));
+ new (global_allocator_) HNot(org_type, opa));
case HInstruction::kBooleanNot:
DCHECK(opb == nullptr);
GENERATE_VEC(
@@ -1546,47 +1648,47 @@
DCHECK(opb == nullptr);
GENERATE_VEC(
new (global_allocator_) HVecCnv(global_allocator_, opa, type, vector_length_),
- new (global_allocator_) HTypeConversion(type, opa, kNoDexPc));
+ new (global_allocator_) HTypeConversion(org_type, opa, kNoDexPc));
case HInstruction::kAdd:
GENERATE_VEC(
new (global_allocator_) HVecAdd(global_allocator_, opa, opb, type, vector_length_),
- new (global_allocator_) HAdd(type, opa, opb));
+ new (global_allocator_) HAdd(org_type, opa, opb));
case HInstruction::kSub:
GENERATE_VEC(
new (global_allocator_) HVecSub(global_allocator_, opa, opb, type, vector_length_),
- new (global_allocator_) HSub(type, opa, opb));
+ new (global_allocator_) HSub(org_type, opa, opb));
case HInstruction::kMul:
GENERATE_VEC(
new (global_allocator_) HVecMul(global_allocator_, opa, opb, type, vector_length_),
- new (global_allocator_) HMul(type, opa, opb));
+ new (global_allocator_) HMul(org_type, opa, opb));
case HInstruction::kDiv:
GENERATE_VEC(
new (global_allocator_) HVecDiv(global_allocator_, opa, opb, type, vector_length_),
- new (global_allocator_) HDiv(type, opa, opb, kNoDexPc));
+ new (global_allocator_) HDiv(org_type, opa, opb, kNoDexPc));
case HInstruction::kAnd:
GENERATE_VEC(
new (global_allocator_) HVecAnd(global_allocator_, opa, opb, type, vector_length_),
- new (global_allocator_) HAnd(type, opa, opb));
+ new (global_allocator_) HAnd(org_type, opa, opb));
case HInstruction::kOr:
GENERATE_VEC(
new (global_allocator_) HVecOr(global_allocator_, opa, opb, type, vector_length_),
- new (global_allocator_) HOr(type, opa, opb));
+ new (global_allocator_) HOr(org_type, opa, opb));
case HInstruction::kXor:
GENERATE_VEC(
new (global_allocator_) HVecXor(global_allocator_, opa, opb, type, vector_length_),
- new (global_allocator_) HXor(type, opa, opb));
+ new (global_allocator_) HXor(org_type, opa, opb));
case HInstruction::kShl:
GENERATE_VEC(
new (global_allocator_) HVecShl(global_allocator_, opa, opb, type, vector_length_),
- new (global_allocator_) HShl(type, opa, opb));
+ new (global_allocator_) HShl(org_type, opa, opb));
case HInstruction::kShr:
GENERATE_VEC(
new (global_allocator_) HVecShr(global_allocator_, opa, opb, type, vector_length_),
- new (global_allocator_) HShr(type, opa, opb));
+ new (global_allocator_) HShr(org_type, opa, opb));
case HInstruction::kUShr:
GENERATE_VEC(
new (global_allocator_) HVecUShr(global_allocator_, opa, opb, type, vector_length_),
- new (global_allocator_) HUShr(type, opa, opb));
+ new (global_allocator_) HUShr(org_type, opa, opb));
case HInstruction::kInvokeStaticOrDirect: {
HInvokeStaticOrDirect* invoke = org->AsInvokeStaticOrDirect();
if (vector_mode_ == kVector) {
@@ -1667,8 +1769,8 @@
//
// Method recognizes the following idioms:
-// rounding halving add (a + b + 1) >> 1 for unsigned/signed operands a, b
-// regular halving add (a + b) >> 1 for unsigned/signed operands a, b
+// rounding halving add (a + b + 1) >> 1 for unsigned/signed operands a, b
+// truncated halving add (a + b) >> 1 for unsigned/signed operands a, b
// Provided that the operands are promoted to a wider form to do the arithmetic and
// then cast back to narrower form, the idioms can be mapped into efficient SIMD
// implementation that operates directly in narrower form (plus one extra bit).
@@ -1712,7 +1814,8 @@
}
// Accept recognized halving add for vectorizable operands. Vectorized code uses the
// shorthand idiomatic operation. Sequential code uses the original scalar expressions.
- DCHECK(r != nullptr && s != nullptr);
+ DCHECK(r != nullptr);
+ DCHECK(s != nullptr);
if (generate_code && vector_mode_ != kVector) { // de-idiom
r = instruction->InputAt(0);
s = instruction->InputAt(1);
@@ -1741,6 +1844,88 @@
return false;
}
+// Method recognizes the following idiom:
+// q += ABS(a - b) for signed operands a, b
+// Provided that the operands have the same type or are promoted to a wider form.
+// Since this may involve a vector length change, the idiom is handled by going directly
+// to a sad-accumulate node (rather than relying combining finer grained nodes later).
+// TODO: unsigned SAD too?
+bool HLoopOptimization::VectorizeSADIdiom(LoopNode* node,
+ HInstruction* instruction,
+ bool generate_code,
+ Primitive::Type reduction_type,
+ uint64_t restrictions) {
+ // Filter integral "q += ABS(a - b);" reduction, where ABS and SUB
+ // are done in the same precision (either int or long).
+ if (!instruction->IsAdd() ||
+ (reduction_type != Primitive::kPrimInt && reduction_type != Primitive::kPrimLong)) {
+ return false;
+ }
+ HInstruction* q = instruction->InputAt(0);
+ HInstruction* v = instruction->InputAt(1);
+ HInstruction* a = nullptr;
+ HInstruction* b = nullptr;
+ if (v->IsInvokeStaticOrDirect() &&
+ (v->AsInvokeStaticOrDirect()->GetIntrinsic() == Intrinsics::kMathAbsInt ||
+ v->AsInvokeStaticOrDirect()->GetIntrinsic() == Intrinsics::kMathAbsLong)) {
+ HInstruction* x = v->InputAt(0);
+ if (x->IsSub() && x->GetType() == reduction_type) {
+ a = x->InputAt(0);
+ b = x->InputAt(1);
+ }
+ }
+ if (a == nullptr || b == nullptr) {
+ return false;
+ }
+ // Accept same-type or consistent sign extension for narrower-type on operands a and b.
+ // The same-type or narrower operands are called r (a or lower) and s (b or lower).
+ HInstruction* r = a;
+ HInstruction* s = b;
+ bool is_unsigned = false;
+ Primitive::Type sub_type = a->GetType();
+ if (a->IsTypeConversion()) {
+ sub_type = a->InputAt(0)->GetType();
+ } else if (b->IsTypeConversion()) {
+ sub_type = b->InputAt(0)->GetType();
+ }
+ if (reduction_type != sub_type &&
+ (!IsNarrowerOperands(a, b, sub_type, &r, &s, &is_unsigned) || is_unsigned)) {
+ return false;
+ }
+ // Try same/narrower type and deal with vector restrictions.
+ if (!TrySetVectorType(sub_type, &restrictions) || HasVectorRestrictions(restrictions, kNoSAD)) {
+ return false;
+ }
+ // Accept SAD idiom for vectorizable operands. Vectorized code uses the shorthand
+ // idiomatic operation. Sequential code uses the original scalar expressions.
+ DCHECK(r != nullptr);
+ DCHECK(s != nullptr);
+ if (generate_code && vector_mode_ != kVector) { // de-idiom
+ r = s = v->InputAt(0);
+ }
+ if (VectorizeUse(node, q, generate_code, sub_type, restrictions) &&
+ VectorizeUse(node, r, generate_code, sub_type, restrictions) &&
+ VectorizeUse(node, s, generate_code, sub_type, restrictions)) {
+ if (generate_code) {
+ if (vector_mode_ == kVector) {
+ vector_map_->Put(instruction, new (global_allocator_) HVecSADAccumulate(
+ global_allocator_,
+ vector_map_->Get(q),
+ vector_map_->Get(r),
+ vector_map_->Get(s),
+ reduction_type,
+ GetOtherVL(reduction_type, sub_type, vector_length_)));
+ MaybeRecordStat(stats_, MethodCompilationStat::kLoopVectorizedIdiom);
+ } else {
+ GenerateVecOp(v, vector_map_->Get(r), nullptr, reduction_type);
+ GenerateVecOp(instruction, vector_map_->Get(q), vector_map_->Get(v), reduction_type);
+ }
+ }
+ return true;
+ }
+ return false;
+}
+
//
// Vectorization heuristics.
//
diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h
index f347518..ae2ea76 100644
--- a/compiler/optimizing/loop_optimization.h
+++ b/compiler/optimizing/loop_optimization.h
@@ -75,6 +75,7 @@
kNoMinMax = 1 << 8, // no min/max
kNoStringCharAt = 1 << 9, // no StringCharAt
kNoReduction = 1 << 10, // no reduction
+ kNoSAD = 1 << 11, // no sum of absolute differences (SAD)
};
/*
@@ -172,6 +173,11 @@
bool generate_code,
Primitive::Type type,
uint64_t restrictions);
+ bool VectorizeSADIdiom(LoopNode* node,
+ HInstruction* instruction,
+ bool generate_code,
+ Primitive::Type type,
+ uint64_t restrictions);
// Vectorization heuristics.
bool IsVectorizationProfitable(int64_t trip_count);
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index a6d0da1..6bc5111 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -1396,6 +1396,7 @@
M(VecUShr, VecBinaryOperation) \
M(VecSetScalars, VecOperation) \
M(VecMultiplyAccumulate, VecOperation) \
+ M(VecSADAccumulate, VecOperation) \
M(VecLoad, VecMemoryOperation) \
M(VecStore, VecMemoryOperation) \
diff --git a/compiler/optimizing/nodes_vector.h b/compiler/optimizing/nodes_vector.h
index 886d75e..1488b70 100644
--- a/compiler/optimizing/nodes_vector.h
+++ b/compiler/optimizing/nodes_vector.h
@@ -461,8 +461,8 @@
};
// Performs halving add on every component in the two vectors, viz.
-// rounded [ x1, .. , xn ] hradd [ y1, .. , yn ] = [ (x1 + y1 + 1) >> 1, .. , (xn + yn + 1) >> 1 ]
-// or [ x1, .. , xn ] hadd [ y1, .. , yn ] = [ (x1 + y1) >> 1, .. , (xn + yn ) >> 1 ]
+// rounded [ x1, .. , xn ] hradd [ y1, .. , yn ] = [ (x1 + y1 + 1) >> 1, .. , (xn + yn + 1) >> 1 ]
+// truncated [ x1, .. , xn ] hadd [ y1, .. , yn ] = [ (x1 + y1) >> 1, .. , (xn + yn ) >> 1 ]
// for signed operands x, y (sign extension) or unsigned operands x, y (zero extension).
class HVecHalvingAdd FINAL : public HVecBinaryOperation {
public:
@@ -810,12 +810,12 @@
//
// Assigns the given scalar elements to a vector,
-// viz. set( array(x1, .., xn) ) = [ x1, .. , xn ] if n == m,
-// set( array(x1, .., xm) ) = [ x1, .. , xm, 0, .., 0 ] if m < n.
+// viz. set( array(x1, .. , xn) ) = [ x1, .. , xn ] if n == m,
+// set( array(x1, .. , xm) ) = [ x1, .. , xm, 0, .. , 0 ] if m < n.
class HVecSetScalars FINAL : public HVecOperation {
public:
HVecSetScalars(ArenaAllocator* arena,
- HInstruction** scalars, // array
+ HInstruction* scalars[],
Primitive::Type packed_type,
size_t vector_length,
size_t number_of_scalars,
@@ -827,7 +827,7 @@
vector_length,
dex_pc) {
for (size_t i = 0; i < number_of_scalars; i++) {
- DCHECK(!scalars[i]->IsVecOperation());
+ DCHECK(!scalars[i]->IsVecOperation() || scalars[i]->IsVecExtractScalar());
SetRawInputAt(0, scalars[i]);
}
}
@@ -842,9 +842,8 @@
DISALLOW_COPY_AND_ASSIGN(HVecSetScalars);
};
-// Multiplies every component in the two vectors, adds the result vector to the accumulator vector.
-// viz. [ acc1, .., accn ] + [ x1, .. , xn ] * [ y1, .. , yn ] =
-// [ acc1 + x1 * y1, .. , accn + xn * yn ].
+// Multiplies every component in the two vectors, adds the result vector to the accumulator vector,
+// viz. [ a1, .. , an ] + [ x1, .. , xn ] * [ y1, .. , yn ] = [ a1 + x1 * y1, .. , an + xn * yn ].
class HVecMultiplyAccumulate FINAL : public HVecOperation {
public:
HVecMultiplyAccumulate(ArenaAllocator* arena,
@@ -866,15 +865,11 @@
DCHECK(HasConsistentPackedTypes(accumulator, packed_type));
DCHECK(HasConsistentPackedTypes(mul_left, packed_type));
DCHECK(HasConsistentPackedTypes(mul_right, packed_type));
- SetRawInputAt(kInputAccumulatorIndex, accumulator);
- SetRawInputAt(kInputMulLeftIndex, mul_left);
- SetRawInputAt(kInputMulRightIndex, mul_right);
+ SetRawInputAt(0, accumulator);
+ SetRawInputAt(1, mul_left);
+ SetRawInputAt(2, mul_right);
}
- static constexpr int kInputAccumulatorIndex = 0;
- static constexpr int kInputMulLeftIndex = 1;
- static constexpr int kInputMulRightIndex = 2;
-
bool CanBeMoved() const OVERRIDE { return true; }
bool InstructionDataEquals(const HInstruction* other) const OVERRIDE {
@@ -894,6 +889,42 @@
DISALLOW_COPY_AND_ASSIGN(HVecMultiplyAccumulate);
};
+// Takes the absolute difference of two vectors, and adds the results to
+// same-precision or wider-precision components in the accumulator,
+// viz. SAD([ a1, .. , am ], [ x1, .. , xn ], [ y1, .. , yn ] =
+// [ a1 + sum abs(xi-yi), .. , am + sum abs(xj-yj) ],
+// for m <= n and non-overlapping sums.
+class HVecSADAccumulate FINAL : public HVecOperation {
+ public:
+ HVecSADAccumulate(ArenaAllocator* arena,
+ HInstruction* accumulator,
+ HInstruction* sad_left,
+ HInstruction* sad_right,
+ Primitive::Type packed_type,
+ size_t vector_length,
+ uint32_t dex_pc = kNoDexPc)
+ : HVecOperation(arena,
+ packed_type,
+ SideEffects::None(),
+ /* number_of_inputs */ 3,
+ vector_length,
+ dex_pc) {
+ DCHECK(HasConsistentPackedTypes(accumulator, packed_type));
+ DCHECK(sad_left->IsVecOperation());
+ DCHECK(sad_right->IsVecOperation());
+ DCHECK_EQ(sad_left->AsVecOperation()->GetPackedType(),
+ sad_right->AsVecOperation()->GetPackedType());
+ SetRawInputAt(0, accumulator);
+ SetRawInputAt(1, sad_left);
+ SetRawInputAt(2, sad_right);
+ }
+
+ DECLARE_INSTRUCTION(VecSADAccumulate);
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(HVecSADAccumulate);
+};
+
// Loads a vector from memory, viz. load(mem, 1)
// yield the vector [ mem(1), .. , mem(n) ].
class HVecLoad FINAL : public HVecMemoryOperation {
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 8dd2762..7451196 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -77,6 +77,7 @@
#include "jit/jit_logger.h"
#include "jni/quick/jni_compiler.h"
#include "licm.h"
+#include "linker/linker_patch.h"
#include "load_store_analysis.h"
#include "load_store_elimination.h"
#include "loop_optimization.h"
@@ -833,13 +834,13 @@
RunArchOptimizations(driver->GetInstructionSet(), graph, codegen, pass_observer);
}
-static ArenaVector<LinkerPatch> EmitAndSortLinkerPatches(CodeGenerator* codegen) {
- ArenaVector<LinkerPatch> linker_patches(codegen->GetGraph()->GetArena()->Adapter());
+static ArenaVector<linker::LinkerPatch> EmitAndSortLinkerPatches(CodeGenerator* codegen) {
+ ArenaVector<linker::LinkerPatch> linker_patches(codegen->GetGraph()->GetArena()->Adapter());
codegen->EmitLinkerPatches(&linker_patches);
// Sort patches by literal offset. Required for .oat_patches encoding.
std::sort(linker_patches.begin(), linker_patches.end(),
- [](const LinkerPatch& lhs, const LinkerPatch& rhs) {
+ [](const linker::LinkerPatch& lhs, const linker::LinkerPatch& rhs) {
return lhs.LiteralOffset() < rhs.LiteralOffset();
});
@@ -851,7 +852,7 @@
CodeGenerator* codegen,
CompilerDriver* compiler_driver,
const DexFile::CodeItem* code_item) const {
- ArenaVector<LinkerPatch> linker_patches = EmitAndSortLinkerPatches(codegen);
+ ArenaVector<linker::LinkerPatch> linker_patches = EmitAndSortLinkerPatches(codegen);
ArenaVector<uint8_t> stack_map(arena->Adapter(kArenaAllocStackMaps));
ArenaVector<uint8_t> method_info(arena->Adapter(kArenaAllocStackMaps));
size_t stack_map_size = 0;
@@ -876,7 +877,7 @@
ArrayRef<const uint8_t>(method_info),
ArrayRef<const uint8_t>(stack_map),
ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data()),
- ArrayRef<const LinkerPatch>(linker_patches));
+ ArrayRef<const linker::LinkerPatch>(linker_patches));
return compiled_method;
}
diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc
index 185303b..754a762 100644
--- a/compiler/optimizing/ssa_liveness_analysis.cc
+++ b/compiler/optimizing/ssa_liveness_analysis.cc
@@ -474,7 +474,9 @@
// For a SIMD operation, compute the number of needed spill slots.
// TODO: do through vector type?
HInstruction* definition = GetParent()->GetDefinedBy();
- if (definition != nullptr && definition->IsVecOperation()) {
+ if (definition != nullptr &&
+ definition->IsVecOperation() &&
+ !definition->IsVecExtractScalar()) {
return definition->AsVecOperation()->GetVectorNumberOfBytes() / kVRegSize;
}
// Return number of needed spill slots based on type.
diff --git a/compiler/utils/assembler_test.h b/compiler/utils/assembler_test.h
index 12954a4..227954e 100644
--- a/compiler/utils/assembler_test.h
+++ b/compiler/utils/assembler_test.h
@@ -114,6 +114,24 @@
fmt);
}
+ std::string Repeatww(void (Ass::*f)(Reg, Reg), const std::string& fmt) {
+ return RepeatTemplatedRegisters<Reg, Reg>(f,
+ GetRegisters(),
+ GetRegisters(),
+ &AssemblerTest::GetRegName<RegisterView::kUseTertiaryName>,
+ &AssemblerTest::GetRegName<RegisterView::kUseTertiaryName>,
+ fmt);
+ }
+
+ std::string Repeatbb(void (Ass::*f)(Reg, Reg), const std::string& fmt) {
+ return RepeatTemplatedRegisters<Reg, Reg>(f,
+ GetRegisters(),
+ GetRegisters(),
+ &AssemblerTest::GetRegName<RegisterView::kUseQuaternaryName>,
+ &AssemblerTest::GetRegName<RegisterView::kUseQuaternaryName>,
+ fmt);
+ }
+
std::string RepeatRRR(void (Ass::*f)(Reg, Reg, Reg), const std::string& fmt) {
return RepeatTemplatedRegisters<Reg, Reg, Reg>(f,
GetRegisters(),
@@ -147,10 +165,18 @@
return RepeatRegisterImm<RegisterView::kUsePrimaryName>(f, imm_bytes, fmt);
}
- std::string Repeatri(void (Ass::*f)(Reg, const Imm&), size_t imm_bytes, const std::string& fmt) {
+ std::string RepeatrI(void (Ass::*f)(Reg, const Imm&), size_t imm_bytes, const std::string& fmt) {
return RepeatRegisterImm<RegisterView::kUseSecondaryName>(f, imm_bytes, fmt);
}
+ std::string RepeatwI(void (Ass::*f)(Reg, const Imm&), size_t imm_bytes, const std::string& fmt) {
+ return RepeatRegisterImm<RegisterView::kUseTertiaryName>(f, imm_bytes, fmt);
+ }
+
+ std::string RepeatbI(void (Ass::*f)(Reg, const Imm&), size_t imm_bytes, const std::string& fmt) {
+ return RepeatRegisterImm<RegisterView::kUseQuaternaryName>(f, imm_bytes, fmt);
+ }
+
template <typename Reg1, typename Reg2, typename ImmType>
std::string RepeatTemplatedRegistersImmBits(void (Ass::*f)(Reg1, Reg2, ImmType),
int imm_bits,
@@ -909,6 +935,63 @@
fmt);
}
+ // Repeats over secondary registers and addresses provided by fixture.
+ std::string RepeatrA(void (Ass::*f)(Reg, const Addr&), const std::string& fmt) {
+ return RepeatrA(f, GetAddresses(), fmt);
+ }
+
+ // Variant that takes explicit vector of addresss
+ // (to test restricted addressing modes set).
+ std::string RepeatrA(void (Ass::*f)(Reg, const Addr&),
+ const std::vector<Addr>& a,
+ const std::string& fmt) {
+ return RepeatTemplatedRegMem<Reg, Addr>(
+ f,
+ GetRegisters(),
+ a,
+ &AssemblerTest::GetRegName<RegisterView::kUseSecondaryName>,
+ &AssemblerTest::GetAddrName,
+ fmt);
+ }
+
+ // Repeats over tertiary registers and addresses provided by fixture.
+ std::string RepeatwA(void (Ass::*f)(Reg, const Addr&), const std::string& fmt) {
+ return RepeatwA(f, GetAddresses(), fmt);
+ }
+
+ // Variant that takes explicit vector of addresss
+ // (to test restricted addressing modes set).
+ std::string RepeatwA(void (Ass::*f)(Reg, const Addr&),
+ const std::vector<Addr>& a,
+ const std::string& fmt) {
+ return RepeatTemplatedRegMem<Reg, Addr>(
+ f,
+ GetRegisters(),
+ a,
+ &AssemblerTest::GetRegName<RegisterView::kUseTertiaryName>,
+ &AssemblerTest::GetAddrName,
+ fmt);
+ }
+
+ // Repeats over quaternary registers and addresses provided by fixture.
+ std::string RepeatbA(void (Ass::*f)(Reg, const Addr&), const std::string& fmt) {
+ return RepeatbA(f, GetAddresses(), fmt);
+ }
+
+ // Variant that takes explicit vector of addresss
+ // (to test restricted addressing modes set).
+ std::string RepeatbA(void (Ass::*f)(Reg, const Addr&),
+ const std::vector<Addr>& a,
+ const std::string& fmt) {
+ return RepeatTemplatedRegMem<Reg, Addr>(
+ f,
+ GetRegisters(),
+ a,
+ &AssemblerTest::GetRegName<RegisterView::kUseQuaternaryName>,
+ &AssemblerTest::GetAddrName,
+ fmt);
+ }
+
// Repeats over fp-registers and addresses provided by fixture.
std::string RepeatFA(void (Ass::*f)(FPReg, const Addr&), const std::string& fmt) {
return RepeatFA(f, GetAddresses(), fmt);
@@ -947,6 +1030,63 @@
fmt);
}
+ // Repeats over addresses and secondary registers provided by fixture.
+ std::string RepeatAr(void (Ass::*f)(const Addr&, Reg), const std::string& fmt) {
+ return RepeatAr(f, GetAddresses(), fmt);
+ }
+
+ // Variant that takes explicit vector of addresss
+ // (to test restricted addressing modes set).
+ std::string RepeatAr(void (Ass::*f)(const Addr&, Reg),
+ const std::vector<Addr>& a,
+ const std::string& fmt) {
+ return RepeatTemplatedMemReg<Addr, Reg>(
+ f,
+ a,
+ GetRegisters(),
+ &AssemblerTest::GetAddrName,
+ &AssemblerTest::GetRegName<RegisterView::kUseSecondaryName>,
+ fmt);
+ }
+
+ // Repeats over addresses and tertiary registers provided by fixture.
+ std::string RepeatAw(void (Ass::*f)(const Addr&, Reg), const std::string& fmt) {
+ return RepeatAw(f, GetAddresses(), fmt);
+ }
+
+ // Variant that takes explicit vector of addresss
+ // (to test restricted addressing modes set).
+ std::string RepeatAw(void (Ass::*f)(const Addr&, Reg),
+ const std::vector<Addr>& a,
+ const std::string& fmt) {
+ return RepeatTemplatedMemReg<Addr, Reg>(
+ f,
+ a,
+ GetRegisters(),
+ &AssemblerTest::GetAddrName,
+ &AssemblerTest::GetRegName<RegisterView::kUseTertiaryName>,
+ fmt);
+ }
+
+ // Repeats over addresses and quaternary registers provided by fixture.
+ std::string RepeatAb(void (Ass::*f)(const Addr&, Reg), const std::string& fmt) {
+ return RepeatAb(f, GetAddresses(), fmt);
+ }
+
+ // Variant that takes explicit vector of addresss
+ // (to test restricted addressing modes set).
+ std::string RepeatAb(void (Ass::*f)(const Addr&, Reg),
+ const std::vector<Addr>& a,
+ const std::string& fmt) {
+ return RepeatTemplatedMemReg<Addr, Reg>(
+ f,
+ a,
+ GetRegisters(),
+ &AssemblerTest::GetAddrName,
+ &AssemblerTest::GetRegName<RegisterView::kUseQuaternaryName>,
+ fmt);
+ }
+
// Repeats over addresses and fp-registers provided by fixture.
std::string RepeatAF(void (Ass::*f)(const Addr&, FPReg), const std::string& fmt) {
return RepeatAF(f, GetAddresses(), fmt);
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index 3162a32..9fcede5 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -35,25 +35,25 @@
std::ostream& operator<<(std::ostream& os, const Address& addr) {
switch (addr.mod()) {
case 0:
- if (addr.rm() == ESP && addr.index() != ESP) {
- return os << "(%" << addr.base() << ",%"
- << addr.index() << "," << (1 << addr.scale()) << ")";
+ if (addr.rm() != ESP || addr.index() == ESP) {
+ return os << "(%" << addr.rm() << ")";
+ } else if (addr.base() == EBP) {
+ return os << static_cast<int>(addr.disp32()) << "(,%" << addr.index()
+ << "," << (1 << addr.scale()) << ")";
}
- return os << "(%" << addr.rm() << ")";
+ return os << "(%" << addr.base() << ",%" << addr.index() << "," << (1 << addr.scale()) << ")";
case 1:
- if (addr.rm() == ESP && addr.index() != ESP) {
- return os << static_cast<int>(addr.disp8())
- << "(%" << addr.base() << ",%"
- << addr.index() << "," << (1 << addr.scale()) << ")";
+ if (addr.rm() != ESP || addr.index() == ESP) {
+ return os << static_cast<int>(addr.disp8()) << "(%" << addr.rm() << ")";
}
- return os << static_cast<int>(addr.disp8()) << "(%" << addr.rm() << ")";
+ return os << static_cast<int>(addr.disp8()) << "(%" << addr.base() << ",%"
+ << addr.index() << "," << (1 << addr.scale()) << ")";
case 2:
- if (addr.rm() == ESP && addr.index() != ESP) {
- return os << static_cast<int>(addr.disp32())
- << "(%" << addr.base() << ",%"
- << addr.index() << "," << (1 << addr.scale()) << ")";
+ if (addr.rm() != ESP || addr.index() == ESP) {
+ return os << static_cast<int>(addr.disp32()) << "(%" << addr.rm() << ")";
}
- return os << static_cast<int>(addr.disp32()) << "(%" << addr.rm() << ")";
+ return os << static_cast<int>(addr.disp32()) << "(%" << addr.base() << ",%"
+ << addr.index() << "," << (1 << addr.scale()) << ")";
default:
return os << "<address?>";
}
diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc
index c28ed3b..cccde37 100644
--- a/compiler/utils/x86/assembler_x86_test.cc
+++ b/compiler/utils/x86/assembler_x86_test.cc
@@ -148,21 +148,14 @@
};
//
-// Test repeat drivers used in the tests.
+// Test some repeat drivers used in the tests.
//
TEST_F(AssemblerX86Test, RepeatRR) {
- EXPECT_EQ("%eax %eax\n%eax %ebx\n%eax %ecx\n%eax %edx\n%eax %ebp\n%eax %esp\n%eax %esi\n"
- "%eax %edi\n%ebx %eax\n%ebx %ebx\n%ebx %ecx\n%ebx %edx\n%ebx %ebp\n%ebx %esp\n"
- "%ebx %esi\n%ebx %edi\n%ecx %eax\n%ecx %ebx\n%ecx %ecx\n%ecx %edx\n%ecx %ebp\n"
- "%ecx %esp\n%ecx %esi\n%ecx %edi\n%edx %eax\n%edx %ebx\n%edx %ecx\n%edx %edx\n"
- "%edx %ebp\n%edx %esp\n%edx %esi\n%edx %edi\n%ebp %eax\n%ebp %ebx\n%ebp %ecx\n"
- "%ebp %edx\n%ebp %ebp\n%ebp %esp\n%ebp %esi\n%ebp %edi\n%esp %eax\n%esp %ebx\n"
- "%esp %ecx\n%esp %edx\n%esp %ebp\n%esp %esp\n%esp %esi\n%esp %edi\n%esi %eax\n"
- "%esi %ebx\n%esi %ecx\n%esi %edx\n%esi %ebp\n%esi %esp\n%esi %esi\n%esi %edi\n"
- "%edi %eax\n%edi %ebx\n%edi %ecx\n%edi %edx\n%edi %ebp\n%edi %esp\n%edi %esi\n"
- "%edi %edi\n",
- RepeatRR(/*f*/ nullptr, "%{reg1} %{reg2}"));
+ EXPECT_NE(RepeatRR(/*f*/ nullptr, "%{reg1} %{reg2}")
+ .find("%eax %eax\n%eax %ebx\n%eax %ecx\n%eax %edx\n%eax %ebp\n%eax %esp\n%eax %esi\n"
+ "%eax %edi\n%ebx %eax\n%ebx %ebx\n%ebx %ecx\n%ebx %edx\n%ebx %ebp\n%ebx %esp\n"),
+ std::string::npos);
}
TEST_F(AssemblerX86Test, RepeatRI) {
@@ -173,18 +166,10 @@
}
TEST_F(AssemblerX86Test, RepeatFF) {
- EXPECT_EQ("%XMM0 %XMM0\n%XMM0 %XMM1\n%XMM0 %XMM2\n%XMM0 %XMM3\n%XMM0 %XMM4\n%XMM0 %XMM5\n"
- "%XMM0 %XMM6\n%XMM0 %XMM7\n%XMM1 %XMM0\n%XMM1 %XMM1\n%XMM1 %XMM2\n%XMM1 %XMM3\n"
- "%XMM1 %XMM4\n%XMM1 %XMM5\n%XMM1 %XMM6\n%XMM1 %XMM7\n%XMM2 %XMM0\n%XMM2 %XMM1\n"
- "%XMM2 %XMM2\n%XMM2 %XMM3\n%XMM2 %XMM4\n%XMM2 %XMM5\n%XMM2 %XMM6\n%XMM2 %XMM7\n"
- "%XMM3 %XMM0\n%XMM3 %XMM1\n%XMM3 %XMM2\n%XMM3 %XMM3\n%XMM3 %XMM4\n%XMM3 %XMM5\n"
- "%XMM3 %XMM6\n%XMM3 %XMM7\n%XMM4 %XMM0\n%XMM4 %XMM1\n%XMM4 %XMM2\n%XMM4 %XMM3\n"
- "%XMM4 %XMM4\n%XMM4 %XMM5\n%XMM4 %XMM6\n%XMM4 %XMM7\n%XMM5 %XMM0\n%XMM5 %XMM1\n"
- "%XMM5 %XMM2\n%XMM5 %XMM3\n%XMM5 %XMM4\n%XMM5 %XMM5\n%XMM5 %XMM6\n%XMM5 %XMM7\n"
- "%XMM6 %XMM0\n%XMM6 %XMM1\n%XMM6 %XMM2\n%XMM6 %XMM3\n%XMM6 %XMM4\n%XMM6 %XMM5\n"
- "%XMM6 %XMM6\n%XMM6 %XMM7\n%XMM7 %XMM0\n%XMM7 %XMM1\n%XMM7 %XMM2\n%XMM7 %XMM3\n"
- "%XMM7 %XMM4\n%XMM7 %XMM5\n%XMM7 %XMM6\n%XMM7 %XMM7\n",
- RepeatFF(/*f*/ nullptr, "%{reg1} %{reg2}"));
+ EXPECT_NE(RepeatFF(/*f*/ nullptr, "%{reg1} %{reg2}")
+ .find("%XMM0 %XMM0\n%XMM0 %XMM1\n%XMM0 %XMM2\n%XMM0 %XMM3\n%XMM0 %XMM4\n%XMM0 %XMM5\n"
+ "%XMM0 %XMM6\n%XMM0 %XMM7\n%XMM1 %XMM0\n%XMM1 %XMM1\n%XMM1 %XMM2\n%XMM1 %XMM3\n"),
+ std::string::npos);
}
TEST_F(AssemblerX86Test, RepeatFFI) {
@@ -235,6 +220,36 @@
// Actual x86 instruction assembler tests.
//
+TEST_F(AssemblerX86Test, PoplAllAddresses) {
+ // Make sure all addressing modes combinations are tested at least once.
+ std::vector<x86::Address> all_addresses;
+ for (x86::Register* base : GetRegisters()) {
+ // Base only.
+ all_addresses.push_back(x86::Address(*base, -1));
+ all_addresses.push_back(x86::Address(*base, 0));
+ all_addresses.push_back(x86::Address(*base, 1));
+ all_addresses.push_back(x86::Address(*base, 123456789));
+ for (x86::Register* index : GetRegisters()) {
+ if (*index == x86::ESP) {
+ // Index cannot be ESP.
+ continue;
+ } else if (*base == *index) {
+ // Index only.
+ all_addresses.push_back(x86::Address(*index, x86::TIMES_1, -1));
+ all_addresses.push_back(x86::Address(*index, x86::TIMES_2, 0));
+ all_addresses.push_back(x86::Address(*index, x86::TIMES_4, 1));
+ all_addresses.push_back(x86::Address(*index, x86::TIMES_8, 123456789));
+ }
+ // Base and index.
+ all_addresses.push_back(x86::Address(*base, *index, x86::TIMES_1, -1));
+ all_addresses.push_back(x86::Address(*base, *index, x86::TIMES_2, 0));
+ all_addresses.push_back(x86::Address(*base, *index, x86::TIMES_4, 1));
+ all_addresses.push_back(x86::Address(*base, *index, x86::TIMES_8, 123456789));
+ }
+ }
+ DriverStr(RepeatA(&x86::X86Assembler::popl, all_addresses, "popl {mem}"), "popq");
+}
+
TEST_F(AssemblerX86Test, Movl) {
DriverStr(RepeatRR(&x86::X86Assembler::movl, "movl %{reg2}, %{reg1}"), "movl");
}
@@ -370,7 +385,7 @@
}
TEST_F(AssemblerX86Test, RorlImm) {
- DriverStr(RepeatRI(&x86::X86Assembler::rorl, 1U, "rorl ${imm}, %{reg}"), "rorli");
+ DriverStr(RepeatRI(&x86::X86Assembler::rorl, /*imm_bytes*/ 1U, "rorl ${imm}, %{reg}"), "rorli");
}
// Roll only allows CL as the shift count.
@@ -390,7 +405,7 @@
}
TEST_F(AssemblerX86Test, RollImm) {
- DriverStr(RepeatRI(&x86::X86Assembler::roll, 1U, "roll ${imm}, %{reg}"), "rolli");
+ DriverStr(RepeatRI(&x86::X86Assembler::roll, /*imm_bytes*/ 1U, "roll ${imm}, %{reg}"), "rolli");
}
TEST_F(AssemblerX86Test, Cvtdq2ps) {
@@ -418,12 +433,12 @@
}
TEST_F(AssemblerX86Test, RoundSS) {
- DriverStr(RepeatFFI(&x86::X86Assembler::roundss, 1U,
+ DriverStr(RepeatFFI(&x86::X86Assembler::roundss, /*imm_bytes*/ 1U,
"roundss ${imm}, %{reg2}, %{reg1}"), "roundss");
}
TEST_F(AssemblerX86Test, RoundSD) {
- DriverStr(RepeatFFI(&x86::X86Assembler::roundsd, 1U,
+ DriverStr(RepeatFFI(&x86::X86Assembler::roundsd, /*imm_bytes*/ 1U,
"roundsd ${imm}, %{reg2}, %{reg1}"), "roundsd");
}
@@ -896,7 +911,15 @@
}
TEST_F(AssemblerX86Test, Cmpb) {
- DriverStr(RepeatAI(&x86::X86Assembler::cmpb, /*imm_bytes*/ 1U, "cmpb ${imm}, {mem}"), "cmpb");
+ DriverStr(RepeatAI(&x86::X86Assembler::cmpb,
+ /*imm_bytes*/ 1U,
+ "cmpb ${imm}, {mem}"), "cmpb");
+}
+
+TEST_F(AssemblerX86Test, Cmpw) {
+ DriverStr(RepeatAI(&x86::X86Assembler::cmpw,
+ /*imm_bytes*/ 1U,
+ "cmpw ${imm}, {mem}"), "cmpw"); // TODO: only imm8?
}
} // namespace art
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index 3bff67d..51f61ca 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -36,6 +36,34 @@
return os << "ST" << static_cast<int>(reg);
}
+std::ostream& operator<<(std::ostream& os, const Address& addr) {
+ switch (addr.mod()) {
+ case 0:
+ if (addr.rm() != RSP || addr.cpu_index().AsRegister() == RSP) {
+ return os << "(%" << addr.cpu_rm() << ")";
+ } else if (addr.base() == RBP) {
+ return os << static_cast<int>(addr.disp32()) << "(,%" << addr.cpu_index()
+ << "," << (1 << addr.scale()) << ")";
+ }
+ return os << "(%" << addr.cpu_base() << ",%"
+ << addr.cpu_index() << "," << (1 << addr.scale()) << ")";
+ case 1:
+ if (addr.rm() != RSP || addr.cpu_index().AsRegister() == RSP) {
+ return os << static_cast<int>(addr.disp8()) << "(%" << addr.cpu_rm() << ")";
+ }
+ return os << static_cast<int>(addr.disp8()) << "(%" << addr.cpu_base() << ",%"
+ << addr.cpu_index() << "," << (1 << addr.scale()) << ")";
+ case 2:
+ if (addr.rm() != RSP || addr.cpu_index().AsRegister() == RSP) {
+ return os << static_cast<int>(addr.disp32()) << "(%" << addr.cpu_rm() << ")";
+ }
+ return os << static_cast<int>(addr.disp32()) << "(%" << addr.cpu_base() << ",%"
+ << addr.cpu_index() << "," << (1 << addr.scale()) << ")";
+ default:
+ return os << "<address?>";
+ }
+}
+
void X86_64Assembler::call(CpuRegister reg) {
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitOptionalRex32(reg);
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index fc0839b5a8..1130444 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -80,6 +80,21 @@
return static_cast<Register>(encoding_at(1) & 7);
}
+ CpuRegister cpu_rm() const {
+ int ext = (rex_ & 1) != 0 ? x86_64::R8 : x86_64::RAX;
+ return static_cast<CpuRegister>(rm() + ext);
+ }
+
+ CpuRegister cpu_index() const {
+ int ext = (rex_ & 2) != 0 ? x86_64::R8 : x86_64::RAX;
+ return static_cast<CpuRegister>(index() + ext);
+ }
+
+ CpuRegister cpu_base() const {
+ int ext = (rex_ & 1) != 0 ? x86_64::R8 : x86_64::RAX;
+ return static_cast<CpuRegister>(base() + ext);
+ }
+
uint8_t rex() const {
return rex_;
}
@@ -268,6 +283,7 @@
Address() {}
};
+std::ostream& operator<<(std::ostream& os, const Address& addr);
/**
* Class to handle constant area values.
diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc
index 3e6110d..aff8871 100644
--- a/compiler/utils/x86_64/assembler_x86_64_test.cc
+++ b/compiler/utils/x86_64/assembler_x86_64_test.cc
@@ -153,6 +153,55 @@
}
void SetUpHelpers() OVERRIDE {
+ if (addresses_singleton_.size() == 0) {
+ // One addressing mode to test the repeat drivers.
+ addresses_singleton_.push_back(
+ x86_64::Address(x86_64::CpuRegister(x86_64::RAX),
+ x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_1, -1));
+ }
+
+ if (addresses_.size() == 0) {
+ // Several addressing modes.
+ addresses_.push_back(
+ x86_64::Address(x86_64::CpuRegister(x86_64::RDI),
+ x86_64::CpuRegister(x86_64::RAX), x86_64::TIMES_1, 15));
+ addresses_.push_back(
+ x86_64::Address(x86_64::CpuRegister(x86_64::RDI),
+ x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_2, 16));
+ addresses_.push_back(
+ x86_64::Address(x86_64::CpuRegister(x86_64::RDI),
+ x86_64::CpuRegister(x86_64::RCX), x86_64::TIMES_4, 17));
+ addresses_.push_back(
+ x86_64::Address(x86_64::CpuRegister(x86_64::RDI),
+ x86_64::CpuRegister(x86_64::RDX), x86_64::TIMES_8, 18));
+ addresses_.push_back(x86_64::Address(x86_64::CpuRegister(x86_64::RAX), -1));
+ addresses_.push_back(x86_64::Address(x86_64::CpuRegister(x86_64::RBX), 0));
+ addresses_.push_back(x86_64::Address(x86_64::CpuRegister(x86_64::RSI), 1));
+ addresses_.push_back(x86_64::Address(x86_64::CpuRegister(x86_64::RDI), 987654321));
+ // Several addressing modes with the special ESP.
+ addresses_.push_back(
+ x86_64::Address(x86_64::CpuRegister(x86_64::RSP),
+ x86_64::CpuRegister(x86_64::RAX), x86_64::TIMES_1, 15));
+ addresses_.push_back(
+ x86_64::Address(x86_64::CpuRegister(x86_64::RSP),
+ x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_2, 16));
+ addresses_.push_back(
+ x86_64::Address(x86_64::CpuRegister(x86_64::RSP),
+ x86_64::CpuRegister(x86_64::RCX), x86_64::TIMES_4, 17));
+ addresses_.push_back(
+ x86_64::Address(x86_64::CpuRegister(x86_64::RSP),
+ x86_64::CpuRegister(x86_64::RDX), x86_64::TIMES_8, 18));
+ addresses_.push_back(x86_64::Address(x86_64::CpuRegister(x86_64::RSP), -1));
+ addresses_.push_back(x86_64::Address(x86_64::CpuRegister(x86_64::RSP), 0));
+ addresses_.push_back(x86_64::Address(x86_64::CpuRegister(x86_64::RSP), 1));
+ addresses_.push_back(x86_64::Address(x86_64::CpuRegister(x86_64::RSP), 987654321));
+ // Several addressing modes with the higher registers.
+ addresses_.push_back(
+ x86_64::Address(x86_64::CpuRegister(x86_64::R8),
+ x86_64::CpuRegister(x86_64::R15), x86_64::TIMES_2, -1));
+ addresses_.push_back(x86_64::Address(x86_64::CpuRegister(x86_64::R15), 123456789));
+ }
+
if (registers_.size() == 0) {
registers_.push_back(new x86_64::CpuRegister(x86_64::RAX));
registers_.push_back(new x86_64::CpuRegister(x86_64::RBX));
@@ -248,8 +297,7 @@
}
std::vector<x86_64::Address> GetAddresses() {
- UNIMPLEMENTED(FATAL) << "Feature not implemented yet";
- UNREACHABLE();
+ return addresses_;
}
std::vector<x86_64::CpuRegister*> GetRegisters() OVERRIDE {
@@ -279,29 +327,31 @@
return quaternary_register_names_[reg];
}
+ std::vector<x86_64::Address> addresses_singleton_;
+
private:
+ std::vector<x86_64::Address> addresses_;
std::vector<x86_64::CpuRegister*> registers_;
std::map<x86_64::CpuRegister, std::string, X86_64CpuRegisterCompare> secondary_register_names_;
std::map<x86_64::CpuRegister, std::string, X86_64CpuRegisterCompare> tertiary_register_names_;
std::map<x86_64::CpuRegister, std::string, X86_64CpuRegisterCompare> quaternary_register_names_;
-
std::vector<x86_64::XmmRegister*> fp_registers_;
};
//
-// Test repeat drivers used in the tests.
+// Test some repeat drivers used in the tests.
//
TEST_F(AssemblerX86_64Test, RepeatI4) {
- EXPECT_EQ("%0\n%-1\n%18\n%4660\n%-4660\n%305419896\n%-305419896\n",
- RepeatI(/*f*/ nullptr, /*imm_bytes*/ 4U, "%{imm}"));
+ EXPECT_EQ("$0\n$-1\n$18\n$4660\n$-4660\n$305419896\n$-305419896\n",
+ RepeatI(/*f*/ nullptr, /*imm_bytes*/ 4U, "${imm}"));
}
TEST_F(AssemblerX86_64Test, RepeatI8) {
- EXPECT_EQ("%0\n%-1\n%18\n%4660\n%-4660\n%305419896\n%-305419896\n"
- "%20015998343868\n%-20015998343868\n%1311768467463790320\n"
- "%-1311768467463790320\n",
- RepeatI(/*f*/ nullptr, /*imm_bytes*/ 8U, "%{imm}"));
+ EXPECT_EQ("$0\n$-1\n$18\n$4660\n$-4660\n$305419896\n$-305419896\n"
+ "$20015998343868\n$-20015998343868\n$1311768467463790320\n"
+ "$-1311768467463790320\n",
+ RepeatI(/*f*/ nullptr, /*imm_bytes*/ 8U, "${imm}"));
}
TEST_F(AssemblerX86_64Test, Repeatr) {
@@ -310,10 +360,10 @@
Repeatr(/*f*/ nullptr, "%{reg}"));
}
-TEST_F(AssemblerX86_64Test, Repeatri) {
- EXPECT_NE(Repeatri(/*f*/ nullptr, /*imm_bytes*/ 1U, "%{reg} %{imm}").
- find("%eax %0\n%eax %-1\n%eax %18\n%ebx %0\n%ebx %-1\n%ebx %18\n"
- "%ecx %0\n%ecx %-1\n%ecx %18\n%edx %0\n%edx %-1\n%edx %18\n"),
+TEST_F(AssemblerX86_64Test, RepeatrI) {
+ EXPECT_NE(RepeatrI(/*f*/ nullptr, /*imm_bytes*/ 1U, "%{reg} ${imm}").
+ find("%eax $0\n%eax $-1\n%eax $18\n%ebx $0\n%ebx $-1\n%ebx $18\n"
+ "%ecx $0\n%ecx $-1\n%ecx $18\n%edx $0\n%edx $-1\n%edx $18\n"),
std::string::npos);
}
@@ -334,10 +384,7 @@
TEST_F(AssemblerX86_64Test, RepeatrF) {
EXPECT_NE(RepeatrF(/*f*/ nullptr, "%{reg1} %{reg2}")
.find("%eax %xmm0\n%eax %xmm1\n%eax %xmm2\n%eax %xmm3\n"
- "%eax %xmm4\n%eax %xmm5\n%eax %xmm6\n%eax %xmm7\n"
- "%eax %xmm8\n%eax %xmm9\n%eax %xmm10\n%eax %xmm11\n"
- "%eax %xmm12\n%eax %xmm13\n%eax %xmm14\n%eax %xmm15\n"
- "%ebx %xmm0\n%ebx %xmm1\n%ebx %xmm2\n%ebx %xmm3\n%ebx %xmm4\n"),
+ "%eax %xmm4\n%eax %xmm5\n%eax %xmm6\n%eax %xmm7\n"),
std::string::npos);
}
@@ -348,59 +395,103 @@
}
TEST_F(AssemblerX86_64Test, RepeatRI) {
- EXPECT_EQ("%rax %0\n%rax %-1\n%rax %18\n%rbx %0\n%rbx %-1\n%rbx %18\n"
- "%rcx %0\n%rcx %-1\n%rcx %18\n%rdx %0\n%rdx %-1\n%rdx %18\n"
- "%rbp %0\n%rbp %-1\n%rbp %18\n%rsp %0\n%rsp %-1\n%rsp %18\n"
- "%rsi %0\n%rsi %-1\n%rsi %18\n%rdi %0\n%rdi %-1\n%rdi %18\n"
- "%r8 %0\n%r8 %-1\n%r8 %18\n%r9 %0\n%r9 %-1\n%r9 %18\n"
- "%r10 %0\n%r10 %-1\n%r10 %18\n%r11 %0\n%r11 %-1\n%r11 %18\n"
- "%r12 %0\n%r12 %-1\n%r12 %18\n%r13 %0\n%r13 %-1\n%r13 %18\n"
- "%r14 %0\n%r14 %-1\n%r14 %18\n%r15 %0\n%r15 %-1\n%r15 %18\n",
- RepeatRI(/*f*/ nullptr, /*imm_bytes*/ 1U, "%{reg} %{imm}"));
+ EXPECT_NE(RepeatRI(/*f*/ nullptr, /*imm_bytes*/ 1U, "%{reg} ${imm}")
+ .find("%rax $0\n%rax $-1\n%rax $18\n%rbx $0\n%rbx $-1\n%rbx $18\n"
+ "%rcx $0\n%rcx $-1\n%rcx $18\n%rdx $0\n%rdx $-1\n%rdx $18\n"),
+ std::string::npos);
}
TEST_F(AssemblerX86_64Test, RepeatRr) {
EXPECT_NE(RepeatRr(/*f*/ nullptr, "%{reg1} %{reg2}")
.find("%rax %eax\n%rax %ebx\n%rax %ecx\n%rax %edx\n%rax %ebp\n"
- "%rax %esp\n%rax %esi\n%rax %edi\n%rax %r8d\n%rax %r9d\n"
- "%rax %r10d\n%rax %r11d\n%rax %r12d\n%rax %r13d\n%rax %r14d\n"
- "%rax %r15d\n%rbx %eax\n%rbx %ebx\n%rbx %ecx\n%rbx %edx\n"),
+ "%rax %esp\n%rax %esi\n%rax %edi\n%rax %r8d\n%rax %r9d\n"),
std::string::npos);
}
TEST_F(AssemblerX86_64Test, RepeatRR) {
EXPECT_NE(RepeatRR(/*f*/ nullptr, "%{reg1} %{reg2}")
.find("%rax %rax\n%rax %rbx\n%rax %rcx\n%rax %rdx\n%rax %rbp\n"
- "%rax %rsp\n%rax %rsi\n%rax %rdi\n%rax %r8\n%rax %r9\n"
- "%rax %r10\n%rax %r11\n%rax %r12\n%rax %r13\n%rax %r14\n"
- "%rax %r15\n%rbx %rax\n%rbx %rbx\n%rbx %rcx\n%rbx %rdx\n"),
+ "%rax %rsp\n%rax %rsi\n%rax %rdi\n%rax %r8\n%rax %r9\n"),
std::string::npos);
}
TEST_F(AssemblerX86_64Test, RepeatRF) {
EXPECT_NE(RepeatRF(/*f*/ nullptr, "%{reg1} %{reg2}")
.find("%rax %xmm0\n%rax %xmm1\n%rax %xmm2\n%rax %xmm3\n%rax %xmm4\n"
- "%rax %xmm5\n%rax %xmm6\n%rax %xmm7\n%rax %xmm8\n%rax %xmm9\n"
- "%rax %xmm10\n%rax %xmm11\n%rax %xmm12\n%rax %xmm13\n%rax %xmm14\n"
- "%rax %xmm15\n%rbx %xmm0\n%rbx %xmm1\n%rbx %xmm2\n%rbx %xmm3\n"),
+ "%rax %xmm5\n%rax %xmm6\n%rax %xmm7\n%rax %xmm8\n%rax %xmm9\n"),
std::string::npos);
}
TEST_F(AssemblerX86_64Test, RepeatFF) {
EXPECT_NE(RepeatFF(/*f*/ nullptr, "%{reg1} %{reg2}")
.find("%xmm0 %xmm0\n%xmm0 %xmm1\n%xmm0 %xmm2\n%xmm0 %xmm3\n%xmm0 %xmm4\n"
- "%xmm0 %xmm5\n%xmm0 %xmm6\n%xmm0 %xmm7\n%xmm0 %xmm8\n%xmm0 %xmm9\n"
- "%xmm0 %xmm10\n%xmm0 %xmm11\n%xmm0 %xmm12\n%xmm0 %xmm13\n%xmm0 %xmm14\n"
- "%xmm0 %xmm15\n%xmm1 %xmm0\n%xmm1 %xmm1\n%xmm1 %xmm2\n%xmm1 %xmm3\n"),
+ "%xmm0 %xmm5\n%xmm0 %xmm6\n%xmm0 %xmm7\n%xmm0 %xmm8\n%xmm0 %xmm9\n"),
std::string::npos);
}
TEST_F(AssemblerX86_64Test, RepeatFFI) {
- EXPECT_NE(RepeatFFI(/*f*/ nullptr, /*imm_bytes*/ 1U, "%{reg1} %{reg2} %{imm}")
- .find("%xmm0 %xmm0 %0\n%xmm0 %xmm0 %-1\n%xmm0 %xmm0 %18\n"
- "%xmm0 %xmm1 %0\n%xmm0 %xmm1 %-1\n%xmm0 %xmm1 %18\n"
- "%xmm0 %xmm2 %0\n%xmm0 %xmm2 %-1\n%xmm0 %xmm2 %18\n"
- "%xmm0 %xmm3 %0\n%xmm0 %xmm3 %-1\n%xmm0 %xmm3 %18\n"),
+ EXPECT_NE(RepeatFFI(/*f*/ nullptr, /*imm_bytes*/ 1U, "%{reg1} %{reg2} ${imm}")
+ .find("%xmm0 %xmm0 $0\n%xmm0 %xmm0 $-1\n%xmm0 %xmm0 $18\n"
+ "%xmm0 %xmm1 $0\n%xmm0 %xmm1 $-1\n%xmm0 %xmm1 $18\n"),
+ std::string::npos);
+}
+
+TEST_F(AssemblerX86_64Test, RepeatA) {
+ EXPECT_EQ("-1(%rax,%rbx,1)\n", RepeatA(/*f*/ nullptr, addresses_singleton_, "{mem}"));
+}
+
+TEST_F(AssemblerX86_64Test, RepeatAFull) {
+ EXPECT_EQ("15(%rdi,%rax,1)\n16(%rdi,%rbx,2)\n17(%rdi,%rcx,4)\n18(%rdi,%rdx,8)\n"
+ "-1(%rax)\n(%rbx)\n1(%rsi)\n987654321(%rdi)\n15(%rsp,%rax,1)\n"
+ "16(%rsp,%rbx,2)\n17(%rsp,%rcx,4)\n18(%rsp,%rdx,8)\n-1(%rsp)\n"
+ "(%rsp)\n1(%rsp)\n987654321(%rsp)\n-1(%r8,%r15,2)\n123456789(%r15)\n",
+ RepeatA(/*f*/ nullptr, "{mem}"));
+}
+
+TEST_F(AssemblerX86_64Test, RepeatAI) {
+ EXPECT_EQ("-1(%rax,%rbx,1) $0\n-1(%rax,%rbx,1) $-1\n-1(%rax,%rbx,1) $18\n",
+ RepeatAI(/*f*/ nullptr, /*imm_bytes*/ 1U, addresses_singleton_, "{mem} ${imm}"));
+}
+
+TEST_F(AssemblerX86_64Test, RepeatRA) {
+ EXPECT_NE(RepeatRA(/*f*/ nullptr, addresses_singleton_, "%{reg} {mem}")
+ .find("%rax -1(%rax,%rbx,1)\n%rbx -1(%rax,%rbx,1)\n%rcx -1(%rax,%rbx,1)\n"
+ "%rdx -1(%rax,%rbx,1)\n%rbp -1(%rax,%rbx,1)\n%rsp -1(%rax,%rbx,1)\n"),
+ std::string::npos);
+}
+
+TEST_F(AssemblerX86_64Test, RepeatrA) {
+ EXPECT_NE(RepeatrA(/*f*/ nullptr, addresses_singleton_, "%{reg} {mem}")
+ .find("%eax -1(%rax,%rbx,1)\n%ebx -1(%rax,%rbx,1)\n%ecx -1(%rax,%rbx,1)\n"
+ "%edx -1(%rax,%rbx,1)\n%ebp -1(%rax,%rbx,1)\n%esp -1(%rax,%rbx,1)\n"),
+ std::string::npos);
+}
+
+TEST_F(AssemblerX86_64Test, RepeatAR) {
+ EXPECT_NE(RepeatAR(/*f*/ nullptr, addresses_singleton_, "{mem} %{reg}")
+ .find("-1(%rax,%rbx,1) %rax\n-1(%rax,%rbx,1) %rbx\n-1(%rax,%rbx,1) %rcx\n"
+ "-1(%rax,%rbx,1) %rdx\n-1(%rax,%rbx,1) %rbp\n-1(%rax,%rbx,1) %rsp\n"),
+ std::string::npos);
+}
+
+TEST_F(AssemblerX86_64Test, RepeatAr) {
+ EXPECT_NE(RepeatAr(/*f*/ nullptr, addresses_singleton_, "{mem} %{reg}")
+ .find("-1(%rax,%rbx,1) %eax\n-1(%rax,%rbx,1) %ebx\n-1(%rax,%rbx,1) %ecx\n"
+ "-1(%rax,%rbx,1) %edx\n-1(%rax,%rbx,1) %ebp\n-1(%rax,%rbx,1) %esp\n"),
+ std::string::npos);
+}
+
+TEST_F(AssemblerX86_64Test, RepeatFA) {
+ EXPECT_NE(RepeatFA(/*f*/ nullptr, addresses_singleton_, "%{reg} {mem}").
+ find("%xmm0 -1(%rax,%rbx,1)\n%xmm1 -1(%rax,%rbx,1)\n%xmm2 -1(%rax,%rbx,1)\n"
+ "%xmm3 -1(%rax,%rbx,1)\n%xmm4 -1(%rax,%rbx,1)\n%xmm5 -1(%rax,%rbx,1)\n"),
+ std::string::npos);
+}
+
+TEST_F(AssemblerX86_64Test, RepeatAF) {
+ EXPECT_NE(RepeatAF(/*f*/ nullptr, addresses_singleton_, "{mem} %{reg}")
+ .find("-1(%rax,%rbx,1) %xmm0\n-1(%rax,%rbx,1) %xmm1\n-1(%rax,%rbx,1) %xmm2\n"
+ "-1(%rax,%rbx,1) %xmm3\n-1(%rax,%rbx,1) %xmm4\n-1(%rax,%rbx,1) %xmm5\n"),
std::string::npos);
}
@@ -412,12 +503,43 @@
EXPECT_TRUE(CheckTools());
}
+TEST_F(AssemblerX86_64Test, PopqAllAddresses) {
+ // Make sure all addressing modes combinations are tested at least once.
+ std::vector<x86_64::Address> all_addresses;
+ for (x86_64::CpuRegister* base : GetRegisters()) {
+ // Base only.
+ all_addresses.push_back(x86_64::Address(*base, -1));
+ all_addresses.push_back(x86_64::Address(*base, 0));
+ all_addresses.push_back(x86_64::Address(*base, 1));
+ all_addresses.push_back(x86_64::Address(*base, 123456789));
+ for (x86_64::CpuRegister* index : GetRegisters()) {
+ if (index->AsRegister() == x86_64::RSP) {
+ // Index cannot be RSP.
+ continue;
+ } else if (base->AsRegister() == index->AsRegister()) {
+ // Index only.
+ all_addresses.push_back(x86_64::Address(*index, x86_64::TIMES_1, -1));
+ all_addresses.push_back(x86_64::Address(*index, x86_64::TIMES_2, 0));
+ all_addresses.push_back(x86_64::Address(*index, x86_64::TIMES_4, 1));
+ all_addresses.push_back(x86_64::Address(*index, x86_64::TIMES_8, 123456789));
+ }
+ // Base and index.
+ all_addresses.push_back(x86_64::Address(*base, *index, x86_64::TIMES_1, -1));
+ all_addresses.push_back(x86_64::Address(*base, *index, x86_64::TIMES_2, 0));
+ all_addresses.push_back(x86_64::Address(*base, *index, x86_64::TIMES_4, 1));
+ all_addresses.push_back(x86_64::Address(*base, *index, x86_64::TIMES_8, 123456789));
+ }
+ }
+ DriverStr(RepeatA(&x86_64::X86_64Assembler::popq, all_addresses, "popq {mem}"), "popq");
+}
+
TEST_F(AssemblerX86_64Test, PushqRegs) {
DriverStr(RepeatR(&x86_64::X86_64Assembler::pushq, "pushq %{reg}"), "pushq");
}
TEST_F(AssemblerX86_64Test, PushqImm) {
- DriverStr(RepeatI(&x86_64::X86_64Assembler::pushq, 4U, "pushq ${imm}"), "pushqi");
+ DriverStr(RepeatI(&x86_64::X86_64Assembler::pushq, /*imm_bytes*/ 4U,
+ "pushq ${imm}"), "pushqi");
}
TEST_F(AssemblerX86_64Test, MovqRegs) {
@@ -425,7 +547,8 @@
}
TEST_F(AssemblerX86_64Test, MovqImm) {
- DriverStr(RepeatRI(&x86_64::X86_64Assembler::movq, 8U, "movq ${imm}, %{reg}"), "movqi");
+ DriverStr(RepeatRI(&x86_64::X86_64Assembler::movq, /*imm_bytes*/ 8U,
+ "movq ${imm}, %{reg}"), "movqi");
}
TEST_F(AssemblerX86_64Test, MovlRegs) {
@@ -433,7 +556,8 @@
}
TEST_F(AssemblerX86_64Test, MovlImm) {
- DriverStr(Repeatri(&x86_64::X86_64Assembler::movl, 4U, "mov ${imm}, %{reg}"), "movli");
+ DriverStr(RepeatrI(&x86_64::X86_64Assembler::movl, /*imm_bytes*/ 4U,
+ "mov ${imm}, %{reg}"), "movli");
}
TEST_F(AssemblerX86_64Test, AddqRegs) {
@@ -441,7 +565,8 @@
}
TEST_F(AssemblerX86_64Test, AddqImm) {
- DriverStr(RepeatRI(&x86_64::X86_64Assembler::addq, 4U, "addq ${imm}, %{reg}"), "addqi");
+ DriverStr(RepeatRI(&x86_64::X86_64Assembler::addq, /*imm_bytes*/ 4U,
+ "addq ${imm}, %{reg}"), "addqi");
}
TEST_F(AssemblerX86_64Test, AddlRegs) {
@@ -449,7 +574,8 @@
}
TEST_F(AssemblerX86_64Test, AddlImm) {
- DriverStr(Repeatri(&x86_64::X86_64Assembler::addl, 4U, "add ${imm}, %{reg}"), "addli");
+ DriverStr(RepeatrI(&x86_64::X86_64Assembler::addl, /*imm_bytes*/ 4U,
+ "add ${imm}, %{reg}"), "addli");
}
TEST_F(AssemblerX86_64Test, ImulqReg1) {
@@ -461,7 +587,8 @@
}
TEST_F(AssemblerX86_64Test, ImulqImm) {
- DriverStr(RepeatRI(&x86_64::X86_64Assembler::imulq, 4U, "imulq ${imm}, %{reg}, %{reg}"),
+ DriverStr(RepeatRI(&x86_64::X86_64Assembler::imulq, /*imm_bytes*/ 4U,
+ "imulq ${imm}, %{reg}, %{reg}"),
"imulqi");
}
@@ -470,7 +597,8 @@
}
TEST_F(AssemblerX86_64Test, ImullImm) {
- DriverStr(Repeatri(&x86_64::X86_64Assembler::imull, 4U, "imull ${imm}, %{reg}, %{reg}"),
+ DriverStr(RepeatrI(&x86_64::X86_64Assembler::imull, /*imm_bytes*/ 4U,
+ "imull ${imm}, %{reg}, %{reg}"),
"imulli");
}
@@ -483,7 +611,8 @@
}
TEST_F(AssemblerX86_64Test, SubqImm) {
- DriverStr(RepeatRI(&x86_64::X86_64Assembler::subq, 4U, "subq ${imm}, %{reg}"), "subqi");
+ DriverStr(RepeatRI(&x86_64::X86_64Assembler::subq, /*imm_bytes*/ 4U,
+ "subq ${imm}, %{reg}"), "subqi");
}
TEST_F(AssemblerX86_64Test, SublRegs) {
@@ -491,21 +620,19 @@
}
TEST_F(AssemblerX86_64Test, SublImm) {
- DriverStr(Repeatri(&x86_64::X86_64Assembler::subl, 4U, "sub ${imm}, %{reg}"), "subli");
+ DriverStr(RepeatrI(&x86_64::X86_64Assembler::subl, /*imm_bytes*/ 4U,
+ "sub ${imm}, %{reg}"), "subli");
}
// Shll only allows CL as the shift count.
std::string shll_fn(AssemblerX86_64Test::Base* assembler_test, x86_64::X86_64Assembler* assembler) {
std::ostringstream str;
-
std::vector<x86_64::CpuRegister*> registers = assembler_test->GetRegisters();
-
x86_64::CpuRegister shifter(x86_64::RCX);
for (auto reg : registers) {
assembler->shll(*reg, shifter);
str << "shll %cl, %" << assembler_test->GetSecondaryRegisterName(*reg) << "\n";
}
-
return str.str();
}
@@ -514,21 +641,19 @@
}
TEST_F(AssemblerX86_64Test, ShllImm) {
- DriverStr(Repeatri(&x86_64::X86_64Assembler::shll, 1U, "shll ${imm}, %{reg}"), "shlli");
+ DriverStr(RepeatrI(&x86_64::X86_64Assembler::shll, /*imm_bytes*/ 1U,
+ "shll ${imm}, %{reg}"), "shlli");
}
// Shlq only allows CL as the shift count.
std::string shlq_fn(AssemblerX86_64Test::Base* assembler_test, x86_64::X86_64Assembler* assembler) {
std::ostringstream str;
-
std::vector<x86_64::CpuRegister*> registers = assembler_test->GetRegisters();
-
x86_64::CpuRegister shifter(x86_64::RCX);
for (auto reg : registers) {
assembler->shlq(*reg, shifter);
str << "shlq %cl, %" << assembler_test->GetRegisterName(*reg) << "\n";
}
-
return str.str();
}
@@ -537,21 +662,19 @@
}
TEST_F(AssemblerX86_64Test, ShlqImm) {
- DriverStr(RepeatRI(&x86_64::X86_64Assembler::shlq, 1U, "shlq ${imm}, %{reg}"), "shlqi");
+ DriverStr(RepeatRI(&x86_64::X86_64Assembler::shlq, /*imm_bytes*/ 1U,
+ "shlq ${imm}, %{reg}"), "shlqi");
}
// Shrl only allows CL as the shift count.
std::string shrl_fn(AssemblerX86_64Test::Base* assembler_test, x86_64::X86_64Assembler* assembler) {
std::ostringstream str;
-
std::vector<x86_64::CpuRegister*> registers = assembler_test->GetRegisters();
-
x86_64::CpuRegister shifter(x86_64::RCX);
for (auto reg : registers) {
assembler->shrl(*reg, shifter);
str << "shrl %cl, %" << assembler_test->GetSecondaryRegisterName(*reg) << "\n";
}
-
return str.str();
}
@@ -560,21 +683,18 @@
}
TEST_F(AssemblerX86_64Test, ShrlImm) {
- DriverStr(Repeatri(&x86_64::X86_64Assembler::shrl, 1U, "shrl ${imm}, %{reg}"), "shrli");
+ DriverStr(RepeatrI(&x86_64::X86_64Assembler::shrl, /*imm_bytes*/ 1U, "shrl ${imm}, %{reg}"), "shrli");
}
// Shrq only allows CL as the shift count.
std::string shrq_fn(AssemblerX86_64Test::Base* assembler_test, x86_64::X86_64Assembler* assembler) {
std::ostringstream str;
-
std::vector<x86_64::CpuRegister*> registers = assembler_test->GetRegisters();
-
x86_64::CpuRegister shifter(x86_64::RCX);
for (auto reg : registers) {
assembler->shrq(*reg, shifter);
str << "shrq %cl, %" << assembler_test->GetRegisterName(*reg) << "\n";
}
-
return str.str();
}
@@ -583,21 +703,18 @@
}
TEST_F(AssemblerX86_64Test, ShrqImm) {
- DriverStr(RepeatRI(&x86_64::X86_64Assembler::shrq, 1U, "shrq ${imm}, %{reg}"), "shrqi");
+ DriverStr(RepeatRI(&x86_64::X86_64Assembler::shrq, /*imm_bytes*/ 1U, "shrq ${imm}, %{reg}"), "shrqi");
}
// Sarl only allows CL as the shift count.
std::string sarl_fn(AssemblerX86_64Test::Base* assembler_test, x86_64::X86_64Assembler* assembler) {
std::ostringstream str;
-
std::vector<x86_64::CpuRegister*> registers = assembler_test->GetRegisters();
-
x86_64::CpuRegister shifter(x86_64::RCX);
for (auto reg : registers) {
assembler->sarl(*reg, shifter);
str << "sarl %cl, %" << assembler_test->GetSecondaryRegisterName(*reg) << "\n";
}
-
return str.str();
}
@@ -606,21 +723,18 @@
}
TEST_F(AssemblerX86_64Test, SarlImm) {
- DriverStr(Repeatri(&x86_64::X86_64Assembler::sarl, 1U, "sarl ${imm}, %{reg}"), "sarli");
+ DriverStr(RepeatrI(&x86_64::X86_64Assembler::sarl, /*imm_bytes*/ 1U, "sarl ${imm}, %{reg}"), "sarli");
}
// Sarq only allows CL as the shift count.
std::string sarq_fn(AssemblerX86_64Test::Base* assembler_test, x86_64::X86_64Assembler* assembler) {
std::ostringstream str;
-
std::vector<x86_64::CpuRegister*> registers = assembler_test->GetRegisters();
-
x86_64::CpuRegister shifter(x86_64::RCX);
for (auto reg : registers) {
assembler->sarq(*reg, shifter);
str << "sarq %cl, %" << assembler_test->GetRegisterName(*reg) << "\n";
}
-
return str.str();
}
@@ -629,21 +743,18 @@
}
TEST_F(AssemblerX86_64Test, SarqImm) {
- DriverStr(RepeatRI(&x86_64::X86_64Assembler::sarq, 1U, "sarq ${imm}, %{reg}"), "sarqi");
+ DriverStr(RepeatRI(&x86_64::X86_64Assembler::sarq, /*imm_bytes*/ 1U, "sarq ${imm}, %{reg}"), "sarqi");
}
// Rorl only allows CL as the shift count.
std::string rorl_fn(AssemblerX86_64Test::Base* assembler_test, x86_64::X86_64Assembler* assembler) {
std::ostringstream str;
-
std::vector<x86_64::CpuRegister*> registers = assembler_test->GetRegisters();
-
x86_64::CpuRegister shifter(x86_64::RCX);
for (auto reg : registers) {
assembler->rorl(*reg, shifter);
str << "rorl %cl, %" << assembler_test->GetSecondaryRegisterName(*reg) << "\n";
}
-
return str.str();
}
@@ -652,21 +763,18 @@
}
TEST_F(AssemblerX86_64Test, RorlImm) {
- DriverStr(Repeatri(&x86_64::X86_64Assembler::rorl, 1U, "rorl ${imm}, %{reg}"), "rorli");
+ DriverStr(RepeatrI(&x86_64::X86_64Assembler::rorl, /*imm_bytes*/ 1U, "rorl ${imm}, %{reg}"), "rorli");
}
// Roll only allows CL as the shift count.
std::string roll_fn(AssemblerX86_64Test::Base* assembler_test, x86_64::X86_64Assembler* assembler) {
std::ostringstream str;
-
std::vector<x86_64::CpuRegister*> registers = assembler_test->GetRegisters();
-
x86_64::CpuRegister shifter(x86_64::RCX);
for (auto reg : registers) {
assembler->roll(*reg, shifter);
str << "roll %cl, %" << assembler_test->GetSecondaryRegisterName(*reg) << "\n";
}
-
return str.str();
}
@@ -675,21 +783,18 @@
}
TEST_F(AssemblerX86_64Test, RollImm) {
- DriverStr(Repeatri(&x86_64::X86_64Assembler::roll, 1U, "roll ${imm}, %{reg}"), "rolli");
+ DriverStr(RepeatrI(&x86_64::X86_64Assembler::roll, /*imm_bytes*/ 1U, "roll ${imm}, %{reg}"), "rolli");
}
// Rorq only allows CL as the shift count.
std::string rorq_fn(AssemblerX86_64Test::Base* assembler_test, x86_64::X86_64Assembler* assembler) {
std::ostringstream str;
-
std::vector<x86_64::CpuRegister*> registers = assembler_test->GetRegisters();
-
x86_64::CpuRegister shifter(x86_64::RCX);
for (auto reg : registers) {
assembler->rorq(*reg, shifter);
str << "rorq %cl, %" << assembler_test->GetRegisterName(*reg) << "\n";
}
-
return str.str();
}
@@ -698,21 +803,18 @@
}
TEST_F(AssemblerX86_64Test, RorqImm) {
- DriverStr(RepeatRI(&x86_64::X86_64Assembler::rorq, 1U, "rorq ${imm}, %{reg}"), "rorqi");
+ DriverStr(RepeatRI(&x86_64::X86_64Assembler::rorq, /*imm_bytes*/ 1U, "rorq ${imm}, %{reg}"), "rorqi");
}
// Rolq only allows CL as the shift count.
std::string rolq_fn(AssemblerX86_64Test::Base* assembler_test, x86_64::X86_64Assembler* assembler) {
std::ostringstream str;
-
std::vector<x86_64::CpuRegister*> registers = assembler_test->GetRegisters();
-
x86_64::CpuRegister shifter(x86_64::RCX);
for (auto reg : registers) {
assembler->rolq(*reg, shifter);
str << "rolq %cl, %" << assembler_test->GetRegisterName(*reg) << "\n";
}
-
return str.str();
}
@@ -721,7 +823,7 @@
}
TEST_F(AssemblerX86_64Test, RolqImm) {
- DriverStr(RepeatRI(&x86_64::X86_64Assembler::rolq, 1U, "rolq ${imm}, %{reg}"), "rolqi");
+ DriverStr(RepeatRI(&x86_64::X86_64Assembler::rolq, /*imm_bytes*/ 1U, "rolq ${imm}, %{reg}"), "rolqi");
}
TEST_F(AssemblerX86_64Test, CmpqRegs) {
@@ -729,8 +831,9 @@
}
TEST_F(AssemblerX86_64Test, CmpqImm) {
- DriverStr(RepeatRI(&x86_64::X86_64Assembler::cmpq, 4U /* cmpq only supports 32b imm */,
- "cmpq ${imm}, %{reg}"), "cmpqi");
+ DriverStr(RepeatRI(&x86_64::X86_64Assembler::cmpq,
+ /*imm_bytes*/ 4U,
+ "cmpq ${imm}, %{reg}"), "cmpqi"); // only imm32
}
TEST_F(AssemblerX86_64Test, CmplRegs) {
@@ -738,7 +841,7 @@
}
TEST_F(AssemblerX86_64Test, CmplImm) {
- DriverStr(Repeatri(&x86_64::X86_64Assembler::cmpl, 4U, "cmpl ${imm}, %{reg}"), "cmpli");
+ DriverStr(RepeatrI(&x86_64::X86_64Assembler::cmpl, /*imm_bytes*/ 4U, "cmpl ${imm}, %{reg}"), "cmpli");
}
TEST_F(AssemblerX86_64Test, Testl) {
@@ -768,8 +871,9 @@
}
TEST_F(AssemblerX86_64Test, AndqImm) {
- DriverStr(RepeatRI(&x86_64::X86_64Assembler::andq, 4U /* andq only supports 32b imm */,
- "andq ${imm}, %{reg}"), "andqi");
+ DriverStr(RepeatRI(&x86_64::X86_64Assembler::andq,
+ /*imm_bytes*/ 4U,
+ "andq ${imm}, %{reg}"), "andqi"); // only imm32
}
TEST_F(AssemblerX86_64Test, AndlRegs) {
@@ -777,7 +881,9 @@
}
TEST_F(AssemblerX86_64Test, AndlImm) {
- DriverStr(Repeatri(&x86_64::X86_64Assembler::andl, 4U, "andl ${imm}, %{reg}"), "andli");
+ DriverStr(RepeatrI(&x86_64::X86_64Assembler::andl,
+ /*imm_bytes*/ 4U,
+ "andl ${imm}, %{reg}"), "andli");
}
TEST_F(AssemblerX86_64Test, OrqRegs) {
@@ -789,7 +895,8 @@
}
TEST_F(AssemblerX86_64Test, OrlImm) {
- DriverStr(Repeatri(&x86_64::X86_64Assembler::orl, 4U, "orl ${imm}, %{reg}"), "orli");
+ DriverStr(RepeatrI(&x86_64::X86_64Assembler::orl,
+ /*imm_bytes*/ 4U, "orl ${imm}, %{reg}"), "orli");
}
TEST_F(AssemblerX86_64Test, XorqRegs) {
@@ -797,7 +904,8 @@
}
TEST_F(AssemblerX86_64Test, XorqImm) {
- DriverStr(RepeatRI(&x86_64::X86_64Assembler::xorq, 4U, "xorq ${imm}, %{reg}"), "xorqi");
+ DriverStr(RepeatRI(&x86_64::X86_64Assembler::xorq,
+ /*imm_bytes*/ 4U, "xorq ${imm}, %{reg}"), "xorqi");
}
TEST_F(AssemblerX86_64Test, XorlRegs) {
@@ -805,7 +913,8 @@
}
TEST_F(AssemblerX86_64Test, XorlImm) {
- DriverStr(Repeatri(&x86_64::X86_64Assembler::xorl, 4U, "xor ${imm}, %{reg}"), "xorli");
+ DriverStr(RepeatrI(&x86_64::X86_64Assembler::xorl,
+ /*imm_bytes*/ 4U, "xor ${imm}, %{reg}"), "xorli");
}
TEST_F(AssemblerX86_64Test, Xchgq) {
@@ -813,167 +922,87 @@
}
TEST_F(AssemblerX86_64Test, Xchgl) {
- // Test is disabled because GCC generates 0x87 0xC0 for xchgl eax, eax. All other cases are the
- // same. Anyone know why it doesn't emit a simple 0x90? It does so for xchgq rax, rax...
+ // TODO: Test is disabled because GCC generates 0x87 0xC0 for xchgl eax, eax. All other cases
+ // are the same. Anyone know why it doesn't emit a simple 0x90? It does so for xchgq rax, rax...
// DriverStr(Repeatrr(&x86_64::X86_64Assembler::xchgl, "xchgl %{reg2}, %{reg1}"), "xchgl");
}
TEST_F(AssemblerX86_64Test, LockCmpxchgl) {
- GetAssembler()->LockCmpxchgl(x86_64::Address(
- x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12),
- x86_64::CpuRegister(x86_64::RSI));
- GetAssembler()->LockCmpxchgl(x86_64::Address(
- x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12),
- x86_64::CpuRegister(x86_64::RSI));
- GetAssembler()->LockCmpxchgl(x86_64::Address(
- x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12),
- x86_64::CpuRegister(x86_64::R8));
- GetAssembler()->LockCmpxchgl(x86_64::Address(
- x86_64::CpuRegister(x86_64::R13), 0), x86_64::CpuRegister(x86_64::RSI));
- GetAssembler()->LockCmpxchgl(x86_64::Address(
- x86_64::CpuRegister(x86_64::R13), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_1, 0),
- x86_64::CpuRegister(x86_64::RSI));
- const char* expected =
- "lock cmpxchgl %ESI, 0xc(%RDI,%RBX,4)\n"
- "lock cmpxchgl %ESI, 0xc(%RDI,%R9,4)\n"
- "lock cmpxchgl %R8d, 0xc(%RDI,%R9,4)\n"
- "lock cmpxchgl %ESI, (%R13)\n"
- "lock cmpxchgl %ESI, (%R13,%R9,1)\n";
-
- DriverStr(expected, "lock_cmpxchgl");
+ DriverStr(RepeatAr(&x86_64::X86_64Assembler::LockCmpxchgl,
+ "lock cmpxchgl %{reg}, {mem}"), "lock_cmpxchgl");
}
TEST_F(AssemblerX86_64Test, LockCmpxchgq) {
- GetAssembler()->LockCmpxchgq(x86_64::Address(
- x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12),
- x86_64::CpuRegister(x86_64::RSI));
- GetAssembler()->LockCmpxchgq(x86_64::Address(
- x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12),
- x86_64::CpuRegister(x86_64::RSI));
- GetAssembler()->LockCmpxchgq(x86_64::Address(
- x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12),
- x86_64::CpuRegister(x86_64::R8));
- GetAssembler()->LockCmpxchgq(x86_64::Address(
- x86_64::CpuRegister(x86_64::R13), 0), x86_64::CpuRegister(x86_64::RSI));
- GetAssembler()->LockCmpxchgq(x86_64::Address(
- x86_64::CpuRegister(x86_64::R13), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_1, 0),
- x86_64::CpuRegister(x86_64::RSI));
- const char* expected =
- "lock cmpxchg %RSI, 0xc(%RDI,%RBX,4)\n"
- "lock cmpxchg %RSI, 0xc(%RDI,%R9,4)\n"
- "lock cmpxchg %R8, 0xc(%RDI,%R9,4)\n"
- "lock cmpxchg %RSI, (%R13)\n"
- "lock cmpxchg %RSI, (%R13,%R9,1)\n";
-
- DriverStr(expected, "lock_cmpxchg");
+ DriverStr(RepeatAR(&x86_64::X86_64Assembler::LockCmpxchgq,
+ "lock cmpxchg %{reg}, {mem}"), "lock_cmpxchg");
}
-TEST_F(AssemblerX86_64Test, Movl) {
- GetAssembler()->movl(x86_64::CpuRegister(x86_64::RAX), x86_64::Address(
- x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12));
- GetAssembler()->movl(x86_64::CpuRegister(x86_64::RAX), x86_64::Address(
- x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12));
- GetAssembler()->movl(x86_64::CpuRegister(x86_64::R8), x86_64::Address(
- x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12));
- GetAssembler()->movl(x86_64::CpuRegister(x86_64::RAX), x86_64::Address(
- x86_64::CpuRegister(x86_64::R13), 0));
- GetAssembler()->movl(x86_64::CpuRegister(x86_64::RAX), x86_64::Address(
- x86_64::CpuRegister(x86_64::R13), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_1, 0));
- const char* expected =
- "movl 0xc(%RDI,%RBX,4), %EAX\n"
- "movl 0xc(%RDI,%R9,4), %EAX\n"
- "movl 0xc(%RDI,%R9,4), %R8d\n"
- "movl (%R13), %EAX\n"
- "movl (%R13,%R9,1), %EAX\n";
-
- DriverStr(expected, "movl");
+TEST_F(AssemblerX86_64Test, MovqStore) {
+ DriverStr(RepeatAR(&x86_64::X86_64Assembler::movq, "movq %{reg}, {mem}"), "movq_s");
}
-TEST_F(AssemblerX86_64Test, Movw) {
- GetAssembler()->movw(x86_64::Address(x86_64::CpuRegister(x86_64::RAX), 0),
- x86_64::CpuRegister(x86_64::R9));
- GetAssembler()->movw(x86_64::Address(x86_64::CpuRegister(x86_64::RAX), 0),
- x86_64::Immediate(0));
- GetAssembler()->movw(x86_64::Address(x86_64::CpuRegister(x86_64::R9), 0),
- x86_64::Immediate(0));
- GetAssembler()->movw(x86_64::Address(x86_64::CpuRegister(x86_64::R14), 0),
- x86_64::Immediate(0));
- const char* expected =
- "movw %R9w, 0(%RAX)\n"
- "movw $0, 0(%RAX)\n"
- "movw $0, 0(%R9)\n"
- "movw $0, 0(%R14)\n";
- DriverStr(expected, "movw");
+TEST_F(AssemblerX86_64Test, MovqLoad) {
+ DriverStr(RepeatRA(&x86_64::X86_64Assembler::movq, "movq {mem}, %{reg}"), "movq_l");
+}
+
+TEST_F(AssemblerX86_64Test, MovlStore) {
+ DriverStr(RepeatAr(&x86_64::X86_64Assembler::movl, "movl %{reg}, {mem}"), "movl_s");
+}
+
+TEST_F(AssemblerX86_64Test, MovlLoad) {
+ DriverStr(RepeatrA(&x86_64::X86_64Assembler::movl, "movl {mem}, %{reg}"), "movl_l");
+}
+
+TEST_F(AssemblerX86_64Test, MovwStore) {
+ DriverStr(RepeatAw(&x86_64::X86_64Assembler::movw, "movw %{reg}, {mem}"), "movw_s");
+}
+
+TEST_F(AssemblerX86_64Test, MovbStore) {
+ DriverStr(RepeatAb(&x86_64::X86_64Assembler::movb, "movb %{reg}, {mem}"), "movb_s");
}
TEST_F(AssemblerX86_64Test, Cmpw) {
- GetAssembler()->cmpw(x86_64::Address(x86_64::CpuRegister(x86_64::RAX), 0),
- x86_64::Immediate(0));
- GetAssembler()->cmpw(x86_64::Address(x86_64::CpuRegister(x86_64::R9), 0),
- x86_64::Immediate(0));
- GetAssembler()->cmpw(x86_64::Address(x86_64::CpuRegister(x86_64::R14), 0),
- x86_64::Immediate(0));
- const char* expected =
- "cmpw $0, 0(%RAX)\n"
- "cmpw $0, 0(%R9)\n"
- "cmpw $0, 0(%R14)\n";
- DriverStr(expected, "cmpw");
+ DriverStr(RepeatAI(&x86_64::X86_64Assembler::cmpw,
+ /*imm_bytes*/ 1U,
+ "cmpw ${imm}, {mem}"), "cmpw"); // TODO: only imm8?
}
TEST_F(AssemblerX86_64Test, MovqAddrImm) {
- GetAssembler()->movq(x86_64::Address(x86_64::CpuRegister(x86_64::RAX), 0),
- x86_64::Immediate(-5));
- const char* expected = "movq $-5, 0(%RAX)\n";
- DriverStr(expected, "movq");
+ DriverStr(RepeatAI(&x86_64::X86_64Assembler::movq,
+ /*imm_bytes*/ 4U,
+ "movq ${imm}, {mem}"), "movq"); // only imm32
+}
+
+TEST_F(AssemblerX86_64Test, MovlAddrImm) {
+ DriverStr(RepeatAI(&x86_64::X86_64Assembler::movl,
+ /*imm_bytes*/ 4U, "movl ${imm}, {mem}"), "movl");
+}
+
+TEST_F(AssemblerX86_64Test, MovwAddrImm) {
+ DriverStr(RepeatAI(&x86_64::X86_64Assembler::movw,
+ /*imm_bytes*/ 2U, "movw ${imm}, {mem}"), "movw");
+}
+
+TEST_F(AssemblerX86_64Test, MovbAddrImm) {
+ DriverStr(RepeatAI(&x86_64::X86_64Assembler::movb,
+ /*imm_bytes*/ 1U, "movb ${imm}, {mem}"), "movb");
}
TEST_F(AssemblerX86_64Test, Movntl) {
- GetAssembler()->movntl(x86_64::Address(
- x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12), x86_64::CpuRegister(x86_64::RAX));
- GetAssembler()->movntl(x86_64::Address(
- x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12), x86_64::CpuRegister(x86_64::RAX));
- GetAssembler()->movntl(x86_64::Address(
- x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12), x86_64::CpuRegister(x86_64::RAX));
- GetAssembler()->movntl(x86_64::Address(x86_64::CpuRegister(x86_64::R13), 0), x86_64::CpuRegister(x86_64::RAX));
- GetAssembler()->movntl(x86_64::Address(
- x86_64::CpuRegister(x86_64::R13), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_1, 0), x86_64::CpuRegister(x86_64::R9));
- const char* expected =
- "movntil %EAX, 0xc(%RDI,%RBX,4)\n"
- "movntil %EAX, 0xc(%RDI,%R9,4)\n"
- "movntil %EAX, 0xc(%RDI,%R9,4)\n"
- "movntil %EAX, (%R13)\n"
- "movntil %R9d, (%R13,%R9,1)\n";
-
- DriverStr(expected, "movntl");
+ DriverStr(RepeatAr(&x86_64::X86_64Assembler::movntl, "movntil %{reg}, {mem}"), "movntl");
}
TEST_F(AssemblerX86_64Test, Movntq) {
- GetAssembler()->movntq(x86_64::Address(
- x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12), x86_64::CpuRegister(x86_64::RAX));
- GetAssembler()->movntq(x86_64::Address(
- x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12), x86_64::CpuRegister(x86_64::RAX));
- GetAssembler()->movntq(x86_64::Address(
- x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12), x86_64::CpuRegister(x86_64::RAX));
- GetAssembler()->movntq(x86_64::Address(x86_64::CpuRegister(x86_64::R13), 0), x86_64::CpuRegister(x86_64::RAX));
- GetAssembler()->movntq(x86_64::Address(
- x86_64::CpuRegister(x86_64::R13), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_1, 0), x86_64::CpuRegister(x86_64::R9));
- const char* expected =
- "movntiq %RAX, 0xc(%RDI,%RBX,4)\n"
- "movntiq %RAX, 0xc(%RDI,%R9,4)\n"
- "movntiq %RAX, 0xc(%RDI,%R9,4)\n"
- "movntiq %RAX, (%R13)\n"
- "movntiq %R9, (%R13,%R9,1)\n";
-
- DriverStr(expected, "movntq");
+ DriverStr(RepeatAR(&x86_64::X86_64Assembler::movntq, "movntiq %{reg}, {mem}"), "movntq");
}
TEST_F(AssemblerX86_64Test, Cvtsi2ssAddr) {
GetAssembler()->cvtsi2ss(x86_64::XmmRegister(x86_64::XMM0),
x86_64::Address(x86_64::CpuRegister(x86_64::RAX), 0),
- false);
+ /*is64bit*/ false);
GetAssembler()->cvtsi2ss(x86_64::XmmRegister(x86_64::XMM0),
x86_64::Address(x86_64::CpuRegister(x86_64::RAX), 0),
- true);
+ /*is64bit*/ true);
const char* expected = "cvtsi2ss 0(%RAX), %xmm0\n"
"cvtsi2ssq 0(%RAX), %xmm0\n";
DriverStr(expected, "cvtsi2ss");
@@ -982,111 +1011,69 @@
TEST_F(AssemblerX86_64Test, Cvtsi2sdAddr) {
GetAssembler()->cvtsi2sd(x86_64::XmmRegister(x86_64::XMM0),
x86_64::Address(x86_64::CpuRegister(x86_64::RAX), 0),
- false);
+ /*is64bit*/ false);
GetAssembler()->cvtsi2sd(x86_64::XmmRegister(x86_64::XMM0),
x86_64::Address(x86_64::CpuRegister(x86_64::RAX), 0),
- true);
+ /*is64bit*/ true);
const char* expected = "cvtsi2sd 0(%RAX), %xmm0\n"
"cvtsi2sdq 0(%RAX), %xmm0\n";
DriverStr(expected, "cvtsi2sd");
}
TEST_F(AssemblerX86_64Test, CmpqAddr) {
- GetAssembler()->cmpq(x86_64::CpuRegister(x86_64::R12),
- x86_64::Address(x86_64::CpuRegister(x86_64::R9), 0));
- const char* expected = "cmpq 0(%R9), %R12\n";
- DriverStr(expected, "cmpq");
+ DriverStr(RepeatRA(&x86_64::X86_64Assembler::cmpq, "cmpq {mem}, %{reg}"), "cmpq");
}
TEST_F(AssemblerX86_64Test, MovsxdAddr) {
- GetAssembler()->movsxd(x86_64::CpuRegister(x86_64::R12),
- x86_64::Address(x86_64::CpuRegister(x86_64::R9), 0));
- const char* expected = "movslq 0(%R9), %R12\n";
- DriverStr(expected, "movsxd");
+ DriverStr(RepeatRA(&x86_64::X86_64Assembler::movsxd, "movslq {mem}, %{reg}"), "movsxd");
}
TEST_F(AssemblerX86_64Test, TestqAddr) {
- GetAssembler()->testq(x86_64::CpuRegister(x86_64::R12),
- x86_64::Address(x86_64::CpuRegister(x86_64::R9), 0));
- const char* expected = "testq 0(%R9), %R12\n";
- DriverStr(expected, "testq");
+ DriverStr(RepeatRA(&x86_64::X86_64Assembler::testq, "testq {mem}, %{reg}"), "testq");
}
TEST_F(AssemblerX86_64Test, AddqAddr) {
- GetAssembler()->addq(x86_64::CpuRegister(x86_64::R12),
- x86_64::Address(x86_64::CpuRegister(x86_64::R9), 0));
- const char* expected = "addq 0(%R9), %R12\n";
- DriverStr(expected, "addq");
+ DriverStr(RepeatRA(&x86_64::X86_64Assembler::addq, "addq {mem}, %{reg}"), "addq");
}
TEST_F(AssemblerX86_64Test, SubqAddr) {
- GetAssembler()->subq(x86_64::CpuRegister(x86_64::R12),
- x86_64::Address(x86_64::CpuRegister(x86_64::R9), 0));
- const char* expected = "subq 0(%R9), %R12\n";
- DriverStr(expected, "subq");
+ DriverStr(RepeatRA(&x86_64::X86_64Assembler::subq, "subq {mem}, %{reg}"), "subq");
}
TEST_F(AssemblerX86_64Test, Cvtss2sdAddr) {
- GetAssembler()->cvtss2sd(x86_64::XmmRegister(x86_64::XMM0),
- x86_64::Address(x86_64::CpuRegister(x86_64::RAX), 0));
- const char* expected = "cvtss2sd 0(%RAX), %xmm0\n";
- DriverStr(expected, "cvtss2sd");
+ DriverStr(RepeatFA(&x86_64::X86_64Assembler::cvtss2sd, "cvtss2sd {mem}, %{reg}"), "cvtss2sd");
}
TEST_F(AssemblerX86_64Test, Cvtsd2ssAddr) {
- GetAssembler()->cvtsd2ss(x86_64::XmmRegister(x86_64::XMM0),
- x86_64::Address(x86_64::CpuRegister(x86_64::RAX), 0));
- const char* expected = "cvtsd2ss 0(%RAX), %xmm0\n";
- DriverStr(expected, "cvtsd2ss");
+ DriverStr(RepeatFA(&x86_64::X86_64Assembler::cvtsd2ss, "cvtsd2ss {mem}, %{reg}"), "cvtsd2ss");
}
TEST_F(AssemblerX86_64Test, ComissAddr) {
- GetAssembler()->comiss(x86_64::XmmRegister(x86_64::XMM14),
- x86_64::Address(x86_64::CpuRegister(x86_64::RAX), 0));
- const char* expected = "comiss 0(%RAX), %xmm14\n";
- DriverStr(expected, "comiss");
+ DriverStr(RepeatFA(&x86_64::X86_64Assembler::comiss, "comiss {mem}, %{reg}"), "comiss");
}
TEST_F(AssemblerX86_64Test, ComisdAddr) {
- GetAssembler()->comisd(x86_64::XmmRegister(x86_64::XMM0),
- x86_64::Address(x86_64::CpuRegister(x86_64::R9), 0));
- const char* expected = "comisd 0(%R9), %xmm0\n";
- DriverStr(expected, "comisd");
+ DriverStr(RepeatFA(&x86_64::X86_64Assembler::comisd, "comisd {mem}, %{reg}"), "comisd");
}
TEST_F(AssemblerX86_64Test, UComissAddr) {
- GetAssembler()->ucomiss(x86_64::XmmRegister(x86_64::XMM0),
- x86_64::Address(x86_64::CpuRegister(x86_64::RAX), 0));
- const char* expected = "ucomiss 0(%RAX), %xmm0\n";
- DriverStr(expected, "ucomiss");
+ DriverStr(RepeatFA(&x86_64::X86_64Assembler::ucomiss, "ucomiss {mem}, %{reg}"), "ucomiss");
}
TEST_F(AssemblerX86_64Test, UComisdAddr) {
- GetAssembler()->ucomisd(x86_64::XmmRegister(x86_64::XMM0),
- x86_64::Address(x86_64::CpuRegister(x86_64::RAX), 0));
- const char* expected = "ucomisd 0(%RAX), %xmm0\n";
- DriverStr(expected, "ucomisd");
+ DriverStr(RepeatFA(&x86_64::X86_64Assembler::ucomisd, "ucomisd {mem}, %{reg}"), "ucomisd");
}
TEST_F(AssemblerX86_64Test, Andq) {
- GetAssembler()->andq(x86_64::CpuRegister(x86_64::R9),
- x86_64::Address(x86_64::CpuRegister(x86_64::RAX), 0));
- const char* expected = "andq 0(%RAX), %r9\n";
- DriverStr(expected, "andq");
+ DriverStr(RepeatRA(&x86_64::X86_64Assembler::andq, "andq {mem}, %{reg}"), "andq");
}
TEST_F(AssemblerX86_64Test, Orq) {
- GetAssembler()->orq(x86_64::CpuRegister(x86_64::R9),
- x86_64::Address(x86_64::CpuRegister(x86_64::RAX), 0));
- const char* expected = "orq 0(%RAX), %r9\n";
- DriverStr(expected, "orq");
+ DriverStr(RepeatRA(&x86_64::X86_64Assembler::orq, "orq {mem}, %{reg}"), "orq");
}
TEST_F(AssemblerX86_64Test, Xorq) {
- GetAssembler()->xorq(x86_64::CpuRegister(x86_64::R9),
- x86_64::Address(x86_64::CpuRegister(x86_64::RAX), 0));
- const char* expected = "xorq 0(%RAX), %r9\n";
- DriverStr(expected, "xorq");
+ DriverStr(RepeatRA(&x86_64::X86_64Assembler::xorq, "xorq {mem}, %{reg}"), "xorq");
}
TEST_F(AssemblerX86_64Test, RepneScasb) {
@@ -1115,22 +1102,20 @@
DriverStr(RepeatFF(&x86_64::X86_64Assembler::movaps, "movaps %{reg2}, %{reg1}"), "movaps");
}
-TEST_F(AssemblerX86_64Test, MovapsAddr) {
- GetAssembler()->movaps(x86_64::XmmRegister(x86_64::XMM0), x86_64::Address(x86_64::CpuRegister(x86_64::RSP), 4));
- GetAssembler()->movaps(x86_64::Address(x86_64::CpuRegister(x86_64::RSP), 2), x86_64::XmmRegister(x86_64::XMM1));
- const char* expected =
- "movaps 0x4(%RSP), %xmm0\n"
- "movaps %xmm1, 0x2(%RSP)\n";
- DriverStr(expected, "movaps_address");
+TEST_F(AssemblerX86_64Test, MovapsStore) {
+ DriverStr(RepeatAF(&x86_64::X86_64Assembler::movaps, "movaps %{reg}, {mem}"), "movaps_s");
}
-TEST_F(AssemblerX86_64Test, MovupsAddr) {
- GetAssembler()->movups(x86_64::XmmRegister(x86_64::XMM0), x86_64::Address(x86_64::CpuRegister(x86_64::RSP), 4));
- GetAssembler()->movups(x86_64::Address(x86_64::CpuRegister(x86_64::RSP), 2), x86_64::XmmRegister(x86_64::XMM1));
- const char* expected =
- "movups 0x4(%RSP), %xmm0\n"
- "movups %xmm1, 0x2(%RSP)\n";
- DriverStr(expected, "movups_address");
+TEST_F(AssemblerX86_64Test, MovapsLoad) {
+ DriverStr(RepeatFA(&x86_64::X86_64Assembler::movaps, "movaps {mem}, %{reg}"), "movaps_l");
+}
+
+TEST_F(AssemblerX86_64Test, MovupsStore) {
+ DriverStr(RepeatAF(&x86_64::X86_64Assembler::movups, "movups %{reg}, {mem}"), "movups_s");
+}
+
+TEST_F(AssemblerX86_64Test, MovupsLoad) {
+ DriverStr(RepeatFA(&x86_64::X86_64Assembler::movups, "movups {mem}, %{reg}"), "movups_l");
}
TEST_F(AssemblerX86_64Test, Movss) {
@@ -1141,22 +1126,20 @@
DriverStr(RepeatFF(&x86_64::X86_64Assembler::movapd, "movapd %{reg2}, %{reg1}"), "movapd");
}
-TEST_F(AssemblerX86_64Test, MovapdAddr) {
- GetAssembler()->movapd(x86_64::XmmRegister(x86_64::XMM0), x86_64::Address(x86_64::CpuRegister(x86_64::RSP), 4));
- GetAssembler()->movapd(x86_64::Address(x86_64::CpuRegister(x86_64::RSP), 2), x86_64::XmmRegister(x86_64::XMM1));
- const char* expected =
- "movapd 0x4(%RSP), %xmm0\n"
- "movapd %xmm1, 0x2(%RSP)\n";
- DriverStr(expected, "movapd_address");
+TEST_F(AssemblerX86_64Test, MovapdStore) {
+ DriverStr(RepeatAF(&x86_64::X86_64Assembler::movapd, "movapd %{reg}, {mem}"), "movapd_s");
}
-TEST_F(AssemblerX86_64Test, MovupdAddr) {
- GetAssembler()->movupd(x86_64::XmmRegister(x86_64::XMM0), x86_64::Address(x86_64::CpuRegister(x86_64::RSP), 4));
- GetAssembler()->movupd(x86_64::Address(x86_64::CpuRegister(x86_64::RSP), 2), x86_64::XmmRegister(x86_64::XMM1));
- const char* expected =
- "movupd 0x4(%RSP), %xmm0\n"
- "movupd %xmm1, 0x2(%RSP)\n";
- DriverStr(expected, "movupd_address");
+TEST_F(AssemblerX86_64Test, MovapdLoad) {
+ DriverStr(RepeatFA(&x86_64::X86_64Assembler::movapd, "movapd {mem}, %{reg}"), "movapd_l");
+}
+
+TEST_F(AssemblerX86_64Test, MovupdStore) {
+ DriverStr(RepeatAF(&x86_64::X86_64Assembler::movupd, "movupd %{reg}, {mem}"), "movupd_s");
+}
+
+TEST_F(AssemblerX86_64Test, MovupdLoad) {
+ DriverStr(RepeatFA(&x86_64::X86_64Assembler::movupd, "movupd {mem}, %{reg}"), "movupd_l");
}
TEST_F(AssemblerX86_64Test, Movsd) {
@@ -1164,25 +1147,23 @@
}
TEST_F(AssemblerX86_64Test, Movdqa) {
- DriverStr(RepeatFF(&x86_64::X86_64Assembler::movdqa, "movdqa %{reg2}, %{reg1}"), "movapd");
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::movdqa, "movdqa %{reg2}, %{reg1}"), "movdqa");
}
-TEST_F(AssemblerX86_64Test, MovdqaAddr) {
- GetAssembler()->movdqa(x86_64::XmmRegister(x86_64::XMM0), x86_64::Address(x86_64::CpuRegister(x86_64::RSP), 4));
- GetAssembler()->movdqa(x86_64::Address(x86_64::CpuRegister(x86_64::RSP), 2), x86_64::XmmRegister(x86_64::XMM1));
- const char* expected =
- "movdqa 0x4(%RSP), %xmm0\n"
- "movdqa %xmm1, 0x2(%RSP)\n";
- DriverStr(expected, "movdqa_address");
+TEST_F(AssemblerX86_64Test, MovdqaStore) {
+ DriverStr(RepeatAF(&x86_64::X86_64Assembler::movdqa, "movdqa %{reg}, {mem}"), "movdqa_s");
}
-TEST_F(AssemblerX86_64Test, MovdquAddr) {
- GetAssembler()->movdqu(x86_64::XmmRegister(x86_64::XMM0), x86_64::Address(x86_64::CpuRegister(x86_64::RSP), 4));
- GetAssembler()->movdqu(x86_64::Address(x86_64::CpuRegister(x86_64::RSP), 2), x86_64::XmmRegister(x86_64::XMM1));
- const char* expected =
- "movdqu 0x4(%RSP), %xmm0\n"
- "movdqu %xmm1, 0x2(%RSP)\n";
- DriverStr(expected, "movdqu_address");
+TEST_F(AssemblerX86_64Test, MovdqaLoad) {
+ DriverStr(RepeatFA(&x86_64::X86_64Assembler::movdqa, "movdqa {mem}, %{reg}"), "movdqa_l");
+}
+
+TEST_F(AssemblerX86_64Test, MovdquStore) {
+ DriverStr(RepeatAF(&x86_64::X86_64Assembler::movdqu, "movdqu %{reg}, {mem}"), "movdqu_s");
+}
+
+TEST_F(AssemblerX86_64Test, MovdquLoad) {
+ DriverStr(RepeatFA(&x86_64::X86_64Assembler::movdqu, "movdqu {mem}, %{reg}"), "movdqu_l");
}
TEST_F(AssemblerX86_64Test, Movd1) {
@@ -1364,11 +1345,13 @@
}
TEST_F(AssemblerX86_64Test, Roundss) {
- DriverStr(RepeatFFI(&x86_64::X86_64Assembler::roundss, 1, "roundss ${imm}, %{reg2}, %{reg1}"), "roundss");
+ DriverStr(RepeatFFI(&x86_64::X86_64Assembler::roundss, /*imm_bytes*/ 1U,
+ "roundss ${imm}, %{reg2}, %{reg1}"), "roundss");
}
TEST_F(AssemblerX86_64Test, Roundsd) {
- DriverStr(RepeatFFI(&x86_64::X86_64Assembler::roundsd, 1, "roundsd ${imm}, %{reg2}, %{reg1}"), "roundsd");
+ DriverStr(RepeatFFI(&x86_64::X86_64Assembler::roundsd, /*imm_bytes*/ 1U,
+ "roundsd ${imm}, %{reg2}, %{reg1}"), "roundsd");
}
TEST_F(AssemblerX86_64Test, Xorps) {
@@ -1564,47 +1547,58 @@
}
TEST_F(AssemblerX86_64Test, Shufps) {
- DriverStr(RepeatFFI(&x86_64::X86_64Assembler::shufps, 1, "shufps ${imm}, %{reg2}, %{reg1}"), "shufps");
+ DriverStr(RepeatFFI(&x86_64::X86_64Assembler::shufps, /*imm_bytes*/ 1U,
+ "shufps ${imm}, %{reg2}, %{reg1}"), "shufps");
}
TEST_F(AssemblerX86_64Test, Shufpd) {
- DriverStr(RepeatFFI(&x86_64::X86_64Assembler::shufpd, 1, "shufpd ${imm}, %{reg2}, %{reg1}"), "shufpd");
+ DriverStr(RepeatFFI(&x86_64::X86_64Assembler::shufpd, /*imm_bytes*/ 1U,
+ "shufpd ${imm}, %{reg2}, %{reg1}"), "shufpd");
}
TEST_F(AssemblerX86_64Test, PShufd) {
- DriverStr(RepeatFFI(&x86_64::X86_64Assembler::pshufd, 1, "pshufd ${imm}, %{reg2}, %{reg1}"), "pshufd");
+ DriverStr(RepeatFFI(&x86_64::X86_64Assembler::pshufd, /*imm_bytes*/ 1U,
+ "pshufd ${imm}, %{reg2}, %{reg1}"), "pshufd");
}
TEST_F(AssemblerX86_64Test, Punpcklbw) {
- DriverStr(RepeatFF(&x86_64::X86_64Assembler::punpcklbw, "punpcklbw %{reg2}, %{reg1}"), "punpcklbw");
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::punpcklbw,
+ "punpcklbw %{reg2}, %{reg1}"), "punpcklbw");
}
TEST_F(AssemblerX86_64Test, Punpcklwd) {
- DriverStr(RepeatFF(&x86_64::X86_64Assembler::punpcklwd, "punpcklwd %{reg2}, %{reg1}"), "punpcklwd");
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::punpcklwd,
+ "punpcklwd %{reg2}, %{reg1}"), "punpcklwd");
}
TEST_F(AssemblerX86_64Test, Punpckldq) {
- DriverStr(RepeatFF(&x86_64::X86_64Assembler::punpckldq, "punpckldq %{reg2}, %{reg1}"), "punpckldq");
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::punpckldq,
+ "punpckldq %{reg2}, %{reg1}"), "punpckldq");
}
TEST_F(AssemblerX86_64Test, Punpcklqdq) {
- DriverStr(RepeatFF(&x86_64::X86_64Assembler::punpcklqdq, "punpcklqdq %{reg2}, %{reg1}"), "punpcklqdq");
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::punpcklqdq,
+ "punpcklqdq %{reg2}, %{reg1}"), "punpcklqdq");
}
TEST_F(AssemblerX86_64Test, Punpckhbw) {
- DriverStr(RepeatFF(&x86_64::X86_64Assembler::punpckhbw, "punpckhbw %{reg2}, %{reg1}"), "punpckhbw");
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::punpckhbw,
+ "punpckhbw %{reg2}, %{reg1}"), "punpckhbw");
}
TEST_F(AssemblerX86_64Test, Punpckhwd) {
- DriverStr(RepeatFF(&x86_64::X86_64Assembler::punpckhwd, "punpckhwd %{reg2}, %{reg1}"), "punpckhwd");
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::punpckhwd,
+ "punpckhwd %{reg2}, %{reg1}"), "punpckhwd");
}
TEST_F(AssemblerX86_64Test, Punpckhdq) {
- DriverStr(RepeatFF(&x86_64::X86_64Assembler::punpckhdq, "punpckhdq %{reg2}, %{reg1}"), "punpckhdq");
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::punpckhdq,
+ "punpckhdq %{reg2}, %{reg1}"), "punpckhdq");
}
TEST_F(AssemblerX86_64Test, Punpckhqdq) {
- DriverStr(RepeatFF(&x86_64::X86_64Assembler::punpckhqdq, "punpckhqdq %{reg2}, %{reg1}"), "punpckhqdq");
+ DriverStr(RepeatFF(&x86_64::X86_64Assembler::punpckhqdq,
+ "punpckhqdq %{reg2}, %{reg1}"), "punpckhqdq");
}
TEST_F(AssemblerX86_64Test, Psllw) {
@@ -1653,63 +1647,21 @@
GetAssembler()->psrld(x86_64::XmmRegister(x86_64::XMM0), x86_64::Immediate(1));
GetAssembler()->psrld(x86_64::XmmRegister(x86_64::XMM15), x86_64::Immediate(2));
DriverStr("psrld $1, %xmm0\n"
- "psrld $2, %xmm15\n", "pslldi");
+ "psrld $2, %xmm15\n", "psrldi");
}
TEST_F(AssemblerX86_64Test, Psrlq) {
GetAssembler()->psrlq(x86_64::XmmRegister(x86_64::XMM0), x86_64::Immediate(1));
GetAssembler()->psrlq(x86_64::XmmRegister(x86_64::XMM15), x86_64::Immediate(2));
DriverStr("psrlq $1, %xmm0\n"
- "psrlq $2, %xmm15\n", "pslrqi");
+ "psrlq $2, %xmm15\n", "psrlqi");
}
TEST_F(AssemblerX86_64Test, Psrldq) {
GetAssembler()->psrldq(x86_64::XmmRegister(x86_64::XMM0), x86_64::Immediate(1));
GetAssembler()->psrldq(x86_64::XmmRegister(x86_64::XMM15), x86_64::Immediate(2));
DriverStr("psrldq $1, %xmm0\n"
- "psrldq $2, %xmm15\n", "pslrdqi");
-}
-
-TEST_F(AssemblerX86_64Test, UcomissAddress) {
- GetAssembler()->ucomiss(x86_64::XmmRegister(x86_64::XMM0), x86_64::Address(
- x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12));
- GetAssembler()->ucomiss(x86_64::XmmRegister(x86_64::XMM1), x86_64::Address(
- x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12));
- GetAssembler()->ucomiss(x86_64::XmmRegister(x86_64::XMM2), x86_64::Address(
- x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12));
- GetAssembler()->ucomiss(x86_64::XmmRegister(x86_64::XMM3), x86_64::Address(
- x86_64::CpuRegister(x86_64::R13), 0));
- GetAssembler()->ucomiss(x86_64::XmmRegister(x86_64::XMM4), x86_64::Address(
- x86_64::CpuRegister(x86_64::R13), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_1, 0));
- const char* expected =
- "ucomiss 0xc(%RDI,%RBX,4), %xmm0\n"
- "ucomiss 0xc(%RDI,%R9,4), %xmm1\n"
- "ucomiss 0xc(%RDI,%R9,4), %xmm2\n"
- "ucomiss (%R13), %xmm3\n"
- "ucomiss (%R13,%R9,1), %xmm4\n";
-
- DriverStr(expected, "ucomiss_address");
-}
-
-TEST_F(AssemblerX86_64Test, UcomisdAddress) {
- GetAssembler()->ucomisd(x86_64::XmmRegister(x86_64::XMM0), x86_64::Address(
- x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12));
- GetAssembler()->ucomisd(x86_64::XmmRegister(x86_64::XMM1), x86_64::Address(
- x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12));
- GetAssembler()->ucomisd(x86_64::XmmRegister(x86_64::XMM2), x86_64::Address(
- x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12));
- GetAssembler()->ucomisd(x86_64::XmmRegister(x86_64::XMM3), x86_64::Address(
- x86_64::CpuRegister(x86_64::R13), 0));
- GetAssembler()->ucomisd(x86_64::XmmRegister(x86_64::XMM4), x86_64::Address(
- x86_64::CpuRegister(x86_64::R13), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_1, 0));
- const char* expected =
- "ucomisd 0xc(%RDI,%RBX,4), %xmm0\n"
- "ucomisd 0xc(%RDI,%R9,4), %xmm1\n"
- "ucomisd 0xc(%RDI,%R9,4), %xmm2\n"
- "ucomisd (%R13), %xmm3\n"
- "ucomisd (%R13,%R9,1), %xmm4\n";
-
- DriverStr(expected, "ucomisd_address");
+ "psrldq $2, %xmm15\n", "psrldqi");
}
std::string x87_fn(AssemblerX86_64Test::Base* assembler_test ATTRIBUTE_UNUSED,
@@ -1735,22 +1687,28 @@
DriverFn(&x87_fn, "x87");
}
-TEST_F(AssemblerX86_64Test, FPUIntegerLoad) {
- GetAssembler()->filds(x86_64::Address(x86_64::CpuRegister(x86_64::RSP), 4));
- GetAssembler()->fildl(x86_64::Address(x86_64::CpuRegister(x86_64::RSP), 12));
- const char* expected =
- "fildl 0x4(%RSP)\n"
- "fildll 0xc(%RSP)\n";
- DriverStr(expected, "FPUIntegerLoad");
+TEST_F(AssemblerX86_64Test, FPUIntegerLoads) {
+ DriverStr(RepeatA(&x86_64::X86_64Assembler::filds,
+ addresses_singleton_, // no ext addressing
+ "fildl {mem}"), "filds");
}
-TEST_F(AssemblerX86_64Test, FPUIntegerStore) {
- GetAssembler()->fistps(x86_64::Address(x86_64::CpuRegister(x86_64::RSP), 16));
- GetAssembler()->fistpl(x86_64::Address(x86_64::CpuRegister(x86_64::RSP), 24));
- const char* expected =
- "fistpl 0x10(%RSP)\n"
- "fistpll 0x18(%RSP)\n";
- DriverStr(expected, "FPUIntegerStore");
+TEST_F(AssemblerX86_64Test, FPUIntegerLoadl) {
+ DriverStr(RepeatA(&x86_64::X86_64Assembler::fildl,
+ addresses_singleton_, // no ext addressing
+ "fildll {mem}"), "fildl");
+}
+
+TEST_F(AssemblerX86_64Test, FPUIntegerStores) {
+ DriverStr(RepeatA(&x86_64::X86_64Assembler::fistps,
+ addresses_singleton_, // no ext addressing
+ "fistpl {mem}"), "fistps");
+}
+
+TEST_F(AssemblerX86_64Test, FPUIntegerStorel) {
+ DriverStr(RepeatA(&x86_64::X86_64Assembler::fistpl,
+ addresses_singleton_, // no ext addressing
+ "fistpll {mem}"), "fistpl");
}
TEST_F(AssemblerX86_64Test, Call) {
@@ -1762,13 +1720,15 @@
}
TEST_F(AssemblerX86_64Test, Enter) {
- DriverStr(RepeatI(&x86_64::X86_64Assembler::enter, 2U /* 16b immediate */, "enter ${imm}, $0",
- true /* Only non-negative number */), "enter");
+ DriverStr(RepeatI(&x86_64::X86_64Assembler::enter,
+ /*imm_bytes*/ 2U,
+ "enter ${imm}, $0", /*non-negative*/ true), "enter");
}
TEST_F(AssemblerX86_64Test, RetImm) {
- DriverStr(RepeatI(&x86_64::X86_64Assembler::ret, 2U /* 16b immediate */, "ret ${imm}",
- true /* Only non-negative number */), "reti");
+ DriverStr(RepeatI(&x86_64::X86_64Assembler::ret,
+ /*imm_bytes*/ 2U,
+ "ret ${imm}", /*non-negative*/ true), "ret");
}
std::string ret_and_leave_fn(AssemblerX86_64Test::Base* assembler_test ATTRIBUTE_UNUSED,
@@ -1801,18 +1761,7 @@
}
TEST_F(AssemblerX86_64Test, BsflAddress) {
- GetAssembler()->bsfl(x86_64::CpuRegister(x86_64::R10), x86_64::Address(
- x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12));
- GetAssembler()->bsfl(x86_64::CpuRegister(x86_64::RDI), x86_64::Address(
- x86_64::CpuRegister(x86_64::R10), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12));
- GetAssembler()->bsfl(x86_64::CpuRegister(x86_64::RDI), x86_64::Address(
- x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12));
- const char* expected =
- "bsfl 0xc(%RDI,%RBX,4), %R10d\n"
- "bsfl 0xc(%R10,%RBX,4), %edi\n"
- "bsfl 0xc(%RDI,%R9,4), %edi\n";
-
- DriverStr(expected, "bsfl_address");
+ DriverStr(RepeatrA(&x86_64::X86_64Assembler::bsfl, "bsfl {mem}, %{reg}"), "bsfl_address");
}
TEST_F(AssemblerX86_64Test, Bsfq) {
@@ -1820,18 +1769,7 @@
}
TEST_F(AssemblerX86_64Test, BsfqAddress) {
- GetAssembler()->bsfq(x86_64::CpuRegister(x86_64::R10), x86_64::Address(
- x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12));
- GetAssembler()->bsfq(x86_64::CpuRegister(x86_64::RDI), x86_64::Address(
- x86_64::CpuRegister(x86_64::R10), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12));
- GetAssembler()->bsfq(x86_64::CpuRegister(x86_64::RDI), x86_64::Address(
- x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12));
- const char* expected =
- "bsfq 0xc(%RDI,%RBX,4), %R10\n"
- "bsfq 0xc(%R10,%RBX,4), %RDI\n"
- "bsfq 0xc(%RDI,%R9,4), %RDI\n";
-
- DriverStr(expected, "bsfq_address");
+ DriverStr(RepeatRA(&x86_64::X86_64Assembler::bsfq, "bsfq {mem}, %{reg}"), "bsfq_address");
}
TEST_F(AssemblerX86_64Test, Bsrl) {
@@ -1839,18 +1777,7 @@
}
TEST_F(AssemblerX86_64Test, BsrlAddress) {
- GetAssembler()->bsrl(x86_64::CpuRegister(x86_64::R10), x86_64::Address(
- x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12));
- GetAssembler()->bsrl(x86_64::CpuRegister(x86_64::RDI), x86_64::Address(
- x86_64::CpuRegister(x86_64::R10), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12));
- GetAssembler()->bsrl(x86_64::CpuRegister(x86_64::RDI), x86_64::Address(
- x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12));
- const char* expected =
- "bsrl 0xc(%RDI,%RBX,4), %R10d\n"
- "bsrl 0xc(%R10,%RBX,4), %edi\n"
- "bsrl 0xc(%RDI,%R9,4), %edi\n";
-
- DriverStr(expected, "bsrl_address");
+ DriverStr(RepeatrA(&x86_64::X86_64Assembler::bsrl, "bsrl {mem}, %{reg}"), "bsrl_address");
}
TEST_F(AssemblerX86_64Test, Bsrq) {
@@ -1858,18 +1785,7 @@
}
TEST_F(AssemblerX86_64Test, BsrqAddress) {
- GetAssembler()->bsrq(x86_64::CpuRegister(x86_64::R10), x86_64::Address(
- x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12));
- GetAssembler()->bsrq(x86_64::CpuRegister(x86_64::RDI), x86_64::Address(
- x86_64::CpuRegister(x86_64::R10), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12));
- GetAssembler()->bsrq(x86_64::CpuRegister(x86_64::RDI), x86_64::Address(
- x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12));
- const char* expected =
- "bsrq 0xc(%RDI,%RBX,4), %R10\n"
- "bsrq 0xc(%R10,%RBX,4), %RDI\n"
- "bsrq 0xc(%RDI,%R9,4), %RDI\n";
-
- DriverStr(expected, "bsrq_address");
+ DriverStr(RepeatRA(&x86_64::X86_64Assembler::bsrq, "bsrq {mem}, %{reg}"), "bsrq_address");
}
TEST_F(AssemblerX86_64Test, Popcntl) {
@@ -1877,18 +1793,7 @@
}
TEST_F(AssemblerX86_64Test, PopcntlAddress) {
- GetAssembler()->popcntl(x86_64::CpuRegister(x86_64::R10), x86_64::Address(
- x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12));
- GetAssembler()->popcntl(x86_64::CpuRegister(x86_64::RDI), x86_64::Address(
- x86_64::CpuRegister(x86_64::R10), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12));
- GetAssembler()->popcntl(x86_64::CpuRegister(x86_64::RDI), x86_64::Address(
- x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12));
- const char* expected =
- "popcntl 0xc(%RDI,%RBX,4), %R10d\n"
- "popcntl 0xc(%R10,%RBX,4), %edi\n"
- "popcntl 0xc(%RDI,%R9,4), %edi\n";
-
- DriverStr(expected, "popcntl_address");
+ DriverStr(RepeatrA(&x86_64::X86_64Assembler::popcntl, "popcntl {mem}, %{reg}"), "popcntl_address");
}
TEST_F(AssemblerX86_64Test, Popcntq) {
@@ -1896,18 +1801,7 @@
}
TEST_F(AssemblerX86_64Test, PopcntqAddress) {
- GetAssembler()->popcntq(x86_64::CpuRegister(x86_64::R10), x86_64::Address(
- x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12));
- GetAssembler()->popcntq(x86_64::CpuRegister(x86_64::RDI), x86_64::Address(
- x86_64::CpuRegister(x86_64::R10), x86_64::CpuRegister(x86_64::RBX), x86_64::TIMES_4, 12));
- GetAssembler()->popcntq(x86_64::CpuRegister(x86_64::RDI), x86_64::Address(
- x86_64::CpuRegister(x86_64::RDI), x86_64::CpuRegister(x86_64::R9), x86_64::TIMES_4, 12));
- const char* expected =
- "popcntq 0xc(%RDI,%RBX,4), %R10\n"
- "popcntq 0xc(%R10,%RBX,4), %RDI\n"
- "popcntq 0xc(%RDI,%R9,4), %RDI\n";
-
- DriverStr(expected, "popcntq_address");
+ DriverStr(RepeatRA(&x86_64::X86_64Assembler::popcntq, "popcntq {mem}, %{reg}"), "popcntq_address");
}
TEST_F(AssemblerX86_64Test, CmovlAddress) {
@@ -1921,7 +1815,6 @@
"cmovzl 0xc(%RDI,%RBX,4), %R10d\n"
"cmovnzl 0xc(%R10,%RBX,4), %edi\n"
"cmovzl 0xc(%RDI,%R9,4), %edi\n";
-
DriverStr(expected, "cmovl_address");
}
@@ -1936,7 +1829,6 @@
"cmovzq 0xc(%RDI,%RBX,4), %R10\n"
"cmovnzq 0xc(%R10,%RBX,4), %rdi\n"
"cmovzq 0xc(%RDI,%R9,4), %rdi\n";
-
DriverStr(expected, "cmovq_address");
}
@@ -2050,52 +1942,21 @@
}
TEST_F(AssemblerX86_64Test, Cmpb) {
- GetAssembler()->cmpb(x86_64::Address(x86_64::CpuRegister(x86_64::RDI), 128),
- x86_64::Immediate(0));
- const char* expected = "cmpb $0, 128(%RDI)\n";
- DriverStr(expected, "cmpb");
+ DriverStr(RepeatAI(&x86_64::X86_64Assembler::cmpb,
+ /*imm_bytes*/ 1U,
+ "cmpb ${imm}, {mem}"), "cmpb");
}
TEST_F(AssemblerX86_64Test, TestbAddressImmediate) {
- GetAssembler()->testb(
- x86_64::Address(x86_64::CpuRegister(x86_64::RDI),
- x86_64::CpuRegister(x86_64::RBX),
- x86_64::TIMES_4,
- 12),
- x86_64::Immediate(1));
- GetAssembler()->testb(
- x86_64::Address(x86_64::CpuRegister(x86_64::RSP), FrameOffset(7)),
- x86_64::Immediate(-128));
- GetAssembler()->testb(
- x86_64::Address(x86_64::CpuRegister(x86_64::RBX), MemberOffset(130)),
- x86_64::Immediate(127));
- const char* expected =
- "testb $1, 0xc(%RDI,%RBX,4)\n"
- "testb $-128, 0x7(%RSP)\n"
- "testb $127, 0x82(%RBX)\n";
-
- DriverStr(expected, "TestbAddressImmediate");
+ DriverStr(RepeatAI(&x86_64::X86_64Assembler::testb,
+ /*imm_bytes*/ 1U,
+ "testb ${imm}, {mem}"), "testbi");
}
TEST_F(AssemblerX86_64Test, TestlAddressImmediate) {
- GetAssembler()->testl(
- x86_64::Address(x86_64::CpuRegister(x86_64::RDI),
- x86_64::CpuRegister(x86_64::RBX),
- x86_64::TIMES_4,
- 12),
- x86_64::Immediate(1));
- GetAssembler()->testl(
- x86_64::Address(x86_64::CpuRegister(x86_64::RSP), FrameOffset(7)),
- x86_64::Immediate(-100000));
- GetAssembler()->testl(
- x86_64::Address(x86_64::CpuRegister(x86_64::RBX), MemberOffset(130)),
- x86_64::Immediate(77777777));
- const char* expected =
- "testl $1, 0xc(%RDI,%RBX,4)\n"
- "testl $-100000, 0x7(%RSP)\n"
- "testl $77777777, 0x82(%RBX)\n";
-
- DriverStr(expected, "TestlAddressImmediate");
+ DriverStr(RepeatAI(&x86_64::X86_64Assembler::testl,
+ /*imm_bytes*/ 4U,
+ "testl ${imm}, {mem}"), "testli");
}
class JNIMacroAssemblerX86_64Test : public JNIMacroAssemblerTest<x86_64::X86_64JNIMacroAssembler> {
@@ -2150,15 +2011,15 @@
// Construct assembly text counterpart.
std::ostringstream str;
- // 1) Push the spill_regs.
+ // (1) Push the spill_regs.
str << "pushq %rsi\n";
str << "pushq %r10\n";
- // 2) Move down the stack pointer.
+ // (2) Move down the stack pointer.
ssize_t displacement = static_cast<ssize_t>(frame_size) - (spill_regs.size() * 8 + 8);
str << "subq $" << displacement << ", %rsp\n";
- // 3) Store method reference.
+ // (3) Store method reference.
str << "movq %rdi, (%rsp)\n";
- // 4) Entry spills.
+ // (4) Entry spills.
str << "movq %rax, " << frame_size + 0 << "(%rsp)\n";
str << "movq %rbx, " << frame_size + 8 << "(%rsp)\n";
str << "movsd %xmm1, " << frame_size + 16 << "(%rsp)\n";
@@ -2186,10 +2047,10 @@
// Construct assembly text counterpart.
std::ostringstream str;
- // 1) Move up the stack pointer.
+ // (1) Move up the stack pointer.
ssize_t displacement = static_cast<ssize_t>(frame_size) - spill_regs.size() * 8 - 8;
str << "addq $" << displacement << ", %rsp\n";
- // 2) Pop spill regs.
+ // (2) Pop spill regs.
str << "popq %r10\n";
str << "popq %rsi\n";
str << "ret\n";
diff --git a/dex2oat/linker/image_test.h b/dex2oat/linker/image_test.h
index 71f1fa6..492c76b 100644
--- a/dex2oat/linker/image_test.h
+++ b/dex2oat/linker/image_test.h
@@ -302,8 +302,8 @@
}
for (size_t i = 0, size = oat_files.size(); i != size; ++i) {
- linker::MultiOatRelativePatcher patcher(driver->GetInstructionSet(),
- driver->GetInstructionSetFeatures());
+ MultiOatRelativePatcher patcher(driver->GetInstructionSet(),
+ driver->GetInstructionSetFeatures());
OatWriter* const oat_writer = oat_writers[i].get();
ElfWriter* const elf_writer = elf_writers[i].get();
std::vector<const DexFile*> cur_dex_files(1u, class_path[i]);
diff --git a/dex2oat/linker/multi_oat_relative_patcher_test.cc b/dex2oat/linker/multi_oat_relative_patcher_test.cc
index 1b2d43e..ca9c5f1 100644
--- a/dex2oat/linker/multi_oat_relative_patcher_test.cc
+++ b/dex2oat/linker/multi_oat_relative_patcher_test.cc
@@ -19,6 +19,7 @@
#include "compiled_method.h"
#include "debug/method_debug_info.h"
#include "gtest/gtest.h"
+#include "linker/linker_patch.h"
#include "linker/vector_output_stream.h"
namespace art {
diff --git a/dex2oat/linker/oat_writer.cc b/dex2oat/linker/oat_writer.cc
index 51c2a03..305d4f6 100644
--- a/dex2oat/linker/oat_writer.cc
+++ b/dex2oat/linker/oat_writer.cc
@@ -29,7 +29,7 @@
#include "base/unix_file/fd_file.h"
#include "class_linker.h"
#include "class_table-inl.h"
-#include "compiled_method.h"
+#include "compiled_method-inl.h"
#include "debug/method_debug_info.h"
#include "dex/verification_results.h"
#include "dex_file-inl.h"
@@ -43,6 +43,7 @@
#include "image_writer.h"
#include "linker/buffered_output_stream.h"
#include "linker/file_output_stream.h"
+#include "linker/linker_patch.h"
#include "linker/method_bss_mapping_encoder.h"
#include "linker/multi_oat_relative_patcher.h"
#include "linker/output_stream.h"
@@ -1063,7 +1064,7 @@
SafeMap<const CompiledMethod*, uint32_t, CodeOffsetsKeyComparator> dedupe_map_;
// Cache writer_'s members and compiler options.
- linker::MultiOatRelativePatcher* relative_patcher_;
+ MultiOatRelativePatcher* relative_patcher_;
uint32_t executable_offset_;
const bool debuggable_;
const bool native_debuggable_;
@@ -1920,7 +1921,7 @@
DCHECK_ALIGNED(offset, 4u);
oat_dex_files_[i].method_bss_mapping_offset_ = offset;
- linker::MethodBssMappingEncoder encoder(
+ MethodBssMappingEncoder encoder(
GetInstructionSetPointerSize(oat_header_->GetInstructionSet()));
size_t number_of_entries = 0u;
bool first_index = true;
@@ -2593,7 +2594,7 @@
"MethodBssMapping alignment check.");
DCHECK_ALIGNED(relative_offset, sizeof(uint32_t));
- linker::MethodBssMappingEncoder encoder(
+ MethodBssMappingEncoder encoder(
GetInstructionSetPointerSize(oat_header_->GetInstructionSet()));
// Allocate a sufficiently large MethodBssMapping.
size_t number_of_method_indexes = method_indexes.NumSetBits();
diff --git a/dex2oat/linker/oat_writer_test.cc b/dex2oat/linker/oat_writer_test.cc
index 33d1491..d89d9f0 100644
--- a/dex2oat/linker/oat_writer_test.cc
+++ b/dex2oat/linker/oat_writer_test.cc
@@ -23,7 +23,7 @@
#include "base/unix_file/fd_file.h"
#include "class_linker.h"
#include "common_compiler_test.h"
-#include "compiled_method.h"
+#include "compiled_method-inl.h"
#include "compiler.h"
#include "debug/method_debug_info.h"
#include "dex/quick_compiler_callbacks.h"
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index 36bd4bc..be78136 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -36,6 +36,7 @@
#include "base/unix_file/fd_file.h"
#include "class_linker-inl.h"
#include "class_linker.h"
+#include "compiled_method.h"
#include "debug/elf_debug_writer.h"
#include "debug/method_debug_info.h"
#include "dex_file-inl.h"
diff --git a/runtime/oat_file_assistant.cc b/runtime/oat_file_assistant.cc
index 83a8e09..f3a0725 100644
--- a/runtime/oat_file_assistant.cc
+++ b/runtime/oat_file_assistant.cc
@@ -645,6 +645,30 @@
return true;
}
+class Dex2oatFileWrapper {
+ public:
+ explicit Dex2oatFileWrapper(File* file)
+ : file_(file),
+ unlink_file_at_destruction_(true) {
+ }
+
+ ~Dex2oatFileWrapper() {
+ if (unlink_file_at_destruction_ && (file_ != nullptr)) {
+ file_->Erase(/*unlink*/ true);
+ }
+ }
+
+ File* GetFile() { return file_.get(); }
+
+ void DisableUnlinkAtDestruction() {
+ unlink_file_at_destruction_ = false;
+ };
+
+ private:
+ std::unique_ptr<File> file_;
+ bool unlink_file_at_destruction_;
+};
+
OatFileAssistant::ResultOfAttemptToUpdate OatFileAssistant::GenerateOatFileNoChecks(
OatFileAssistant::OatFileInfo& info,
CompilerFilter::Filter filter,
@@ -690,8 +714,9 @@
(dex_path_stat.st_mode & S_IRGRP) |
(dex_path_stat.st_mode & S_IROTH);
- std::unique_ptr<File> vdex_file(OS::CreateEmptyFile(vdex_file_name.c_str()));
- if (vdex_file.get() == nullptr) {
+ Dex2oatFileWrapper vdex_file_wrapper(OS::CreateEmptyFile(vdex_file_name.c_str()));
+ File* vdex_file = vdex_file_wrapper.GetFile();
+ if (vdex_file == nullptr) {
*error_msg = "Generation of oat file " + oat_file_name
+ " not attempted because the vdex file " + vdex_file_name
+ " could not be opened.";
@@ -705,8 +730,9 @@
return kUpdateNotAttempted;
}
- std::unique_ptr<File> oat_file(OS::CreateEmptyFile(oat_file_name.c_str()));
- if (oat_file.get() == nullptr) {
+ Dex2oatFileWrapper oat_file_wrapper(OS::CreateEmptyFile(oat_file_name.c_str()));
+ File* oat_file = oat_file_wrapper.GetFile();
+ if (oat_file == nullptr) {
*error_msg = "Generation of oat file " + oat_file_name
+ " not attempted because the oat file could not be created.";
return kUpdateNotAttempted;
@@ -715,7 +741,6 @@
if (fchmod(oat_file->Fd(), file_mode) != 0) {
*error_msg = "Generation of oat file " + oat_file_name
+ " not attempted because the oat file could not be made world readable.";
- oat_file->Erase();
return kUpdateNotAttempted;
}
@@ -731,29 +756,25 @@
args.push_back("--class-loader-context=" + dex2oat_context);
if (!Dex2Oat(args, error_msg)) {
- // Manually delete the oat and vdex files. This ensures there is no garbage
- // left over if the process unexpectedly died.
- vdex_file->Erase();
- unlink(vdex_file_name.c_str());
- oat_file->Erase();
- unlink(oat_file_name.c_str());
return kUpdateFailed;
}
if (vdex_file->FlushCloseOrErase() != 0) {
*error_msg = "Unable to close vdex file " + vdex_file_name;
- unlink(vdex_file_name.c_str());
return kUpdateFailed;
}
if (oat_file->FlushCloseOrErase() != 0) {
*error_msg = "Unable to close oat file " + oat_file_name;
- unlink(oat_file_name.c_str());
return kUpdateFailed;
}
// Mark that the odex file has changed and we should try to reload.
info.Reset();
+ // We have compiled successfully. Disable the auto-unlink.
+ vdex_file_wrapper.DisableUnlinkAtDestruction();
+ oat_file_wrapper.DisableUnlinkAtDestruction();
+
return kUpdateSucceeded;
}
diff --git a/test/442-checker-constant-folding/smali/TestCmp.smali b/test/442-checker-constant-folding/smali/TestCmp.smali
index df631bc..f55c837 100644
--- a/test/442-checker-constant-folding/smali/TestCmp.smali
+++ b/test/442-checker-constant-folding/smali/TestCmp.smali
@@ -330,3 +330,141 @@
cmpl-double v0, v1, v3
return v0
.end method
+
+
+## CHECK-START: int TestCmp.IntAddition2() constant_folding (before)
+## CHECK-DAG: <<Const1:i\d+>> IntConstant 1
+## CHECK-DAG: <<Const2:i\d+>> IntConstant 2
+## CHECK-DAG: <<Const5:i\d+>> IntConstant 5
+## CHECK-DAG: <<Const6:i\d+>> IntConstant 6
+## CHECK-DAG: <<Add1:i\d+>> Add [<<Const1>>,<<Const2>>]
+## CHECK-DAG: <<Add2:i\d+>> Add [<<Const5>>,<<Const6>>]
+## CHECK-DAG: <<Add3:i\d+>> Add [<<Add1>>,<<Add2>>]
+## CHECK-DAG: Return [<<Add3>>]
+
+## CHECK-START: int TestCmp.IntAddition2() constant_folding (after)
+## CHECK-DAG: <<Const14:i\d+>> IntConstant 14
+## CHECK-DAG: Return [<<Const14>>]
+
+## CHECK-START: int TestCmp.IntAddition2() constant_folding (after)
+## CHECK-NOT: Add
+.method public static IntAddition2()I
+ # A more direct translation from Java.
+
+ # int a, b, c;
+ .registers 3
+
+ # a = 1;
+ const/4 v0, 1
+ # b = 2;
+ const/4 v1, 2
+
+ # a += b;
+ add-int/2addr v0, v1
+
+ # b = 5;
+ const/4 v1, 5
+ # c = 6;
+ const/4 v2, 6
+
+ # b += c;
+ add-int/2addr v1, v2
+ # c = a + b;
+ add-int v2, v0, v1
+
+ # return c;
+ return v2
+.end method
+
+
+## CHECK-START: int TestCmp.IntAddition2AddAndMove() constant_folding (before)
+## CHECK-DAG: <<Const1:i\d+>> IntConstant 1
+## CHECK-DAG: <<Const2:i\d+>> IntConstant 2
+## CHECK-DAG: <<Const5:i\d+>> IntConstant 5
+## CHECK-DAG: <<Const6:i\d+>> IntConstant 6
+## CHECK-DAG: <<Add1:i\d+>> Add [<<Const1>>,<<Const2>>]
+## CHECK-DAG: <<Add2:i\d+>> Add [<<Const5>>,<<Const6>>]
+## CHECK-DAG: <<Add3:i\d+>> Add [<<Add1>>,<<Add2>>]
+## CHECK-DAG: Return [<<Add3>>]
+
+## CHECK-START: int TestCmp.IntAddition2AddAndMove() constant_folding (after)
+## CHECK-DAG: <<Const14:i\d+>> IntConstant 14
+## CHECK-DAG: Return [<<Const14>>]
+
+## CHECK-START: int TestCmp.IntAddition2AddAndMove() constant_folding (after)
+## CHECK-NOT: Add
+
+# D8 uses 3 registers for += when local variable info is presented.
+.method public static IntAddition2AddAndMove()I
+ .registers 4
+
+ # a = 1;
+ const/4 v0, 1
+ # b = 2;
+ const/4 v1, 2
+
+ # a += b;
+ add-int v2, v0, v1
+ move v0, v2
+
+ # b = 5;
+ const/4 v2, 5
+ move v1, v2
+
+ # c = 6;
+ const/4 v2, 6
+
+ # b += c;
+ add-int v3, v1, v2
+ move v1, v3
+
+ # c = a + b;
+ add-int v3, v0, v1
+ move v2, v3
+
+ # return c;
+ return v2
+.end method
+
+
+## CHECK-START: int TestCmp.JumpsAndConditionals(boolean) constant_folding (before)
+## CHECK-DAG: <<Const2:i\d+>> IntConstant 2
+## CHECK-DAG: <<Const5:i\d+>> IntConstant 5
+## CHECK-DAG: <<Add:i\d+>> Add [<<Const5>>,<<Const2>>]
+## CHECK-DAG: <<Sub:i\d+>> Sub [<<Const5>>,<<Const2>>]
+## CHECK-DAG: <<Phi:i\d+>> Phi [<<Add>>,<<Sub>>]
+## CHECK-DAG: Return [<<Phi>>]
+
+## CHECK-START: int TestCmp.JumpsAndConditionals(boolean) constant_folding (after)
+## CHECK-DAG: <<Const3:i\d+>> IntConstant 3
+## CHECK-DAG: <<Const7:i\d+>> IntConstant 7
+## CHECK-DAG: <<Phi:i\d+>> Phi [<<Const7>>,<<Const3>>]
+## CHECK-DAG: Return [<<Phi>>]
+
+## CHECK-START: int TestCmp.JumpsAndConditionals(boolean) constant_folding (after)
+## CHECK-NOT: Add
+## CHECK-NOT: Sub
+.method public static JumpsAndConditionals(Z)I
+ # int a, b, c;
+ # a = 5;
+ # b = 2;
+ # if (cond)
+ # c = a + b;
+ # else
+ # c = a - b;
+ # return c;
+ .registers 4
+
+ const/4 v0, 5
+ const/4 v1, 2
+
+ if-eqz p0, :cond_7
+ add-int v2, v0, v1
+
+ :goto_6
+ return v2
+
+ :cond_7
+ sub-int v2, v0, v1
+ goto :goto_6
+.end method
diff --git a/test/442-checker-constant-folding/src/Main.java b/test/442-checker-constant-folding/src/Main.java
index eba5137..95c19ea 100644
--- a/test/442-checker-constant-folding/src/Main.java
+++ b/test/442-checker-constant-folding/src/Main.java
@@ -113,6 +113,19 @@
return (Integer)m.invoke(null);
}
+ public static int smaliIntAddition2() throws Exception {
+ Method m = Class.forName("TestCmp").getMethod("IntAddition2");
+ return (Integer)m.invoke(null);
+ }
+ public static int smaliIntAddition2AddAndMove() throws Exception {
+ Method m = Class.forName("TestCmp").getMethod("IntAddition2AddAndMove");
+ return (Integer)m.invoke(null);
+ }
+ public static int smaliJumpsAndConditionals(boolean cond) throws Exception {
+ Method m = Class.forName("TestCmp").getMethod("JumpsAndConditionals", boolean.class);
+ return (Integer)m.invoke(null, cond);
+ }
+
/**
* Exercise constant folding on negation.
@@ -225,11 +238,8 @@
/// CHECK-DAG: <<Const2:i\d+>> IntConstant 2
/// CHECK-DAG: <<Const5:i\d+>> IntConstant 5
/// CHECK-DAG: <<Const6:i\d+>> IntConstant 6
- /// CHECK-DAG: <<Const11:i\d+>> IntConstant 11
/// CHECK-DAG: <<Add1:i\d+>> Add [<<Const1>>,<<Const2>>]
/// CHECK-DAG: Add [<<Const5>>,<<Const6>>]
- /// CHECK-DAG: <<Add3:i\d+>> Add [<<Add1>>,<<Const11>>]
- /// CHECK-DAG: Return [<<Add3>>]
/// CHECK-START: int Main.IntAddition2() constant_folding (after)
/// CHECK-DAG: <<Const14:i\d+>> IntConstant 14
@@ -1520,6 +1530,8 @@
assertIntEquals(3, IntAddition1());
assertIntEquals(14, IntAddition2());
+ assertIntEquals(14, smaliIntAddition2());
+ assertIntEquals(14, smaliIntAddition2AddAndMove());
assertLongEquals(3L, LongAddition());
assertFloatEquals(3F, FloatAddition());
assertDoubleEquals(3D, DoubleAddition());
@@ -1567,6 +1579,8 @@
assertIntEquals(7, JumpsAndConditionals(true));
assertIntEquals(3, JumpsAndConditionals(false));
+ assertIntEquals(7, smaliJumpsAndConditionals(true));
+ assertIntEquals(3, smaliJumpsAndConditionals(false));
int arbitrary = 123456; // Value chosen arbitrarily.
diff --git a/test/552-checker-sharpening/src/Main.java b/test/552-checker-sharpening/src/Main.java
index 1f1920c..55873ea 100644
--- a/test/552-checker-sharpening/src/Main.java
+++ b/test/552-checker-sharpening/src/Main.java
@@ -63,16 +63,12 @@
/// CHECK-START-X86_64: int Main.testSimple(int) sharpening (after)
/// CHECK: InvokeStaticOrDirect method_load_kind:BssEntry
- /// CHECK-START-MIPS: int Main.testSimple(int) pc_relative_fixups_mips (after)
- /// CHECK: MipsComputeBaseMethodAddress
- /// CHECK-NOT: MipsComputeBaseMethodAddress
-
/// CHECK-START-X86: int Main.testSimple(int) pc_relative_fixups_x86 (after)
/// CHECK: X86ComputeBaseMethodAddress
/// CHECK-NOT: X86ComputeBaseMethodAddress
public static int testSimple(int x) {
- // This call should use PC-relative dex cache array load to retrieve the target method.
+ // This call should use PC-relative .bss array load to retrieve the target method.
return $noinline$foo(x);
}
@@ -104,14 +100,6 @@
/// CHECK: InvokeStaticOrDirect method_load_kind:BssEntry
/// CHECK: InvokeStaticOrDirect method_load_kind:BssEntry
- /// CHECK-START-MIPS: int Main.testDiamond(boolean, int) pc_relative_fixups_mips (after)
- /// CHECK: MipsComputeBaseMethodAddress
- /// CHECK-NOT: MipsComputeBaseMethodAddress
-
- /// CHECK-START-MIPS: int Main.testDiamond(boolean, int) pc_relative_fixups_mips (after)
- /// CHECK: MipsComputeBaseMethodAddress
- /// CHECK-NEXT: If
-
/// CHECK-START-X86: int Main.testDiamond(boolean, int) pc_relative_fixups_x86 (after)
/// CHECK: X86ComputeBaseMethodAddress
/// CHECK-NOT: X86ComputeBaseMethodAddress
@@ -122,7 +110,7 @@
public static int testDiamond(boolean negate, int x) {
// These calls should use PC-relative loads to retrieve the target method.
- // PC-relative bases used by MIPS and X86 should be pulled before the If.
+ // PC-relative bases used by MIPS32R2 and X86 should be pulled before the If.
if (negate) {
return $noinline$foo(-x);
} else {
@@ -130,24 +118,6 @@
}
}
- /// CHECK-START-MIPS: int Main.testLoop(int[], int) pc_relative_fixups_mips (before)
- /// CHECK-NOT: MipsComputeBaseMethodAddress
-
- /// CHECK-START-MIPS: int Main.testLoop(int[], int) pc_relative_fixups_mips (after)
- /// CHECK: MipsComputeBaseMethodAddress
- /// CHECK-NOT: MipsComputeBaseMethodAddress
-
- /// CHECK-START-MIPS: int Main.testLoop(int[], int) pc_relative_fixups_mips (after)
- /// CHECK: InvokeStaticOrDirect
- /// CHECK-NOT: InvokeStaticOrDirect
-
- /// CHECK-START-MIPS: int Main.testLoop(int[], int) pc_relative_fixups_mips (after)
- /// CHECK: ArrayLength
- /// CHECK-NEXT: MipsComputeBaseMethodAddress
- /// CHECK-NEXT: Goto
- /// CHECK: begin_block
- /// CHECK: InvokeStaticOrDirect method_load_kind:BssEntry
-
/// CHECK-START-X86: int Main.testLoop(int[], int) pc_relative_fixups_x86 (before)
/// CHECK-NOT: X86ComputeBaseMethodAddress
@@ -167,23 +137,13 @@
/// CHECK: InvokeStaticOrDirect method_load_kind:BssEntry
public static int testLoop(int[] array, int x) {
- // PC-relative bases used by MIPS and X86 should be pulled before the loop.
+ // PC-relative bases used by MIPS32R2 and X86 should be pulled before the loop.
for (int i : array) {
x += $noinline$foo(i);
}
return x;
}
- /// CHECK-START-MIPS: int Main.testLoopWithDiamond(int[], boolean, int) pc_relative_fixups_mips (before)
- /// CHECK-NOT: MipsComputeBaseMethodAddress
-
- /// CHECK-START-MIPS: int Main.testLoopWithDiamond(int[], boolean, int) pc_relative_fixups_mips (after)
- /// CHECK: If
- /// CHECK: begin_block
- /// CHECK: ArrayLength
- /// CHECK-NEXT: MipsComputeBaseMethodAddress
- /// CHECK-NEXT: Goto
-
/// CHECK-START-X86: int Main.testLoopWithDiamond(int[], boolean, int) pc_relative_fixups_x86 (before)
/// CHECK-NOT: X86ComputeBaseMethodAddress
@@ -195,7 +155,7 @@
/// CHECK-NEXT: Goto
public static int testLoopWithDiamond(int[] array, boolean negate, int x) {
- // PC-relative bases used by MIPS and X86 should be pulled before the loop
+ // PC-relative bases used by MIPS32R2 and X86 should be pulled before the loop
// but not outside the if.
if (array != null) {
for (int i : array) {
diff --git a/test/651-checker-byte-simd-minmax/src/Main.java b/test/651-checker-byte-simd-minmax/src/Main.java
index e018b56..9643b90 100644
--- a/test/651-checker-byte-simd-minmax/src/Main.java
+++ b/test/651-checker-byte-simd-minmax/src/Main.java
@@ -165,6 +165,28 @@
}
}
+ /// CHECK-START: void Main.doitMin100(byte[], byte[]) loop_optimization (before)
+ /// CHECK-DAG: <<I100:i\d+>> IntConstant 100 loop:none
+ /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Get:b\d+>> ArrayGet loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Min:i\d+>> InvokeStaticOrDirect [<<Get>>,<<I100>>] intrinsic:MathMinIntInt loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Cnv:b\d+>> TypeConversion [<<Min>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-ARM64: void Main.doitMin100(byte[], byte[]) loop_optimization (after)
+ /// CHECK-DAG: <<I100:i\d+>> IntConstant 100 loop:none
+ /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<I100>>] loop:none
+ /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Get:d\d+>> VecLoad loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Min:d\d+>> VecMin [<<Get>>,<<Repl>>] unsigned:false loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>> outer_loop:none
+ private static void doitMin100(byte[] x, byte[] y) {
+ int min = Math.min(x.length, y.length);
+ for (int i = 0; i < min; i++) {
+ x[i] = (byte) Math.min(y[i], 100);
+ }
+ }
+
public static void main(String[] args) {
// Initialize cross-values for all possible values.
int total = 256 * 256;
@@ -202,6 +224,11 @@
byte expected = (byte) Math.max(y[i] & 0xff, z[i] & 0xff);
expectEquals(expected, x[i]);
}
+ doitMin100(x, y);
+ for (int i = 0; i < total; i++) {
+ byte expected = (byte) Math.min(y[i], 100);
+ expectEquals(expected, x[i]);
+ }
System.out.println("passed");
}
diff --git a/test/651-checker-char-simd-minmax/src/Main.java b/test/651-checker-char-simd-minmax/src/Main.java
index 57cad9b..8a0262c 100644
--- a/test/651-checker-char-simd-minmax/src/Main.java
+++ b/test/651-checker-char-simd-minmax/src/Main.java
@@ -89,6 +89,28 @@
}
}
+ /// CHECK-START: void Main.doitMin100(char[], char[]) loop_optimization (before)
+ /// CHECK-DAG: <<I100:i\d+>> IntConstant 100 loop:none
+ /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Get:c\d+>> ArrayGet loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Min:i\d+>> InvokeStaticOrDirect [<<Get>>,<<I100>>] intrinsic:MathMinIntInt loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Cnv:c\d+>> TypeConversion [<<Min>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-ARM64: void Main.doitMin100(char[], char[]) loop_optimization (after)
+ /// CHECK-DAG: <<I100:i\d+>> IntConstant 100 loop:none
+ /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<I100>>] loop:none
+ /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Get:d\d+>> VecLoad loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Min:d\d+>> VecMin [<<Get>>,<<Repl>>] unsigned:true loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>> outer_loop:none
+ private static void doitMin100(char[] x, char[] y) {
+ int min = Math.min(x.length, y.length);
+ for (int i = 0; i < min; i++) {
+ x[i] = (char) Math.min(y[i], 100);
+ }
+ }
+
public static void main(String[] args) {
char[] interesting = {
0x0000, 0x0001, 0x007f, 0x0080, 0x0081, 0x00ff,
@@ -124,6 +146,11 @@
char expected = (char) Math.max(y[i], z[i]);
expectEquals(expected, x[i]);
}
+ doitMin100(x, y);
+ for (int i = 0; i < total; i++) {
+ char expected = (char) Math.min(y[i], 100);
+ expectEquals(expected, x[i]);
+ }
System.out.println("passed");
}
diff --git a/test/651-checker-short-simd-minmax/src/Main.java b/test/651-checker-short-simd-minmax/src/Main.java
index 4f2a7a4..ffbf73b 100644
--- a/test/651-checker-short-simd-minmax/src/Main.java
+++ b/test/651-checker-short-simd-minmax/src/Main.java
@@ -165,6 +165,28 @@
}
}
+ /// CHECK-START: void Main.doitMin100(short[], short[]) loop_optimization (before)
+ /// CHECK-DAG: <<I100:i\d+>> IntConstant 100 loop:none
+ /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Get:s\d+>> ArrayGet loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Min:i\d+>> InvokeStaticOrDirect [<<Get>>,<<I100>>] intrinsic:MathMinIntInt loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Cnv:s\d+>> TypeConversion [<<Min>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-ARM64: void Main.doitMin100(short[], short[]) loop_optimization (after)
+ /// CHECK-DAG: <<I100:i\d+>> IntConstant 100 loop:none
+ /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<I100>>] loop:none
+ /// CHECK-DAG: <<Phi:i\d+>> Phi loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Get:d\d+>> VecLoad loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Min:d\d+>> VecMin [<<Get>>,<<Repl>>] unsigned:false loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>> outer_loop:none
+ private static void doitMin100(short[] x, short[] y) {
+ int min = Math.min(x.length, y.length);
+ for (int i = 0; i < min; i++) {
+ x[i] = (short) Math.min(y[i], 100);
+ }
+ }
+
public static void main(String[] args) {
short[] interesting = {
(short) 0x0000, (short) 0x0001, (short) 0x007f,
@@ -216,6 +238,11 @@
short expected = (short) Math.max(y[i] & 0xffff, z[i] & 0xffff);
expectEquals(expected, x[i]);
}
+ doitMin100(x, y);
+ for (int i = 0; i < total; i++) {
+ short expected = (short) Math.min(y[i], 100);
+ expectEquals(expected, x[i]);
+ }
System.out.println("passed");
}
diff --git a/test/656-checker-simd-opt/src/Main.java b/test/656-checker-simd-opt/src/Main.java
index 091633f..39a126f 100644
--- a/test/656-checker-simd-opt/src/Main.java
+++ b/test/656-checker-simd-opt/src/Main.java
@@ -92,7 +92,91 @@
}
}
- public static void main(String[] args) {
+ /// CHECK-START: long Main.longInductionReduction(long[]) loop_optimization (before)
+ /// CHECK-DAG: <<L0:j\d+>> LongConstant 0 loop:none
+ /// CHECK-DAG: <<L1:j\d+>> LongConstant 1 loop:none
+ /// CHECK-DAG: <<I0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Get:j\d+>> ArrayGet [{{l\d+}},<<I0>>] loop:none
+ /// CHECK-DAG: <<Phi1:j\d+>> Phi [<<L0>>,<<Add1:j\d+>>] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:j\d+>> Phi [<<L1>>,<<Add2:j\d+>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Add2>> Add [<<Phi2>>,<<Get>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Add1>> Add [<<Phi1>>,<<L1>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-ARM64: long Main.longInductionReduction(long[]) loop_optimization (after)
+ /// CHECK-DAG: <<L0:j\d+>> LongConstant 0 loop:none
+ /// CHECK-DAG: <<L1:j\d+>> LongConstant 1 loop:none
+ /// CHECK-DAG: <<L2:j\d+>> LongConstant 2 loop:none
+ /// CHECK-DAG: <<I0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Get:j\d+>> ArrayGet [{{l\d+}},<<I0>>] loop:none
+ /// CHECK-DAG: <<Rep:d\d+>> VecReplicateScalar [<<Get>>] loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<L1>>] loop:none
+ /// CHECK-DAG: <<Phi1:j\d+>> Phi [<<L0>>,{{j\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: VecAdd [<<Phi2>>,<<Rep>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<L2>>] loop:<<Loop>> outer_loop:none
+ static long longInductionReduction(long[] y) {
+ long x = 1;
+ for (long i = 0; i < 10; i++) {
+ x += y[0];
+ }
+ return x;
+ }
+
+ /// CHECK-START: void Main.intVectorLongInvariant(int[], long[]) loop_optimization (before)
+ /// CHECK-DAG: <<I0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<I1:i\d+>> IntConstant 1 loop:none
+ /// CHECK-DAG: <<Get:j\d+>> ArrayGet [{{l\d+}},<<I0>>] loop:none
+ /// CHECK-DAG: <<Phi:i\d+>> Phi [<<I0>>,<<Add:i\d+>>] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Cnv:i\d+>> TypeConversion [<<Get>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Add>> Add [<<Phi>>,<<I1>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-ARM64: void Main.intVectorLongInvariant(int[], long[]) loop_optimization (after)
+ /// CHECK-DAG: <<I0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<I1:i\d+>> IntConstant 1 loop:none
+ /// CHECK-DAG: <<I4:i\d+>> IntConstant 4 loop:none
+ /// CHECK-DAG: <<Get:j\d+>> ArrayGet [{{l\d+}},<<I0>>] loop:none
+ /// CHECK-DAG: <<Cnv:i\d+>> TypeConversion [<<Get>>] loop:none
+ /// CHECK-DAG: <<Rep:d\d+>> VecReplicateScalar [<<Cnv>>] loop:none
+ /// CHECK-DAG: <<Phi:i\d+>> Phi [<<I0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Rep>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi>>,<<I4>>] loop:<<Loop>> outer_loop:none
+ static void intVectorLongInvariant(int[] x, long[] y) {
+ for (int i = 0; i < 100; i++) {
+ x[i] = (int) y[0];
+ }
+ }
+
+ /// CHECK-START: void Main.longCanBeDoneWithInt(int[], int[]) loop_optimization (before)
+ /// CHECK-DAG: <<I0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<I1:i\d+>> IntConstant 1 loop:none
+ /// CHECK-DAG: <<L1:j\d+>> LongConstant 1 loop:none
+ /// CHECK-DAG: <<Phi:i\d+>> Phi [<<I0>>,<<Add:i\d+>>] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Get:i\d+>> ArrayGet [{{l\d+}},<<Phi>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Cnv1:j\d+>> TypeConversion [<<Get>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<AddL:j\d+>> Add [<<Cnv1>>,<<L1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Cnv2:i\d+>> TypeConversion [<<AddL>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: ArraySet [{{l\d+}},<<Phi>>,<<Cnv2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Add>> Add [<<Phi>>,<<I1>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-ARM64: void Main.longCanBeDoneWithInt(int[], int[]) loop_optimization (after)
+ /// CHECK-DAG: <<I0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<I4:i\d+>> IntConstant 4 loop:none
+ /// CHECK-DAG: <<L1:j\d+>> LongConstant 1 loop:none
+ /// CHECK-DAG: <<Cnv:i\d+>> TypeConversion [<<L1>>] loop:none
+ /// CHECK-DAG: <<Rep:d\d+>> VecReplicateScalar [<<Cnv>>] loop:none
+ /// CHECK-DAG: <<Phi:i\d+>> Phi [<<I0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Load:d\d+>> VecLoad [{{l\d+}},<<Phi>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Add:d\d+>> VecAdd [<<Load>>,<<Rep>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: VecStore [{{l\d+}},<<Phi>>,<<Add>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi>>,<<I4>>] loop:<<Loop>> outer_loop:none
+ static void longCanBeDoneWithInt(int[] x, int[] y) {
+ for (int i = 0; i < 100; i++) {
+ x[i] = (int) (y[i] + 1L);
+ }
+ }
+
+ static void testUnroll() {
float[] x = new float[100];
float[] y = new float[100];
for (int i = 0; i < 100; i++) {
@@ -104,51 +188,89 @@
expectEquals(5.0f, x[i]);
expectEquals(2.0f, y[i]);
}
- {
- int[] a = new int[100];
- int[] b = new int[100];
- for (int i = 0; i < 100; i++) {
- a[i] = 0;
- b[i] = i;
- }
- stencil(a, b, 100);
- for (int i = 1; i < 99; i++) {
- int e = i + i + i;
- expectEquals(e, a[i]);
- expectEquals(i, b[i]);
- }
+ }
+
+ static void testStencil1() {
+ int[] a = new int[100];
+ int[] b = new int[100];
+ for (int i = 0; i < 100; i++) {
+ a[i] = 0;
+ b[i] = i;
}
- {
- int[] a = new int[100];
- int[] b = new int[100];
- for (int i = 0; i < 100; i++) {
- a[i] = 0;
- b[i] = i;
- }
- stencilSubInt(a, b, 100);
- for (int i = 1; i < 99; i++) {
- int e = i + i + i;
- expectEquals(e, a[i]);
- expectEquals(i, b[i]);
- }
+ stencil(a, b, 100);
+ for (int i = 1; i < 99; i++) {
+ int e = i + i + i;
+ expectEquals(e, a[i]);
+ expectEquals(i, b[i]);
}
- {
- int[] a = new int[100];
- int[] b = new int[100];
- for (int i = 0; i < 100; i++) {
- a[i] = 0;
- b[i] = i;
- }
- stencilAddInt(a, b, 100);
- for (int i = 1; i < 99; i++) {
- int e = i + i + i;
- expectEquals(e, a[i]);
- expectEquals(i, b[i]);
- }
+ }
+
+ static void testStencil2() {
+ int[] a = new int[100];
+ int[] b = new int[100];
+ for (int i = 0; i < 100; i++) {
+ a[i] = 0;
+ b[i] = i;
}
+ stencilSubInt(a, b, 100);
+ for (int i = 1; i < 99; i++) {
+ int e = i + i + i;
+ expectEquals(e, a[i]);
+ expectEquals(i, b[i]);
+ }
+ }
+
+ static void testStencil3() {
+ int[] a = new int[100];
+ int[] b = new int[100];
+ for (int i = 0; i < 100; i++) {
+ a[i] = 0;
+ b[i] = i;
+ }
+ stencilAddInt(a, b, 100);
+ for (int i = 1; i < 99; i++) {
+ int e = i + i + i;
+ expectEquals(e, a[i]);
+ expectEquals(i, b[i]);
+ }
+ }
+
+ static void testTypes() {
+ int[] a = new int[100];
+ int[] b = new int[100];
+ long[] l = { 3 };
+ expectEquals(31, longInductionReduction(l));
+ intVectorLongInvariant(a, l);
+ for (int i = 0; i < 100; i++) {
+ expectEquals(3, a[i]);
+ }
+ longCanBeDoneWithInt(b, a);
+ for (int i = 0; i < 100; i++) {
+ expectEquals(4, b[i]);
+ }
+ }
+
+ public static void main(String[] args) {
+ testUnroll();
+ testStencil1();
+ testStencil2();
+ testStencil3();
+ testTypes();
System.out.println("passed");
}
+ private static void expectEquals(int expected, int result) {
+ if (expected != result) {
+ throw new Error("Expected: " + expected + ", found: " + result);
+ }
+ }
+
+ private static void expectEquals(long expected, long result) {
+ if (expected != result) {
+ throw new Error("Expected: " + expected + ", found: " + result);
+ }
+ }
+
private static void expectEquals(float expected, float result) {
if (expected != result) {
throw new Error("Expected: " + expected + ", found: " + result);
diff --git a/test/660-checker-simd-sad-byte/expected.txt b/test/660-checker-simd-sad-byte/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/660-checker-simd-sad-byte/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/660-checker-simd-sad-byte/info.txt b/test/660-checker-simd-sad-byte/info.txt
new file mode 100644
index 0000000..b56c119
--- /dev/null
+++ b/test/660-checker-simd-sad-byte/info.txt
@@ -0,0 +1 @@
+Functional tests on SAD vectorization.
diff --git a/test/660-checker-simd-sad-byte/src/Main.java b/test/660-checker-simd-sad-byte/src/Main.java
new file mode 100644
index 0000000..72d1c24
--- /dev/null
+++ b/test/660-checker-simd-sad-byte/src/Main.java
@@ -0,0 +1,332 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Tests for SAD (sum of absolute differences).
+ */
+public class Main {
+
+ // TODO: lower precision still coming, b/64091002
+
+ private static byte sadByte2Byte(byte[] b1, byte[] b2) {
+ int min_length = Math.min(b1.length, b2.length);
+ byte sad = 0;
+ for (int i = 0; i < min_length; i++) {
+ sad += Math.abs(b1[i] - b2[i]);
+ }
+ return sad;
+ }
+
+ private static byte sadByte2ByteAlt(byte[] b1, byte[] b2) {
+ int min_length = Math.min(b1.length, b2.length);
+ byte sad = 0;
+ for (int i = 0; i < min_length; i++) {
+ byte s = b1[i];
+ byte p = b2[i];
+ sad += s >= p ? s - p : p - s;
+ }
+ return sad;
+ }
+
+ private static byte sadByte2ByteAlt2(byte[] b1, byte[] b2) {
+ int min_length = Math.min(b1.length, b2.length);
+ byte sad = 0;
+ for (int i = 0; i < min_length; i++) {
+ byte s = b1[i];
+ byte p = b2[i];
+ int x = s - p;
+ if (x < 0) x = -x;
+ sad += x;
+ }
+ return sad;
+ }
+
+ private static short sadByte2Short(byte[] b1, byte[] b2) {
+ int min_length = Math.min(b1.length, b2.length);
+ short sad = 0;
+ for (int i = 0; i < min_length; i++) {
+ sad += Math.abs(b1[i] - b2[i]);
+ }
+ return sad;
+ }
+
+ private static short sadByte2ShortAlt(byte[] b1, byte[] b2) {
+ int min_length = Math.min(b1.length, b2.length);
+ short sad = 0;
+ for (int i = 0; i < min_length; i++) {
+ byte s = b1[i];
+ byte p = b2[i];
+ sad += s >= p ? s - p : p - s;
+ }
+ return sad;
+ }
+
+ private static short sadByte2ShortAlt2(byte[] b1, byte[] b2) {
+ int min_length = Math.min(b1.length, b2.length);
+ short sad = 0;
+ for (int i = 0; i < min_length; i++) {
+ byte s = b1[i];
+ byte p = b2[i];
+ int x = s - p;
+ if (x < 0) x = -x;
+ sad += x;
+ }
+ return sad;
+ }
+
+ /// CHECK-START: int Main.sadByte2Int(byte[], byte[]) loop_optimization (before)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get1:b\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get2:b\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Sub:i\d+>> Sub [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Intrin:i\d+>> InvokeStaticOrDirect [<<Sub>>] intrinsic:MathAbsInt loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi2>>,<<Intrin>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-ARM64: int Main.sadByte2Int(byte[], byte[]) loop_optimization (after)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons16:i\d+>> IntConstant 16 loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Cons0>>] loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons16>>] loop:<<Loop>> outer_loop:none
+ private static int sadByte2Int(byte[] b1, byte[] b2) {
+ int min_length = Math.min(b1.length, b2.length);
+ int sad = 0;
+ for (int i = 0; i < min_length; i++) {
+ sad += Math.abs(b1[i] - b2[i]);
+ }
+ return sad;
+ }
+
+ /// CHECK-START: int Main.sadByte2IntAlt(byte[], byte[]) loop_optimization (before)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get1:b\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get2:b\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Sub:i\d+>> Sub [<<Get2>>,<<Get1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Intrin:i\d+>> InvokeStaticOrDirect [<<Sub>>] intrinsic:MathAbsInt loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi2>>,<<Intrin>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-ARM64: int Main.sadByte2IntAlt(byte[], byte[]) loop_optimization (after)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons16:i\d+>> IntConstant 16 loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Cons0>>] loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load2>>,<<Load1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons16>>] loop:<<Loop>> outer_loop:none
+ private static int sadByte2IntAlt(byte[] b1, byte[] b2) {
+ int min_length = Math.min(b1.length, b2.length);
+ int sad = 0;
+ for (int i = 0; i < min_length; i++) {
+ byte s = b1[i];
+ byte p = b2[i];
+ sad += s >= p ? s - p : p - s;
+ }
+ return sad;
+ }
+
+ /// CHECK-START: int Main.sadByte2IntAlt2(byte[], byte[]) loop_optimization (before)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get1:b\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get2:b\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Sub:i\d+>> Sub [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Intrin:i\d+>> InvokeStaticOrDirect [<<Sub>>] intrinsic:MathAbsInt loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi2>>,<<Intrin>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-ARM64: int Main.sadByte2IntAlt2(byte[], byte[]) loop_optimization (after)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons16:i\d+>> IntConstant 16 loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Cons0>>] loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons16>>] loop:<<Loop>> outer_loop:none
+ private static int sadByte2IntAlt2(byte[] b1, byte[] b2) {
+ int min_length = Math.min(b1.length, b2.length);
+ int sad = 0;
+ for (int i = 0; i < min_length; i++) {
+ byte s = b1[i];
+ byte p = b2[i];
+ int x = s - p;
+ if (x < 0) x = -x;
+ sad += x;
+ }
+ return sad;
+ }
+
+ /// CHECK-START: long Main.sadByte2Long(byte[], byte[]) loop_optimization (before)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none
+ /// CHECK-DAG: <<ConsL:j\d+>> LongConstant 0 loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:j\d+>> Phi [<<ConsL>>,{{j\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get1:b\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get2:b\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Cnv1:j\d+>> TypeConversion [<<Get1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Cnv2:j\d+>> TypeConversion [<<Get2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Sub:j\d+>> Sub [<<Cnv1>>,<<Cnv2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Intrin:j\d+>> InvokeStaticOrDirect [<<Sub>>] intrinsic:MathAbsLong loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi2>>,<<Intrin>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-ARM64: long Main.sadByte2Long(byte[], byte[]) loop_optimization (after)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons16:i\d+>> IntConstant 16 loop:none
+ /// CHECK-DAG: <<ConsL:j\d+>> LongConstant 0 loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<ConsL>>] loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons16>>] loop:<<Loop>> outer_loop:none
+ private static long sadByte2Long(byte[] b1, byte[] b2) {
+ int min_length = Math.min(b1.length, b2.length);
+ long sad = 0;
+ for (int i = 0; i < min_length; i++) {
+ long x = b1[i];
+ long y = b2[i];
+ sad += Math.abs(x - y);
+ }
+ return sad;
+ }
+
+ /// CHECK-START: long Main.sadByte2LongAt1(byte[], byte[]) loop_optimization (before)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none
+ /// CHECK-DAG: <<ConsL:j\d+>> LongConstant 1 loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:j\d+>> Phi [<<ConsL>>,{{j\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get1:b\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get2:b\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Cnv1:j\d+>> TypeConversion [<<Get1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Cnv2:j\d+>> TypeConversion [<<Get2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Sub:j\d+>> Sub [<<Cnv1>>,<<Cnv2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Intrin:j\d+>> InvokeStaticOrDirect [<<Sub>>] intrinsic:MathAbsLong loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi2>>,<<Intrin>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-ARM64: long Main.sadByte2LongAt1(byte[], byte[]) loop_optimization (after)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons16:i\d+>> IntConstant 16 loop:none
+ /// CHECK-DAG: <<ConsL:j\d+>> LongConstant 1 loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<ConsL>>] loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons16>>] loop:<<Loop>> outer_loop:none
+ private static long sadByte2LongAt1(byte[] b1, byte[] b2) {
+ int min_length = Math.min(b1.length, b2.length);
+ long sad = 1; // starts at 1
+ for (int i = 0; i < min_length; i++) {
+ long x = b1[i];
+ long y = b2[i];
+ sad += Math.abs(x - y);
+ }
+ return sad;
+ }
+
+ public static void main(String[] args) {
+ // Cross-test the two most extreme values individually.
+ byte[] b1 = { 0, -128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+ byte[] b2 = { 0, 127, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+ expectEquals(-1, sadByte2Byte(b1, b2));
+ expectEquals(-1, sadByte2Byte(b2, b1));
+ expectEquals(-1, sadByte2ByteAlt(b1, b2));
+ expectEquals(-1, sadByte2ByteAlt(b2, b1));
+ expectEquals(-1, sadByte2ByteAlt2(b1, b2));
+ expectEquals(-1, sadByte2ByteAlt2(b2, b1));
+ expectEquals(255, sadByte2Short(b1, b2));
+ expectEquals(255, sadByte2Short(b2, b1));
+ expectEquals(255, sadByte2ShortAlt(b1, b2));
+ expectEquals(255, sadByte2ShortAlt(b2, b1));
+ expectEquals(255, sadByte2ShortAlt2(b1, b2));
+ expectEquals(255, sadByte2ShortAlt2(b2, b1));
+ expectEquals(255, sadByte2Int(b1, b2));
+ expectEquals(255, sadByte2Int(b2, b1));
+ expectEquals(255, sadByte2IntAlt(b1, b2));
+ expectEquals(255, sadByte2IntAlt(b2, b1));
+ expectEquals(255, sadByte2IntAlt2(b1, b2));
+ expectEquals(255, sadByte2IntAlt2(b2, b1));
+ expectEquals(255, sadByte2Long(b1, b2));
+ expectEquals(255L, sadByte2Long(b2, b1));
+ expectEquals(256L, sadByte2LongAt1(b1, b2));
+ expectEquals(256L, sadByte2LongAt1(b2, b1));
+
+ // Use cross-values to test all cases.
+ // One for scalar cleanup.
+ int n = 256;
+ int m = n * n + 1;
+ int k = 0;
+ b1 = new byte[m];
+ b2 = new byte[m];
+ for (int i = 0; i < n; i++) {
+ for (int j = 0; j < n; j++) {
+ b1[k] = (byte) i;
+ b2[k] = (byte) j;
+ k++;
+ }
+ }
+ b1[k] = 10;
+ b2[k] = 2;
+ expectEquals(8, sadByte2Byte(b1, b2));
+ expectEquals(8, sadByte2ByteAlt(b1, b2));
+ expectEquals(8, sadByte2ByteAlt2(b1, b2));
+ expectEquals(21768, sadByte2Short(b1, b2));
+ expectEquals(21768, sadByte2ShortAlt(b1, b2));
+ expectEquals(21768, sadByte2ShortAlt2(b1, b2));
+ expectEquals(5592328, sadByte2Int(b1, b2));
+ expectEquals(5592328, sadByte2IntAlt(b1, b2));
+ expectEquals(5592328, sadByte2IntAlt2(b1, b2));
+ expectEquals(5592328L, sadByte2Long(b1, b2));
+ expectEquals(5592329L, sadByte2LongAt1(b1, b2));
+
+ System.out.println("passed");
+ }
+
+ private static void expectEquals(int expected, int result) {
+ if (expected != result) {
+ throw new Error("Expected: " + expected + ", found: " + result);
+ }
+ }
+
+ private static void expectEquals(long expected, long result) {
+ if (expected != result) {
+ throw new Error("Expected: " + expected + ", found: " + result);
+ }
+ }
+}
diff --git a/test/660-checker-simd-sad-char/expected.txt b/test/660-checker-simd-sad-char/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/660-checker-simd-sad-char/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/660-checker-simd-sad-char/info.txt b/test/660-checker-simd-sad-char/info.txt
new file mode 100644
index 0000000..b56c119
--- /dev/null
+++ b/test/660-checker-simd-sad-char/info.txt
@@ -0,0 +1 @@
+Functional tests on SAD vectorization.
diff --git a/test/660-checker-simd-sad-char/src/Main.java b/test/660-checker-simd-sad-char/src/Main.java
new file mode 100644
index 0000000..bb0c58f
--- /dev/null
+++ b/test/660-checker-simd-sad-char/src/Main.java
@@ -0,0 +1,259 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Tests for SAD (sum of absolute differences).
+ */
+public class Main {
+
+ // TODO: lower precision still coming, b/64091002
+
+ // TODO: consider unsigned SAD too, b/64091002
+
+ private static char sadShort2Short(char[] s1, char[] s2) {
+ int min_length = Math.min(s1.length, s2.length);
+ char sad = 0;
+ for (int i = 0; i < min_length; i++) {
+ sad += Math.abs(s1[i] - s2[i]);
+ }
+ return sad;
+ }
+
+ private static char sadShort2ShortAlt(char[] s1, char[] s2) {
+ int min_length = Math.min(s1.length, s2.length);
+ char sad = 0;
+ for (int i = 0; i < min_length; i++) {
+ char s = s1[i];
+ char p = s2[i];
+ sad += s >= p ? s - p : p - s;
+ }
+ return sad;
+ }
+
+ private static char sadShort2ShortAlt2(char[] s1, char[] s2) {
+ int min_length = Math.min(s1.length, s2.length);
+ char sad = 0;
+ for (int i = 0; i < min_length; i++) {
+ char s = s1[i];
+ char p = s2[i];
+ int x = s - p;
+ if (x < 0) x = -x;
+ sad += x;
+ }
+ return sad;
+ }
+
+ /// CHECK-START: int Main.sadShort2Int(char[], char[]) loop_optimization (before)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get1:c\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get2:c\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Sub:i\d+>> Sub [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Intrin:i\d+>> InvokeStaticOrDirect [<<Sub>>] intrinsic:MathAbsInt loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi2>>,<<Intrin>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-ARM64: int Main.sadShort2Int(char[], char[]) loop_optimization (after)
+ /// CHECK-NOT: VecSADAccumulate
+ private static int sadShort2Int(char[] s1, char[] s2) {
+ int min_length = Math.min(s1.length, s2.length);
+ int sad = 0;
+ for (int i = 0; i < min_length; i++) {
+ sad += Math.abs(s1[i] - s2[i]);
+ }
+ return sad;
+ }
+
+ /// CHECK-START: int Main.sadShort2IntAlt(char[], char[]) loop_optimization (before)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get1:c\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get2:c\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Sub:i\d+>> Sub [<<Get2>>,<<Get1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Intrin:i\d+>> InvokeStaticOrDirect [<<Sub>>] intrinsic:MathAbsInt loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi2>>,<<Intrin>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-ARM64: int Main.sadShort2IntAlt(char[], char[]) loop_optimization (after)
+ /// CHECK-NOT: VecSADAccumulate
+ private static int sadShort2IntAlt(char[] s1, char[] s2) {
+ int min_length = Math.min(s1.length, s2.length);
+ int sad = 0;
+ for (int i = 0; i < min_length; i++) {
+ char s = s1[i];
+ char p = s2[i];
+ sad += s >= p ? s - p : p - s;
+ }
+ return sad;
+ }
+
+ /// CHECK-START: int Main.sadShort2IntAlt2(char[], char[]) loop_optimization (before)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get1:c\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get2:c\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Sub:i\d+>> Sub [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Intrin:i\d+>> InvokeStaticOrDirect [<<Sub>>] intrinsic:MathAbsInt loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi2>>,<<Intrin>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-ARM64: int Main.sadShort2IntAlt2(char[], char[]) loop_optimization (after)
+ /// CHECK-NOT: VecSADAccumulate
+ private static int sadShort2IntAlt2(char[] s1, char[] s2) {
+ int min_length = Math.min(s1.length, s2.length);
+ int sad = 0;
+ for (int i = 0; i < min_length; i++) {
+ char s = s1[i];
+ char p = s2[i];
+ int x = s - p;
+ if (x < 0) x = -x;
+ sad += x;
+ }
+ return sad;
+ }
+
+ /// CHECK-START: long Main.sadShort2Long(char[], char[]) loop_optimization (before)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none
+ /// CHECK-DAG: <<ConsL:j\d+>> LongConstant 0 loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:j\d+>> Phi [<<ConsL>>,{{j\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get1:c\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get2:c\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Cnv1:j\d+>> TypeConversion [<<Get1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Cnv2:j\d+>> TypeConversion [<<Get2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Sub:j\d+>> Sub [<<Cnv1>>,<<Cnv2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Intrin:j\d+>> InvokeStaticOrDirect [<<Sub>>] intrinsic:MathAbsLong loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi2>>,<<Intrin>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-ARM64: long Main.sadShort2Long(char[], char[]) loop_optimization (after)
+ /// CHECK-NOT: VecSADAccumulate
+ private static long sadShort2Long(char[] s1, char[] s2) {
+ int min_length = Math.min(s1.length, s2.length);
+ long sad = 0;
+ for (int i = 0; i < min_length; i++) {
+ long x = s1[i];
+ long y = s2[i];
+ sad += Math.abs(x - y);
+ }
+ return sad;
+ }
+
+ /// CHECK-START: long Main.sadShort2LongAt1(char[], char[]) loop_optimization (before)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none
+ /// CHECK-DAG: <<ConsL:j\d+>> LongConstant 1 loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:j\d+>> Phi [<<ConsL>>,{{j\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get1:c\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get2:c\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Cnv1:j\d+>> TypeConversion [<<Get1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Cnv2:j\d+>> TypeConversion [<<Get2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Sub:j\d+>> Sub [<<Cnv1>>,<<Cnv2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Intrin:j\d+>> InvokeStaticOrDirect [<<Sub>>] intrinsic:MathAbsLong loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi2>>,<<Intrin>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-ARM64: long Main.sadShort2LongAt1(char[], char[]) loop_optimization (after)
+ /// CHECK-NOT: VecSADAccumulate
+ private static long sadShort2LongAt1(char[] s1, char[] s2) {
+ int min_length = Math.min(s1.length, s2.length);
+ long sad = 1; // starts at 1
+ for (int i = 0; i < min_length; i++) {
+ long x = s1[i];
+ long y = s2[i];
+ sad += Math.abs(x - y);
+ }
+ return sad;
+ }
+
+ public static void main(String[] args) {
+ // Cross-test the two most extreme values individually.
+ char[] s1 = { 0, 0x8000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+ char[] s2 = { 0, 0x7fff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+ expectEquals(1, sadShort2Short(s1, s2));
+ expectEquals(1, sadShort2Short(s2, s1));
+ expectEquals(1, sadShort2ShortAlt(s1, s2));
+ expectEquals(1, sadShort2ShortAlt(s2, s1));
+ expectEquals(1, sadShort2ShortAlt2(s1, s2));
+ expectEquals(1, sadShort2ShortAlt2(s2, s1));
+ expectEquals(1, sadShort2Int(s1, s2));
+ expectEquals(1, sadShort2Int(s2, s1));
+ expectEquals(1, sadShort2IntAlt(s1, s2));
+ expectEquals(1, sadShort2IntAlt(s2, s1));
+ expectEquals(1, sadShort2IntAlt2(s1, s2));
+ expectEquals(1, sadShort2IntAlt2(s2, s1));
+ expectEquals(1L, sadShort2Long(s1, s2));
+ expectEquals(1L, sadShort2Long(s2, s1));
+ expectEquals(2L, sadShort2LongAt1(s1, s2));
+ expectEquals(2L, sadShort2LongAt1(s2, s1));
+
+ // Use cross-values to test all cases.
+ char[] interesting = {
+ (char) 0x0000,
+ (char) 0x0001,
+ (char) 0x0002,
+ (char) 0x1234,
+ (char) 0x8000,
+ (char) 0x8001,
+ (char) 0x7fff,
+ (char) 0xffff
+ };
+ int n = interesting.length;
+ int m = n * n + 1;
+ s1 = new char[m];
+ s2 = new char[m];
+ int k = 0;
+ for (int i = 0; i < n; i++) {
+ for (int j = 0; j < n; j++) {
+ s1[k] = interesting[i];
+ s2[k] = interesting[j];
+ k++;
+ }
+ }
+ s1[k] = 10;
+ s2[k] = 2;
+ expectEquals(56196, sadShort2Short(s1, s2));
+ expectEquals(56196, sadShort2ShortAlt(s1, s2));
+ expectEquals(56196, sadShort2ShortAlt2(s1, s2));
+ expectEquals(1497988, sadShort2Int(s1, s2));
+ expectEquals(1497988, sadShort2IntAlt(s1, s2));
+ expectEquals(1497988, sadShort2IntAlt2(s1, s2));
+ expectEquals(1497988L, sadShort2Long(s1, s2));
+ expectEquals(1497989L, sadShort2LongAt1(s1, s2));
+
+ System.out.println("passed");
+ }
+
+ private static void expectEquals(int expected, int result) {
+ if (expected != result) {
+ throw new Error("Expected: " + expected + ", found: " + result);
+ }
+ }
+
+ private static void expectEquals(long expected, long result) {
+ if (expected != result) {
+ throw new Error("Expected: " + expected + ", found: " + result);
+ }
+ }
+}
diff --git a/test/660-checker-simd-sad-int/expected.txt b/test/660-checker-simd-sad-int/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/660-checker-simd-sad-int/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/660-checker-simd-sad-int/info.txt b/test/660-checker-simd-sad-int/info.txt
new file mode 100644
index 0000000..b56c119
--- /dev/null
+++ b/test/660-checker-simd-sad-int/info.txt
@@ -0,0 +1 @@
+Functional tests on SAD vectorization.
diff --git a/test/660-checker-simd-sad-int/src/Main.java b/test/660-checker-simd-sad-int/src/Main.java
new file mode 100644
index 0000000..0daeedd
--- /dev/null
+++ b/test/660-checker-simd-sad-int/src/Main.java
@@ -0,0 +1,248 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Tests for SAD (sum of absolute differences).
+ */
+public class Main {
+
+ /// CHECK-START: int Main.sadInt2Int(int[], int[]) loop_optimization (before)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get1:i\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get2:i\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Sub:i\d+>> Sub [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Intrin:i\d+>> InvokeStaticOrDirect [<<Sub>>] intrinsic:MathAbsInt loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi2>>,<<Intrin>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-ARM64: int Main.sadInt2Int(int[], int[]) loop_optimization (after)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons4:i\d+>> IntConstant 4 loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Cons0>>] loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons4>>] loop:<<Loop>> outer_loop:none
+ private static int sadInt2Int(int[] x, int[] y) {
+ int min_length = Math.min(x.length, y.length);
+ int sad = 0;
+ for (int i = 0; i < min_length; i++) {
+ sad += Math.abs(x[i] - y[i]);
+ }
+ return sad;
+ }
+
+ /// CHECK-START: int Main.sadInt2IntAlt(int[], int[]) loop_optimization (before)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get1:i\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get2:i\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Sub1:i\d+>> Sub [<<Get2>>,<<Get1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Sub2:i\d+>> Sub [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Select:i\d+>> Select [<<Sub2>>,<<Sub1>>,{{z\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi2>>,<<Select>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none
+ //
+ // No ABS? No SAD!
+ //
+ /// CHECK-START-ARM64: int Main.sadInt2IntAlt(int[], int[]) loop_optimization (after)
+ /// CHECK-NOT: VecSADAccumulate
+ private static int sadInt2IntAlt(int[] x, int[] y) {
+ int min_length = Math.min(x.length, y.length);
+ int sad = 0;
+ for (int i = 0; i < min_length; i++) {
+ int s = x[i];
+ int p = y[i];
+ sad += s >= p ? s - p : p - s;
+ }
+ return sad;
+ }
+
+ /// CHECK-START: int Main.sadInt2IntAlt2(int[], int[]) loop_optimization (before)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get1:i\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get2:i\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Sub:i\d+>> Sub [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Intrin:i\d+>> InvokeStaticOrDirect [<<Sub>>] intrinsic:MathAbsInt loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi2>>,<<Intrin>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-ARM64: int Main.sadInt2IntAlt2(int[], int[]) loop_optimization (after)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons4:i\d+>> IntConstant 4 loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Cons0>>] loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons4>>] loop:<<Loop>> outer_loop:none
+ private static int sadInt2IntAlt2(int[] x, int[] y) {
+ int min_length = Math.min(x.length, y.length);
+ int sad = 0;
+ for (int i = 0; i < min_length; i++) {
+ int s = x[i];
+ int p = y[i];
+ int m = s - p;
+ if (m < 0) m = -m;
+ sad += m;
+ }
+ return sad;
+ }
+
+ /// CHECK-START: long Main.sadInt2Long(int[], int[]) loop_optimization (before)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none
+ /// CHECK-DAG: <<ConsL:j\d+>> LongConstant 0 loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:j\d+>> Phi [<<ConsL>>,{{j\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get1:i\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get2:i\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Cnv1:j\d+>> TypeConversion [<<Get1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Cnv2:j\d+>> TypeConversion [<<Get2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Sub:j\d+>> Sub [<<Cnv1>>,<<Cnv2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Intrin:j\d+>> InvokeStaticOrDirect [<<Sub>>] intrinsic:MathAbsLong loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi2>>,<<Intrin>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-ARM64: long Main.sadInt2Long(int[], int[]) loop_optimization (after)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons4:i\d+>> IntConstant 4 loop:none
+ /// CHECK-DAG: <<ConsL:j\d+>> LongConstant 0 loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<ConsL>>] loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons4>>] loop:<<Loop>> outer_loop:none
+ private static long sadInt2Long(int[] x, int[] y) {
+ int min_length = Math.min(x.length, y.length);
+ long sad = 0;
+ for (int i = 0; i < min_length; i++) {
+ long s = x[i];
+ long p = y[i];
+ sad += Math.abs(s - p);
+ }
+ return sad;
+ }
+
+ /// CHECK-START: long Main.sadInt2LongAt1(int[], int[]) loop_optimization (before)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none
+ /// CHECK-DAG: <<ConsL:j\d+>> LongConstant 1 loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:j\d+>> Phi [<<ConsL>>,{{j\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get1:i\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get2:i\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Cnv1:j\d+>> TypeConversion [<<Get1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Cnv2:j\d+>> TypeConversion [<<Get2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Sub:j\d+>> Sub [<<Cnv1>>,<<Cnv2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Intrin:j\d+>> InvokeStaticOrDirect [<<Sub>>] intrinsic:MathAbsLong loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi2>>,<<Intrin>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-ARM64: long Main.sadInt2LongAt1(int[], int[]) loop_optimization (after)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons4:i\d+>> IntConstant 4 loop:none
+ /// CHECK-DAG: <<ConsL:j\d+>> LongConstant 1 loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<ConsL>>] loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons4>>] loop:<<Loop>> outer_loop:none
+ private static long sadInt2LongAt1(int[] x, int[] y) {
+ int min_length = Math.min(x.length, y.length);
+ long sad = 1; // starts at 1
+ for (int i = 0; i < min_length; i++) {
+ long s = x[i];
+ long p = y[i];
+ sad += Math.abs(s - p);
+ }
+ return sad;
+ }
+
+ public static void main(String[] args) {
+ // Cross-test the two most extreme values individually.
+ int[] x = { 0, Integer.MAX_VALUE, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+ int[] y = { 0, Integer.MIN_VALUE, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+ expectEquals(1, sadInt2Int(x, y));
+ expectEquals(1, sadInt2Int(y, x));
+ expectEquals(-1, sadInt2IntAlt(x, y));
+ expectEquals(-1, sadInt2IntAlt(y, x));
+ expectEquals(1, sadInt2IntAlt2(x, y));
+ expectEquals(1, sadInt2IntAlt2(y, x));
+ expectEquals(4294967295L, sadInt2Long(x, y));
+ expectEquals(4294967295L, sadInt2Long(y, x));
+ expectEquals(4294967296L, sadInt2LongAt1(x, y));
+ expectEquals(4294967296L, sadInt2LongAt1(y, x));
+
+ // Use cross-values for the interesting values.
+ int[] interesting = {
+ 0x00000000, 0x00000001, 0x00007fff, 0x00008000, 0x00008001, 0x0000ffff,
+ 0x00010000, 0x00010001, 0x00017fff, 0x00018000, 0x00018001, 0x0001ffff,
+ 0x7fff0000, 0x7fff0001, 0x7fff7fff, 0x7fff8000, 0x7fff8001, 0x7fffffff,
+ 0x80000000, 0x80000001, 0x80007fff, 0x80008000, 0x80008001, 0x8000ffff,
+ 0x80010000, 0x80010001, 0x80017fff, 0x80018000, 0x80018001, 0x8001ffff,
+ 0xffff0000, 0xffff0001, 0xffff7fff, 0xffff8000, 0xffff8001, 0xffffffff
+ };
+ int n = interesting.length;
+ int m = n * n + 1;
+ x = new int[m];
+ y = new int[m];
+ int k = 0;
+ for (int i = 0; i < n; i++) {
+ for (int j = 0; j < n; j++) {
+ x[k] = interesting[i];
+ y[k] = interesting[j];
+ k++;
+ }
+ }
+ x[k] = 10;
+ y[k] = 2;
+ expectEquals(8, sadInt2Int(x, y));
+ expectEquals(-13762600, sadInt2IntAlt(x, y));
+ expectEquals(8, sadInt2IntAlt2(x, y));
+ expectEquals(2010030931928L, sadInt2Long(x, y));
+ expectEquals(2010030931929L, sadInt2LongAt1(x, y));
+
+ System.out.println("passed");
+ }
+
+ private static void expectEquals(int expected, int result) {
+ if (expected != result) {
+ throw new Error("Expected: " + expected + ", found: " + result);
+ }
+ }
+
+ private static void expectEquals(long expected, long result) {
+ if (expected != result) {
+ throw new Error("Expected: " + expected + ", found: " + result);
+ }
+ }
+}
diff --git a/test/660-checker-simd-sad-long/expected.txt b/test/660-checker-simd-sad-long/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/660-checker-simd-sad-long/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/660-checker-simd-sad-long/info.txt b/test/660-checker-simd-sad-long/info.txt
new file mode 100644
index 0000000..b56c119
--- /dev/null
+++ b/test/660-checker-simd-sad-long/info.txt
@@ -0,0 +1 @@
+Functional tests on SAD vectorization.
diff --git a/test/660-checker-simd-sad-long/src/Main.java b/test/660-checker-simd-sad-long/src/Main.java
new file mode 100644
index 0000000..06f62bd
--- /dev/null
+++ b/test/660-checker-simd-sad-long/src/Main.java
@@ -0,0 +1,209 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Tests for SAD (sum of absolute differences).
+ */
+public class Main {
+
+ /// CHECK-START: long Main.sadLong2Long(long[], long[]) loop_optimization (before)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none
+ /// CHECK-DAG: <<ConsL:j\d+>> LongConstant 0 loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:j\d+>> Phi [<<ConsL>>,{{j\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get1:j\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get2:j\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Sub:j\d+>> Sub [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Intrin:j\d+>> InvokeStaticOrDirect [<<Sub>>] intrinsic:MathAbsLong loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi2>>,<<Intrin>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-ARM64: long Main.sadLong2Long(long[], long[]) loop_optimization (after)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons2:i\d+>> IntConstant 2 loop:none
+ /// CHECK-DAG: <<ConsL:j\d+>> LongConstant 0 loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<ConsL>>] loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons2>>] loop:<<Loop>> outer_loop:none
+ private static long sadLong2Long(long[] x, long[] y) {
+ int min_length = Math.min(x.length, y.length);
+ long sad = 0;
+ for (int i = 0; i < min_length; i++) {
+ sad += Math.abs(x[i] - y[i]);
+ }
+ return sad;
+ }
+
+ /// CHECK-START: long Main.sadLong2LongAlt(long[], long[]) loop_optimization (before)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none
+ /// CHECK-DAG: <<ConsL:j\d+>> LongConstant 0 loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:j\d+>> Phi [<<ConsL>>,{{j\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get1:j\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get2:j\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Sub1:j\d+>> Sub [<<Get2>>,<<Get1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Sub2:j\d+>> Sub [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Select:j\d+>> Select [<<Sub2>>,<<Sub1>>,{{z\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi2>>,<<Select>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none
+ //
+ // No ABS? No SAD!
+ //
+ /// CHECK-START: long Main.sadLong2LongAlt(long[], long[]) loop_optimization (after)
+ /// CHECK-NOT: VecSADAccumulate
+ private static long sadLong2LongAlt(long[] x, long[] y) {
+ int min_length = Math.min(x.length, y.length);
+ long sad = 0;
+ for (int i = 0; i < min_length; i++) {
+ long s = x[i];
+ long p = y[i];
+ sad += s >= p ? s - p : p - s;
+ }
+ return sad;
+ }
+
+ /// CHECK-START: long Main.sadLong2LongAlt2(long[], long[]) loop_optimization (before)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none
+ /// CHECK-DAG: <<ConsL:j\d+>> LongConstant 0 loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:j\d+>> Phi [<<ConsL>>,{{j\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get1:j\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get2:j\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Sub:j\d+>> Sub [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Intrin:j\d+>> InvokeStaticOrDirect [<<Sub>>] intrinsic:MathAbsLong loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi2>>,<<Intrin>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-ARM64: long Main.sadLong2LongAlt2(long[], long[]) loop_optimization (after)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons2:i\d+>> IntConstant 2 loop:none
+ /// CHECK-DAG: <<ConsL:j\d+>> LongConstant 0 loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<ConsL>>] loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons2>>] loop:<<Loop>> outer_loop:none
+ private static long sadLong2LongAlt2(long[] x, long[] y) {
+ int min_length = Math.min(x.length, y.length);
+ long sad = 0;
+ for (int i = 0; i < min_length; i++) {
+ long s = x[i];
+ long p = y[i];
+ long m = s - p;
+ if (m < 0) m = -m;
+ sad += m;
+ }
+ return sad;
+ }
+
+ /// CHECK-START: long Main.sadLong2LongAt1(long[], long[]) loop_optimization (before)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none
+ /// CHECK-DAG: <<ConsL:j\d+>> LongConstant 1 loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:j\d+>> Phi [<<ConsL>>,{{j\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get1:j\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get2:j\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Sub:j\d+>> Sub [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Intrin:j\d+>> InvokeStaticOrDirect [<<Sub>>] intrinsic:MathAbsLong loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi2>>,<<Intrin>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-ARM64: long Main.sadLong2LongAt1(long[], long[]) loop_optimization (after)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons2:i\d+>> IntConstant 2 loop:none
+ /// CHECK-DAG: <<ConsL:j\d+>> LongConstant 1 loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<ConsL>>] loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons2>>] loop:<<Loop>> outer_loop:none
+ private static long sadLong2LongAt1(long[] x, long[] y) {
+ int min_length = Math.min(x.length, y.length);
+ long sad = 1; // starts at 1
+ for (int i = 0; i < min_length; i++) {
+ sad += Math.abs(x[i] - y[i]);
+ }
+ return sad;
+ }
+
+ public static void main(String[] args) {
+ // Cross-test the two most extreme values individually.
+ long[] x = { 0, Long.MIN_VALUE, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+ long[] y = { 0, Long.MAX_VALUE, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+ expectEquals(1L, sadLong2Long(x, y));
+ expectEquals(1L, sadLong2Long(y, x));
+ expectEquals(-1L, sadLong2LongAlt(x, y));
+ expectEquals(-1L, sadLong2LongAlt(y, x));
+ expectEquals(1L, sadLong2LongAlt2(x, y));
+ expectEquals(1L, sadLong2LongAlt2(y, x));
+ expectEquals(2L, sadLong2LongAt1(x, y));
+ expectEquals(2L, sadLong2LongAt1(y, x));
+
+ // Use cross-values for the interesting values.
+ long[] interesting = {
+ 0x0000000000000000L, 0x0000000000000001L, 0x000000007fffffffL,
+ 0x0000000080000000L, 0x0000000080000001L, 0x00000000ffffffffL,
+ 0x0000000100000000L, 0x0000000100000001L, 0x000000017fffffffL,
+ 0x0000000180000000L, 0x0000000180000001L, 0x00000001ffffffffL,
+ 0x7fffffff00000000L, 0x7fffffff00000001L, 0x7fffffff7fffffffL,
+ 0x7fffffff80000000L, 0x7fffffff80000001L, 0x7fffffffffffffffL,
+ 0x8000000000000000L, 0x8000000000000001L, 0x800000007fffffffL,
+ 0x8000000080000000L, 0x8000000080000001L, 0x80000000ffffffffL,
+ 0x8000000100000000L, 0x8000000100000001L, 0x800000017fffffffL,
+ 0x8000000180000000L, 0x8000000180000001L, 0x80000001ffffffffL,
+ 0xffffffff00000000L, 0xffffffff00000001L, 0xffffffff7fffffffL,
+ 0xffffffff80000000L, 0xffffffff80000001L, 0xffffffffffffffffL
+ };
+ int n = interesting.length;
+ int m = n * n + 1;
+ x = new long[m];
+ y = new long[m];
+ int k = 0;
+ for (int i = 0; i < n; i++) {
+ for (int j = 0; j < n; j++) {
+ x[k] = interesting[i];
+ y[k] = interesting[j];
+ k++;
+ }
+ }
+ x[k] = 10;
+ y[k] = 2;
+ expectEquals(8L, sadLong2Long(x, y));
+ expectEquals(-901943132200L, sadLong2LongAlt(x, y));
+ expectEquals(8L, sadLong2LongAlt2(x, y));
+ expectEquals(9L, sadLong2LongAt1(x, y));
+
+ System.out.println("passed");
+ }
+
+ private static void expectEquals(long expected, long result) {
+ if (expected != result) {
+ throw new Error("Expected: " + expected + ", found: " + result);
+ }
+ }
+}
diff --git a/test/660-checker-simd-sad-short/expected.txt b/test/660-checker-simd-sad-short/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/660-checker-simd-sad-short/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/660-checker-simd-sad-short/info.txt b/test/660-checker-simd-sad-short/info.txt
new file mode 100644
index 0000000..b56c119
--- /dev/null
+++ b/test/660-checker-simd-sad-short/info.txt
@@ -0,0 +1 @@
+Functional tests on SAD vectorization.
diff --git a/test/660-checker-simd-sad-short/src/Main.java b/test/660-checker-simd-sad-short/src/Main.java
new file mode 100644
index 0000000..d94308e
--- /dev/null
+++ b/test/660-checker-simd-sad-short/src/Main.java
@@ -0,0 +1,299 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Tests for SAD (sum of absolute differences).
+ */
+public class Main {
+
+ // TODO: lower precision still coming, b/64091002
+
+ private static short sadShort2Short(short[] s1, short[] s2) {
+ int min_length = Math.min(s1.length, s2.length);
+ short sad = 0;
+ for (int i = 0; i < min_length; i++) {
+ sad += Math.abs(s1[i] - s2[i]);
+ }
+ return sad;
+ }
+
+ private static short sadShort2ShortAlt(short[] s1, short[] s2) {
+ int min_length = Math.min(s1.length, s2.length);
+ short sad = 0;
+ for (int i = 0; i < min_length; i++) {
+ short s = s1[i];
+ short p = s2[i];
+ sad += s >= p ? s - p : p - s;
+ }
+ return sad;
+ }
+
+ private static short sadShort2ShortAlt2(short[] s1, short[] s2) {
+ int min_length = Math.min(s1.length, s2.length);
+ short sad = 0;
+ for (int i = 0; i < min_length; i++) {
+ short s = s1[i];
+ short p = s2[i];
+ int x = s - p;
+ if (x < 0) x = -x;
+ sad += x;
+ }
+ return sad;
+ }
+
+ /// CHECK-START: int Main.sadShort2Int(short[], short[]) loop_optimization (before)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get1:s\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get2:s\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Sub:i\d+>> Sub [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Intrin:i\d+>> InvokeStaticOrDirect [<<Sub>>] intrinsic:MathAbsInt loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi2>>,<<Intrin>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-ARM64: int Main.sadShort2Int(short[], short[]) loop_optimization (after)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons8:i\d+>> IntConstant 8 loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Cons0>>] loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons8>>] loop:<<Loop>> outer_loop:none
+ private static int sadShort2Int(short[] s1, short[] s2) {
+ int min_length = Math.min(s1.length, s2.length);
+ int sad = 0;
+ for (int i = 0; i < min_length; i++) {
+ sad += Math.abs(s1[i] - s2[i]);
+ }
+ return sad;
+ }
+
+ /// CHECK-START: int Main.sadShort2IntAlt(short[], short[]) loop_optimization (before)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get1:s\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get2:s\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Sub:i\d+>> Sub [<<Get2>>,<<Get1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Intrin:i\d+>> InvokeStaticOrDirect [<<Sub>>] intrinsic:MathAbsInt loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi2>>,<<Intrin>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-ARM64: int Main.sadShort2IntAlt(short[], short[]) loop_optimization (after)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons8:i\d+>> IntConstant 8 loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Cons0>>] loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load2>>,<<Load1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons8>>] loop:<<Loop>> outer_loop:none
+ private static int sadShort2IntAlt(short[] s1, short[] s2) {
+ int min_length = Math.min(s1.length, s2.length);
+ int sad = 0;
+ for (int i = 0; i < min_length; i++) {
+ short s = s1[i];
+ short p = s2[i];
+ sad += s >= p ? s - p : p - s;
+ }
+ return sad;
+ }
+
+ /// CHECK-START: int Main.sadShort2IntAlt2(short[], short[]) loop_optimization (before)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get1:s\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get2:s\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Sub:i\d+>> Sub [<<Get1>>,<<Get2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Intrin:i\d+>> InvokeStaticOrDirect [<<Sub>>] intrinsic:MathAbsInt loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi2>>,<<Intrin>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-ARM64: int Main.sadShort2IntAlt2(short[], short[]) loop_optimization (after)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons8:i\d+>> IntConstant 8 loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Cons0>>] loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons8>>] loop:<<Loop>> outer_loop:none
+ private static int sadShort2IntAlt2(short[] s1, short[] s2) {
+ int min_length = Math.min(s1.length, s2.length);
+ int sad = 0;
+ for (int i = 0; i < min_length; i++) {
+ short s = s1[i];
+ short p = s2[i];
+ int x = s - p;
+ if (x < 0) x = -x;
+ sad += x;
+ }
+ return sad;
+ }
+
+ /// CHECK-START: long Main.sadShort2Long(short[], short[]) loop_optimization (before)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none
+ /// CHECK-DAG: <<ConsL:j\d+>> LongConstant 0 loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:j\d+>> Phi [<<ConsL>>,{{j\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get1:s\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get2:s\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Cnv1:j\d+>> TypeConversion [<<Get1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Cnv2:j\d+>> TypeConversion [<<Get2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Sub:j\d+>> Sub [<<Cnv1>>,<<Cnv2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Intrin:j\d+>> InvokeStaticOrDirect [<<Sub>>] intrinsic:MathAbsLong loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi2>>,<<Intrin>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-ARM64: long Main.sadShort2Long(short[], short[]) loop_optimization (after)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons8:i\d+>> IntConstant 8 loop:none
+ /// CHECK-DAG: <<ConsL:j\d+>> LongConstant 0 loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<ConsL>>] loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons8>>] loop:<<Loop>> outer_loop:none
+ private static long sadShort2Long(short[] s1, short[] s2) {
+ int min_length = Math.min(s1.length, s2.length);
+ long sad = 0;
+ for (int i = 0; i < min_length; i++) {
+ long x = s1[i];
+ long y = s2[i];
+ sad += Math.abs(x - y);
+ }
+ return sad;
+ }
+
+ /// CHECK-START: long Main.sadShort2LongAt1(short[], short[]) loop_optimization (before)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none
+ /// CHECK-DAG: <<ConsL:j\d+>> LongConstant 1 loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:j\d+>> Phi [<<ConsL>>,{{j\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get1:s\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Get2:s\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Cnv1:j\d+>> TypeConversion [<<Get1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Cnv2:j\d+>> TypeConversion [<<Get2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Sub:j\d+>> Sub [<<Cnv1>>,<<Cnv2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Intrin:j\d+>> InvokeStaticOrDirect [<<Sub>>] intrinsic:MathAbsLong loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi2>>,<<Intrin>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop>> outer_loop:none
+ //
+ /// CHECK-START-ARM64: long Main.sadShort2LongAt1(short[], short[]) loop_optimization (after)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons8:i\d+>> IntConstant 8 loop:none
+ /// CHECK-DAG: <<ConsL:j\d+>> LongConstant 1 loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<ConsL>>] loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: <<SAD:d\d+>> VecSADAccumulate [<<Phi2>>,<<Load1>>,<<Load2>>] loop:<<Loop>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons8>>] loop:<<Loop>> outer_loop:none
+ private static long sadShort2LongAt1(short[] s1, short[] s2) {
+ int min_length = Math.min(s1.length, s2.length);
+ long sad = 1; // starts at 1
+ for (int i = 0; i < min_length; i++) {
+ long x = s1[i];
+ long y = s2[i];
+ sad += Math.abs(x - y);
+ }
+ return sad;
+ }
+
+ public static void main(String[] args) {
+ // Cross-test the two most extreme values individually.
+ short[] s1 = { 0, -32768, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+ short[] s2 = { 0, 32767, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+ expectEquals(-1, sadShort2Short(s1, s2));
+ expectEquals(-1, sadShort2Short(s2, s1));
+ expectEquals(-1, sadShort2ShortAlt(s1, s2));
+ expectEquals(-1, sadShort2ShortAlt(s2, s1));
+ expectEquals(-1, sadShort2ShortAlt2(s1, s2));
+ expectEquals(-1, sadShort2ShortAlt2(s2, s1));
+ expectEquals(65535, sadShort2Int(s1, s2));
+ expectEquals(65535, sadShort2Int(s2, s1));
+ expectEquals(65535, sadShort2IntAlt(s1, s2));
+ expectEquals(65535, sadShort2IntAlt(s2, s1));
+ expectEquals(65535, sadShort2IntAlt2(s1, s2));
+ expectEquals(65535, sadShort2IntAlt2(s2, s1));
+ expectEquals(65535L, sadShort2Long(s1, s2));
+ expectEquals(65535L, sadShort2Long(s2, s1));
+ expectEquals(65536L, sadShort2LongAt1(s1, s2));
+ expectEquals(65536L, sadShort2LongAt1(s2, s1));
+
+ // Use cross-values to test all cases.
+ short[] interesting = {
+ (short) 0x0000,
+ (short) 0x0001,
+ (short) 0x0002,
+ (short) 0x1234,
+ (short) 0x8000,
+ (short) 0x8001,
+ (short) 0x7fff,
+ (short) 0xffff
+ };
+ int n = interesting.length;
+ int m = n * n + 1;
+ s1 = new short[m];
+ s2 = new short[m];
+ int k = 0;
+ for (int i = 0; i < n; i++) {
+ for (int j = 0; j < n; j++) {
+ s1[k] = interesting[i];
+ s2[k] = interesting[j];
+ k++;
+ }
+ }
+ s1[k] = 10;
+ s2[k] = 2;
+ expectEquals(-18932, sadShort2Short(s1, s2));
+ expectEquals(-18932, sadShort2ShortAlt(s1, s2));
+ expectEquals(-18932, sadShort2ShortAlt2(s1, s2));
+ expectEquals(1291788, sadShort2Int(s1, s2));
+ expectEquals(1291788, sadShort2IntAlt(s1, s2));
+ expectEquals(1291788, sadShort2IntAlt2(s1, s2));
+ expectEquals(1291788L, sadShort2Long(s1, s2));
+ expectEquals(1291789L, sadShort2LongAt1(s1, s2));
+
+ System.out.println("passed");
+ }
+
+ private static void expectEquals(int expected, int result) {
+ if (expected != result) {
+ throw new Error("Expected: " + expected + ", found: " + result);
+ }
+ }
+
+ private static void expectEquals(long expected, long result) {
+ if (expected != result) {
+ throw new Error("Expected: " + expected + ", found: " + result);
+ }
+ }
+}
diff --git a/test/661-checker-simd-reduc/src/Main.java b/test/661-checker-simd-reduc/src/Main.java
index 71eb3cd..bcfa968 100644
--- a/test/661-checker-simd-reduc/src/Main.java
+++ b/test/661-checker-simd-reduc/src/Main.java
@@ -80,6 +80,101 @@
return sum;
}
+ /// CHECK-START: int Main.reductionIntChain() loop_optimization (before)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons1>>,{{i\d+}}] loop:<<Loop1:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop1>> outer_loop:none
+ /// CHECK-DAG: <<Get1:i\d+>> ArrayGet [{{l\d+}},<<Phi2>>] loop:<<Loop1>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Get1>>] loop:<<Loop1>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi2>>,<<Cons1>>] loop:<<Loop1>> outer_loop:none
+ /// CHECK-DAG: <<Phi3:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop2:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi4:i\d+>> Phi [<<Phi1>>,{{i\d+}}] loop:<<Loop2>> outer_loop:none
+ /// CHECK-DAG: <<Get2:i\d+>> ArrayGet [{{l\d+}},<<Phi3>>] loop:<<Loop2>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi4>>,<<Get2>>] loop:<<Loop2>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi3>>,<<Cons1>>] loop:<<Loop2>> outer_loop:none
+ /// CHECK-DAG: Return [<<Phi4>>] loop:none
+ //
+ /// CHECK-EVAL: "<<Loop1>>" != "<<Loop2>>"
+ //
+ /// CHECK-START-ARM64: int Main.reductionIntChain() loop_optimization (after)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none
+ /// CHECK-DAG: <<Cons4:i\d+>> IntConstant 4 loop:none
+ /// CHECK-DAG: <<Set1:d\d+>> VecSetScalars [<<Cons1>>] loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop1:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set1>>,{{d\d+}}] loop:<<Loop1>> outer_loop:none
+ /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop1>> outer_loop:none
+ /// CHECK-DAG: VecAdd [<<Phi2>>,<<Load1>>] loop:<<Loop1>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons4>>] loop:<<Loop1>> outer_loop:none
+ /// CHECK-DAG: <<Red1:d\d+>> VecReduce [<<Phi2>>] loop:none
+ /// CHECK-DAG: <<Extr1:i\d+>> VecExtractScalar [<<Red1>>] loop:none
+ /// CHECK-DAG: <<Set2:d\d+>> VecSetScalars [<<Extr1>>] loop:none
+ /// CHECK-DAG: <<Phi3:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop2:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi4:d\d+>> Phi [<<Set2>>,{{d\d+}}] loop:<<Loop2>> outer_loop:none
+ /// CHECK-DAG: <<Load2:d\d+>> VecLoad [{{l\d+}},<<Phi3>>] loop:<<Loop2>> outer_loop:none
+ /// CHECK-DAG: VecAdd [<<Phi4>>,<<Load2>>] loop:<<Loop2>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi3>>,<<Cons4>>] loop:<<Loop2>> outer_loop:none
+ /// CHECK-DAG: <<Red2:d\d+>> VecReduce [<<Phi4>>] loop:none
+ /// CHECK-DAG: <<Extr2:i\d+>> VecExtractScalar [<<Red2>>] loop:none
+ /// CHECK-DAG: Return [<<Extr2>>] loop:none
+ //
+ /// CHECK-EVAL: "<<Loop1>>" != "<<Loop2>>"
+ //
+ // NOTE: pattern is robust with respect to vector loop unrolling.
+ private static int reductionIntChain() {
+ int[] x = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
+ int r = 1;
+ for (int i = 0; i < 16; i++) {
+ r += x[i];
+ }
+ for (int i = 0; i < 16; i++) {
+ r += x[i];
+ }
+ return r;
+ }
+
+ /// CHECK-START: int Main.reductionIntToLoop(int[]) loop_optimization (before)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop1:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop1>> outer_loop:none
+ /// CHECK-DAG: <<Get:i\d+>> ArrayGet [{{l\d+}},<<Phi1>>] loop:<<Loop1>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi2>>,<<Get>>] loop:<<Loop1>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons1>>] loop:<<Loop1>> outer_loop:none
+ /// CHECK-DAG: <<Phi3:i\d+>> Phi [<<Phi2>>,{{i\d+}}] loop:<<Loop2:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi4:i\d+>> Phi [<<Phi2>>,{{i\d+}}] loop:<<Loop2>> outer_loop:none
+ //
+ /// CHECK-EVAL: "<<Loop1>>" != "<<Loop2>>"
+ //
+ /// CHECK-START-ARM64: int Main.reductionIntToLoop(int[]) loop_optimization (after)
+ /// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
+ /// CHECK-DAG: <<Cons1:i\d+>> IntConstant 1 loop:none
+ /// CHECK-DAG: <<Cons4:i\d+>> IntConstant 4 loop:none
+ /// CHECK-DAG: <<Set:d\d+>> VecSetScalars [<<Cons0>>] loop:none
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi [<<Cons0>>,{{i\d+}}] loop:<<Loop1:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:d\d+>> Phi [<<Set>>,{{d\d+}}] loop:<<Loop1>> outer_loop:none
+ /// CHECK-DAG: <<Load1:d\d+>> VecLoad [{{l\d+}},<<Phi1>>] loop:<<Loop1>> outer_loop:none
+ /// CHECK-DAG: VecAdd [<<Phi2>>,<<Load1>>] loop:<<Loop1>> outer_loop:none
+ /// CHECK-DAG: Add [<<Phi1>>,<<Cons4>>] loop:<<Loop1>> outer_loop:none
+ /// CHECK-DAG: <<Red:d\d+>> VecReduce [<<Phi2>>] loop:none
+ /// CHECK-DAG: <<Extr:i\d+>> VecExtractScalar [<<Red>>] loop:none
+ /// CHECK-DAG: <<Phi3:i\d+>> Phi [<<Extr>>,{{i\d+}}] loop:<<Loop2:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi4:i\d+>> Phi [<<Extr>>,{{i\d+}}] loop:<<Loop2>> outer_loop:none
+ //
+ /// CHECK-EVAL: "<<Loop1>>" != "<<Loop2>>"
+ //
+ private static int reductionIntToLoop(int[] x) {
+ int r = 0;
+ for (int i = 0; i < 4; i++) {
+ r += x[i];
+ }
+ for (int i = r; i < 16; i++) {
+ r += i;
+ }
+ return r;
+ }
+
/// CHECK-START: long Main.reductionLong(long[]) loop_optimization (before)
/// CHECK-DAG: <<Cons0:i\d+>> IntConstant 0 loop:none
/// CHECK-DAG: <<Long0:j\d+>> LongConstant 0 loop:none
@@ -468,10 +563,28 @@
}
// Test various reductions in loops.
+ int[] x0 = { 0, 0, 0, 0 };
+ int[] x1 = { 0, 0, 0, 1 };
+ int[] x2 = { 1, 1, 1, 1 };
expectEquals(-74, reductionByte(xb));
expectEquals(-27466, reductionShort(xs));
expectEquals(38070, reductionChar(xc));
expectEquals(365750, reductionInt(xi));
+ expectEquals(273, reductionIntChain());
+ expectEquals(120, reductionIntToLoop(x0));
+ expectEquals(121, reductionIntToLoop(x1));
+ expectEquals(118, reductionIntToLoop(x2));
+ expectEquals(-1205, reductionIntToLoop(xi));
+ expectEquals(365750L, reductionLong(xl));
+ expectEquals(-75, reductionByteM1(xb));
+ expectEquals(-27467, reductionShortM1(xs));
+ expectEquals(38069, reductionCharM1(xc));
+ expectEquals(365749, reductionIntM1(xi));
+ expectEquals(365749L, reductionLongM1(xl));
+ expectEquals(74, reductionMinusByte(xb));
+ expectEquals(27466, reductionMinusShort(xs));
+ expectEquals(27466, reductionMinusChar(xc));
+ expectEquals(-365750, reductionMinusInt(xi));
expectEquals(365750L, reductionLong(xl));
expectEquals(-75, reductionByteM1(xb));
expectEquals(-27467, reductionShortM1(xs));