Merge "Faster deduplication in OatWriter."
diff --git a/Android.mk b/Android.mk
index 3f4ead6..7a95dfe 100644
--- a/Android.mk
+++ b/Android.mk
@@ -324,7 +324,7 @@
$$(OUT_OAT_FILE): $(PRODUCT_OUT)/$(1) $(DEFAULT_DEX_PREOPT_BUILT_IMAGE) $(DEX2OATD_DEPENDENCY)
@mkdir -p $$(dir $$@)
- $(DEX2OATD) --runtime-arg $(DEX2OAT_XMS) --runtime-arg $(DEX2OAT_XMX) \
+ $(DEX2OATD) --runtime-arg -Xms$(DEX2OAT_XMS) --runtime-arg -Xmx$(DEX2OAT_XMX) \
--boot-image=$(DEFAULT_DEX_PREOPT_BUILT_IMAGE) --dex-file=$(PRODUCT_OUT)/$(1) \
--dex-location=/$(1) --oat-file=$$@ \
--instruction-set=$(DEX2OAT_TARGET_ARCH) \
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index 10cd1cc..d501b57 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -110,6 +110,7 @@
runtime/mem_map_test.cc \
runtime/mirror/dex_cache_test.cc \
runtime/mirror/object_test.cc \
+ runtime/monitor_pool_test.cc \
runtime/monitor_test.cc \
runtime/parsed_options_test.cc \
runtime/reference_table_test.cc \
diff --git a/build/Android.oat.mk b/build/Android.oat.mk
index dd87f4a..61a2cde 100644
--- a/build/Android.oat.mk
+++ b/build/Android.oat.mk
@@ -29,7 +29,7 @@
$$($(1)HOST_CORE_IMG_OUT): $$(HOST_CORE_DEX_FILES) $$(DEX2OATD_DEPENDENCY)
@echo "host dex2oat: $$@ ($$?)"
@mkdir -p $$(dir $$@)
- $$(hide) $$(DEX2OATD) --runtime-arg $(DEX2OAT_IMAGE_XMS) --runtime-arg $(DEX2OAT_IMAGE_XMX) \
+ $$(hide) $$(DEX2OATD) --runtime-arg -Xms$(DEX2OAT_IMAGE_XMS) --runtime-arg -Xmx$(DEX2OAT_IMAGE_XMX) \
--image-classes=$$(PRELOADED_CLASSES) $$(addprefix --dex-file=,$$(HOST_CORE_DEX_FILES)) \
$$(addprefix --dex-location=,$$(HOST_CORE_DEX_LOCATIONS)) --oat-file=$$($(1)HOST_CORE_OAT_OUT) \
--oat-location=$$($(1)HOST_CORE_OAT) --image=$$($(1)HOST_CORE_IMG_OUT) \
@@ -57,7 +57,7 @@
$$($(1)TARGET_CORE_IMG_OUT): $$($(1)TARGET_CORE_DEX_FILES) $$(DEX2OATD_DEPENDENCY)
@echo "target dex2oat: $$@ ($$?)"
@mkdir -p $$(dir $$@)
- $$(hide) $$(DEX2OATD) --runtime-arg $(DEX2OAT_XMS) --runtime-arg $(DEX2OAT_XMX) \
+ $$(hide) $$(DEX2OATD) --runtime-arg -Xms$(DEX2OAT_XMS) --runtime-arg -Xmx$(DEX2OAT_XMX) \
--image-classes=$$(PRELOADED_CLASSES) $$(addprefix --dex-file=,$$(TARGET_CORE_DEX_FILES)) \
$$(addprefix --dex-location=,$$(TARGET_CORE_DEX_LOCATIONS)) --oat-file=$$($(1)TARGET_CORE_OAT_OUT) \
--oat-location=$$($(1)TARGET_CORE_OAT) --image=$$($(1)TARGET_CORE_IMG_OUT) \
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index dafefea..b31e9a2 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -766,8 +766,9 @@
// Generate code for all slow paths.
void Mir2Lir::HandleSlowPaths() {
- int n = slow_paths_.Size();
- for (int i = 0; i < n; ++i) {
+ // We should check slow_paths_.Size() every time, because a new slow path
+ // may be created during slowpath->Compile().
+ for (size_t i = 0; i < slow_paths_.Size(); ++i) {
LIRSlowPath* slowpath = slow_paths_.Get(i);
slowpath->Compile();
}
diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc
index 4e973d8..8df5b6d 100644
--- a/compiler/dex/quick/x86/assemble_x86.cc
+++ b/compiler/dex/quick/x86/assemble_x86.cc
@@ -327,6 +327,13 @@
{ kX86 ## opname ## RM, kRegMem, IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE1, { prefix, 0, 0x0F, opcode, 0, 0, 0, 0, false }, #opname "RM", "!0r,[!1r+!2d]" }, \
{ kX86 ## opname ## RA, kRegArray, IS_LOAD | IS_QUIN_OP | reg_def | REG_USE12, { prefix, 0, 0x0F, opcode, 0, 0, 0, 0, false }, #opname "RA", "!0r,[!1r+!2r<<!3d+!4d]" }
+// This is a special encoding with r8_form on the second register only
+// for Movzx8 and Movsx8.
+#define EXT_0F_R8_FORM_ENCODING_MAP(opname, prefix, opcode, reg_def) \
+{ kX86 ## opname ## RR, kRegReg, IS_BINARY_OP | reg_def | REG_USE1, { prefix, 0, 0x0F, opcode, 0, 0, 0, 0, true }, #opname "RR", "!0r,!1r" }, \
+{ kX86 ## opname ## RM, kRegMem, IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE1, { prefix, 0, 0x0F, opcode, 0, 0, 0, 0, false }, #opname "RM", "!0r,[!1r+!2d]" }, \
+{ kX86 ## opname ## RA, kRegArray, IS_LOAD | IS_QUIN_OP | reg_def | REG_USE12, { prefix, 0, 0x0F, opcode, 0, 0, 0, 0, false }, #opname "RA", "!0r,[!1r+!2r<<!3d+!4d]" }
+
#define EXT_0F_REX_W_ENCODING_MAP(opname, prefix, opcode, reg_def) \
{ kX86 ## opname ## RR, kRegReg, IS_BINARY_OP | reg_def | REG_USE1, { prefix, REX_W, 0x0F, opcode, 0, 0, 0, 0, false }, #opname "RR", "!0r,!1r" }, \
{ kX86 ## opname ## RM, kRegMem, IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE1, { prefix, REX_W, 0x0F, opcode, 0, 0, 0, 0, false }, #opname "RM", "!0r,[!1r+!2d]" }, \
@@ -488,9 +495,9 @@
{ kX86LockCmpxchg64A, kArray, IS_STORE | IS_QUAD_OP | REG_USE01 | REG_DEFAD_USEAD | REG_USEC | REG_USEB | SETS_CCODES, { 0xF0, 0, 0x0F, 0xC7, 0, 1, 0, 0, false }, "Lock Cmpxchg8b", "[!0r+!1r<<!2d+!3d]" },
{ kX86XchgMR, kMemReg, IS_STORE | IS_LOAD | IS_TERTIARY_OP | REG_DEF2 | REG_USE02, { 0, 0, 0x87, 0, 0, 0, 0, 0, false }, "Xchg", "[!0r+!1d],!2r" },
- EXT_0F_ENCODING_MAP(Movzx8, 0x00, 0xB6, REG_DEF0),
+ EXT_0F_R8_FORM_ENCODING_MAP(Movzx8, 0x00, 0xB6, REG_DEF0),
EXT_0F_ENCODING_MAP(Movzx16, 0x00, 0xB7, REG_DEF0),
- EXT_0F_ENCODING_MAP(Movsx8, 0x00, 0xBE, REG_DEF0),
+ EXT_0F_R8_FORM_ENCODING_MAP(Movsx8, 0x00, 0xBE, REG_DEF0),
EXT_0F_ENCODING_MAP(Movsx16, 0x00, 0xBF, REG_DEF0),
EXT_0F_ENCODING_MAP(Movzx8q, REX_W, 0xB6, REG_DEF0),
EXT_0F_ENCODING_MAP(Movzx16q, REX_W, 0xB7, REG_DEF0),
@@ -593,6 +600,10 @@
}
}
+static bool IsByteSecondOperand(const X86EncodingMap* entry) {
+ return StartsWith(entry->name, "Movzx8") || StartsWith(entry->name, "Movsx8");
+}
+
size_t X86Mir2Lir::ComputeSize(const X86EncodingMap* entry, int32_t raw_reg, int32_t raw_index,
int32_t raw_base, int32_t displacement) {
bool has_modrm = HasModrm(entry);
@@ -613,7 +624,8 @@
bool registers_need_rex_prefix = NeedsRex(raw_reg) || NeedsRex(raw_index) || NeedsRex(raw_base);
if (r8_form) {
// Do we need an empty REX prefix to normalize byte registers?
- registers_need_rex_prefix = registers_need_rex_prefix || (RegStorage::RegNum(raw_reg) >= 4);
+ registers_need_rex_prefix = registers_need_rex_prefix ||
+ (RegStorage::RegNum(raw_reg) >= 4 && !IsByteSecondOperand(entry));
registers_need_rex_prefix = registers_need_rex_prefix ||
(modrm_is_reg_reg && (RegStorage::RegNum(raw_base) >= 4));
}
@@ -877,7 +889,7 @@
uint8_t rex = 0;
if (r8_form) {
// Do we need an empty REX prefix to normalize byte register addressing?
- if (RegStorage::RegNum(raw_reg_r) >= 4) {
+ if (RegStorage::RegNum(raw_reg_r) >= 4 && !IsByteSecondOperand(entry)) {
rex |= 0x40; // REX.0000
} else if (modrm_is_reg_reg && RegStorage::RegNum(raw_reg_b) >= 4) {
rex |= 0x40; // REX.0000
@@ -1167,7 +1179,9 @@
}
void X86Mir2Lir::EmitRegReg(const X86EncodingMap* entry, int32_t raw_reg1, int32_t raw_reg2) {
- CheckValidByteRegister(entry, raw_reg1);
+ if (!IsByteSecondOperand(entry)) {
+ CheckValidByteRegister(entry, raw_reg1);
+ }
CheckValidByteRegister(entry, raw_reg2);
EmitPrefixAndOpcode(entry, raw_reg1, NO_REG, raw_reg2);
uint8_t low_reg1 = LowRegisterBits(raw_reg1);
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index cf29e52..f1166f6 100755
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -761,54 +761,59 @@
}
bool X86Mir2Lir::GenInlinedPeek(CallInfo* info, OpSize size) {
- return false;
-// Turned off until tests available in Art.
-//
-// RegLocation rl_src_address = info->args[0]; // long address
-// RegLocation rl_address;
-// if (!cu_->target64) {
-// rl_src_address = NarrowRegLoc(rl_src_address); // ignore high half in info->args[0]
-// rl_address = LoadValue(rl_src_address, kCoreReg);
-// } else {
-// rl_address = LoadValueWide(rl_src_address, kCoreReg);
-// }
-// RegLocation rl_dest = size == k64 ? InlineTargetWide(info) : InlineTarget(info);
-// RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
-// // Unaligned access is allowed on x86.
-// LoadBaseDisp(rl_address.reg, 0, rl_result.reg, size, kNotVolatile);
-// if (size == k64) {
-// StoreValueWide(rl_dest, rl_result);
-// } else {
-// DCHECK(size == kSignedByte || size == kSignedHalf || size == k32);
-// StoreValue(rl_dest, rl_result);
-// }
-// return true;
+ RegLocation rl_src_address = info->args[0]; // long address
+ RegLocation rl_address;
+ if (!cu_->target64) {
+ rl_src_address = NarrowRegLoc(rl_src_address); // ignore high half in info->args[0]
+ rl_address = LoadValue(rl_src_address, kCoreReg);
+ } else {
+ rl_address = LoadValueWide(rl_src_address, kCoreReg);
+ }
+ RegLocation rl_dest = size == k64 ? InlineTargetWide(info) : InlineTarget(info);
+ RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+ // Unaligned access is allowed on x86.
+ LoadBaseDisp(rl_address.reg, 0, rl_result.reg, size, kNotVolatile);
+ if (size == k64) {
+ StoreValueWide(rl_dest, rl_result);
+ } else {
+ DCHECK(size == kSignedByte || size == kSignedHalf || size == k32);
+ StoreValue(rl_dest, rl_result);
+ }
+ return true;
}
bool X86Mir2Lir::GenInlinedPoke(CallInfo* info, OpSize size) {
- return false;
-// Turned off until tests available in Art.
-//
-// RegLocation rl_src_address = info->args[0]; // long address
-// RegLocation rl_address;
-// if (!cu_->target64) {
-// rl_src_address = NarrowRegLoc(rl_src_address); // ignore high half in info->args[0]
-// rl_address = LoadValue(rl_src_address, kCoreReg);
-// } else {
-// rl_address = LoadValueWide(rl_src_address, kCoreReg);
-// }
-// RegLocation rl_src_value = info->args[2]; // [size] value
-// if (size == k64) {
-// // Unaligned access is allowed on x86.
-// RegLocation rl_value = LoadValueWide(rl_src_value, kCoreReg);
-// StoreBaseDisp(rl_address.reg, 0, rl_value.reg, size, kNotVolatile);
-// } else {
-// DCHECK(size == kSignedByte || size == kSignedHalf || size == k32);
-// // Unaligned access is allowed on x86.
-// RegLocation rl_value = LoadValue(rl_src_value, kCoreReg);
-// StoreBaseDisp(rl_address.reg, 0, rl_value.reg, size, kNotVolatile);
-// }
-// return true;
+ RegLocation rl_src_address = info->args[0]; // long address
+ RegLocation rl_address;
+ if (!cu_->target64) {
+ rl_src_address = NarrowRegLoc(rl_src_address); // ignore high half in info->args[0]
+ rl_address = LoadValue(rl_src_address, kCoreReg);
+ } else {
+ rl_address = LoadValueWide(rl_src_address, kCoreReg);
+ }
+ RegLocation rl_src_value = info->args[2]; // [size] value
+ RegLocation rl_value;
+ if (size == k64) {
+ // Unaligned access is allowed on x86.
+ rl_value = LoadValueWide(rl_src_value, kCoreReg);
+ } else {
+ DCHECK(size == kSignedByte || size == kSignedHalf || size == k32);
+ // In 32-bit mode the only EAX..EDX registers can be used with Mov8MR.
+ if (!cu_->target64 && size == kSignedByte) {
+ rl_src_value = UpdateLocTyped(rl_src_value, kCoreReg);
+ if (rl_src_value.location == kLocPhysReg && !IsByteRegister(rl_src_value.reg)) {
+ RegStorage temp = AllocateByteRegister();
+ OpRegCopy(temp, rl_src_value.reg);
+ rl_value.reg = temp;
+ } else {
+ rl_value = LoadValue(rl_src_value, kCoreReg);
+ }
+ } else {
+ rl_value = LoadValue(rl_src_value, kCoreReg);
+ }
+ }
+ StoreBaseDisp(rl_address.reg, 0, rl_value.reg, size, kNotVolatile);
+ return true;
}
void X86Mir2Lir::OpLea(RegStorage r_base, RegStorage reg1, RegStorage reg2, int scale, int offset) {
@@ -831,14 +836,12 @@
bool X86Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) {
DCHECK(cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64);
- if (cu_->instruction_set == kX86_64) {
- return false; // TODO: Verify working on x86-64.
- }
-
// Unused - RegLocation rl_src_unsafe = info->args[0];
RegLocation rl_src_obj = info->args[1]; // Object - known non-null
RegLocation rl_src_offset = info->args[2]; // long low
- rl_src_offset = NarrowRegLoc(rl_src_offset); // ignore high half in info->args[3]
+ if (!cu_->target64) {
+ rl_src_offset = NarrowRegLoc(rl_src_offset); // ignore high half in info->args[3]
+ }
RegLocation rl_src_expected = info->args[4]; // int, long or Object
// If is_long, high half is in info->args[5]
RegLocation rl_src_new_value = info->args[is_long ? 6 : 5]; // int, long or Object
@@ -846,21 +849,21 @@
if (is_long && cu_->target64) {
// RAX must hold expected for CMPXCHG. Neither rl_new_value, nor r_ptr may be in RAX.
- FlushReg(rs_r0);
- Clobber(rs_r0);
- LockTemp(rs_r0);
+ FlushReg(rs_r0q);
+ Clobber(rs_r0q);
+ LockTemp(rs_r0q);
RegLocation rl_object = LoadValue(rl_src_obj, kRefReg);
RegLocation rl_new_value = LoadValueWide(rl_src_new_value, kCoreReg);
- RegLocation rl_offset = LoadValue(rl_src_offset, kCoreReg);
- LoadValueDirectWide(rl_src_expected, rs_r0);
+ RegLocation rl_offset = LoadValueWide(rl_src_offset, kCoreReg);
+ LoadValueDirectWide(rl_src_expected, rs_r0q);
NewLIR5(kX86LockCmpxchg64AR, rl_object.reg.GetReg(), rl_offset.reg.GetReg(), 0, 0, rl_new_value.reg.GetReg());
// After a store we need to insert barrier in case of potential load. Since the
// locked cmpxchg has full barrier semantics, only a scheduling barrier will be generated.
GenMemBarrier(kStoreLoad);
- FreeTemp(rs_r0);
+ FreeTemp(rs_r0q);
} else if (is_long) {
// TODO: avoid unnecessary loads of SI and DI when the values are in registers.
// TODO: CFI support.
@@ -942,7 +945,12 @@
LockTemp(rs_r0);
}
- RegLocation rl_offset = LoadValue(rl_src_offset, kCoreReg);
+ RegLocation rl_offset;
+ if (cu_->target64) {
+ rl_offset = LoadValueWide(rl_src_offset, kCoreReg);
+ } else {
+ rl_offset = LoadValue(rl_src_offset, kCoreReg);
+ }
LoadValueDirect(rl_src_expected, rs_r0);
NewLIR5(kX86LockCmpxchgAR, rl_object.reg.GetReg(), rl_offset.reg.GetReg(), 0, 0, rl_new_value.reg.GetReg());
diff --git a/runtime/atomic.h b/runtime/atomic.h
index ed83a33..5ddafb4 100644
--- a/runtime/atomic.h
+++ b/runtime/atomic.h
@@ -343,6 +343,14 @@
return this->fetch_sub(value, std::memory_order_seq_cst); // Return old value.
}
+ T FetchAndOrSequentiallyConsistent(const T value) {
+ return this->fetch_or(value, std::memory_order_seq_cst); // Return old_value.
+ }
+
+ T FetchAndAndSequentiallyConsistent(const T value) {
+ return this->fetch_and(value, std::memory_order_seq_cst); // Return old_value.
+ }
+
volatile T* Address() {
return reinterpret_cast<T*>(this);
}
@@ -382,6 +390,20 @@
}
};
+// Interpret the bit pattern of input (type U) as type V. Requires the size
+// of V >= size of U (compile-time checked).
+// Reproduced here from utils.h to keep dependencies small.
+template<typename U, typename V>
+static inline V bit_cast_atomic(U in) {
+ COMPILE_ASSERT(sizeof(U) == sizeof(V), size_of_u_not_eq_size_of_v);
+ union {
+ U u;
+ V v;
+ } tmp;
+ tmp.u = in;
+ return tmp.v;
+}
+
template<class T> struct AtomicHelper<8, T> {
friend class Atomic<T>;
@@ -392,15 +414,14 @@
// sizeof(T) == 8
volatile const int64_t* loc_ptr =
reinterpret_cast<volatile const int64_t*>(loc);
- return static_cast<T>(QuasiAtomic::Read64(loc_ptr));
+ return bit_cast_atomic<int64_t, T>(QuasiAtomic::Read64(loc_ptr));
}
static void StoreRelaxed(volatile T* loc, T desired) {
// sizeof(T) == 8
volatile int64_t* loc_ptr =
reinterpret_cast<volatile int64_t*>(loc);
- QuasiAtomic::Write64(loc_ptr,
- static_cast<int64_t>(desired));
+ QuasiAtomic::Write64(loc_ptr, bit_cast_atomic<T, int64_t>(desired));
}
@@ -408,14 +429,14 @@
T expected_value, T desired_value) {
// sizeof(T) == 8
volatile int64_t* loc_ptr = reinterpret_cast<volatile int64_t*>(loc);
- return QuasiAtomic::Cas64(
- static_cast<int64_t>(reinterpret_cast<uintptr_t>(expected_value)),
- static_cast<int64_t>(reinterpret_cast<uintptr_t>(desired_value)), loc_ptr);
+ return QuasiAtomic::Cas64(bit_cast_atomic<T, int64_t>(expected_value),
+ bit_cast_atomic<T, int64_t>(desired_value),
+ loc_ptr);
}
};
template<typename T>
-class Atomic {
+class PACKED(sizeof(T)) Atomic {
private:
COMPILE_ASSERT(sizeof(T) <= 4 || sizeof(T) == 8, bad_atomic_arg);
@@ -521,6 +542,30 @@
}
}
+ T FetchAndOrSequentiallyConsistent(const T value) {
+ if (sizeof(T) <= 4) {
+ return __sync_fetch_and_or(&value_, value); // Return old value.
+ } else {
+ T expected;
+ do {
+ expected = LoadRelaxed();
+ } while (!CompareExchangeWeakSequentiallyConsistent(expected, expected | value));
+ return expected;
+ }
+ }
+
+ T FetchAndAndSequentiallyConsistent(const T value) {
+ if (sizeof(T) <= 4) {
+ return __sync_fetch_and_and(&value_, value); // Return old value.
+ } else {
+ T expected;
+ do {
+ expected = LoadRelaxed();
+ } while (!CompareExchangeWeakSequentiallyConsistent(expected, expected & value));
+ return expected;
+ }
+ }
+
T operator++() { // Prefix operator.
if (sizeof(T) <= 4) {
return __sync_add_and_fetch(&value_, 1); // Return new value.
diff --git a/runtime/base/mutex-inl.h b/runtime/base/mutex-inl.h
index 1890181..3e5cdba 100644
--- a/runtime/base/mutex-inl.h
+++ b/runtime/base/mutex-inl.h
@@ -23,7 +23,6 @@
#define ATRACE_TAG ATRACE_TAG_DALVIK
-#include "cutils/atomic-inline.h"
#include "cutils/trace.h"
#include "base/stringprintf.h"
@@ -152,20 +151,20 @@
#if ART_USE_FUTEXES
bool done = false;
do {
- int32_t cur_state = state_;
+ int32_t cur_state = state_.LoadRelaxed();
if (LIKELY(cur_state >= 0)) {
// Add as an extra reader.
- done = android_atomic_acquire_cas(cur_state, cur_state + 1, &state_) == 0;
+ done = state_.CompareExchangeWeakAcquire(cur_state, cur_state + 1);
} else {
// Owner holds it exclusively, hang up.
ScopedContentionRecorder scr(this, GetExclusiveOwnerTid(), SafeGetTid(self));
- android_atomic_inc(&num_pending_readers_);
- if (futex(&state_, FUTEX_WAIT, cur_state, NULL, NULL, 0) != 0) {
+ ++num_pending_readers_;
+ if (futex(state_.Address(), FUTEX_WAIT, cur_state, NULL, NULL, 0) != 0) {
if (errno != EAGAIN) {
PLOG(FATAL) << "futex wait failed for " << name_;
}
}
- android_atomic_dec(&num_pending_readers_);
+ --num_pending_readers_;
}
} while (!done);
#else
@@ -184,14 +183,18 @@
#if ART_USE_FUTEXES
bool done = false;
do {
- int32_t cur_state = state_;
+ int32_t cur_state = state_.LoadRelaxed();
if (LIKELY(cur_state > 0)) {
- // Reduce state by 1.
- done = android_atomic_release_cas(cur_state, cur_state - 1, &state_) == 0;
- if (done && (cur_state - 1) == 0) { // cas may fail due to noise?
- if (num_pending_writers_.LoadRelaxed() > 0 || num_pending_readers_ > 0) {
+ // Reduce state by 1 and impose lock release load/store ordering.
+ // Note, the relaxed loads below musn't reorder before the CompareExchange.
+ // TODO: the ordering here is non-trivial as state is split across 3 fields, fix by placing
+ // a status bit into the state on contention.
+ done = state_.CompareExchangeWeakSequentiallyConsistent(cur_state, cur_state - 1);
+ if (done && (cur_state - 1) == 0) { // Weak CAS may fail spuriously.
+ if (num_pending_writers_.LoadRelaxed() > 0 ||
+ num_pending_readers_.LoadRelaxed() > 0) {
// Wake any exclusive waiters as there are now no readers.
- futex(&state_, FUTEX_WAKE, -1, NULL, NULL, 0);
+ futex(state_.Address(), FUTEX_WAKE, -1, NULL, NULL, 0);
}
}
} else {
@@ -233,7 +236,7 @@
inline uint64_t ReaderWriterMutex::GetExclusiveOwnerTid() const {
#if ART_USE_FUTEXES
- int32_t state = state_;
+ int32_t state = state_.LoadRelaxed();
if (state == 0) {
return 0; // No owner.
} else if (state > 0) {
diff --git a/runtime/base/mutex.cc b/runtime/base/mutex.cc
index fd1eb12..7779547 100644
--- a/runtime/base/mutex.cc
+++ b/runtime/base/mutex.cc
@@ -30,6 +30,7 @@
namespace art {
Mutex* Locks::abort_lock_ = nullptr;
+Mutex* Locks::allocated_monitor_ids_lock_ = nullptr;
Mutex* Locks::allocated_thread_ids_lock_ = nullptr;
Mutex* Locks::breakpoint_lock_ = nullptr;
ReaderWriterMutex* Locks::classlinker_classes_lock_ = nullptr;
@@ -262,7 +263,7 @@
Mutex::Mutex(const char* name, LockLevel level, bool recursive)
: BaseMutex(name, level), recursive_(recursive), recursion_count_(0) {
#if ART_USE_FUTEXES
- state_ = 0;
+ DCHECK_EQ(0, state_.LoadRelaxed());
DCHECK_EQ(0, num_contenders_.LoadRelaxed());
#else
CHECK_MUTEX_CALL(pthread_mutex_init, (&mutex_, nullptr));
@@ -272,13 +273,13 @@
Mutex::~Mutex() {
#if ART_USE_FUTEXES
- if (state_ != 0) {
+ if (state_.LoadRelaxed() != 0) {
Runtime* runtime = Runtime::Current();
bool shutting_down = runtime == nullptr || runtime->IsShuttingDown(Thread::Current());
LOG(shutting_down ? WARNING : FATAL) << "destroying mutex with owner: " << exclusive_owner_;
} else {
CHECK_EQ(exclusive_owner_, 0U) << "unexpectedly found an owner on unlocked mutex " << name_;
- CHECK_EQ(num_contenders_.LoadRelaxed(), 0)
+ CHECK_EQ(num_contenders_.LoadSequentiallyConsistent(), 0)
<< "unexpectedly found a contender on mutex " << name_;
}
#else
@@ -305,15 +306,15 @@
#if ART_USE_FUTEXES
bool done = false;
do {
- int32_t cur_state = state_;
+ int32_t cur_state = state_.LoadRelaxed();
if (LIKELY(cur_state == 0)) {
- // Change state from 0 to 1.
- done = __sync_bool_compare_and_swap(&state_, 0 /* cur_state */, 1 /* new state */);
+ // Change state from 0 to 1 and impose load/store ordering appropriate for lock acquisition.
+ done = state_.CompareExchangeWeakAcquire(0 /* cur_state */, 1 /* new state */);
} else {
// Failed to acquire, hang up.
ScopedContentionRecorder scr(this, SafeGetTid(self), GetExclusiveOwnerTid());
num_contenders_++;
- if (futex(&state_, FUTEX_WAIT, 1, NULL, NULL, 0) != 0) {
+ if (futex(state_.Address(), FUTEX_WAIT, 1, NULL, NULL, 0) != 0) {
// EAGAIN and EINTR both indicate a spurious failure, try again from the beginning.
// We don't use TEMP_FAILURE_RETRY so we can intentionally retry to acquire the lock.
if ((errno != EAGAIN) && (errno != EINTR)) {
@@ -323,11 +324,7 @@
num_contenders_--;
}
} while (!done);
- // We assert that no memory fence is needed here, since
- // __sync_bool_compare_and_swap includes it.
- // TODO: Change state_ to be a art::Atomic and use an intention revealing CAS operation
- // that exposes the ordering semantics.
- DCHECK_EQ(state_, 1);
+ DCHECK_EQ(state_.LoadRelaxed(), 1);
#else
CHECK_MUTEX_CALL(pthread_mutex_lock, (&mutex_));
#endif
@@ -352,16 +349,15 @@
#if ART_USE_FUTEXES
bool done = false;
do {
- int32_t cur_state = state_;
+ int32_t cur_state = state_.LoadRelaxed();
if (cur_state == 0) {
- // Change state from 0 to 1.
- done = __sync_bool_compare_and_swap(&state_, 0 /* cur_state */, 1 /* new state */);
+ // Change state from 0 to 1 and impose load/store ordering appropriate for lock acquisition.
+ done = state_.CompareExchangeWeakAcquire(0 /* cur_state */, 1 /* new state */);
} else {
return false;
}
} while (!done);
- // We again assert no memory fence is needed.
- DCHECK_EQ(state_, 1);
+ DCHECK_EQ(state_.LoadRelaxed(), 1);
#else
int result = pthread_mutex_trylock(&mutex_);
if (result == EBUSY) {
@@ -399,17 +395,19 @@
#if ART_USE_FUTEXES
bool done = false;
do {
- int32_t cur_state = state_;
+ int32_t cur_state = state_.LoadRelaxed();
if (LIKELY(cur_state == 1)) {
- // The __sync_bool_compare_and_swap enforces the necessary memory ordering.
// We're no longer the owner.
exclusive_owner_ = 0;
- // Change state to 0.
- done = __sync_bool_compare_and_swap(&state_, cur_state, 0 /* new state */);
+ // Change state to 0 and impose load/store ordering appropriate for lock release.
+ // Note, the relaxed loads below musn't reorder before the CompareExchange.
+ // TODO: the ordering here is non-trivial as state is split across 3 fields, fix by placing
+ // a status bit into the state on contention.
+ done = state_.CompareExchangeWeakSequentiallyConsistent(cur_state, 0 /* new state */);
if (LIKELY(done)) { // Spurious fail?
- // Wake a contender
+ // Wake a contender.
if (UNLIKELY(num_contenders_.LoadRelaxed() > 0)) {
- futex(&state_, FUTEX_WAKE, 1, NULL, NULL, 0);
+ futex(state_.Address(), FUTEX_WAKE, 1, NULL, NULL, 0);
}
}
} else {
@@ -459,9 +457,9 @@
ReaderWriterMutex::~ReaderWriterMutex() {
#if ART_USE_FUTEXES
- CHECK_EQ(state_, 0);
+ CHECK_EQ(state_.LoadRelaxed(), 0);
CHECK_EQ(exclusive_owner_, 0U);
- CHECK_EQ(num_pending_readers_, 0);
+ CHECK_EQ(num_pending_readers_.LoadRelaxed(), 0);
CHECK_EQ(num_pending_writers_.LoadRelaxed(), 0);
#else
// We can't use CHECK_MUTEX_CALL here because on shutdown a suspended daemon thread
@@ -484,25 +482,25 @@
#if ART_USE_FUTEXES
bool done = false;
do {
- int32_t cur_state = state_;
+ int32_t cur_state = state_.LoadRelaxed();
if (LIKELY(cur_state == 0)) {
- // Change state from 0 to -1.
- done = __sync_bool_compare_and_swap(&state_, 0 /* cur_state*/, -1 /* new state */);
+ // Change state from 0 to -1 and impose load/store ordering appropriate for lock acquisition.
+ done = state_.CompareExchangeWeakAcquire(0 /* cur_state*/, -1 /* new state */);
} else {
// Failed to acquire, hang up.
ScopedContentionRecorder scr(this, SafeGetTid(self), GetExclusiveOwnerTid());
- num_pending_writers_++;
- if (futex(&state_, FUTEX_WAIT, cur_state, NULL, NULL, 0) != 0) {
+ ++num_pending_writers_;
+ if (futex(state_.Address(), FUTEX_WAIT, cur_state, NULL, NULL, 0) != 0) {
// EAGAIN and EINTR both indicate a spurious failure, try again from the beginning.
// We don't use TEMP_FAILURE_RETRY so we can intentionally retry to acquire the lock.
if ((errno != EAGAIN) && (errno != EINTR)) {
PLOG(FATAL) << "futex wait failed for " << name_;
}
}
- num_pending_writers_--;
+ --num_pending_writers_;
}
} while (!done);
- DCHECK_EQ(state_, -1);
+ DCHECK_EQ(state_.LoadRelaxed(), -1);
#else
CHECK_MUTEX_CALL(pthread_rwlock_wrlock, (&rwlock_));
#endif
@@ -520,16 +518,20 @@
#if ART_USE_FUTEXES
bool done = false;
do {
- int32_t cur_state = state_;
+ int32_t cur_state = state_.LoadRelaxed();
if (LIKELY(cur_state == -1)) {
// We're no longer the owner.
exclusive_owner_ = 0;
- // Change state from -1 to 0.
- done = __sync_bool_compare_and_swap(&state_, -1 /* cur_state*/, 0 /* new state */);
- if (LIKELY(done)) { // cmpxchg may fail due to noise?
+ // Change state from -1 to 0 and impose load/store ordering appropriate for lock release.
+ // Note, the relaxed loads below musn't reorder before the CompareExchange.
+ // TODO: the ordering here is non-trivial as state is split across 3 fields, fix by placing
+ // a status bit into the state on contention.
+ done = state_.CompareExchangeWeakSequentiallyConsistent(-1 /* cur_state*/, 0 /* new state */);
+ if (LIKELY(done)) { // Weak CAS may fail spuriously.
// Wake any waiters.
- if (UNLIKELY(num_pending_readers_ > 0 || num_pending_writers_.LoadRelaxed() > 0)) {
- futex(&state_, FUTEX_WAKE, -1, NULL, NULL, 0);
+ if (UNLIKELY(num_pending_readers_.LoadRelaxed() > 0 ||
+ num_pending_writers_.LoadRelaxed() > 0)) {
+ futex(state_.Address(), FUTEX_WAKE, -1, NULL, NULL, 0);
}
}
} else {
@@ -550,10 +552,10 @@
timespec end_abs_ts;
InitTimeSpec(true, CLOCK_REALTIME, ms, ns, &end_abs_ts);
do {
- int32_t cur_state = state_;
+ int32_t cur_state = state_.LoadRelaxed();
if (cur_state == 0) {
- // Change state from 0 to -1.
- done = __sync_bool_compare_and_swap(&state_, 0 /* cur_state */, -1 /* new state */);
+ // Change state from 0 to -1 and impose load/store ordering appropriate for lock acquisition.
+ done = state_.CompareExchangeWeakAcquire(0 /* cur_state */, -1 /* new state */);
} else {
// Failed to acquire, hang up.
timespec now_abs_ts;
@@ -563,10 +565,10 @@
return false; // Timed out.
}
ScopedContentionRecorder scr(this, SafeGetTid(self), GetExclusiveOwnerTid());
- num_pending_writers_++;
- if (futex(&state_, FUTEX_WAIT, cur_state, &rel_ts, NULL, 0) != 0) {
+ ++num_pending_writers_;
+ if (futex(state_.Address(), FUTEX_WAIT, cur_state, &rel_ts, NULL, 0) != 0) {
if (errno == ETIMEDOUT) {
- num_pending_writers_--;
+ --num_pending_writers_;
return false; // Timed out.
} else if ((errno != EAGAIN) && (errno != EINTR)) {
// EAGAIN and EINTR both indicate a spurious failure,
@@ -575,7 +577,7 @@
PLOG(FATAL) << "timed futex wait failed for " << name_;
}
}
- num_pending_writers_--;
+ --num_pending_writers_;
}
} while (!done);
#else
@@ -602,10 +604,10 @@
#if ART_USE_FUTEXES
bool done = false;
do {
- int32_t cur_state = state_;
+ int32_t cur_state = state_.LoadRelaxed();
if (cur_state >= 0) {
- // Add as an extra reader.
- done = __sync_bool_compare_and_swap(&state_, cur_state, cur_state + 1);
+ // Add as an extra reader and impose load/store ordering appropriate for lock acquisition.
+ done = state_.CompareExchangeWeakAcquire(cur_state, cur_state + 1);
} else {
// Owner holds it exclusively.
return false;
@@ -702,7 +704,7 @@
// mutex unlocks will awaken the requeued waiter thread.
done = futex(sequence_.Address(), FUTEX_CMP_REQUEUE, 0,
reinterpret_cast<const timespec*>(std::numeric_limits<int32_t>::max()),
- &guard_.state_, cur_sequence) != -1;
+ guard_.state_.Address(), cur_sequence) != -1;
if (!done) {
if (errno != EAGAIN) {
PLOG(FATAL) << "futex cmp requeue failed for " << name_;
@@ -831,6 +833,7 @@
DCHECK(modify_ldt_lock_ == nullptr);
}
DCHECK(abort_lock_ != nullptr);
+ DCHECK(allocated_monitor_ids_lock_ != nullptr);
DCHECK(allocated_thread_ids_lock_ != nullptr);
DCHECK(breakpoint_lock_ != nullptr);
DCHECK(classlinker_classes_lock_ != nullptr);
@@ -882,6 +885,10 @@
classlinker_classes_lock_ = new ReaderWriterMutex("ClassLinker classes lock",
current_lock_level);
+ UPDATE_CURRENT_LOCK_LEVEL(kMonitorPoolLock);
+ DCHECK(allocated_monitor_ids_lock_ == nullptr);
+ allocated_monitor_ids_lock_ = new Mutex("allocated monitor ids lock", current_lock_level);
+
UPDATE_CURRENT_LOCK_LEVEL(kAllocatedThreadIdsLock);
DCHECK(allocated_thread_ids_lock_ == nullptr);
allocated_thread_ids_lock_ = new Mutex("allocated thread ids lock", current_lock_level);
diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h
index 81e62ab..8d2cd07 100644
--- a/runtime/base/mutex.h
+++ b/runtime/base/mutex.h
@@ -70,7 +70,6 @@
kMarkSweepMarkStackLock,
kTransactionLogLock,
kInternTableLock,
- kMonitorPoolLock,
kDefaultMutexLevel,
kMarkSweepLargeObjectLock,
kPinTableLock,
@@ -78,6 +77,7 @@
kJdwpObjectRegistryLock,
kModifyLdtLock,
kAllocatedThreadIdsLock,
+ kMonitorPoolLock,
kClassLinkerClassesLock,
kBreakpointLock,
kMonitorLock,
@@ -226,7 +226,8 @@
}
void AssertNotHeld(const Thread* self) { AssertNotHeldExclusive(self); }
- // Id associated with exclusive owner.
+ // Id associated with exclusive owner. No memory ordering semantics if called from a thread other
+ // than the owner.
uint64_t GetExclusiveOwnerTid() const;
// Returns how many times this Mutex has been locked, it is better to use AssertHeld/NotHeld.
@@ -239,7 +240,7 @@
private:
#if ART_USE_FUTEXES
// 0 is unheld, 1 is held.
- volatile int32_t state_;
+ AtomicInteger state_;
// Exclusive owner.
volatile uint64_t exclusive_owner_;
// Number of waiting contenders.
@@ -343,7 +344,8 @@
}
}
- // Id associated with exclusive owner.
+ // Id associated with exclusive owner. No memory ordering semantics if called from a thread other
+ // than the owner.
uint64_t GetExclusiveOwnerTid() const;
virtual void Dump(std::ostream& os) const;
@@ -351,12 +353,12 @@
private:
#if ART_USE_FUTEXES
// -1 implies held exclusive, +ve shared held by state_ many owners.
- volatile int32_t state_;
- // Exclusive owner.
+ AtomicInteger state_;
+ // Exclusive owner. Modification guarded by this mutex.
volatile uint64_t exclusive_owner_;
- // Pending readers.
- volatile int32_t num_pending_readers_;
- // Pending writers.
+ // Number of contenders waiting for a reader share.
+ AtomicInteger num_pending_readers_;
+ // Number of contenders waiting to be the writer.
AtomicInteger num_pending_writers_;
#else
pthread_rwlock_t rwlock_;
@@ -558,8 +560,10 @@
// doesn't try to hold a higher level Mutex.
#define DEFAULT_MUTEX_ACQUIRED_AFTER ACQUIRED_AFTER(Locks::classlinker_classes_lock_)
+ static Mutex* allocated_monitor_ids_lock_ ACQUIRED_AFTER(classlinker_classes_lock_);
+
// Guard the allocation/deallocation of thread ids.
- static Mutex* allocated_thread_ids_lock_ ACQUIRED_AFTER(classlinker_classes_lock_);
+ static Mutex* allocated_thread_ids_lock_ ACQUIRED_AFTER(allocated_monitor_ids_lock_);
// Guards modification of the LDT on x86.
static Mutex* modify_ldt_lock_ ACQUIRED_AFTER(allocated_thread_ids_lock_);
diff --git a/runtime/common_runtime_test.h b/runtime/common_runtime_test.h
index fdbc9c2..289dc1d 100644
--- a/runtime/common_runtime_test.h
+++ b/runtime/common_runtime_test.h
@@ -26,7 +26,7 @@
#include <fstream>
#include <memory>
-#include "../../external/icu4c/common/unicode/uvernum.h"
+#include "../../external/icu/icu4c/source/common/unicode/uvernum.h"
#include "base/macros.h"
#include "base/stl_util.h"
#include "base/stringprintf.h"
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index 6161aff..c95be01 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -1996,13 +1996,14 @@
case kTerminated:
return JDWP::TS_ZOMBIE;
case kTimedWaiting:
+ case kWaitingForCheckPointsToRun:
case kWaitingForDebuggerSend:
case kWaitingForDebuggerSuspension:
case kWaitingForDebuggerToAttach:
case kWaitingForDeoptimization:
case kWaitingForGcToComplete:
- case kWaitingForCheckPointsToRun:
case kWaitingForJniOnLoad:
+ case kWaitingForMethodTracingStart:
case kWaitingForSignalCatcherOutput:
case kWaitingInMainDebuggerLoop:
case kWaitingInMainSignalCatcherLoop:
diff --git a/runtime/gc/accounting/card_table-inl.h b/runtime/gc/accounting/card_table-inl.h
index a1d001e..ad0a4f43 100644
--- a/runtime/gc/accounting/card_table-inl.h
+++ b/runtime/gc/accounting/card_table-inl.h
@@ -17,9 +17,9 @@
#ifndef ART_RUNTIME_GC_ACCOUNTING_CARD_TABLE_INL_H_
#define ART_RUNTIME_GC_ACCOUNTING_CARD_TABLE_INL_H_
+#include "atomic.h"
#include "base/logging.h"
#include "card_table.h"
-#include "cutils/atomic-inline.h"
#include "space_bitmap.h"
#include "utils.h"
@@ -28,18 +28,23 @@
namespace accounting {
static inline bool byte_cas(byte old_value, byte new_value, byte* address) {
+#if defined(__i386__) || defined(__x86_64__)
+ Atomic<byte>* byte_atomic = reinterpret_cast<Atomic<byte>*>(address);
+ return byte_atomic->CompareExchangeWeakRelaxed(old_value, new_value);
+#else
// Little endian means most significant byte is on the left.
const size_t shift_in_bytes = reinterpret_cast<uintptr_t>(address) % sizeof(uintptr_t);
// Align the address down.
address -= shift_in_bytes;
const size_t shift_in_bits = shift_in_bytes * kBitsPerByte;
- int32_t* word_address = reinterpret_cast<int32_t*>(address);
+ AtomicInteger* word_atomic = reinterpret_cast<AtomicInteger*>(address);
+
// Word with the byte we are trying to cas cleared.
- const int32_t cur_word = *word_address & ~(0xFF << shift_in_bits);
+ const int32_t cur_word = word_atomic->LoadRelaxed() & ~(0xFF << shift_in_bits);
const int32_t old_word = cur_word | (static_cast<int32_t>(old_value) << shift_in_bits);
const int32_t new_word = cur_word | (static_cast<int32_t>(new_value) << shift_in_bits);
- bool success = android_atomic_cas(old_word, new_word, word_address) == 0;
- return success;
+ return word_atomic->CompareExchangeWeakRelaxed(old_word, new_word);
+#endif
}
template <typename Visitor>
@@ -174,8 +179,8 @@
for (size_t i = 0; i < sizeof(uintptr_t); ++i) {
new_bytes[i] = visitor(expected_bytes[i]);
}
- if (LIKELY(android_atomic_cas(expected_word, new_word,
- reinterpret_cast<int32_t*>(word_cur)) == 0)) {
+ Atomic<uintptr_t>* atomic_word = reinterpret_cast<Atomic<uintptr_t>*>(word_cur);
+ if (LIKELY(atomic_word->CompareExchangeWeakRelaxed(expected_word, new_word))) {
for (size_t i = 0; i < sizeof(uintptr_t); ++i) {
const byte expected_byte = expected_bytes[i];
const byte new_byte = new_bytes[i];
diff --git a/runtime/gc/accounting/space_bitmap-inl.h b/runtime/gc/accounting/space_bitmap-inl.h
index 7f1da79..1e9556a 100644
--- a/runtime/gc/accounting/space_bitmap-inl.h
+++ b/runtime/gc/accounting/space_bitmap-inl.h
@@ -21,6 +21,7 @@
#include <memory>
+#include "atomic.h"
#include "base/logging.h"
#include "dex_file-inl.h"
#include "heap_bitmap.h"
@@ -43,17 +44,17 @@
const uintptr_t offset = addr - heap_begin_;
const size_t index = OffsetToIndex(offset);
const uword mask = OffsetToMask(offset);
- uword* const address = &bitmap_begin_[index];
+ Atomic<uword>* atomic_entry = reinterpret_cast<Atomic<uword>*>(&bitmap_begin_[index]);
DCHECK_LT(index, bitmap_size_ / kWordSize) << " bitmap_size_ = " << bitmap_size_;
uword old_word;
do {
- old_word = *address;
+ old_word = atomic_entry->LoadRelaxed();
// Fast path: The bit is already set.
if ((old_word & mask) != 0) {
DCHECK(Test(obj));
return true;
}
- } while (!__sync_bool_compare_and_swap(address, old_word, old_word | mask));
+ } while (!atomic_entry->CompareExchangeWeakSequentiallyConsistent(old_word, old_word | mask));
DCHECK(Test(obj));
return false;
}
diff --git a/runtime/gc/accounting/space_bitmap.h b/runtime/gc/accounting/space_bitmap.h
index 27fb087..6d1ba87 100644
--- a/runtime/gc/accounting/space_bitmap.h
+++ b/runtime/gc/accounting/space_bitmap.h
@@ -60,17 +60,17 @@
// <offset> is the difference from .base to a pointer address.
// <index> is the index of .bits that contains the bit representing
// <offset>.
- static size_t OffsetToIndex(size_t offset) ALWAYS_INLINE {
+ static constexpr size_t OffsetToIndex(size_t offset) {
return offset / kAlignment / kBitsPerWord;
}
template<typename T>
- static T IndexToOffset(T index) {
+ static constexpr T IndexToOffset(T index) {
return static_cast<T>(index * kAlignment * kBitsPerWord);
}
// Bits are packed in the obvious way.
- static uword OffsetToMask(uintptr_t offset) ALWAYS_INLINE {
+ static constexpr uword OffsetToMask(uintptr_t offset) {
return (static_cast<size_t>(1)) << ((offset / kAlignment) % kBitsPerWord);
}
diff --git a/runtime/gc/allocator/rosalloc.cc b/runtime/gc/allocator/rosalloc.cc
index 09fb97a..722576f 100644
--- a/runtime/gc/allocator/rosalloc.cc
+++ b/runtime/gc/allocator/rosalloc.cc
@@ -159,7 +159,7 @@
if (it != free_page_runs_.rend() && (last_free_page_run = *it)->End(this) == base_ + footprint_) {
// There is a free page run at the end.
DCHECK(last_free_page_run->IsFree());
- DCHECK_EQ(page_map_[ToPageMapIndex(last_free_page_run)], kPageMapEmpty);
+ DCHECK(IsFreePage(ToPageMapIndex(last_free_page_run)));
last_free_page_run_size = last_free_page_run->ByteSize(this);
} else {
// There is no free page run at the end.
@@ -248,7 +248,7 @@
// Update the page map.
size_t page_map_idx = ToPageMapIndex(res);
for (size_t i = 0; i < num_pages; i++) {
- DCHECK_EQ(page_map_[page_map_idx + i], kPageMapEmpty);
+ DCHECK(IsFreePage(page_map_idx + i));
}
switch (page_map_type) {
case kPageMapRun:
@@ -301,8 +301,7 @@
pm_part_type = kPageMapLargeObjectPart;
break;
default:
- pm_part_type = kPageMapEmpty;
- LOG(FATAL) << "Unreachable - RosAlloc::FreePages() : " << "pm_idx=" << pm_idx << ", pm_type="
+ LOG(FATAL) << "Unreachable - " << __PRETTY_FUNCTION__ << " : " << "pm_idx=" << pm_idx << ", pm_type="
<< static_cast<int>(pm_type) << ", ptr=" << std::hex
<< reinterpret_cast<intptr_t>(ptr);
return 0;
@@ -330,7 +329,7 @@
}
if (kTraceRosAlloc) {
- LOG(INFO) << "RosAlloc::FreePages() : 0x" << std::hex << reinterpret_cast<intptr_t>(ptr)
+ LOG(INFO) << __PRETTY_FUNCTION__ << " : 0x" << std::hex << reinterpret_cast<intptr_t>(ptr)
<< "-0x" << (reinterpret_cast<intptr_t>(ptr) + byte_size)
<< "(" << std::dec << (num_pages * kPageSize) << ")";
}
@@ -347,7 +346,7 @@
if (!free_page_runs_.empty()) {
// Try to coalesce in the higher address direction.
if (kTraceRosAlloc) {
- LOG(INFO) << "RosAlloc::FreePages() : trying to coalesce a free page run 0x"
+ LOG(INFO) << __PRETTY_FUNCTION__ << "RosAlloc::FreePages() : trying to coalesce a free page run 0x"
<< std::hex << reinterpret_cast<uintptr_t>(fpr) << " [" << std::dec << pm_idx << "] -0x"
<< std::hex << reinterpret_cast<uintptr_t>(fpr->End(this)) << " [" << std::dec
<< (fpr->End(this) == End() ? page_map_size_ : ToPageMapIndex(fpr->End(this))) << "]";
@@ -497,27 +496,27 @@
<< ", page_map_entry=" << static_cast<int>(page_map_entry);
}
switch (page_map_[pm_idx]) {
- case kPageMapEmpty:
- LOG(FATAL) << "Unreachable - page map type: " << page_map_[pm_idx];
- return 0;
case kPageMapLargeObject:
return FreePages(self, ptr, false);
case kPageMapLargeObjectPart:
LOG(FATAL) << "Unreachable - page map type: " << page_map_[pm_idx];
return 0;
- case kPageMapRun:
case kPageMapRunPart: {
- size_t pi = pm_idx;
- DCHECK(page_map_[pi] == kPageMapRun || page_map_[pi] == kPageMapRunPart);
// Find the beginning of the run.
- while (page_map_[pi] != kPageMapRun) {
- pi--;
- DCHECK_LT(pi, capacity_ / kPageSize);
- }
- DCHECK_EQ(page_map_[pi], kPageMapRun);
- run = reinterpret_cast<Run*>(base_ + pi * kPageSize);
+ do {
+ --pm_idx;
+ DCHECK_LT(pm_idx, capacity_ / kPageSize);
+ } while (page_map_[pm_idx] != kPageMapRun);
+ // Fall-through.
+ case kPageMapRun:
+ run = reinterpret_cast<Run*>(base_ + pm_idx * kPageSize);
DCHECK_EQ(run->magic_num_, kMagicNum);
break;
+ case kPageMapReleased:
+ // Fall-through.
+ case kPageMapEmpty:
+ LOG(FATAL) << "Unreachable - page map type: " << page_map_[pm_idx];
+ return 0;
}
default:
LOG(FATAL) << "Unreachable - page map type: " << page_map_[pm_idx];
@@ -594,7 +593,8 @@
if (kIsDebugBuild && current_run != dedicated_full_run_) {
full_runs_[idx].insert(current_run);
if (kTraceRosAlloc) {
- LOG(INFO) << __FUNCTION__ << " : Inserted run 0x" << std::hex << reinterpret_cast<intptr_t>(current_run)
+ LOG(INFO) << __PRETTY_FUNCTION__ << " : Inserted run 0x" << std::hex
+ << reinterpret_cast<intptr_t>(current_run)
<< " into full_runs_[" << std::dec << idx << "]";
}
DCHECK(non_full_runs_[idx].find(current_run) == non_full_runs_[idx].end());
@@ -1358,6 +1358,8 @@
for (size_t i = 0; i < end; ++i) {
byte pm = page_map_[i];
switch (pm) {
+ case kPageMapReleased:
+ // Fall-through.
case kPageMapEmpty: {
FreePageRun* fpr = reinterpret_cast<FreePageRun*>(base_ + i * kPageSize);
if (free_page_runs_.find(fpr) != free_page_runs_.end()) {
@@ -1370,8 +1372,8 @@
curr_fpr_size = fpr->ByteSize(this);
DCHECK_EQ(curr_fpr_size % kPageSize, static_cast<size_t>(0));
remaining_curr_fpr_size = curr_fpr_size - kPageSize;
- stream << "[" << i << "]=Empty (FPR start)"
- << " fpr_size=" << curr_fpr_size
+ stream << "[" << i << "]=" << (pm == kPageMapReleased ? "Released" : "Empty")
+ << " (FPR start) fpr_size=" << curr_fpr_size
<< " remaining_fpr_size=" << remaining_curr_fpr_size << std::endl;
if (remaining_curr_fpr_size == 0) {
// Reset at the end of the current free page run.
@@ -1441,43 +1443,46 @@
size_t pm_idx = RoundDownToPageMapIndex(ptr);
MutexLock mu(Thread::Current(), lock_);
switch (page_map_[pm_idx]) {
- case kPageMapEmpty:
- LOG(FATAL) << "Unreachable - RosAlloc::UsableSize(): pm_idx=" << pm_idx << ", ptr=" << std::hex
- << reinterpret_cast<intptr_t>(ptr);
- break;
- case kPageMapLargeObject: {
- size_t num_pages = 1;
- size_t idx = pm_idx + 1;
- size_t end = page_map_size_;
- while (idx < end && page_map_[idx] == kPageMapLargeObjectPart) {
- num_pages++;
- idx++;
+ case kPageMapReleased:
+ // Fall-through.
+ case kPageMapEmpty:
+ LOG(FATAL) << "Unreachable - " << __PRETTY_FUNCTION__ << ": pm_idx=" << pm_idx << ", ptr="
+ << std::hex << reinterpret_cast<intptr_t>(ptr);
+ break;
+ case kPageMapLargeObject: {
+ size_t num_pages = 1;
+ size_t idx = pm_idx + 1;
+ size_t end = page_map_size_;
+ while (idx < end && page_map_[idx] == kPageMapLargeObjectPart) {
+ num_pages++;
+ idx++;
+ }
+ return num_pages * kPageSize;
}
- return num_pages * kPageSize;
- }
- case kPageMapLargeObjectPart:
- LOG(FATAL) << "Unreachable - RosAlloc::UsableSize(): pm_idx=" << pm_idx << ", ptr=" << std::hex
- << reinterpret_cast<intptr_t>(ptr);
- break;
- case kPageMapRun:
- case kPageMapRunPart: {
- // Find the beginning of the run.
- while (page_map_[pm_idx] != kPageMapRun) {
- pm_idx--;
- DCHECK_LT(pm_idx, capacity_ / kPageSize);
+ case kPageMapLargeObjectPart:
+ LOG(FATAL) << "Unreachable - " << __PRETTY_FUNCTION__ << ": pm_idx=" << pm_idx << ", ptr="
+ << std::hex << reinterpret_cast<intptr_t>(ptr);
+ break;
+ case kPageMapRun:
+ case kPageMapRunPart: {
+ // Find the beginning of the run.
+ while (page_map_[pm_idx] != kPageMapRun) {
+ pm_idx--;
+ DCHECK_LT(pm_idx, capacity_ / kPageSize);
+ }
+ DCHECK_EQ(page_map_[pm_idx], kPageMapRun);
+ Run* run = reinterpret_cast<Run*>(base_ + pm_idx * kPageSize);
+ DCHECK_EQ(run->magic_num_, kMagicNum);
+ size_t idx = run->size_bracket_idx_;
+ size_t offset_from_slot_base = reinterpret_cast<byte*>(ptr)
+ - (reinterpret_cast<byte*>(run) + headerSizes[idx]);
+ DCHECK_EQ(offset_from_slot_base % bracketSizes[idx], static_cast<size_t>(0));
+ return IndexToBracketSize(idx);
}
- DCHECK_EQ(page_map_[pm_idx], kPageMapRun);
- Run* run = reinterpret_cast<Run*>(base_ + pm_idx * kPageSize);
- DCHECK_EQ(run->magic_num_, kMagicNum);
- size_t idx = run->size_bracket_idx_;
- size_t offset_from_slot_base = reinterpret_cast<byte*>(ptr)
- - (reinterpret_cast<byte*>(run) + headerSizes[idx]);
- DCHECK_EQ(offset_from_slot_base % bracketSizes[idx], static_cast<size_t>(0));
- return IndexToBracketSize(idx);
- }
- default:
- LOG(FATAL) << "Unreachable - page map type: " << page_map_[pm_idx];
- break;
+ default: {
+ LOG(FATAL) << "Unreachable - page map type: " << page_map_[pm_idx];
+ break;
+ }
}
return 0;
}
@@ -1490,7 +1495,7 @@
if (it != free_page_runs_.rend() && (last_free_page_run = *it)->End(this) == base_ + footprint_) {
// Remove the last free page run, if any.
DCHECK(last_free_page_run->IsFree());
- DCHECK_EQ(page_map_[ToPageMapIndex(last_free_page_run)], kPageMapEmpty);
+ DCHECK(IsFreePage(ToPageMapIndex(last_free_page_run)));
DCHECK_EQ(last_free_page_run->ByteSize(this) % kPageSize, static_cast<size_t>(0));
DCHECK_EQ(last_free_page_run->End(this), base_ + footprint_);
free_page_runs_.erase(last_free_page_run);
@@ -1500,7 +1505,7 @@
size_t new_num_of_pages = new_footprint / kPageSize;
DCHECK_GE(page_map_size_, new_num_of_pages);
// Zero out the tail of the page map.
- byte* zero_begin = page_map_ + new_num_of_pages;
+ byte* zero_begin = const_cast<byte*>(page_map_) + new_num_of_pages;
byte* madvise_begin = AlignUp(zero_begin, kPageSize);
DCHECK_LE(madvise_begin, page_map_mem_map_->End());
size_t madvise_size = page_map_mem_map_->End() - madvise_begin;
@@ -1543,6 +1548,8 @@
while (i < pm_end) {
byte pm = page_map_[i];
switch (pm) {
+ case kPageMapReleased:
+ // Fall-through.
case kPageMapEmpty: {
// The start of a free page run.
FreePageRun* fpr = reinterpret_cast<FreePageRun*>(base_ + i * kPageSize);
@@ -1560,7 +1567,7 @@
size_t num_pages = fpr_size / kPageSize;
if (kIsDebugBuild) {
for (size_t j = i + 1; j < i + num_pages; ++j) {
- DCHECK_EQ(page_map_[j], kPageMapEmpty);
+ DCHECK(IsFreePage(j));
}
}
i += fpr_size / kPageSize;
@@ -1672,7 +1679,7 @@
full_runs_[idx].insert(run);
DCHECK(full_runs_[idx].find(run) != full_runs_[idx].end());
if (kTraceRosAlloc) {
- LOG(INFO) << __FUNCTION__ << " : Inserted run 0x" << std::hex
+ LOG(INFO) << __PRETTY_FUNCTION__ << " : Inserted run 0x" << std::hex
<< reinterpret_cast<intptr_t>(run)
<< " into full_runs_[" << std::dec << idx << "]";
}
@@ -1685,7 +1692,7 @@
non_full_runs_[idx].insert(run);
DCHECK(non_full_runs_[idx].find(run) != non_full_runs_[idx].end());
if (kTraceRosAlloc) {
- LOG(INFO) << __FUNCTION__ << " : Inserted run 0x" << std::hex
+ LOG(INFO) << __PRETTY_FUNCTION__ << " : Inserted run 0x" << std::hex
<< reinterpret_cast<intptr_t>(run)
<< " into non_full_runs_[" << std::dec << idx << "]";
}
@@ -1865,7 +1872,7 @@
void RosAlloc::Verify() {
Thread* self = Thread::Current();
CHECK(Locks::mutator_lock_->IsExclusiveHeld(self))
- << "The mutator locks isn't exclusively locked at RosAlloc::Verify()";
+ << "The mutator locks isn't exclusively locked at " << __PRETTY_FUNCTION__;
MutexLock mu(self, *Locks::thread_list_lock_);
ReaderMutexLock wmu(self, bulk_free_lock_);
std::vector<Run*> runs;
@@ -1876,6 +1883,8 @@
while (i < pm_end) {
byte pm = page_map_[i];
switch (pm) {
+ case kPageMapReleased:
+ // Fall-through.
case kPageMapEmpty: {
// The start of a free page run.
FreePageRun* fpr = reinterpret_cast<FreePageRun*>(base_ + i * kPageSize);
@@ -1889,7 +1898,7 @@
CHECK_GT(num_pages, static_cast<uintptr_t>(0))
<< "A free page run size must be > 0 : " << fpr_size;
for (size_t j = i + 1; j < i + num_pages; ++j) {
- CHECK_EQ(page_map_[j], kPageMapEmpty)
+ CHECK(IsFreePage(j))
<< "A mismatch between the page map table for kPageMapEmpty "
<< " at page index " << j
<< " and the free page run size : page index range : "
@@ -2097,48 +2106,36 @@
Thread* self = Thread::Current();
size_t reclaimed_bytes = 0;
size_t i = 0;
- while (true) {
- MutexLock mu(self, lock_);
- // Check the page map size which might have changed due to grow/shrink.
- size_t pm_end = page_map_size_;
- if (i >= pm_end) {
- // Reached the end.
- break;
- }
+ // Check the page map size which might have changed due to grow/shrink.
+ while (i < page_map_size_) {
+ // Reading the page map without a lock is racy but the race is benign since it should only
+ // result in occasionally not releasing pages which we could release.
byte pm = page_map_[i];
switch (pm) {
case kPageMapEmpty: {
- // The start of a free page run. Release pages.
- FreePageRun* fpr = reinterpret_cast<FreePageRun*>(base_ + i * kPageSize);
- DCHECK(free_page_runs_.find(fpr) != free_page_runs_.end());
- size_t fpr_size = fpr->ByteSize(this);
- DCHECK(IsAligned<kPageSize>(fpr_size));
- byte* start = reinterpret_cast<byte*>(fpr);
- if (kIsDebugBuild) {
- // In the debug build, the first page of a free page run
- // contains a magic number for debugging. Exclude it.
- start = reinterpret_cast<byte*>(fpr) + kPageSize;
+ // Only lock if we have an empty page since we want to prevent other threads racing in.
+ MutexLock mu(self, lock_);
+ // Check that it's still empty after we acquired the lock since another thread could have
+ // raced in and placed an allocation here.
+ pm = page_map_[i];
+ if (LIKELY(pm == kPageMapEmpty)) {
+ // The start of a free page run. Release pages.
+ FreePageRun* fpr = reinterpret_cast<FreePageRun*>(base_ + i * kPageSize);
+ DCHECK(free_page_runs_.find(fpr) != free_page_runs_.end());
+ size_t fpr_size = fpr->ByteSize(this);
+ DCHECK(IsAligned<kPageSize>(fpr_size));
+ byte* start = reinterpret_cast<byte*>(fpr);
+ reclaimed_bytes += ReleasePageRange(start, start + fpr_size);
+ i += fpr_size / kPageSize;
+ DCHECK_LE(i, page_map_size_);
}
- byte* end = reinterpret_cast<byte*>(fpr) + fpr_size;
- if (!kMadviseZeroes) {
- memset(start, 0, end - start);
- }
- CHECK_EQ(madvise(start, end - start, MADV_DONTNEED), 0);
- reclaimed_bytes += fpr_size;
- size_t num_pages = fpr_size / kPageSize;
- if (kIsDebugBuild) {
- for (size_t j = i + 1; j < i + num_pages; ++j) {
- DCHECK_EQ(page_map_[j], kPageMapEmpty);
- }
- }
- i += num_pages;
- DCHECK_LE(i, pm_end);
break;
}
case kPageMapLargeObject: // Fall through.
case kPageMapLargeObjectPart: // Fall through.
case kPageMapRun: // Fall through.
case kPageMapRunPart: // Fall through.
+ case kPageMapReleased: // Fall through since it is already released.
++i;
break; // Skip.
default:
@@ -2149,6 +2146,35 @@
return reclaimed_bytes;
}
+size_t RosAlloc::ReleasePageRange(byte* start, byte* end) {
+ DCHECK_ALIGNED(start, kPageSize);
+ DCHECK_ALIGNED(end, kPageSize);
+ DCHECK_LT(start, end);
+ if (kIsDebugBuild) {
+ // In the debug build, the first page of a free page run
+ // contains a magic number for debugging. Exclude it.
+ start += kPageSize;
+ }
+ if (!kMadviseZeroes) {
+ // TODO: Do this when we resurrect the page instead.
+ memset(start, 0, end - start);
+ }
+ CHECK_EQ(madvise(start, end - start, MADV_DONTNEED), 0);
+ size_t pm_idx = ToPageMapIndex(start);
+ size_t reclaimed_bytes = 0;
+ // Calculate reclaimed bytes and upate page map.
+ const size_t max_idx = pm_idx + (end - start) / kPageSize;
+ for (; pm_idx < max_idx; ++pm_idx) {
+ DCHECK(IsFreePage(pm_idx));
+ if (page_map_[pm_idx] == kPageMapEmpty) {
+ // Mark the page as released and update how many bytes we released.
+ reclaimed_bytes += kPageSize;
+ page_map_[pm_idx] = kPageMapReleased;
+ }
+ }
+ return reclaimed_bytes;
+}
+
} // namespace allocator
} // namespace gc
} // namespace art
diff --git a/runtime/gc/allocator/rosalloc.h b/runtime/gc/allocator/rosalloc.h
index 13f61ec..fad0dc8 100644
--- a/runtime/gc/allocator/rosalloc.h
+++ b/runtime/gc/allocator/rosalloc.h
@@ -99,27 +99,8 @@
byte* start = reinterpret_cast<byte*>(this);
size_t byte_size = ByteSize(rosalloc);
DCHECK_EQ(byte_size % kPageSize, static_cast<size_t>(0));
- bool release_pages = ShouldReleasePages(rosalloc);
- if (kIsDebugBuild) {
- // Exclude the first page that stores the magic number.
- DCHECK_GE(byte_size, static_cast<size_t>(kPageSize));
- start += kPageSize;
- byte_size -= kPageSize;
- if (byte_size > 0) {
- if (release_pages) {
- if (!kMadviseZeroes) {
- memset(start, 0, byte_size);
- }
- madvise(start, byte_size, MADV_DONTNEED);
- }
- }
- } else {
- if (release_pages) {
- if (!kMadviseZeroes) {
- memset(start, 0, byte_size);
- }
- madvise(start, byte_size, MADV_DONTNEED);
- }
+ if (ShouldReleasePages(rosalloc)) {
+ rosalloc->ReleasePageRange(start, start + byte_size);
}
}
};
@@ -462,14 +443,15 @@
std::string size_bracket_lock_names[kNumOfSizeBrackets];
// The types of page map entries.
enum {
- kPageMapEmpty = 0, // Not allocated.
- kPageMapRun = 1, // The beginning of a run.
- kPageMapRunPart = 2, // The non-beginning part of a run.
- kPageMapLargeObject = 3, // The beginning of a large object.
- kPageMapLargeObjectPart = 4, // The non-beginning part of a large object.
+ kPageMapReleased = 0, // Zero and released back to the OS.
+ kPageMapEmpty, // Zero but probably dirty.
+ kPageMapRun, // The beginning of a run.
+ kPageMapRunPart, // The non-beginning part of a run.
+ kPageMapLargeObject, // The beginning of a large object.
+ kPageMapLargeObjectPart, // The non-beginning part of a large object.
};
// The table that indicates what pages are currently used for.
- byte* page_map_; // No GUARDED_BY(lock_) for kReadPageMapEntryWithoutLockInBulkFree.
+ volatile byte* page_map_; // No GUARDED_BY(lock_) for kReadPageMapEntryWithoutLockInBulkFree.
size_t page_map_size_;
size_t max_page_map_size_;
std::unique_ptr<MemMap> page_map_mem_map_;
@@ -536,6 +518,9 @@
// Revoke the current runs which share an index with the thread local runs.
void RevokeThreadUnsafeCurrentRuns();
+ // Release a range of pages.
+ size_t ReleasePageRange(byte* start, byte* end) EXCLUSIVE_LOCKS_REQUIRED(lock_);
+
public:
RosAlloc(void* base, size_t capacity, size_t max_capacity,
PageReleaseMode page_release_mode,
@@ -588,6 +573,11 @@
static Run* GetDedicatedFullRun() {
return dedicated_full_run_;
}
+ bool IsFreePage(size_t idx) const {
+ DCHECK_LT(idx, capacity_ / kPageSize);
+ byte pm_type = page_map_[idx];
+ return pm_type == kPageMapReleased || pm_type == kPageMapEmpty;
+ }
// Callbacks for InspectAll that will count the number of bytes
// allocated and objects allocated, respectively.
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 696728b..e9adca0 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -114,7 +114,7 @@
desired_collector_type_(foreground_collector_type_),
heap_trim_request_lock_(nullptr),
last_trim_time_(0),
- heap_transition_target_time_(0),
+ heap_transition_or_trim_target_time_(0),
heap_trim_request_pending_(false),
parallel_gc_threads_(parallel_gc_threads),
conc_gc_threads_(conc_gc_threads),
@@ -850,10 +850,10 @@
MutexLock mu(self, *heap_trim_request_lock_);
desired_collector_type = desired_collector_type_;
uint64_t current_time = NanoTime();
- if (current_time >= heap_transition_target_time_) {
+ if (current_time >= heap_transition_or_trim_target_time_) {
break;
}
- wait_time = heap_transition_target_time_ - current_time;
+ wait_time = heap_transition_or_trim_target_time_ - current_time;
}
ScopedThreadStateChange tsc(self, kSleeping);
usleep(wait_time / 1000); // Usleep takes microseconds.
@@ -871,9 +871,9 @@
VLOG(heap) << "Deflating " << count << " monitors took "
<< PrettyDuration(NanoTime() - start_time);
runtime->GetThreadList()->ResumeAll();
- // Do a heap trim if it is needed.
- Trim();
}
+ // Do a heap trim if it is needed.
+ Trim();
}
void Heap::Trim() {
@@ -904,9 +904,13 @@
uint64_t managed_reclaimed = 0;
for (const auto& space : continuous_spaces_) {
if (space->IsMallocSpace()) {
- gc::space::MallocSpace* alloc_space = space->AsMallocSpace();
- total_alloc_space_size += alloc_space->Size();
- managed_reclaimed += alloc_space->Trim();
+ gc::space::MallocSpace* malloc_space = space->AsMallocSpace();
+ if (malloc_space->IsRosAllocSpace() || !CareAboutPauseTimes()) {
+ // Don't trim dlmalloc spaces if we care about pauses since this can hold the space lock
+ // for a long period of time.
+ managed_reclaimed += malloc_space->Trim();
+ }
+ total_alloc_space_size += malloc_space->Size();
}
}
total_alloc_space_allocated = GetBytesAllocated() - large_object_space_->GetBytesAllocated();
@@ -919,15 +923,18 @@
// We never move things in the native heap, so we can finish the GC at this point.
FinishGC(self, collector::kGcTypeNone);
size_t native_reclaimed = 0;
+ // Only trim the native heap if we don't care about pauses.
+ if (!CareAboutPauseTimes()) {
#if defined(USE_DLMALLOC)
- // Trim the native heap.
- dlmalloc_trim(0);
- dlmalloc_inspect_all(DlmallocMadviseCallback, &native_reclaimed);
+ // Trim the native heap.
+ dlmalloc_trim(0);
+ dlmalloc_inspect_all(DlmallocMadviseCallback, &native_reclaimed);
#elif defined(USE_JEMALLOC)
- // Jemalloc does it's own internal trimming.
+ // Jemalloc does it's own internal trimming.
#else
- UNIMPLEMENTED(WARNING) << "Add trimming support";
+ UNIMPLEMENTED(WARNING) << "Add trimming support";
#endif
+ }
uint64_t end_ns = NanoTime();
VLOG(heap) << "Heap trim of managed (duration=" << PrettyDuration(gc_heap_end_ns - start_ns)
<< ", advised=" << PrettySize(managed_reclaimed) << ") and native (duration="
@@ -2693,17 +2700,14 @@
if (desired_collector_type_ == desired_collector_type) {
return;
}
- heap_transition_target_time_ = std::max(heap_transition_target_time_, NanoTime() + delta_time);
+ heap_transition_or_trim_target_time_ =
+ std::max(heap_transition_or_trim_target_time_, NanoTime() + delta_time);
desired_collector_type_ = desired_collector_type;
}
SignalHeapTrimDaemon(self);
}
void Heap::RequestHeapTrim() {
- // Request a heap trim only if we do not currently care about pause times.
- if (CareAboutPauseTimes()) {
- return;
- }
// GC completed and now we must decide whether to request a heap trim (advising pages back to the
// kernel) or not. Issuing a request will also cause trimming of the libc heap. As a trim scans
// a space it will hold its lock and can become a cause of jank.
@@ -2733,6 +2737,10 @@
return;
}
heap_trim_request_pending_ = true;
+ uint64_t current_time = NanoTime();
+ if (heap_transition_or_trim_target_time_ < current_time) {
+ heap_transition_or_trim_target_time_ = current_time + kHeapTrimWait;
+ }
}
// Notify the daemon thread which will actually do the heap trim.
SignalHeapTrimDaemon(self);
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index 6d70a38..c9ea03e 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -769,8 +769,8 @@
Mutex* heap_trim_request_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
// When we want to perform the next heap trim (nano seconds).
uint64_t last_trim_time_ GUARDED_BY(heap_trim_request_lock_);
- // When we want to perform the next heap transition (nano seconds).
- uint64_t heap_transition_target_time_ GUARDED_BY(heap_trim_request_lock_);
+ // When we want to perform the next heap transition (nano seconds) or heap trim.
+ uint64_t heap_transition_or_trim_target_time_ GUARDED_BY(heap_trim_request_lock_);
// If we have a heap trim request pending.
bool heap_trim_request_pending_ GUARDED_BY(heap_trim_request_lock_);
@@ -981,6 +981,7 @@
friend class VerifyReferenceCardVisitor;
friend class VerifyReferenceVisitor;
friend class VerifyObjectVisitor;
+ friend class ScopedHeapFill;
friend class ScopedHeapLock;
friend class space::SpaceTest;
@@ -997,6 +998,25 @@
DISALLOW_IMPLICIT_CONSTRUCTORS(Heap);
};
+// ScopedHeapFill changes the bytes allocated counter to be equal to the growth limit. This
+// causes the next allocation to perform a GC and possibly an OOM. It can be used to ensure that a
+// GC happens in specific methods such as ThrowIllegalMonitorStateExceptionF in Monitor::Wait.
+class ScopedHeapFill {
+ public:
+ explicit ScopedHeapFill(Heap* heap)
+ : heap_(heap),
+ delta_(heap_->GetMaxMemory() - heap_->GetBytesAllocated()) {
+ heap_->num_bytes_allocated_.FetchAndAddSequentiallyConsistent(delta_);
+ }
+ ~ScopedHeapFill() {
+ heap_->num_bytes_allocated_.FetchAndSubSequentiallyConsistent(delta_);
+ }
+
+ private:
+ Heap* const heap_;
+ const int64_t delta_;
+};
+
} // namespace gc
} // namespace art
diff --git a/runtime/gc/space/bump_pointer_space-inl.h b/runtime/gc/space/bump_pointer_space-inl.h
index 71c295e..ee3c979 100644
--- a/runtime/gc/space/bump_pointer_space-inl.h
+++ b/runtime/gc/space/bump_pointer_space-inl.h
@@ -41,11 +41,12 @@
size_t* usable_size) {
Locks::mutator_lock_->AssertExclusiveHeld(self);
num_bytes = RoundUp(num_bytes, kAlignment);
- if (end_ + num_bytes > growth_end_) {
+ byte* end = end_.LoadRelaxed();
+ if (end + num_bytes > growth_end_) {
return nullptr;
}
- mirror::Object* obj = reinterpret_cast<mirror::Object*>(end_);
- end_ += num_bytes;
+ mirror::Object* obj = reinterpret_cast<mirror::Object*>(end);
+ end_.StoreRelaxed(end + num_bytes);
*bytes_allocated = num_bytes;
// Use the CAS free versions as an optimization.
objects_allocated_.StoreRelaxed(objects_allocated_.LoadRelaxed() + 1);
@@ -61,15 +62,13 @@
byte* old_end;
byte* new_end;
do {
- old_end = end_;
+ old_end = end_.LoadRelaxed();
new_end = old_end + num_bytes;
// If there is no more room in the region, we are out of memory.
if (UNLIKELY(new_end > growth_end_)) {
return nullptr;
}
- } while (!__sync_bool_compare_and_swap(reinterpret_cast<volatile intptr_t*>(&end_),
- reinterpret_cast<intptr_t>(old_end),
- reinterpret_cast<intptr_t>(new_end)));
+ } while (!end_.CompareExchangeWeakSequentiallyConsistent(old_end, new_end));
return reinterpret_cast<mirror::Object*>(old_end);
}
diff --git a/runtime/gc/space/dlmalloc_space.cc b/runtime/gc/space/dlmalloc_space.cc
index 41a0458..5123e47 100644
--- a/runtime/gc/space/dlmalloc_space.cc
+++ b/runtime/gc/space/dlmalloc_space.cc
@@ -293,7 +293,7 @@
madvise(GetMemMap()->Begin(), GetMemMap()->Size(), MADV_DONTNEED);
live_bitmap_->Clear();
mark_bitmap_->Clear();
- end_ = Begin() + starting_size_;
+ SetEnd(Begin() + starting_size_);
mspace_ = CreateMspace(mem_map_->Begin(), starting_size_, initial_size_);
SetFootprintLimit(footprint_limit);
}
diff --git a/runtime/gc/space/malloc_space.cc b/runtime/gc/space/malloc_space.cc
index 4d74f3c..27f92b5 100644
--- a/runtime/gc/space/malloc_space.cc
+++ b/runtime/gc/space/malloc_space.cc
@@ -123,13 +123,13 @@
growth_limit = RoundUp(growth_limit, kPageSize);
growth_limit_ = growth_limit;
if (Size() > growth_limit_) {
- end_ = begin_ + growth_limit;
+ SetEnd(begin_ + growth_limit);
}
}
void* MallocSpace::MoreCore(intptr_t increment) {
CheckMoreCoreForPrecondition();
- byte* original_end = end_;
+ byte* original_end = End();
if (increment != 0) {
VLOG(heap) << "MallocSpace::MoreCore " << PrettySize(increment);
byte* new_end = original_end + increment;
@@ -151,8 +151,8 @@
CHECK_MEMORY_CALL(madvise, (new_end, size, MADV_DONTNEED), GetName());
CHECK_MEMORY_CALL(mprotect, (new_end, size, PROT_NONE), GetName());
}
- // Update end_
- end_ = new_end;
+ // Update end_.
+ SetEnd(new_end);
}
return original_end;
}
@@ -163,11 +163,11 @@
// alloc space so that we won't mix thread local runs from different
// alloc spaces.
RevokeAllThreadLocalBuffers();
- end_ = reinterpret_cast<byte*>(RoundUp(reinterpret_cast<uintptr_t>(end_), kPageSize));
+ SetEnd(reinterpret_cast<byte*>(RoundUp(reinterpret_cast<uintptr_t>(End()), kPageSize)));
DCHECK(IsAligned<accounting::CardTable::kCardSize>(begin_));
- DCHECK(IsAligned<accounting::CardTable::kCardSize>(end_));
+ DCHECK(IsAligned<accounting::CardTable::kCardSize>(End()));
DCHECK(IsAligned<kPageSize>(begin_));
- DCHECK(IsAligned<kPageSize>(end_));
+ DCHECK(IsAligned<kPageSize>(End()));
size_t size = RoundUp(Size(), kPageSize);
// Trimming the heap should be done by the caller since we may have invalidated the accounting
// stored in between objects.
@@ -175,7 +175,7 @@
const size_t growth_limit = growth_limit_ - size;
const size_t capacity = Capacity() - size;
VLOG(heap) << "Begin " << reinterpret_cast<const void*>(begin_) << "\n"
- << "End " << reinterpret_cast<const void*>(end_) << "\n"
+ << "End " << reinterpret_cast<const void*>(End()) << "\n"
<< "Size " << size << "\n"
<< "GrowthLimit " << growth_limit_ << "\n"
<< "Capacity " << Capacity();
@@ -188,16 +188,17 @@
VLOG(heap) << "Capacity " << PrettySize(capacity);
// Remap the tail.
std::string error_msg;
- std::unique_ptr<MemMap> mem_map(GetMemMap()->RemapAtEnd(end_, alloc_space_name,
- PROT_READ | PROT_WRITE, &error_msg));
+ std::unique_ptr<MemMap> mem_map(GetMemMap()->RemapAtEnd(End(), alloc_space_name,
+ PROT_READ | PROT_WRITE, &error_msg));
CHECK(mem_map.get() != nullptr) << error_msg;
- void* allocator = CreateAllocator(end_, starting_size_, initial_size_, capacity, low_memory_mode);
+ void* allocator = CreateAllocator(End(), starting_size_, initial_size_, capacity,
+ low_memory_mode);
// Protect memory beyond the initial size.
byte* end = mem_map->Begin() + starting_size_;
if (capacity > initial_size_) {
CHECK_MEMORY_CALL(mprotect, (end, capacity - initial_size_, PROT_NONE), alloc_space_name);
}
- *out_malloc_space = CreateInstance(alloc_space_name, mem_map.release(), allocator, end_, end,
+ *out_malloc_space = CreateInstance(alloc_space_name, mem_map.release(), allocator, End(), end,
limit_, growth_limit, CanMoveObjects());
SetLimit(End());
live_bitmap_->SetHeapLimit(reinterpret_cast<uintptr_t>(End()));
diff --git a/runtime/gc/space/rosalloc_space.cc b/runtime/gc/space/rosalloc_space.cc
index a1511e7..5738d47 100644
--- a/runtime/gc/space/rosalloc_space.cc
+++ b/runtime/gc/space/rosalloc_space.cc
@@ -349,7 +349,7 @@
madvise(GetMemMap()->Begin(), GetMemMap()->Size(), MADV_DONTNEED);
live_bitmap_->Clear();
mark_bitmap_->Clear();
- end_ = begin_ + starting_size_;
+ SetEnd(begin_ + starting_size_);
delete rosalloc_;
rosalloc_ = CreateRosAlloc(mem_map_->Begin(), starting_size_, initial_size_, Capacity(),
low_memory_mode_);
diff --git a/runtime/gc/space/space.h b/runtime/gc/space/space.h
index 8444a70..fff4df1 100644
--- a/runtime/gc/space/space.h
+++ b/runtime/gc/space/space.h
@@ -20,6 +20,7 @@
#include <memory>
#include <string>
+#include "atomic.h"
#include "base/macros.h"
#include "base/mutex.h"
#include "gc/accounting/space_bitmap.h"
@@ -249,7 +250,7 @@
// Current address at which the space ends, which may vary as the space is filled.
byte* End() const {
- return end_;
+ return end_.LoadRelaxed();
}
// The end of the address range covered by the space.
@@ -260,7 +261,7 @@
// Change the end of the space. Be careful with use since changing the end of a space to an
// invalid value may break the GC.
void SetEnd(byte* end) {
- end_ = end;
+ end_.StoreRelaxed(end);
}
void SetLimit(byte* limit) {
@@ -307,7 +308,7 @@
byte* begin_;
// Current end of the space.
- byte* volatile end_;
+ Atomic<byte*> end_;
// Limit of the space.
byte* limit_;
diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc
index 8f5da83..f459b59 100644
--- a/runtime/instrumentation.cc
+++ b/runtime/instrumentation.cc
@@ -137,7 +137,8 @@
new_quick_code = GetQuickResolutionTrampoline(class_linker);
}
} else { // !uninstall
- if ((interpreter_stubs_installed_ || IsDeoptimized(method)) && !method->IsNative()) {
+ if ((interpreter_stubs_installed_ || forced_interpret_only_ || IsDeoptimized(method)) &&
+ !method->IsNative()) {
new_portable_code = GetPortableToInterpreterBridge();
new_quick_code = GetQuickToInterpreterBridge();
} else {
@@ -150,7 +151,9 @@
new_quick_code = class_linker->GetQuickOatCodeFor(method);
DCHECK(new_quick_code != GetQuickToInterpreterBridgeTrampoline(class_linker));
if (entry_exit_stubs_installed_ && new_quick_code != GetQuickToInterpreterBridge()) {
- DCHECK(new_portable_code != GetPortableToInterpreterBridge());
+ // TODO: portable to quick bridge. Bug: 8196384. We cannot enable the check below as long
+ // as GetPortableToQuickBridge() == GetPortableToInterpreterBridge().
+ // DCHECK(new_portable_code != GetPortableToInterpreterBridge());
new_portable_code = GetPortableToInterpreterBridge();
new_quick_code = GetQuickInstrumentationEntryPoint();
}
diff --git a/runtime/interpreter/interpreter_common.cc b/runtime/interpreter/interpreter_common.cc
index c7fb884..9f04b90 100644
--- a/runtime/interpreter/interpreter_common.cc
+++ b/runtime/interpreter/interpreter_common.cc
@@ -772,8 +772,13 @@
// shadow_frame.GetMethod()->GetDeclaringClass()->GetClassLoader();
Class* found = Runtime::Current()->GetClassLinker()->FindClass(
self, descriptor.c_str(), NullHandle<mirror::ClassLoader>());
- CHECK(found != NULL) << "Class.forName failed in un-started runtime for class: "
- << PrettyDescriptor(descriptor);
+ if (found == NULL) {
+ if (!self->IsExceptionPending()) {
+ AbortTransaction(self, "Class.forName failed in un-started runtime for class: %s",
+ PrettyDescriptor(descriptor).c_str());
+ }
+ return;
+ }
result->SetL(found);
} else if (name == "java.lang.Class java.lang.Void.lookupType()") {
result->SetL(Runtime::Current()->GetClassLinker()->FindPrimitiveClass('V'));
diff --git a/runtime/monitor.cc b/runtime/monitor.cc
index eb62a69..c3ec38d 100644
--- a/runtime/monitor.cc
+++ b/runtime/monitor.cc
@@ -90,7 +90,33 @@
hash_code_(hash_code),
locking_method_(NULL),
locking_dex_pc_(0),
- monitor_id_(MonitorPool::CreateMonitorId(self, this)) {
+ monitor_id_(MonitorPool::ComputeMonitorId(this, self)) {
+#ifdef __LP64__
+ DCHECK(false) << "Should not be reached in 64b";
+ next_free_ = nullptr;
+#endif
+ // We should only inflate a lock if the owner is ourselves or suspended. This avoids a race
+ // with the owner unlocking the thin-lock.
+ CHECK(owner == nullptr || owner == self || owner->IsSuspended());
+ // The identity hash code is set for the life time of the monitor.
+}
+
+Monitor::Monitor(Thread* self, Thread* owner, mirror::Object* obj, int32_t hash_code,
+ MonitorId id)
+ : monitor_lock_("a monitor lock", kMonitorLock),
+ monitor_contenders_("monitor contenders", monitor_lock_),
+ num_waiters_(0),
+ owner_(owner),
+ lock_count_(0),
+ obj_(obj),
+ wait_set_(NULL),
+ hash_code_(hash_code),
+ locking_method_(NULL),
+ locking_dex_pc_(0),
+ monitor_id_(id) {
+#ifdef __LP64__
+ next_free_ = nullptr;
+#endif
// We should only inflate a lock if the owner is ourselves or suspended. This avoids a race
// with the owner unlocking the thin-lock.
CHECK(owner == nullptr || owner == self || owner->IsSuspended());
@@ -146,7 +172,6 @@
}
Monitor::~Monitor() {
- MonitorPool::ReleaseMonitorId(monitor_id_);
// Deflated monitors have a null object.
}
@@ -621,20 +646,23 @@
* inflating the lock and so the caller should read the monitor following the call.
*/
void Monitor::Inflate(Thread* self, Thread* owner, mirror::Object* obj, int32_t hash_code) {
- DCHECK(self != NULL);
- DCHECK(obj != NULL);
+ DCHECK(self != nullptr);
+ DCHECK(obj != nullptr);
// Allocate and acquire a new monitor.
- std::unique_ptr<Monitor> m(new Monitor(self, owner, obj, hash_code));
+ Monitor* m = MonitorPool::CreateMonitor(self, owner, obj, hash_code);
+ DCHECK(m != nullptr);
if (m->Install(self)) {
if (owner != nullptr) {
VLOG(monitor) << "monitor: thread" << owner->GetThreadId()
- << " created monitor " << m.get() << " for object " << obj;
+ << " created monitor " << m << " for object " << obj;
} else {
VLOG(monitor) << "monitor: Inflate with hashcode " << hash_code
- << " created monitor " << m.get() << " for object " << obj;
+ << " created monitor " << m << " for object " << obj;
}
- Runtime::Current()->GetMonitorList()->Add(m.release());
+ Runtime::Current()->GetMonitorList()->Add(m);
CHECK_EQ(obj->GetLockWord(true).GetState(), LockWord::kFatLocked);
+ } else {
+ MonitorPool::ReleaseMonitor(self, m);
}
}
@@ -1071,8 +1099,12 @@
}
MonitorList::~MonitorList() {
- MutexLock mu(Thread::Current(), monitor_list_lock_);
- STLDeleteElements(&list_);
+ Thread* self = Thread::Current();
+ MutexLock mu(self, monitor_list_lock_);
+ // Release all monitors to the pool.
+ // TODO: Is it an invariant that *all* open monitors are in the list? Then we could
+ // clear faster in the pool.
+ MonitorPool::ReleaseMonitors(self, &list_);
}
void MonitorList::DisallowNewMonitors() {
@@ -1097,7 +1129,8 @@
}
void MonitorList::SweepMonitorList(IsMarkedCallback* callback, void* arg) {
- MutexLock mu(Thread::Current(), monitor_list_lock_);
+ Thread* self = Thread::Current();
+ MutexLock mu(self, monitor_list_lock_);
for (auto it = list_.begin(); it != list_.end(); ) {
Monitor* m = *it;
// Disable the read barrier in GetObject() as this is called by GC.
@@ -1107,7 +1140,7 @@
if (new_obj == nullptr) {
VLOG(monitor) << "freeing monitor " << m << " belonging to unmarked object "
<< obj;
- delete m;
+ MonitorPool::ReleaseMonitor(self, m);
it = list_.erase(it);
} else {
m->SetObject(new_obj);
diff --git a/runtime/monitor.h b/runtime/monitor.h
index d7552a3..0d0ad0b 100644
--- a/runtime/monitor.h
+++ b/runtime/monitor.h
@@ -124,7 +124,9 @@
private:
explicit Monitor(Thread* self, Thread* owner, mirror::Object* obj, int32_t hash_code)
- SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+ SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
+ explicit Monitor(Thread* self, Thread* owner, mirror::Object* obj, int32_t hash_code,
+ MonitorId id) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
// Install the monitor into its object, may fail if another thread installs a different monitor
// first.
@@ -212,8 +214,14 @@
// The denser encoded version of this monitor as stored in the lock word.
MonitorId monitor_id_;
+#ifdef __LP64__
+ // Free list for monitor pool.
+ Monitor* next_free_ GUARDED_BY(Locks::allocated_monitor_ids_lock_);
+#endif
+
friend class MonitorInfo;
friend class MonitorList;
+ friend class MonitorPool;
friend class mirror::Object;
DISALLOW_COPY_AND_ASSIGN(Monitor);
};
diff --git a/runtime/monitor_pool.cc b/runtime/monitor_pool.cc
index eb7525a..440a6be 100644
--- a/runtime/monitor_pool.cc
+++ b/runtime/monitor_pool.cc
@@ -23,36 +23,118 @@
namespace art {
-MonitorPool::MonitorPool() : allocated_ids_lock_("allocated monitor ids lock",
- LockLevel::kMonitorPoolLock) {
+namespace mirror {
+ class Object;
+} // namespace mirror
+
+MonitorPool::MonitorPool()
+ : num_chunks_(0), capacity_(0), first_free_(nullptr) {
+ AllocateChunk(); // Get our first chunk.
}
-Monitor* MonitorPool::LookupMonitorFromTable(MonitorId mon_id) {
- ReaderMutexLock mu(Thread::Current(), allocated_ids_lock_);
- return table_.Get(mon_id);
-}
+// Assumes locks are held appropriately when necessary.
+// We do not need a lock in the constructor, but we need one when in CreateMonitorInPool.
+void MonitorPool::AllocateChunk() {
+ DCHECK(first_free_ == nullptr);
-MonitorId MonitorPool::AllocMonitorIdFromTable(Thread* self, Monitor* mon) {
- WriterMutexLock mu(self, allocated_ids_lock_);
- for (size_t i = 0; i < allocated_ids_.size(); ++i) {
- if (!allocated_ids_[i]) {
- allocated_ids_.set(i);
- MonitorId mon_id = i + 1; // Zero is reserved to mean "invalid".
- table_.Put(mon_id, mon);
- return mon_id;
+ // Do we need to resize?
+ if (num_chunks_ == capacity_) {
+ if (capacity_ == 0U) {
+ // Initialization.
+ capacity_ = kInitialChunkStorage;
+ uintptr_t* new_backing = new uintptr_t[capacity_];
+ monitor_chunks_.StoreRelaxed(new_backing);
+ } else {
+ size_t new_capacity = 2 * capacity_;
+ uintptr_t* new_backing = new uintptr_t[new_capacity];
+ uintptr_t* old_backing = monitor_chunks_.LoadRelaxed();
+ memcpy(new_backing, old_backing, sizeof(uintptr_t) * capacity_);
+ monitor_chunks_.StoreRelaxed(new_backing);
+ capacity_ = new_capacity;
+ old_chunk_arrays_.push_back(old_backing);
+ LOG(INFO) << "Resizing to capacity " << capacity_;
}
}
- LOG(FATAL) << "Out of internal monitor ids";
- return 0;
+
+ // Allocate the chunk.
+ void* chunk = malloc(kChunkSize);
+ // Check we allocated memory.
+ CHECK_NE(reinterpret_cast<uintptr_t>(nullptr), reinterpret_cast<uintptr_t>(chunk));
+ // Check it is aligned as we need it.
+ CHECK_EQ(0U, reinterpret_cast<uintptr_t>(chunk) % kMonitorAlignment);
+
+ // Add the chunk.
+ *(monitor_chunks_.LoadRelaxed()+num_chunks_) = reinterpret_cast<uintptr_t>(chunk);
+ num_chunks_++;
+
+ // Set up the free list
+ Monitor* last = reinterpret_cast<Monitor*>(reinterpret_cast<uintptr_t>(chunk) +
+ (kChunkCapacity - 1) * kAlignedMonitorSize);
+ last->next_free_ = nullptr;
+ // Eagerly compute id.
+ last->monitor_id_ = OffsetToMonitorId((num_chunks_ - 1) * kChunkSize +
+ (kChunkCapacity - 1) * kAlignedMonitorSize);
+ for (size_t i = 0; i < kChunkCapacity - 1; ++i) {
+ Monitor* before = reinterpret_cast<Monitor*>(reinterpret_cast<uintptr_t>(last) -
+ kAlignedMonitorSize);
+ before->next_free_ = last;
+ // Derive monitor_id from last.
+ before->monitor_id_ = OffsetToMonitorId(MonitorIdToOffset(last->monitor_id_) -
+ kAlignedMonitorSize);
+
+ last = before;
+ }
+ DCHECK(last == reinterpret_cast<Monitor*>(chunk));
+ first_free_ = last;
}
-void MonitorPool::ReleaseMonitorIdFromTable(MonitorId mon_id) {
- WriterMutexLock mu(Thread::Current(), allocated_ids_lock_);
- DCHECK(table_.Get(mon_id) != nullptr);
- table_.erase(mon_id);
- --mon_id; // Zero is reserved to mean "invalid".
- DCHECK(allocated_ids_[mon_id]) << mon_id;
- allocated_ids_.reset(mon_id);
+Monitor* MonitorPool::CreateMonitorInPool(Thread* self, Thread* owner, mirror::Object* obj,
+ int32_t hash_code)
+ SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+ // We are gonna allocate, so acquire the writer lock.
+ MutexLock mu(self, *Locks::allocated_monitor_ids_lock_);
+
+ // Enough space, or need to resize?
+ if (first_free_ == nullptr) {
+ LOG(INFO) << "Allocating a new chunk.";
+ AllocateChunk();
+ }
+
+ Monitor* mon_uninitialized = first_free_;
+ first_free_ = first_free_->next_free_;
+
+ // Pull out the id which was preinitialized.
+ MonitorId id = mon_uninitialized->monitor_id_;
+
+ // Initialize it.
+ Monitor* monitor = new(mon_uninitialized) Monitor(self, owner, obj, hash_code, id);
+
+ return monitor;
+}
+
+void MonitorPool::ReleaseMonitorToPool(Thread* self, Monitor* monitor) {
+ // Might be racy with allocation, so acquire lock.
+ MutexLock mu(self, *Locks::allocated_monitor_ids_lock_);
+
+ // Keep the monitor id. Don't trust it's not cleared.
+ MonitorId id = monitor->monitor_id_;
+
+ // Call the destructor.
+ // TODO: Exception safety?
+ monitor->~Monitor();
+
+ // Add to the head of the free list.
+ monitor->next_free_ = first_free_;
+ first_free_ = monitor;
+
+ // Rewrite monitor id.
+ monitor->monitor_id_ = id;
+}
+
+void MonitorPool::ReleaseMonitorsToPool(Thread* self, std::list<Monitor*>* monitors) {
+ for (Monitor* mon : *monitors) {
+ ReleaseMonitorToPool(self, mon);
+ }
}
} // namespace art
diff --git a/runtime/monitor_pool.h b/runtime/monitor_pool.h
index 32e1553..5bc28f1 100644
--- a/runtime/monitor_pool.h
+++ b/runtime/monitor_pool.h
@@ -20,11 +20,11 @@
#include "monitor.h"
#ifdef __LP64__
-#include <bitset>
#include <stdint.h>
-
+#include "atomic.h"
#include "runtime.h"
-#include "safe_map.h"
+#else
+#include "base/stl_util.h" // STLDeleteElements
#endif
namespace art {
@@ -41,11 +41,36 @@
#endif
}
+ static Monitor* CreateMonitor(Thread* self, Thread* owner, mirror::Object* obj, int32_t hash_code)
+ SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+#ifndef __LP64__
+ return new Monitor(self, owner, obj, hash_code);
+#else
+ return GetMonitorPool()->CreateMonitorInPool(self, owner, obj, hash_code);
+#endif
+ }
+
+ static void ReleaseMonitor(Thread* self, Monitor* monitor) {
+#ifndef __LP64__
+ delete monitor;
+#else
+ GetMonitorPool()->ReleaseMonitorToPool(self, monitor);
+#endif
+ }
+
+ static void ReleaseMonitors(Thread* self, std::list<Monitor*>* monitors) {
+#ifndef __LP64__
+ STLDeleteElements(monitors);
+#else
+ GetMonitorPool()->ReleaseMonitorsToPool(self, monitors);
+#endif
+ }
+
static Monitor* MonitorFromMonitorId(MonitorId mon_id) {
#ifndef __LP64__
return reinterpret_cast<Monitor*>(mon_id << 3);
#else
- return Runtime::Current()->GetMonitorPool()->LookupMonitorFromTable(mon_id);
+ return GetMonitorPool()->LookupMonitor(mon_id);
#endif
}
@@ -57,39 +82,98 @@
#endif
}
- static MonitorId CreateMonitorId(Thread* self, Monitor* mon) {
+ static MonitorId ComputeMonitorId(Monitor* mon, Thread* self) {
#ifndef __LP64__
- UNUSED(self);
return MonitorIdFromMonitor(mon);
#else
- return Runtime::Current()->GetMonitorPool()->AllocMonitorIdFromTable(self, mon);
+ return GetMonitorPool()->ComputeMonitorIdInPool(mon, self);
#endif
}
- static void ReleaseMonitorId(MonitorId mon_id) {
+ static MonitorPool* GetMonitorPool() {
#ifndef __LP64__
- UNUSED(mon_id);
+ return nullptr;
#else
- Runtime::Current()->GetMonitorPool()->ReleaseMonitorIdFromTable(mon_id);
+ return Runtime::Current()->GetMonitorPool();
#endif
}
private:
#ifdef __LP64__
- MonitorPool();
+ // When we create a monitor pool, threads have not been initialized, yet, so ignore thread-safety
+ // analysis.
+ MonitorPool() NO_THREAD_SAFETY_ANALYSIS;
- Monitor* LookupMonitorFromTable(MonitorId mon_id);
+ void AllocateChunk() EXCLUSIVE_LOCKS_REQUIRED(Locks::allocated_monitor_ids_lock_);
- MonitorId LookupMonitorIdFromTable(Monitor* mon);
+ Monitor* CreateMonitorInPool(Thread* self, Thread* owner, mirror::Object* obj, int32_t hash_code)
+ SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
- MonitorId AllocMonitorIdFromTable(Thread* self, Monitor* mon);
+ void ReleaseMonitorToPool(Thread* self, Monitor* monitor);
+ void ReleaseMonitorsToPool(Thread* self, std::list<Monitor*>* monitors);
- void ReleaseMonitorIdFromTable(MonitorId mon_id);
+ // Note: This is safe as we do not ever move chunks.
+ Monitor* LookupMonitor(MonitorId mon_id) {
+ size_t offset = MonitorIdToOffset(mon_id);
+ size_t index = offset / kChunkSize;
+ size_t offset_in_chunk = offset % kChunkSize;
+ uintptr_t base = *(monitor_chunks_.LoadRelaxed()+index);
+ return reinterpret_cast<Monitor*>(base + offset_in_chunk);
+ }
- ReaderWriterMutex allocated_ids_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
- static constexpr uint32_t kMaxMonitorId = 0xFFFF;
- std::bitset<kMaxMonitorId> allocated_ids_ GUARDED_BY(allocated_ids_lock_);
- SafeMap<MonitorId, Monitor*> table_ GUARDED_BY(allocated_ids_lock_);
+ static bool IsInChunk(uintptr_t base_addr, Monitor* mon) {
+ uintptr_t mon_ptr = reinterpret_cast<uintptr_t>(mon);
+ return base_addr <= mon_ptr && (mon_ptr - base_addr < kChunkSize);
+ }
+
+ // Note: This is safe as we do not ever move chunks.
+ MonitorId ComputeMonitorIdInPool(Monitor* mon, Thread* self) {
+ MutexLock mu(self, *Locks::allocated_monitor_ids_lock_);
+ for (size_t index = 0; index < num_chunks_; ++index) {
+ uintptr_t chunk_addr = *(monitor_chunks_.LoadRelaxed() + index);
+ if (IsInChunk(chunk_addr, mon)) {
+ return OffsetToMonitorId(reinterpret_cast<uintptr_t>(mon) - chunk_addr + index * kChunkSize);
+ }
+ }
+ LOG(FATAL) << "Did not find chunk that contains monitor.";
+ return 0;
+ }
+
+ static size_t MonitorIdToOffset(MonitorId id) {
+ return id << 3;
+ }
+
+ static MonitorId OffsetToMonitorId(size_t offset) {
+ return static_cast<MonitorId>(offset >> 3);
+ }
+
+ // TODO: There are assumptions in the code that monitor addresses are 8B aligned (>>3).
+ static constexpr size_t kMonitorAlignment = 8;
+ // Size of a monitor, rounded up to a multiple of alignment.
+ static constexpr size_t kAlignedMonitorSize = (sizeof(Monitor) + kMonitorAlignment - 1) &
+ -kMonitorAlignment;
+ // As close to a page as we can get seems a good start.
+ static constexpr size_t kChunkCapacity = kPageSize / kAlignedMonitorSize;
+ // Chunk size that is referenced in the id. We can collapse this to the actually used storage
+ // in a chunk, i.e., kChunkCapacity * kAlignedMonitorSize, but this will mean proper divisions.
+ static constexpr size_t kChunkSize = kPageSize;
+ // The number of initial chunks storable in monitor_chunks_. The number is large enough to make
+ // resizing unlikely, but small enough to not waste too much memory.
+ static constexpr size_t kInitialChunkStorage = 8U;
+
+ // List of memory chunks. Each chunk is kChunkSize.
+ Atomic<uintptr_t*> monitor_chunks_;
+ // Number of chunks stored.
+ size_t num_chunks_ GUARDED_BY(Locks::allocated_monitor_ids_lock_);
+ // Number of chunks storable.
+ size_t capacity_ GUARDED_BY(Locks::allocated_monitor_ids_lock_);
+
+ // To avoid race issues when resizing, we keep all the previous arrays.
+ std::vector<uintptr_t*> old_chunk_arrays_ GUARDED_BY(Locks::allocated_monitor_ids_lock_);
+
+ // Start of free list of monitors.
+ // Note: these point to the right memory regions, but do *not* denote initialized objects.
+ Monitor* first_free_ GUARDED_BY(Locks::allocated_monitor_ids_lock_);
#endif
};
diff --git a/runtime/monitor_pool_test.cc b/runtime/monitor_pool_test.cc
new file mode 100644
index 0000000..cddc245
--- /dev/null
+++ b/runtime/monitor_pool_test.cc
@@ -0,0 +1,125 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "monitor_pool.h"
+
+#include "common_runtime_test.h"
+
+namespace art {
+
+class MonitorPoolTest : public CommonRuntimeTest {};
+
+class RandGen {
+ public:
+ explicit RandGen(uint32_t seed) : val_(seed) {}
+
+ uint32_t next() {
+ val_ = val_ * 48271 % 2147483647 + 13;
+ return val_;
+ }
+
+ uint32_t val_;
+};
+
+static void VerifyMonitor(Monitor* mon, Thread* self) {
+ // Check whether the monitor id is correct.
+ EXPECT_EQ(MonitorPool::MonitorIdFromMonitor(mon), mon->GetMonitorId());
+ // Check whether the monitor id agrees with the compuation.
+ EXPECT_EQ(MonitorPool::ComputeMonitorId(mon, self), mon->GetMonitorId());
+ // Check whether we can use the monitor ID to get the monitor.
+ EXPECT_EQ(mon, MonitorPool::MonitorFromMonitorId(mon->GetMonitorId()));
+}
+
+TEST_F(MonitorPoolTest, MonitorPoolTest) {
+ std::vector<Monitor*> monitors;
+ RandGen r(0x1234);
+
+ // 1) Create and release monitors without increasing the storage.
+
+ // Number of max alive monitors before resize.
+ // Note: for correct testing, make sure this is corresponding to monitor-pool's initial size.
+ const size_t kMaxUsage = 28;
+
+ Thread* self = Thread::Current();
+ ScopedObjectAccess soa(self);
+
+ // Allocate and release monitors.
+ for (size_t i = 0; i < 1000 ; i++) {
+ bool alloc;
+ if (monitors.size() == 0) {
+ alloc = true;
+ } else if (monitors.size() == kMaxUsage) {
+ alloc = false;
+ } else {
+ // Random decision.
+ alloc = r.next() % 2 == 0;
+ }
+
+ if (alloc) {
+ Monitor* mon = MonitorPool::CreateMonitor(self, self, nullptr, static_cast<int32_t>(i));
+ monitors.push_back(mon);
+
+ VerifyMonitor(mon, self);
+ } else {
+ // Release a random monitor.
+ size_t index = r.next() % monitors.size();
+ Monitor* mon = monitors[index];
+ monitors.erase(monitors.begin() + index);
+
+ // Recheck the monitor.
+ VerifyMonitor(mon, self);
+
+ MonitorPool::ReleaseMonitor(self, mon);
+ }
+ }
+
+ // Loop some time.
+
+ for (size_t i = 0; i < 10; ++i) {
+ // 2.1) Create enough monitors to require new chunks.
+ size_t target_size = monitors.size() + 2*kMaxUsage;
+ while (monitors.size() < target_size) {
+ Monitor* mon = MonitorPool::CreateMonitor(self, self, nullptr,
+ static_cast<int32_t>(-monitors.size()));
+ monitors.push_back(mon);
+
+ VerifyMonitor(mon, self);
+ }
+
+ // 2.2) Verify all monitors.
+ for (Monitor* mon : monitors) {
+ VerifyMonitor(mon, self);
+ }
+
+ // 2.3) Release a number of monitors randomly.
+ for (size_t j = 0; j < kMaxUsage; j++) {
+ // Release a random monitor.
+ size_t index = r.next() % monitors.size();
+ Monitor* mon = monitors[index];
+ monitors.erase(monitors.begin() + index);
+
+ MonitorPool::ReleaseMonitor(self, mon);
+ }
+ }
+
+ // Check and release all remaining monitors.
+ for (Monitor* mon : monitors) {
+ VerifyMonitor(mon, self);
+ MonitorPool::ReleaseMonitor(self, mon);
+ }
+}
+
+} // namespace art
diff --git a/runtime/native/java_lang_Thread.cc b/runtime/native/java_lang_Thread.cc
index 86db893..bae67f2 100644
--- a/runtime/native/java_lang_Thread.cc
+++ b/runtime/native/java_lang_Thread.cc
@@ -85,6 +85,7 @@
case kWaitingForJniOnLoad: return kJavaWaiting;
case kWaitingForSignalCatcherOutput: return kJavaWaiting;
case kWaitingInMainSignalCatcherLoop: return kJavaWaiting;
+ case kWaitingForMethodTracingStart: return kJavaWaiting;
case kSuspended: return kJavaRunnable;
// Don't add a 'default' here so the compiler can spot incompatible enum changes.
}
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 3b14aaa..efa205e 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -147,6 +147,13 @@
}
Runtime::~Runtime() {
+ if (method_trace_ && Thread::Current() == nullptr) {
+ // We need a current thread to shutdown method tracing: re-attach it now.
+ JNIEnv* unused_env;
+ if (GetJavaVM()->AttachCurrentThread(&unused_env, nullptr) != JNI_OK) {
+ LOG(ERROR) << "Could not attach current thread before runtime shutdown.";
+ }
+ }
if (dump_gc_performance_on_shutdown_) {
// This can't be called from the Heap destructor below because it
// could call RosAlloc::InspectAll() which needs the thread_list
@@ -681,6 +688,7 @@
Trace::SetDefaultClockSource(options->profile_clock_source_);
if (options->method_trace_) {
+ ScopedThreadStateChange tsc(self, kWaitingForMethodTracingStart);
Trace::Start(options->method_trace_file_.c_str(), -1, options->method_trace_file_size_, 0,
false, false, 0);
}
diff --git a/runtime/thread-inl.h b/runtime/thread-inl.h
index b1180bd..38f1307 100644
--- a/runtime/thread-inl.h
+++ b/runtime/thread-inl.h
@@ -21,8 +21,6 @@
#include <pthread.h>
-#include "cutils/atomic-inline.h"
-
#include "base/casts.h"
#include "base/mutex-inl.h"
#include "gc/heap.h"
@@ -99,9 +97,12 @@
DCHECK_EQ((old_state_and_flags.as_struct.flags & kCheckpointRequest), 0);
new_state_and_flags.as_struct.flags = old_state_and_flags.as_struct.flags;
new_state_and_flags.as_struct.state = new_state;
- int status = android_atomic_cas(old_state_and_flags.as_int, new_state_and_flags.as_int,
- &tls32_.state_and_flags.as_int);
- if (LIKELY(status == 0)) {
+
+ // CAS the value without a memory ordering as that is given by the lock release below.
+ bool done =
+ tls32_.state_and_flags.as_atomic_int.CompareExchangeWeakRelaxed(old_state_and_flags.as_int,
+ new_state_and_flags.as_int);
+ if (LIKELY(done)) {
break;
}
}
@@ -141,9 +142,10 @@
union StateAndFlags new_state_and_flags;
new_state_and_flags.as_int = old_state_and_flags.as_int;
new_state_and_flags.as_struct.state = kRunnable;
- // CAS the value without a memory barrier, that occurred in the lock above.
- done = android_atomic_cas(old_state_and_flags.as_int, new_state_and_flags.as_int,
- &tls32_.state_and_flags.as_int) == 0;
+ // CAS the value without a memory ordering as that is given by the lock acquisition above.
+ done =
+ tls32_.state_and_flags.as_atomic_int.CompareExchangeWeakRelaxed(old_state_and_flags.as_int,
+ new_state_and_flags.as_int);
}
if (UNLIKELY(!done)) {
// Failed to transition to Runnable. Release shared mutator_lock_ access and try again.
diff --git a/runtime/thread.cc b/runtime/thread.cc
index d60fb49..7827dfb 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -34,8 +34,6 @@
#include "base/mutex.h"
#include "class_linker.h"
#include "class_linker-inl.h"
-#include "cutils/atomic.h"
-#include "cutils/atomic-inline.h"
#include "debugger.h"
#include "dex_file-inl.h"
#include "entrypoints/entrypoint_utils.h"
@@ -591,14 +589,6 @@
#endif
}
-void Thread::AtomicSetFlag(ThreadFlag flag) {
- android_atomic_or(flag, &tls32_.state_and_flags.as_int);
-}
-
-void Thread::AtomicClearFlag(ThreadFlag flag) {
- android_atomic_and(-1 ^ flag, &tls32_.state_and_flags.as_int);
-}
-
// Attempt to rectify locks so that we dump thread list with required locks before exiting.
static void UnsafeLogFatalForSuspendCount(Thread* self, Thread* thread) NO_THREAD_SAFETY_ANALYSIS {
LOG(ERROR) << *thread << " suspend count already zero.";
@@ -704,9 +694,10 @@
union StateAndFlags new_state_and_flags;
new_state_and_flags.as_int = old_state_and_flags.as_int;
new_state_and_flags.as_struct.flags |= kCheckpointRequest;
- int succeeded = android_atomic_acquire_cas(old_state_and_flags.as_int, new_state_and_flags.as_int,
- &tls32_.state_and_flags.as_int);
- if (UNLIKELY(succeeded != 0)) {
+ bool success =
+ tls32_.state_and_flags.as_atomic_int.CompareExchangeStrongSequentiallyConsistent(old_state_and_flags.as_int,
+ new_state_and_flags.as_int);
+ if (UNLIKELY(!success)) {
// The thread changed state before the checkpoint was installed.
CHECK_EQ(tlsPtr_.checkpoint_functions[available_checkpoint], function);
tlsPtr_.checkpoint_functions[available_checkpoint] = nullptr;
@@ -714,7 +705,7 @@
CHECK_EQ(ReadFlag(kCheckpointRequest), true);
TriggerSuspend();
}
- return succeeded == 0;
+ return success;
}
void Thread::FullSuspendCheck() {
diff --git a/runtime/thread.h b/runtime/thread.h
index 7cd86de..4312741 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -24,6 +24,7 @@
#include <memory>
#include <string>
+#include "atomic.h"
#include "base/macros.h"
#include "base/mutex.h"
#include "entrypoints/interpreter/interpreter_entrypoints.h"
@@ -738,9 +739,13 @@
return (tls32_.state_and_flags.as_struct.flags != 0);
}
- void AtomicSetFlag(ThreadFlag flag);
+ void AtomicSetFlag(ThreadFlag flag) {
+ tls32_.state_and_flags.as_atomic_int.FetchAndOrSequentiallyConsistent(flag);
+ }
- void AtomicClearFlag(ThreadFlag flag);
+ void AtomicClearFlag(ThreadFlag flag) {
+ tls32_.state_and_flags.as_atomic_int.FetchAndAndSequentiallyConsistent(-1 ^ flag);
+ }
void ResetQuickAllocEntryPointsForThread();
@@ -864,6 +869,7 @@
// change to Runnable as a GC or other operation is in progress.
volatile uint16_t state;
} as_struct;
+ AtomicInteger as_atomic_int;
volatile int32_t as_int;
private:
@@ -871,6 +877,7 @@
// See http://gcc.gnu.org/bugzilla/show_bug.cgi?id=47409
DISALLOW_COPY_AND_ASSIGN(StateAndFlags);
};
+ COMPILE_ASSERT(sizeof(StateAndFlags) == sizeof(int32_t), weird_state_and_flags_size);
static void ThreadExitCallback(void* arg);
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index d20a459..54732fa 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -153,8 +153,8 @@
#if HAVE_TIMED_RWLOCK
// Attempt to rectify locks so that we dump thread list with required locks before exiting.
-static void UnsafeLogFatalForThreadSuspendAllTimeout(Thread* self) NO_THREAD_SAFETY_ANALYSIS __attribute__((noreturn));
-static void UnsafeLogFatalForThreadSuspendAllTimeout(Thread* self) {
+static void UnsafeLogFatalForThreadSuspendAllTimeout() NO_THREAD_SAFETY_ANALYSIS __attribute__((noreturn));
+static void UnsafeLogFatalForThreadSuspendAllTimeout() {
Runtime* runtime = Runtime::Current();
std::ostringstream ss;
ss << "Thread suspend timeout\n";
@@ -332,7 +332,7 @@
#if HAVE_TIMED_RWLOCK
// Timeout if we wait more than 30 seconds.
if (!Locks::mutator_lock_->ExclusiveLockWithTimeout(self, 30 * 1000, 0)) {
- UnsafeLogFatalForThreadSuspendAllTimeout(self);
+ UnsafeLogFatalForThreadSuspendAllTimeout();
}
#else
Locks::mutator_lock_->ExclusiveLock(self);
@@ -351,6 +351,7 @@
void ThreadList::ResumeAll() {
Thread* self = Thread::Current();
+ DCHECK(self != nullptr);
VLOG(threads) << *self << " ResumeAll starting";
@@ -587,7 +588,7 @@
#if HAVE_TIMED_RWLOCK
// Timeout if we wait more than 30 seconds.
if (!Locks::mutator_lock_->ExclusiveLockWithTimeout(self, 30 * 1000, 0)) {
- UnsafeLogFatalForThreadSuspendAllTimeout(self);
+ UnsafeLogFatalForThreadSuspendAllTimeout();
} else {
Locks::mutator_lock_->ExclusiveUnlock(self);
}
diff --git a/runtime/thread_state.h b/runtime/thread_state.h
index 57bf4f1..0e47d21 100644
--- a/runtime/thread_state.h
+++ b/runtime/thread_state.h
@@ -38,6 +38,7 @@
kWaitingForSignalCatcherOutput, // WAITING TS_WAIT waiting for signal catcher IO to complete
kWaitingInMainSignalCatcherLoop, // WAITING TS_WAIT blocking/reading/processing signals
kWaitingForDeoptimization, // WAITING TS_WAIT waiting for deoptimization suspend all
+ kWaitingForMethodTracingStart, // WAITING TS_WAIT waiting for method tracing to start
kStarting, // NEW TS_WAIT native thread started, not yet ready to run managed code
kNative, // RUNNABLE TS_RUNNING running in a JNI native method
kSuspended, // RUNNABLE TS_RUNNING suspended by GC or debugger
diff --git a/runtime/trace.cc b/runtime/trace.cc
index 032a566..1a450c4 100644
--- a/runtime/trace.cc
+++ b/runtime/trace.cc
@@ -459,7 +459,7 @@
}
// Update current offset.
- cur_offset_ = kTraceHeaderLength;
+ cur_offset_.StoreRelaxed(kTraceHeaderLength);
}
static void DumpBuf(uint8_t* buf, size_t buf_size, ProfilerClockSource clock_source)
@@ -480,7 +480,7 @@
// Compute elapsed time.
uint64_t elapsed = MicroTime() - start_time_;
- size_t final_offset = cur_offset_;
+ size_t final_offset = cur_offset_.LoadRelaxed();
uint32_t clock_overhead_ns = GetClockOverheadNanoSeconds(this);
if ((flags_ & kTraceCountAllocs) != 0) {
@@ -623,13 +623,13 @@
int32_t new_offset;
int32_t old_offset;
do {
- old_offset = cur_offset_;
+ old_offset = cur_offset_.LoadRelaxed();
new_offset = old_offset + GetRecordSize(clock_source_);
if (new_offset > buffer_size_) {
overflow_ = true;
return;
}
- } while (android_atomic_release_cas(old_offset, new_offset, &cur_offset_) != 0);
+ } while (!cur_offset_.CompareExchangeWeakSequentiallyConsistent(old_offset, new_offset));
TraceAction action = kTraceMethodEnter;
switch (event) {
diff --git a/runtime/trace.h b/runtime/trace.h
index 08da16f..9c8d35b 100644
--- a/runtime/trace.h
+++ b/runtime/trace.h
@@ -23,6 +23,7 @@
#include <string>
#include <vector>
+#include "atomic.h"
#include "base/macros.h"
#include "globals.h"
#include "instrumentation.h"
@@ -65,11 +66,14 @@
static void Start(const char* trace_filename, int trace_fd, int buffer_size, int flags,
bool direct_to_ddms, bool sampling_enabled, int interval_us)
- LOCKS_EXCLUDED(Locks::mutator_lock_,
- Locks::thread_list_lock_,
- Locks::thread_suspend_count_lock_,
- Locks::trace_lock_);
- static void Stop() LOCKS_EXCLUDED(Locks::trace_lock_);
+ LOCKS_EXCLUDED(Locks::mutator_lock_,
+ Locks::thread_list_lock_,
+ Locks::thread_suspend_count_lock_,
+ Locks::trace_lock_);
+ static void Stop()
+ LOCKS_EXCLUDED(Locks::mutator_lock_,
+ Locks::thread_list_lock_,
+ Locks::trace_lock_);
static void Shutdown() LOCKS_EXCLUDED(Locks::trace_lock_);
static TracingMode GetMethodTracingMode() LOCKS_EXCLUDED(Locks::trace_lock_);
@@ -163,7 +167,7 @@
const uint64_t start_time_;
// Offset into buf_.
- volatile int32_t cur_offset_;
+ AtomicInteger cur_offset_;
// Did we overflow the buffer recording traces?
bool overflow_;
diff --git a/sigchainlib/sigchain.cc b/sigchainlib/sigchain.cc
index 26e7d31..5a5805f 100644
--- a/sigchainlib/sigchain.cc
+++ b/sigchainlib/sigchain.cc
@@ -101,11 +101,6 @@
}
const struct sigaction& action = user_sigactions[sig].GetAction();
-
- // Only deliver the signal if the signal was not masked out.
- if (sigismember(&action.sa_mask, sig)) {
- return;
- }
if ((action.sa_flags & SA_SIGINFO) == 0) {
if (action.sa_handler != NULL) {
action.sa_handler(sig);
diff --git a/test/082-inline-execute/src/Main.java b/test/082-inline-execute/src/Main.java
index 3b11879..f412034 100644
--- a/test/082-inline-execute/src/Main.java
+++ b/test/082-inline-execute/src/Main.java
@@ -15,9 +15,11 @@
*/
import junit.framework.Assert;
+import java.util.Arrays;
+import java.lang.reflect.Method;
public class Main {
- public static void main(String args[]) {
+ public static void main(String args[]) throws Exception {
test_Double_doubleToRawLongBits();
test_Double_longBitsToDouble();
test_Float_floatToRawIntBits();
@@ -50,6 +52,18 @@
test_String_isEmpty();
test_String_length();
test_Thread_currentThread();
+ initSupportMethodsForPeekPoke();
+ test_Memory_peekByte();
+ test_Memory_peekShort();
+ test_Memory_peekInt();
+ test_Memory_peekLong();
+ test_Memory_pokeByte();
+ test_Memory_pokeShort();
+ test_Memory_pokeInt();
+ test_Memory_pokeLong();
+ test_AtomicBoolean_compareAndSet();
+ test_AtomicInteger_compareAndSet();
+ test_AtomicLong_compareAndSet();
}
/*
@@ -82,6 +96,60 @@
Assert.assertNotNull(Thread.currentThread());
}
+ /**
+ * Will test inlining CAS, by inclusion of AtomicBoolean in core.oat.
+ */
+ public static void test_AtomicBoolean_compareAndSet() {
+ java.util.concurrent.atomic.AtomicBoolean ab = new java.util.concurrent.atomic.AtomicBoolean();
+ Assert.assertEquals(ab.compareAndSet(false, false), true);
+ Assert.assertEquals(ab.compareAndSet(true, false), false);
+ Assert.assertEquals(ab.compareAndSet(true, true), false);
+ Assert.assertEquals(ab.compareAndSet(false, true), true);
+ Assert.assertEquals(ab.compareAndSet(false, true), false);
+ Assert.assertEquals(ab.compareAndSet(false, false), false);
+ Assert.assertEquals(ab.compareAndSet(true, true), true);
+ Assert.assertEquals(ab.compareAndSet(true, false), true);
+ Assert.assertEquals(ab.compareAndSet(true, false), false);
+ Assert.assertEquals(ab.compareAndSet(true, true), false);
+ Assert.assertEquals(ab.compareAndSet(false, false), true);
+ }
+
+ /**
+ * Will test inlining CAS, by inclusion of AtomicInteger in core.oat.
+ */
+ public static void test_AtomicInteger_compareAndSet() {
+ java.util.concurrent.atomic.AtomicInteger ab = new java.util.concurrent.atomic.AtomicInteger();
+ Assert.assertEquals(ab.compareAndSet(0, 0), true);
+ Assert.assertEquals(ab.compareAndSet(0x12345678, 0), false);
+ Assert.assertEquals(ab.compareAndSet(0x12345678, 0x12345678), false);
+ Assert.assertEquals(ab.compareAndSet(0, 0x12345678), true);
+ Assert.assertEquals(ab.compareAndSet(0, 0x12345678), false);
+ Assert.assertEquals(ab.compareAndSet(0, 0), false);
+ Assert.assertEquals(ab.compareAndSet(0x12345678, 0x12345678), true);
+ Assert.assertEquals(ab.compareAndSet(0x12345678, 0), true);
+ Assert.assertEquals(ab.compareAndSet(0x12345678, 0), false);
+ Assert.assertEquals(ab.compareAndSet(0x12345678, 0x12345678), false);
+ Assert.assertEquals(ab.compareAndSet(0, 0), true);
+ }
+
+ /**
+ * Will test inlining CAS, by inclusion of AtomicLong in core.oat.
+ */
+ public static void test_AtomicLong_compareAndSet() {
+ java.util.concurrent.atomic.AtomicLong ab = new java.util.concurrent.atomic.AtomicLong();
+ Assert.assertEquals(ab.compareAndSet(0l, 0l), true);
+ Assert.assertEquals(ab.compareAndSet(0x1234567890l, 0l), false);
+ Assert.assertEquals(ab.compareAndSet(0x1234567890l, 0x1234567890l), false);
+ Assert.assertEquals(ab.compareAndSet(0l, 0x1234567890l), true);
+ Assert.assertEquals(ab.compareAndSet(0l, 0x1234567890l), false);
+ Assert.assertEquals(ab.compareAndSet(0l, 0l), false);
+ Assert.assertEquals(ab.compareAndSet(0x1234567890l, 0x1234567890l), true);
+ Assert.assertEquals(ab.compareAndSet(0x1234567890l, 0l), true);
+ Assert.assertEquals(ab.compareAndSet(0x1234567890l, 0l), false);
+ Assert.assertEquals(ab.compareAndSet(0x1234567890l, 0x1234567890l), false);
+ Assert.assertEquals(ab.compareAndSet(0l, 0l), true);
+ }
+
public static void test_String_length() {
String str0 = "";
String str1 = "x";
@@ -510,4 +578,131 @@
Assert.assertEquals(Long.reverse(Long.MIN_VALUE), 1L);
}
+ static Object runtime;
+ static Method address_of;
+ static Method peek_byte;
+ static Method peek_short;
+ static Method peek_int;
+ static Method peek_long;
+ static Method poke_byte;
+ static Method poke_short;
+ static Method poke_int;
+ static Method poke_long;
+
+ public static void initSupportMethodsForPeekPoke() throws Exception {
+ Class<?> vm_runtime = Class.forName("dalvik.system.VMRuntime");
+ Method get_runtime = vm_runtime.getDeclaredMethod("getRuntime");
+ runtime = get_runtime.invoke(null);
+ address_of = vm_runtime.getDeclaredMethod("addressOf", Object.class);
+
+ Class<?> io_memory = Class.forName("libcore.io.Memory");
+ peek_byte = io_memory.getDeclaredMethod("peekByte", Long.TYPE);
+ peek_int = io_memory.getDeclaredMethod("peekInt", Long.TYPE, Boolean.TYPE);
+ peek_short = io_memory.getDeclaredMethod("peekShort", Long.TYPE, Boolean.TYPE);
+ peek_long = io_memory.getDeclaredMethod("peekLong", Long.TYPE, Boolean.TYPE);
+ poke_byte = io_memory.getDeclaredMethod("pokeByte", Long.TYPE, Byte.TYPE);
+ poke_short = io_memory.getDeclaredMethod("pokeShort", Long.TYPE, Short.TYPE, Boolean.TYPE);
+ poke_int = io_memory.getDeclaredMethod("pokeInt", Long.TYPE, Integer.TYPE, Boolean.TYPE);
+ poke_long = io_memory.getDeclaredMethod("pokeLong", Long.TYPE, Long.TYPE, Boolean.TYPE);
+ }
+
+ public static void test_Memory_peekByte() throws Exception {
+ byte[] b = new byte [2];
+ b[0] = 0x12;
+ b[1] = 0x11;
+ long address = (long)address_of.invoke(runtime, b);
+ Assert.assertEquals((byte)peek_byte.invoke(null, address), 0x12);
+ Assert.assertEquals((byte)peek_byte.invoke(null, address + 1), 0x11);
+ }
+
+ public static void test_Memory_peekShort() throws Exception {
+ byte[] b = new byte [3];
+ b[0] = 0x13;
+ b[1] = 0x12;
+ b[2] = 0x11;
+ long address = (long)address_of.invoke(runtime, b);
+ Assert.assertEquals((short)peek_short.invoke(null, address, false), 0x1213); // Aligned read
+ Assert.assertEquals((short)peek_short.invoke(null, address + 1, false), 0x1112); // Unaligned read
+ }
+
+ public static void test_Memory_peekInt() throws Exception {
+ byte[] b = new byte [5];
+ b[0] = 0x15;
+ b[1] = 0x14;
+ b[2] = 0x13;
+ b[3] = 0x12;
+ b[4] = 0x11;
+ long address = (long)address_of.invoke(runtime, b);
+ Assert.assertEquals((int)peek_int.invoke(null, address, false), 0x12131415);
+ Assert.assertEquals((int)peek_int.invoke(null, address + 1, false), 0x11121314);
+ }
+
+ public static void test_Memory_peekLong() throws Exception {
+ byte[] b = new byte [9];
+ b[0] = 0x19;
+ b[1] = 0x18;
+ b[2] = 0x17;
+ b[3] = 0x16;
+ b[4] = 0x15;
+ b[5] = 0x14;
+ b[6] = 0x13;
+ b[7] = 0x12;
+ b[8] = 0x11;
+ long address = (long)address_of.invoke(runtime, b);
+ Assert.assertEquals((long)peek_long.invoke(null, address, false), 0x1213141516171819L);
+ Assert.assertEquals((long)peek_long.invoke(null, address + 1, false), 0x1112131415161718L);
+ }
+
+ public static void test_Memory_pokeByte() throws Exception {
+ byte[] r = {0x11, 0x12};
+ byte[] b = new byte [2];
+ long address = (long)address_of.invoke(runtime, b);
+ poke_byte.invoke(null, address, (byte)0x11);
+ poke_byte.invoke(null, address + 1, (byte)0x12);
+ Assert.assertTrue(Arrays.equals(r, b));
+ }
+
+ public static void test_Memory_pokeShort() throws Exception {
+ byte[] ra = {0x12, 0x11, 0x13};
+ byte[] ru = {0x12, 0x22, 0x21};
+ byte[] b = new byte [3];
+ long address = (long)address_of.invoke(runtime, b);
+
+ // Aligned write
+ b[2] = 0x13;
+ poke_short.invoke(null, address, (short)0x1112, false);
+ Assert.assertTrue(Arrays.equals(ra, b));
+
+ // Unaligned write
+ poke_short.invoke(null, address + 1, (short)0x2122, false);
+ Assert.assertTrue(Arrays.equals(ru, b));
+ }
+
+ public static void test_Memory_pokeInt() throws Exception {
+ byte[] ra = {0x14, 0x13, 0x12, 0x11, 0x15};
+ byte[] ru = {0x14, 0x24, 0x23, 0x22, 0x21};
+ byte[] b = new byte [5];
+ long address = (long)address_of.invoke(runtime, b);
+
+ b[4] = 0x15;
+ poke_int.invoke(null, address, (int)0x11121314, false);
+ Assert.assertTrue(Arrays.equals(ra, b));
+
+ poke_int.invoke(null, address + 1, (int)0x21222324, false);
+ Assert.assertTrue(Arrays.equals(ru, b));
+ }
+
+ public static void test_Memory_pokeLong() throws Exception {
+ byte[] ra = {0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x19};
+ byte[] ru = {0x18, 0x28, 0x27, 0x26, 0x25, 0x24, 0x23, 0x22, 0x21};
+ byte[] b = new byte [9];
+ long address = (long)address_of.invoke(runtime, b);
+
+ b[8] = 0x19;
+ poke_long.invoke(null, address, (long)0x1112131415161718L, false);
+ Assert.assertTrue(Arrays.equals(ra, b));
+
+ poke_long.invoke(null, address + 1, (long)0x2122232425262728L, false);
+ Assert.assertTrue(Arrays.equals(ru, b));
+ }
}
diff --git a/test/304-method-tracing/expected.txt b/test/304-method-tracing/expected.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/304-method-tracing/expected.txt
diff --git a/test/304-method-tracing/info.txt b/test/304-method-tracing/info.txt
new file mode 100644
index 0000000..d3154e6
--- /dev/null
+++ b/test/304-method-tracing/info.txt
@@ -0,0 +1 @@
+Test method tracing from command-line.
diff --git a/test/304-method-tracing/run b/test/304-method-tracing/run
new file mode 100755
index 0000000..7bd1895
--- /dev/null
+++ b/test/304-method-tracing/run
@@ -0,0 +1,18 @@
+#!/bin/bash
+#
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Runs the test with method tracing enabled.
+exec ${RUN} "$@" --runtime-option -Xmethod-trace --runtime-option -Xmethod-trace-file:${DEX_LOCATION}/trace.bin
diff --git a/test/304-method-tracing/src/Main.java b/test/304-method-tracing/src/Main.java
new file mode 100644
index 0000000..25cee6d
--- /dev/null
+++ b/test/304-method-tracing/src/Main.java
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.ArrayList;
+
+public class Main {
+ static class ThreadRunnable implements Runnable {
+ public void run() {
+ for (int i = 0; i < 1000; ++i) {
+ doNothing();
+ }
+ }
+
+ private void doNothing() {}
+ }
+
+ public static void main(String[] args) {
+ ArrayList<Thread> threads = new ArrayList<Thread>();
+ for (int i = 0; i < 10; ++i) {
+ threads.add(new Thread(new ThreadRunnable(), "TestThread-" + i));
+ }
+
+ for (Thread t : threads) {
+ t.start();
+ }
+
+ for (Thread t : threads) {
+ try {
+ t.join();
+ } catch (InterruptedException e) {
+ System.out.println("Thread " + t.getName() + " has been interrupted");
+ }
+ }
+ }
+}
diff --git a/test/Android.oat.mk b/test/Android.oat.mk
index fec2540e..16300bb 100644
--- a/test/Android.oat.mk
+++ b/test/Android.oat.mk
@@ -193,7 +193,7 @@
$(3): $$(ART_TEST_HOST_OAT_$(1)_DEX) $(ART_TEST_HOST_OAT_DEPENDENCIES)
$(hide) mkdir -p $(ART_HOST_TEST_DIR)/android-data-$$@/dalvik-cache/$$($(2)HOST_ARCH)
$(hide) cp $$(realpath $$<) $(ART_HOST_TEST_DIR)/android-data-$$@/oat-test-dex-$(1).jar
- $(hide) $(DEX2OATD) $(DEX2OAT_FLAGS) --runtime-arg $(DEX2OAT_XMS) --runtime-arg $(DEX2OAT_XMX) $(4) \
+ $(hide) $(DEX2OATD) $(DEX2OAT_FLAGS) --runtime-arg -Xms$(DEX2OAT_XMS) --runtime-arg -Xmx$(DEX2OAT_XMX) $(4) \
--boot-image=$$(HOST_CORE_IMG_LOCATION) \
--dex-file=$$(PRIVATE_DEX_FILE) --oat-file=$$(PRIVATE_OAT_FILE) \
--instruction-set=$($(2)ART_HOST_ARCH) --host --android-root=$(HOST_OUT) \
diff --git a/test/run-all-tests b/test/run-all-tests
index 885ee44..25d5c5f 100755
--- a/test/run-all-tests
+++ b/test/run-all-tests
@@ -80,6 +80,9 @@
elif [ "x$1" = "x--64" ]; then
run_args="${run_args} --64"
shift
+ elif [ "x$1" = "x--trace" ]; then
+ run_args="${run_args} --trace"
+ shift
elif expr "x$1" : "x--" >/dev/null 2>&1; then
echo "unknown $0 option: $1" 1>&2
usage="yes"
diff --git a/test/run-test b/test/run-test
index d1c5bb2..2989f25 100755
--- a/test/run-test
+++ b/test/run-test
@@ -64,7 +64,6 @@
target_mode="yes"
dev_mode="no"
update_mode="no"
-debug_mode="no"
runtime="art"
usage="no"
build_only="no"
@@ -162,6 +161,9 @@
run_args="${run_args} --64"
suffix64="64"
shift
+ elif [ "x$1" = "x--trace" ]; then
+ run_args="${run_args} --runtime-option -Xmethod-trace --runtime-option -Xmethod-trace-file:${DEX_LOCATION}/trace.bin"
+ shift
elif expr "x$1" : "x--" >/dev/null 2>&1; then
echo "unknown $0 option: $1" 1>&2
usage="yes"
@@ -257,6 +259,7 @@
echo " --output-path [path] Location where to store the build" \
"files."
echo " --64 Run the test in 64-bit mode"
+ echo " --trace Run with method tracing"
) 1>&2
exit 1
fi