blob: 483b875b77da51d7b174980e20c077dc638b5678 [file]
/*
* Copyright 2021 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <fcntl.h>
// Glibc v2.19 doesn't include these in fcntl.h so host builds will fail without.
#if !defined(FALLOC_FL_PUNCH_HOLE) || !defined(FALLOC_FL_KEEP_SIZE)
#include <linux/falloc.h>
#endif
#include <linux/userfaultfd.h>
#include <poll.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <sys/resource.h>
#include <sys/stat.h>
#include <sys/syscall.h>
#include <unistd.h>
#include <fstream>
#include <numeric>
#include <string>
#include <string_view>
#include <vector>
#include "android-base/file.h"
#include "android-base/parsebool.h"
#include "android-base/parseint.h"
#include "android-base/properties.h"
#include "android-base/strings.h"
#include "base/file_utils.h"
#include "base/memfd.h"
#include "base/quasi_atomic.h"
#include "base/systrace.h"
#include "base/utils.h"
#include "gc/accounting/mod_union_table-inl.h"
#include "gc/collector_type.h"
#include "gc/reference_processor.h"
#include "gc/space/bump_pointer_space.h"
#include "gc/space/space-inl.h"
#include "gc/task_processor.h"
#include "gc/verification-inl.h"
#include "jit/jit_code_cache.h"
#include "mark_compact-inl.h"
#include "mirror/object-refvisitor-inl.h"
#include "read_barrier_config.h"
#include "scoped_thread_state_change-inl.h"
#include "sigchain.h"
#include "thread_list.h"
#ifdef ART_TARGET_ANDROID
#include "android-modules-utils/sdk_level.h"
#include "com_android_art.h"
#include "com_android_art_flags.h"
#endif
// See aosp/2996596 for where these values came from.
#ifndef UFFDIO_COPY_MODE_MMAP_TRYLOCK
#define UFFDIO_COPY_MODE_MMAP_TRYLOCK (static_cast<uint64_t>(1) << 63)
#endif
#ifndef UFFDIO_ZEROPAGE_MODE_MMAP_TRYLOCK
#define UFFDIO_ZEROPAGE_MODE_MMAP_TRYLOCK (static_cast<uint64_t>(1) << 63)
#endif
#ifdef ART_TARGET_ANDROID
namespace {
using ::android::base::GetBoolProperty;
using ::android::base::ParseBool;
using ::android::base::ParseBoolResult;
using ::android::modules::sdklevel::IsAtLeastV;
} // namespace
#endif
namespace art HIDDEN {
static bool HaveMremapDontunmap() {
const size_t page_size = GetPageSizeSlow();
void* old = mmap(nullptr, page_size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_SHARED, -1, 0);
CHECK_NE(old, MAP_FAILED);
void* addr = mremap(old, page_size, page_size, MREMAP_MAYMOVE | MREMAP_DONTUNMAP, nullptr);
CHECK_EQ(munmap(old, page_size), 0);
if (addr != MAP_FAILED) {
CHECK_EQ(munmap(addr, page_size), 0);
return true;
} else {
return false;
}
}
static bool gUffdSupportsMmapTrylock = false;
// We require MREMAP_DONTUNMAP functionality of the mremap syscall, which was
// introduced in 5.13 kernel version. But it was backported to GKI kernels.
static bool gHaveMremapDontunmap = IsKernelVersionAtLeast(5, 13) || HaveMremapDontunmap();
// Bitmap of features supported by userfaultfd. This is obtained via uffd API ioctl.
static uint64_t gUffdFeatures = 0;
// Both, missing and minor faults on shmem are needed only for minor-fault mode.
static constexpr uint64_t kUffdFeaturesForMinorFault =
UFFD_FEATURE_MISSING_SHMEM | UFFD_FEATURE_MINOR_SHMEM;
static constexpr uint64_t kUffdFeaturesForSigbus = UFFD_FEATURE_SIGBUS;
// A region which is more than kBlackDenseRegionThreshold percent live doesn't
// need to be compacted as it is too densely packed.
static constexpr uint kBlackDenseRegionThreshold = 95U;
// We consider SIGBUS feature necessary to enable this GC as it's superior than
// threading-based implementation for janks. We may want minor-fault in future
// to be available for making jit-code-cache updation concurrent, which uses shmem.
bool KernelSupportsUffd() {
#ifdef __linux__
if (gHaveMremapDontunmap) {
int fd = syscall(__NR_userfaultfd, O_CLOEXEC | UFFD_USER_MODE_ONLY);
// On non-android devices we may not have the kernel patches that restrict
// userfaultfd to user mode. But that is not a security concern as we are
// on host. Therefore, attempt one more time without UFFD_USER_MODE_ONLY.
if (!kIsTargetAndroid && fd == -1 && errno == EINVAL) {
fd = syscall(__NR_userfaultfd, O_CLOEXEC);
}
if (fd >= 0) {
// We are only fetching the available features, which is returned by the
// ioctl.
struct uffdio_api api = {.api = UFFD_API, .features = 0, .ioctls = 0};
CHECK_EQ(ioctl(fd, UFFDIO_API, &api), 0) << "ioctl_userfaultfd : API:" << strerror(errno);
gUffdFeatures = api.features;
// MMAP_TRYLOCK is available only in 5.10 and 5.15 GKI kernels. The higher
// versions will have per-vma locks. The lower ones don't support
// userfaultfd.
if (kIsTargetAndroid && !IsKernelVersionAtLeast(5, 16)) {
// Check if MMAP_TRYLOCK feature is supported
const size_t page_size = GetPageSizeSlow();
void* mem =
mmap(nullptr, page_size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
CHECK_NE(mem, MAP_FAILED) << " errno: " << errno;
struct uffdio_zeropage uffd_zeropage;
uffd_zeropage.mode = UFFDIO_ZEROPAGE_MODE_MMAP_TRYLOCK;
uffd_zeropage.range.start = reinterpret_cast<uintptr_t>(mem);
uffd_zeropage.range.len = page_size;
uffd_zeropage.zeropage = 0;
// The ioctl will definitely fail as mem is not registered with uffd.
CHECK_EQ(ioctl(fd, UFFDIO_ZEROPAGE, &uffd_zeropage), -1);
// uffd ioctls return EINVAL for several reasons. We make sure with
// (proper alignment of 'mem' and 'len') that, before updating
// uffd_zeropage.zeropage (with error), it fails with EINVAL only if
// `trylock` isn't available.
if (uffd_zeropage.zeropage == 0 && errno == EINVAL) {
LOG(INFO) << "MMAP_TRYLOCK is not supported in uffd addr:" << mem
<< " page-size:" << page_size;
} else {
gUffdSupportsMmapTrylock = true;
LOG(INFO) << "MMAP_TRYLOCK is supported in uffd errno:" << errno << " addr:" << mem
<< " size:" << page_size;
}
munmap(mem, page_size);
}
close(fd);
// Minimum we need is sigbus feature for using userfaultfd.
return (api.features & kUffdFeaturesForSigbus) == kUffdFeaturesForSigbus;
}
}
#endif
return false;
}
// The other cases are defined as constexpr in runtime/read_barrier_config.h
#if !defined(ART_FORCE_USE_READ_BARRIER) && defined(ART_USE_READ_BARRIER)
// Returns collector type asked to be used on the cmdline.
static gc::CollectorType FetchCmdlineGcType() {
std::string argv;
gc::CollectorType gc_type = gc::CollectorType::kCollectorTypeNone;
if (android::base::ReadFileToString("/proc/self/cmdline", &argv)) {
auto pos = argv.rfind("-Xgc:");
if (argv.substr(pos + 5, 3) == "CMC") {
gc_type = gc::CollectorType::kCollectorTypeCMC;
} else if (argv.substr(pos + 5, 2) == "CC") {
gc_type = gc::CollectorType::kCollectorTypeCC;
}
}
return gc_type;
}
#ifdef ART_TARGET_ANDROID
static int GetOverrideCacheInfoFd() {
std::string args_str;
if (!android::base::ReadFileToString("/proc/self/cmdline", &args_str)) {
LOG(WARNING) << "Failed to load /proc/self/cmdline";
return -1;
}
std::vector<std::string_view> args;
Split(std::string_view(args_str), /*separator=*/'\0', &args);
for (std::string_view arg : args) {
if (android::base::ConsumePrefix(&arg, "--cache-info-fd=")) { // This is a dex2oat flag.
int fd;
if (!android::base::ParseInt(std::string(arg), &fd)) {
LOG(ERROR) << "Failed to parse --cache-info-fd (value: '" << arg << "')";
return -1;
}
return fd;
}
}
return -1;
}
static std::unordered_map<std::string, std::string> GetCachedProperties() {
// For simplicity, we don't handle multiple calls because otherwise we would have to reset the fd.
static bool called = false;
CHECK(!called) << "GetCachedBoolProperty can be called only once";
called = true;
std::string cache_info_contents;
int fd = GetOverrideCacheInfoFd();
if (fd >= 0) {
if (!android::base::ReadFdToString(fd, &cache_info_contents)) {
PLOG(ERROR) << "Failed to read cache-info from fd " << fd;
return {};
}
} else {
std::string path = GetApexDataDalvikCacheDirectory(InstructionSet::kNone) + "/cache-info.xml";
if (!android::base::ReadFileToString(path, &cache_info_contents)) {
// If the file is not found, then we are in chroot or in a standalone runtime process (e.g.,
// IncidentHelper), or odsign/odrefresh failed to generate and sign the cache info. There's
// nothing we can do.
if (errno != ENOENT) {
PLOG(ERROR) << "Failed to read cache-info from the default path";
}
return {};
}
}
std::optional<com::android::art::CacheInfo> cache_info =
com::android::art::parse(cache_info_contents.c_str());
if (!cache_info.has_value()) {
// This should never happen.
LOG(ERROR) << "Failed to parse cache-info";
return {};
}
const com::android::art::KeyValuePairList* list = cache_info->getFirstSystemProperties();
if (list == nullptr) {
// This should never happen.
LOG(ERROR) << "Missing system properties from cache-info";
return {};
}
const std::vector<com::android::art::KeyValuePair>& properties = list->getItem();
std::unordered_map<std::string, std::string> result;
for (const com::android::art::KeyValuePair& pair : properties) {
result[pair.getK()] = pair.getV();
}
return result;
}
static bool GetCachedBoolProperty(
const std::unordered_map<std::string, std::string>& cached_properties,
const std::string& key,
bool default_value) {
auto it = cached_properties.find(key);
if (it == cached_properties.end()) {
return default_value;
}
ParseBoolResult result = ParseBool(it->second);
switch (result) {
case ParseBoolResult::kTrue:
return true;
case ParseBoolResult::kFalse:
return false;
case ParseBoolResult::kError:
return default_value;
}
}
static bool SysPropSaysUffdGc() {
// The phenotype flag can change at time time after boot, but it shouldn't take effect until a
// reboot. Therefore, we read the phenotype flag from the cache info, which is generated on boot.
std::unordered_map<std::string, std::string> cached_properties = GetCachedProperties();
bool phenotype_enable = GetCachedBoolProperty(
cached_properties, "persist.device_config.runtime_native_boot.enable_uffd_gc_2", false);
bool phenotype_force_disable = GetCachedBoolProperty(
cached_properties, "persist.device_config.runtime_native_boot.force_disable_uffd_gc", false);
bool build_enable = GetBoolProperty("ro.dalvik.vm.enable_uffd_gc", false);
bool is_at_most_u = !IsAtLeastV();
return (phenotype_enable || build_enable || is_at_most_u) && !phenotype_force_disable;
}
#else
// Never called.
static bool SysPropSaysUffdGc() { return false; }
#endif
static bool ShouldUseUserfaultfd() {
static_assert(kUseBakerReadBarrier || kUseTableLookupReadBarrier);
#ifdef __linux__
// Use CMC/CC if that is being explicitly asked for on cmdline. Otherwise,
// always use CC on host. On target, use CMC only if system properties says so
// and the kernel supports it.
gc::CollectorType gc_type = FetchCmdlineGcType();
return gc_type == gc::CollectorType::kCollectorTypeCMC ||
(gc_type == gc::CollectorType::kCollectorTypeNone &&
kIsTargetAndroid &&
SysPropSaysUffdGc() &&
KernelSupportsUffd());
#else
return false;
#endif
}
const bool gUseUserfaultfd = ShouldUseUserfaultfd();
const bool gUseReadBarrier = !gUseUserfaultfd;
#endif
#ifdef ART_TARGET_ANDROID
bool ShouldUseGenerationalGC() {
if (gUseUserfaultfd && !com::android::art::flags::use_generational_cmc()) {
return false;
}
// Generational GC feature doesn't need a reboot. Any process (like dex2oat)
// can pick a different values than zygote and will be able to execute.
return GetBoolProperty("persist.device_config.runtime_native_boot.use_generational_gc", true);
}
#else
bool ShouldUseGenerationalGC() { return true; }
#endif
namespace gc {
namespace collector {
// Turn off kCheckLocks when profiling the GC as it slows down the GC
// significantly.
static constexpr bool kCheckLocks = kDebugLocking;
static constexpr bool kVerifyRootsMarked = kIsDebugBuild;
// Verify that there are no missing card marks.
static constexpr bool kVerifyNoMissingCardMarks = kIsDebugBuild;
// Verify that all references in post-GC objects are valid.
static constexpr bool kVerifyPostGCObjects = kIsDebugBuild;
// Assert during marking that GC-roots are valid.
static constexpr bool kVerifyGcRootDuringMarking = kIsDebugBuild;
// Number of compaction buffers reserved for mutator threads in SIGBUS feature
// case. It's extremely unlikely that we will ever have more than these number
// of mutator threads trying to access the moving-space during one compaction
// phase.
static constexpr size_t kMutatorCompactionBufferCount = 2048;
// Minimum from-space chunk to be madvised (during concurrent compaction) in one go.
// Choose a reasonable size to avoid making too many batched ioctl and madvise calls.
static constexpr ssize_t kMinFromSpaceMadviseSize = 8 * MB;
// Concurrent compaction termination logic is different (and slightly more efficient) if the
// kernel has the fault-retry feature (allowing repeated faults on the same page), which was
// introduced in 5.7 (https://android-review.git.corp.google.com/c/kernel/common/+/1540088).
// This allows a single page fault to be handled, in turn, by each worker thread, only waking
// up the GC thread at the end.
static const bool gKernelHasFaultRetry = IsKernelVersionAtLeast(5, 7);
std::pair<bool, bool> MarkCompact::GetUffdAndMinorFault() {
bool uffd_available;
// In most cases the gUffdFeatures will already be initialized at boot time
// when libart is loaded. On very old kernels we may get '0' from the kernel,
// in which case we would be doing the syscalls each time this function is
// called. But that's very unlikely case. There are no correctness issues as
// the response from kernel never changes after boot.
if (UNLIKELY(gUffdFeatures == 0)) {
uffd_available = KernelSupportsUffd();
} else {
// We can have any uffd features only if uffd exists.
uffd_available = true;
}
bool minor_fault_available =
(gUffdFeatures & kUffdFeaturesForMinorFault) == kUffdFeaturesForMinorFault;
return std::pair<bool, bool>(uffd_available, minor_fault_available);
}
bool MarkCompact::CreateUserfaultfd(bool post_fork) {
if (post_fork || uffd_ == kFdUnused) {
// Check if we have MREMAP_DONTUNMAP here for cases where
// 'ART_USE_READ_BARRIER=false' is used. Additionally, this check ensures
// that userfaultfd isn't used on old kernels, which cause random ioctl
// failures.
if (gHaveMremapDontunmap) {
// Don't use O_NONBLOCK as we rely on read waiting on uffd_ if there isn't
// any read event available. We don't use poll.
uffd_ = syscall(__NR_userfaultfd, O_CLOEXEC | UFFD_USER_MODE_ONLY);
// On non-android devices we may not have the kernel patches that restrict
// userfaultfd to user mode. But that is not a security concern as we are
// on host. Therefore, attempt one more time without UFFD_USER_MODE_ONLY.
if (!kIsTargetAndroid && UNLIKELY(uffd_ == -1 && errno == EINVAL)) {
uffd_ = syscall(__NR_userfaultfd, O_CLOEXEC);
}
if (UNLIKELY(uffd_ == -1)) {
uffd_ = kFallbackMode;
LOG(WARNING) << "Userfaultfd isn't supported (reason: " << strerror(errno)
<< ") and therefore falling back to stop-the-world compaction.";
} else {
DCHECK(IsValidFd(uffd_));
// Initialize uffd with the features which are required and available.
// Using private anonymous mapping in threading mode is the default,
// for which we don't need to ask for any features. Note: this mode
// is not used in production.
struct uffdio_api api = {.api = UFFD_API, .features = 0, .ioctls = 0};
// We should add SIGBUS feature only if we plan on using it as
// requesting it here will mean threading mode will not work.
CHECK_EQ(gUffdFeatures & kUffdFeaturesForSigbus, kUffdFeaturesForSigbus);
api.features |= kUffdFeaturesForSigbus;
CHECK_EQ(ioctl(uffd_, UFFDIO_API, &api), 0)
<< "ioctl_userfaultfd: API: " << strerror(errno);
}
} else {
uffd_ = kFallbackMode;
}
}
uffd_initialized_ = !post_fork || uffd_ == kFallbackMode;
return IsValidFd(uffd_);
}
template <size_t kAlignment>
MarkCompact::LiveWordsBitmap<kAlignment>* MarkCompact::LiveWordsBitmap<kAlignment>::Create(
uintptr_t begin, uintptr_t end) {
return static_cast<LiveWordsBitmap<kAlignment>*>(
MemRangeBitmap::Create("Concurrent Mark Compact live words bitmap", begin, end));
}
size_t MarkCompact::ComputeInfoMapSize() {
size_t moving_space_size = bump_pointer_space_->Capacity();
size_t chunk_info_vec_size = moving_space_size / kOffsetChunkSize;
size_t nr_moving_pages = DivideByPageSize(moving_space_size);
size_t nr_non_moving_pages = DivideByPageSize(heap_->GetNonMovingSpace()->Capacity());
return chunk_info_vec_size * sizeof(uint32_t) + nr_non_moving_pages * sizeof(ObjReference) +
nr_moving_pages * (sizeof(ObjReference) + sizeof(uint32_t) + sizeof(Atomic<uint32_t>));
}
size_t MarkCompact::InitializeInfoMap(uint8_t* p, size_t moving_space_sz) {
size_t nr_moving_pages = DivideByPageSize(moving_space_sz);
chunk_info_vec_ = reinterpret_cast<uint32_t*>(p);
vector_length_ = moving_space_sz / kOffsetChunkSize;
size_t total = vector_length_ * sizeof(uint32_t);
first_objs_moving_space_ = reinterpret_cast<ObjReference*>(p + total);
total += nr_moving_pages * sizeof(ObjReference);
pre_compact_offset_moving_space_ = reinterpret_cast<uint32_t*>(p + total);
total += nr_moving_pages * sizeof(uint32_t);
moving_pages_status_ = reinterpret_cast<Atomic<uint32_t>*>(p + total);
total += nr_moving_pages * sizeof(Atomic<uint32_t>);
first_objs_non_moving_space_ = reinterpret_cast<ObjReference*>(p + total);
total += DivideByPageSize(heap_->GetNonMovingSpace()->Capacity()) * sizeof(ObjReference);
DCHECK_EQ(total, ComputeInfoMapSize());
return total;
}
YoungMarkCompact::YoungMarkCompact(Heap* heap, MarkCompact* main)
: GarbageCollector(heap, "young concurrent mark compact"), main_collector_(main) {
// Initialize GC metrics.
metrics::ArtMetrics* metrics = GetMetrics();
gc_time_histogram_ = metrics->YoungGcCollectionTime();
metrics_gc_count_ = metrics->YoungGcCount();
metrics_gc_count_delta_ = metrics->YoungGcCountDelta();
gc_throughput_histogram_ = metrics->YoungGcThroughput();
gc_tracing_throughput_hist_ = metrics->YoungGcTracingThroughput();
gc_throughput_avg_ = metrics->YoungGcThroughputAvg();
gc_tracing_throughput_avg_ = metrics->YoungGcTracingThroughputAvg();
gc_scanned_bytes_ = metrics->YoungGcScannedBytes();
gc_scanned_bytes_delta_ = metrics->YoungGcScannedBytesDelta();
gc_freed_bytes_ = metrics->YoungGcFreedBytes();
gc_freed_bytes_delta_ = metrics->YoungGcFreedBytesDelta();
gc_duration_ = metrics->YoungGcDuration();
gc_duration_delta_ = metrics->YoungGcDurationDelta();
gc_app_slow_path_during_gc_duration_delta_ = metrics->AppSlowPathDuringYoungGcDurationDelta();
are_metrics_initialized_ = true;
}
void YoungMarkCompact::RunPhases() {
DCHECK(!main_collector_->young_gen_);
main_collector_->young_gen_ = true;
main_collector_->RunPhases();
main_collector_->young_gen_ = false;
}
MarkCompact::MarkCompact(Heap* heap)
: GarbageCollector(heap, "concurrent mark compact"),
overflow_arrays_(nullptr),
gc_barrier_(0),
lock_("mark compact lock", kGenericBottomLock),
sigbus_in_progress_count_{kSigbusCounterCompactionDoneMask, kSigbusCounterCompactionDoneMask},
mid_to_old_promo_bit_vec_(nullptr),
bump_pointer_space_(heap->GetBumpPointerSpace()),
post_compact_end_(nullptr),
young_gen_(false),
use_generational_(heap->GetUseGenerational()),
compacting_(false),
moving_space_bitmap_(bump_pointer_space_->GetMarkBitmap()),
moving_space_begin_(bump_pointer_space_->Begin()),
moving_space_end_(bump_pointer_space_->Limit()),
black_dense_end_(moving_space_begin_),
mid_gen_end_(moving_space_begin_),
uffd_(kFdUnused),
marking_done_(false),
uffd_initialized_(false),
clamp_info_map_status_(ClampInfoStatus::kClampInfoNotDone) {
if (kIsDebugBuild) {
updated_roots_.reset(new std::unordered_set<void*>());
}
if (gUffdFeatures == 0) {
GetUffdAndMinorFault();
}
uint8_t* moving_space_begin = bump_pointer_space_->Begin();
// TODO: Depending on how the bump-pointer space move is implemented. If we
// switch between two virtual memories each time, then we will have to
// initialize live_words_bitmap_ accordingly.
live_words_bitmap_.reset(LiveWordsBitmap<kAlignment>::Create(
reinterpret_cast<uintptr_t>(moving_space_begin),
reinterpret_cast<uintptr_t>(bump_pointer_space_->Limit())));
std::string err_msg;
size_t moving_space_size = bump_pointer_space_->Capacity();
{
// Create one MemMap for all the data structures
info_map_ = MemMap::MapAnonymous("Concurrent mark-compact chunk-info vector",
ComputeInfoMapSize(),
PROT_READ | PROT_WRITE,
/*low_4gb=*/false,
&err_msg);
if (UNLIKELY(!info_map_.IsValid())) {
LOG(FATAL) << "Failed to allocate concurrent mark-compact chunk-info vector: " << err_msg;
} else {
size_t total = InitializeInfoMap(info_map_.Begin(), moving_space_size);
DCHECK_EQ(total, info_map_.Size());
}
}
size_t moving_space_alignment = Heap::BestPageTableAlignment(moving_space_size);
// The moving space is created at a fixed address, which is expected to be
// PMD-size aligned.
if (!IsAlignedParam(moving_space_begin, moving_space_alignment)) {
LOG(WARNING) << "Bump pointer space is not aligned to " << PrettySize(moving_space_alignment)
<< ". This can lead to longer stop-the-world pauses for compaction";
}
// NOTE: PROT_NONE is used here as these mappings are for address space reservation
// only and will be used only after appropriately remapping them.
from_space_map_ = MemMap::MapAnonymousAligned("Concurrent mark-compact from-space",
moving_space_size,
PROT_NONE,
/*low_4gb=*/kObjPtrPoisoning,
moving_space_alignment,
&err_msg);
if (UNLIKELY(!from_space_map_.IsValid())) {
LOG(FATAL) << "Failed to allocate concurrent mark-compact from-space" << err_msg;
} else {
from_space_begin_ = from_space_map_.Begin();
}
compaction_buffers_map_ = MemMap::MapAnonymous("Concurrent mark-compact compaction buffers",
(1 + kMutatorCompactionBufferCount) * gPageSize,
PROT_READ | PROT_WRITE,
/*low_4gb=*/kObjPtrPoisoning,
&err_msg);
if (UNLIKELY(!compaction_buffers_map_.IsValid())) {
LOG(FATAL) << "Failed to allocate concurrent mark-compact compaction buffers" << err_msg;
}
// We also use the first page-sized buffer for the purpose of terminating concurrent compaction.
conc_compaction_termination_page_ = compaction_buffers_map_.Begin();
// Touch the page deliberately to avoid userfaults on it. We madvise it in
// CompactionPhase() before using it to terminate concurrent compaction.
ForceRead(conc_compaction_termination_page_);
// In most of the cases, we don't expect more than one LinearAlloc space.
linear_alloc_spaces_data_.reserve(1);
// Initialize GC metrics.
metrics::ArtMetrics* metrics = GetMetrics();
gc_time_histogram_ = metrics->FullGcCollectionTime();
metrics_gc_count_ = metrics->FullGcCount();
metrics_gc_count_delta_ = metrics->FullGcCountDelta();
gc_throughput_histogram_ = metrics->FullGcThroughput();
gc_tracing_throughput_hist_ = metrics->FullGcTracingThroughput();
gc_throughput_avg_ = metrics->FullGcThroughputAvg();
gc_tracing_throughput_avg_ = metrics->FullGcTracingThroughputAvg();
gc_scanned_bytes_ = metrics->FullGcScannedBytes();
gc_scanned_bytes_delta_ = metrics->FullGcScannedBytesDelta();
gc_freed_bytes_ = metrics->FullGcFreedBytes();
gc_freed_bytes_delta_ = metrics->FullGcFreedBytesDelta();
gc_duration_ = metrics->FullGcDuration();
gc_duration_delta_ = metrics->FullGcDurationDelta();
gc_app_slow_path_during_gc_duration_delta_ = metrics->AppSlowPathDuringFullGcDurationDelta();
are_metrics_initialized_ = true;
}
void MarkCompact::ResetGenerationalState() {
black_dense_end_ = mid_gen_end_ = moving_space_begin_;
post_compact_end_ = nullptr;
class_after_obj_map_.clear();
}
void MarkCompact::AddLinearAllocSpaceData(uint8_t* begin, size_t len) {
DCHECK_ALIGNED_PARAM(begin, gPageSize);
DCHECK_ALIGNED_PARAM(len, gPageSize);
DCHECK_GE(len, Heap::GetPMDSize());
size_t alignment = Heap::BestPageTableAlignment(len);
std::string err_msg;
MemMap shadow(MemMap::MapAnonymousAligned("linear-alloc shadow map",
len,
PROT_NONE,
/*low_4gb=*/false,
alignment,
&err_msg));
if (!shadow.IsValid()) {
LOG(FATAL) << "Failed to allocate linear-alloc shadow map: " << err_msg;
UNREACHABLE();
}
MemMap page_status_map(MemMap::MapAnonymous("linear-alloc page-status map",
DivideByPageSize(len),
PROT_READ | PROT_WRITE,
/*low_4gb=*/false,
&err_msg));
if (!page_status_map.IsValid()) {
LOG(FATAL) << "Failed to allocate linear-alloc page-status shadow map: " << err_msg;
UNREACHABLE();
}
linear_alloc_spaces_data_.emplace_back(
std::forward<MemMap>(shadow), std::forward<MemMap>(page_status_map), begin, begin + len);
}
void MarkCompact::ClampGrowthLimit(size_t new_capacity) {
// From-space is the same size as moving-space in virtual memory.
// However, if it's in >4GB address space then we don't need to do it
// synchronously.
#if defined(__LP64__)
constexpr bool kClampFromSpace = kObjPtrPoisoning;
#else
constexpr bool kClampFromSpace = true;
#endif
size_t old_capacity = bump_pointer_space_->Capacity();
new_capacity = bump_pointer_space_->ClampGrowthLimit(new_capacity);
if (new_capacity < old_capacity) {
CHECK(from_space_map_.IsValid());
if (kClampFromSpace) {
from_space_map_.SetSize(new_capacity);
}
clamp_info_map_status_ = ClampInfoStatus::kClampInfoPending;
}
CHECK_EQ(moving_space_begin_, bump_pointer_space_->Begin());
}
void MarkCompact::MaybeClampGcStructures() {
size_t moving_space_size = bump_pointer_space_->Capacity();
DCHECK(thread_running_gc_ != nullptr);
if (UNLIKELY(clamp_info_map_status_ == ClampInfoStatus::kClampInfoPending)) {
CHECK(from_space_map_.IsValid());
if (from_space_map_.Size() > moving_space_size) {
from_space_map_.SetSize(moving_space_size);
}
// Bitmaps and other data structures
live_words_bitmap_->SetBitmapSize(moving_space_size);
size_t set_size = InitializeInfoMap(info_map_.Begin(), moving_space_size);
CHECK_LT(set_size, info_map_.Size());
info_map_.SetSize(set_size);
clamp_info_map_status_ = ClampInfoStatus::kClampInfoFinished;
}
}
void MarkCompact::PrepareForMarking(bool pre_marking) {
static_assert(gc::accounting::CardTable::kCardDirty - 1 == gc::accounting::CardTable::kCardAged);
static_assert(gc::accounting::CardTable::kCardAged - 1 == gc::accounting::CardTable::kCardAged2);
TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
accounting::CardTable* const card_table = heap_->GetCardTable();
// immune_spaces_ is emptied in InitializePhase() before marking starts. This
// function is invoked twice during marking. We only need to populate immune_spaces_
// once per GC cycle. And when it's done (below), all the immune spaces are
// added to it. We can never have partially filled immune_spaces_.
bool update_immune_spaces = immune_spaces_.IsEmpty();
// Mark all of the spaces we never collect as immune.
for (const auto& space : GetHeap()->GetContinuousSpaces()) {
if (space->GetGcRetentionPolicy() == space::kGcRetentionPolicyNeverCollect ||
space->GetGcRetentionPolicy() == space::kGcRetentionPolicyFullCollect) {
CHECK(space->IsZygoteSpace() || space->IsImageSpace());
if (update_immune_spaces) {
immune_spaces_.AddSpace(space);
}
accounting::ModUnionTable* table = heap_->FindModUnionTableFromSpace(space);
if (table != nullptr) {
table->ProcessCards();
} else {
// Keep cards aged if we don't have a mod-union table since we need
// to scan them in future GCs. This case is for app images.
card_table->ModifyCardsAtomic(
space->Begin(),
space->End(),
[](uint8_t card) {
return (card == gc::accounting::CardTable::kCardClean)
? card
: gc::accounting::CardTable::kCardAged;
},
/* card modified visitor */ VoidFunctor());
}
} else if (pre_marking) {
CHECK(!space->IsZygoteSpace());
CHECK(!space->IsImageSpace());
if (young_gen_) {
uint8_t* space_age_end = space->Limit();
// Age cards in old-gen as they contain old-to-young references.
if (space == bump_pointer_space_) {
DCHECK_ALIGNED_PARAM(old_gen_end_, gPageSize);
moving_space_bitmap_->ClearRange(reinterpret_cast<mirror::Object*>(old_gen_end_),
reinterpret_cast<mirror::Object*>(moving_space_end_));
// Clear cards in [old_gen_end_, moving_space_end_) as they are not needed.
card_table->ClearCardRange(old_gen_end_, space->Limit());
space_age_end = old_gen_end_;
}
card_table->ModifyCardsAtomic(space->Begin(),
space_age_end,
AgeCardVisitor(),
/*card modified visitor=*/VoidFunctor());
} else {
// The card-table corresponding to bump-pointer and non-moving space can
// be cleared, because we are going to traverse all the reachable objects
// in these spaces. This card-table will eventually be used to track
// mutations while concurrent marking is going on.
card_table->ClearCardRange(space->Begin(), space->Limit());
if (space == bump_pointer_space_) {
moving_space_bitmap_->Clear();
}
}
if (space != bump_pointer_space_) {
CHECK_EQ(space, heap_->GetNonMovingSpace());
if (young_gen_) {
space->AsContinuousMemMapAllocSpace()->BindLiveToMarkBitmap();
}
non_moving_space_ = space;
non_moving_space_bitmap_ = space->GetMarkBitmap();
}
} else {
if (young_gen_) {
// It would be correct to retain existing aged cards and add dirty cards
// to that set. However, that would unecessarily need us to re-scan
// cards which haven't been dirtied since first-pass of marking.
auto card_visitor = [](uint8_t card) {
return (card > gc::accounting::CardTable::kCardAged2)
? card - 1
: gc::accounting::CardTable::kCardClean;
};
card_table->ModifyCardsAtomic(
space->Begin(), space->End(), card_visitor, /*card modified visitor=*/VoidFunctor());
} else {
card_table->ModifyCardsAtomic(space->Begin(),
space->End(),
AgeCardVisitor(),
/*card modified visitor=*/VoidFunctor());
}
}
}
if (pre_marking && young_gen_) {
for (const auto& space : GetHeap()->GetDiscontinuousSpaces()) {
CHECK(space->IsLargeObjectSpace());
space->AsLargeObjectSpace()->CopyLiveToMarked();
}
}
}
void MarkCompact::MarkZygoteLargeObjects() {
Thread* self = thread_running_gc_;
DCHECK_EQ(self, Thread::Current());
space::LargeObjectSpace* const los = heap_->GetLargeObjectsSpace();
if (los != nullptr) {
// Pick the current live bitmap (mark bitmap if swapped).
accounting::LargeObjectBitmap* const live_bitmap = los->GetLiveBitmap();
accounting::LargeObjectBitmap* const mark_bitmap = los->GetMarkBitmap();
// Walk through all of the objects and explicitly mark the zygote ones so they don't get swept.
std::pair<uint8_t*, uint8_t*> range = los->GetBeginEndAtomic();
live_bitmap->VisitMarkedRange(reinterpret_cast<uintptr_t>(range.first),
reinterpret_cast<uintptr_t>(range.second),
[mark_bitmap, los, self](mirror::Object* obj)
REQUIRES(Locks::heap_bitmap_lock_)
REQUIRES_SHARED(Locks::mutator_lock_) {
if (los->IsZygoteLargeObject(self, obj)) {
mark_bitmap->Set(obj);
}
});
}
}
void MarkCompact::InitializePhase() {
TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
mark_stack_ = heap_->GetMarkStack();
CHECK(mark_stack_->IsEmpty());
immune_spaces_.Reset();
moving_first_objs_count_ = 0;
non_moving_first_objs_count_ = 0;
black_page_count_ = 0;
bytes_scanned_ = 0;
freed_objects_ = 0;
// The first buffer is used by gc-thread.
compaction_buffer_counter_.store(1, std::memory_order_relaxed);
black_allocations_begin_ = bump_pointer_space_->Limit();
DCHECK_EQ(moving_space_begin_, bump_pointer_space_->Begin());
from_space_slide_diff_ = from_space_begin_ - moving_space_begin_;
moving_space_end_ = bump_pointer_space_->Limit();
if (use_generational_ && !young_gen_) {
class_after_obj_map_.clear();
}
// TODO: Would it suffice to read it once in the constructor, which is called
// in zygote process?
pointer_size_ = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
for (size_t i = 0; i < vector_length_; i++) {
DCHECK_EQ(chunk_info_vec_[i], 0u);
}
app_slow_path_start_time_ = 0;
}
class MarkCompact::ThreadFlipVisitor : public Closure {
public:
explicit ThreadFlipVisitor(MarkCompact* collector) : collector_(collector) {}
void Run(Thread* thread) override REQUIRES_SHARED(Locks::mutator_lock_) {
// Note: self is not necessarily equal to thread since thread may be suspended.
Thread* self = Thread::Current();
CHECK(thread == self || thread->GetState() != ThreadState::kRunnable)
<< thread->GetState() << " thread " << thread << " self " << self;
thread->VisitRoots(collector_, kVisitRootFlagAllRoots);
// Interpreter cache is thread-local so it needs to be swept either in a
// flip, or a stop-the-world pause.
CHECK(collector_->compacting_);
thread->GetInterpreterCache()->Clear(thread);
thread->AdjustTlab(collector_->black_objs_slide_diff_);
}
private:
MarkCompact* const collector_;
};
class MarkCompact::FlipCallback : public Closure {
public:
explicit FlipCallback(MarkCompact* collector) : collector_(collector) {}
void Run([[maybe_unused]] Thread* thread) override REQUIRES(Locks::mutator_lock_) {
collector_->CompactionPause();
}
private:
MarkCompact* const collector_;
};
void MarkCompact::RunPhases() {
Thread* self = Thread::Current();
thread_running_gc_ = self;
Runtime* runtime = Runtime::Current();
GetHeap()->PreGcVerification(this);
InitializePhase();
{
ReaderMutexLock mu(self, *Locks::mutator_lock_);
MarkingPhase();
}
{
// Marking pause
ScopedPause pause(this);
MarkingPause();
if (kIsDebugBuild) {
bump_pointer_space_->AssertAllThreadLocalBuffersAreRevoked();
}
}
bool perform_compaction;
{
ReaderMutexLock mu(self, *Locks::mutator_lock_);
ReclaimPhase();
perform_compaction = PrepareForCompaction();
}
if (perform_compaction) {
// Compaction pause
ThreadFlipVisitor visitor(this);
FlipCallback callback(this);
runtime->GetThreadList()->FlipThreadRoots(
&visitor, &callback, this, GetHeap()->GetGcPauseListener());
if (IsValidFd(uffd_)) {
ReaderMutexLock mu(self, *Locks::mutator_lock_);
CompactionPhase();
}
} else {
if (use_generational_) {
DCHECK_IMPLIES(post_compact_end_ != nullptr, post_compact_end_ == black_allocations_begin_);
}
post_compact_end_ = black_allocations_begin_;
}
FinishPhase(perform_compaction);
GetHeap()->PostGcVerification(this);
thread_running_gc_ = nullptr;
}
void MarkCompact::InitMovingSpaceFirstObjects(size_t vec_len, size_t to_space_page_idx) {
uint32_t offset_in_chunk_word;
uint32_t offset;
mirror::Object* obj;
const uintptr_t heap_begin = moving_space_bitmap_->HeapBegin();
// Find the first live word.
size_t chunk_idx = to_space_page_idx * (gPageSize / kOffsetChunkSize);
DCHECK_LT(chunk_idx, vec_len);
// Find the first live word in the space
for (; chunk_info_vec_[chunk_idx] == 0; chunk_idx++) {
if (chunk_idx >= vec_len) {
// We don't have any live data on the moving-space.
moving_first_objs_count_ = to_space_page_idx;
return;
}
}
DCHECK_LT(chunk_idx, vec_len);
// Use live-words bitmap to find the first live word
offset_in_chunk_word = live_words_bitmap_->FindNthLiveWordOffset(chunk_idx, /*n*/ 0);
offset = chunk_idx * kBitsPerVectorWord + offset_in_chunk_word;
DCHECK(live_words_bitmap_->Test(offset)) << "offset=" << offset
<< " chunk_idx=" << chunk_idx
<< " N=0"
<< " offset_in_word=" << offset_in_chunk_word
<< " word=" << std::hex
<< live_words_bitmap_->GetWord(chunk_idx);
obj = moving_space_bitmap_->FindPrecedingObject(heap_begin + offset * kAlignment);
// TODO: add a check to validate the object.
pre_compact_offset_moving_space_[to_space_page_idx] = offset;
first_objs_moving_space_[to_space_page_idx].Assign(obj);
to_space_page_idx++;
uint32_t page_live_bytes = 0;
while (true) {
for (; page_live_bytes <= gPageSize; chunk_idx++) {
if (chunk_idx >= vec_len) {
moving_first_objs_count_ = to_space_page_idx;
return;
}
page_live_bytes += chunk_info_vec_[chunk_idx];
}
chunk_idx--;
page_live_bytes -= gPageSize;
DCHECK_LE(page_live_bytes, kOffsetChunkSize);
DCHECK_LE(page_live_bytes, chunk_info_vec_[chunk_idx])
<< " chunk_idx=" << chunk_idx
<< " to_space_page_idx=" << to_space_page_idx
<< " vec_len=" << vec_len;
DCHECK(IsAligned<kAlignment>(chunk_info_vec_[chunk_idx] - page_live_bytes));
offset_in_chunk_word =
live_words_bitmap_->FindNthLiveWordOffset(
chunk_idx, (chunk_info_vec_[chunk_idx] - page_live_bytes) / kAlignment);
offset = chunk_idx * kBitsPerVectorWord + offset_in_chunk_word;
DCHECK(live_words_bitmap_->Test(offset))
<< "offset=" << offset
<< " chunk_idx=" << chunk_idx
<< " N=" << ((chunk_info_vec_[chunk_idx] - page_live_bytes) / kAlignment)
<< " offset_in_word=" << offset_in_chunk_word
<< " word=" << std::hex << live_words_bitmap_->GetWord(chunk_idx);
// TODO: Can we optimize this for large objects? If we are continuing a
// large object that spans multiple pages, then we may be able to do without
// calling FindPrecedingObject().
//
// Find the object which encapsulates offset in it, which could be
// starting at offset itself.
obj = moving_space_bitmap_->FindPrecedingObject(heap_begin + offset * kAlignment);
// TODO: add a check to validate the object.
pre_compact_offset_moving_space_[to_space_page_idx] = offset;
first_objs_moving_space_[to_space_page_idx].Assign(obj);
to_space_page_idx++;
chunk_idx++;
}
}
size_t MarkCompact::InitNonMovingFirstObjects(uintptr_t begin,
uintptr_t end,
accounting::ContinuousSpaceBitmap* bitmap,
ObjReference* first_objs_arr) {
mirror::Object* prev_obj;
size_t page_idx;
{
// Find first live object
mirror::Object* obj = nullptr;
bitmap->VisitMarkedRange</*kVisitOnce*/ true>(begin,
end,
[&obj] (mirror::Object* o) {
obj = o;
});
if (obj == nullptr) {
// There are no live objects in the space
return 0;
}
page_idx = DivideByPageSize(reinterpret_cast<uintptr_t>(obj) - begin);
first_objs_arr[page_idx++].Assign(obj);
prev_obj = obj;
}
// TODO: check obj is valid
uintptr_t prev_obj_end = reinterpret_cast<uintptr_t>(prev_obj)
+ RoundUp(prev_obj->SizeOf<kDefaultVerifyFlags>(), kAlignment);
// For every page find the object starting from which we need to call
// VisitReferences. It could either be an object that started on some
// preceding page, or some object starting within this page.
begin = RoundDown(reinterpret_cast<uintptr_t>(prev_obj) + gPageSize, gPageSize);
while (begin < end) {
// Utilize, if any, large object that started in some preceding page, but
// overlaps with this page as well.
if (prev_obj != nullptr && prev_obj_end > begin) {
DCHECK_LT(prev_obj, reinterpret_cast<mirror::Object*>(begin));
first_objs_arr[page_idx].Assign(prev_obj);
} else {
prev_obj_end = 0;
// It's sufficient to only search for previous object in the preceding page.
// If no live object started in that page and some object had started in
// the page preceding to that page, which was big enough to overlap with
// the current page, then we wouldn't be in the else part.
prev_obj = bitmap->FindPrecedingObject(begin, begin - gPageSize);
if (prev_obj != nullptr) {
prev_obj_end = reinterpret_cast<uintptr_t>(prev_obj)
+ RoundUp(prev_obj->SizeOf<kDefaultVerifyFlags>(), kAlignment);
}
if (prev_obj_end > begin) {
first_objs_arr[page_idx].Assign(prev_obj);
} else {
// Find the first live object in this page
bitmap->VisitMarkedRange</*kVisitOnce*/ true>(
begin, begin + gPageSize, [first_objs_arr, page_idx](mirror::Object* obj) {
first_objs_arr[page_idx].Assign(obj);
});
}
// An empty entry indicates that the page has no live objects and hence
// can be skipped.
}
begin += gPageSize;
page_idx++;
}
return page_idx;
}
// Generational CMC description
// ============================
//
// All allocations since last GC are considered to be in young generation.
// Unlike other ART GCs, we promote surviving objects to old generation after
// they survive two contiguous GCs. Objects that survive one GC are considered
// to be in mid generation. In the next young GC, marking is performed on both
// the young as well as mid gen objects. And then during compaction, the
// surviving mid-gen objects are compacted and then promoted to old-gen, while
// the surviving young gen objects are compacted and promoted to mid-gen.
//
// Some other important points worth explaining:
//
// 1. During marking-phase, 'mid_gen_end_' segregates young and mid generations.
// Before starting compaction, in PrepareForCompaction(), we set it to the
// corresponding post-compact addresses, aligned up to page-size. Therefore,
// some object's beginning portion maybe in mid-gen, while the rest is in young-gen.
// Aligning up is essential as mid_gen_end_ becomes old_gen_end_ at the end of
// GC cycle, and the latter has to be page-aligned as old-gen pages are
// processed differently (no compaction).
//
// 2. We need to maintain the mark-bitmap for the old-gen for subsequent GCs,
// when objects are promoted to old-gen from mid-gen, their mark bits are
// first collected in a BitVector and then later copied into mark-bitmap in
// FinishPhase(). We can't directly set the bits in mark-bitmap as the bitmap
// contains pre-compaction mark bits which are required during compaction.
//
// 3. Since we need to revisit mid-gen objects in the next GC cycle, we need to
// dirty the cards in old-gen containing references to them. We identify these
// references when visiting old-gen objects during compaction. However, native
// roots are skipped at that time (they are updated separately in linear-alloc
// space, where we don't know which object (dex-cache/class-loader/class) does
// a native root belong to. Therefore, native roots are covered during marking
// phase.
bool MarkCompact::PrepareForCompaction() {
TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
size_t chunk_info_per_page = gPageSize / kOffsetChunkSize;
size_t vector_len = (black_allocations_begin_ - moving_space_begin_) / kOffsetChunkSize;
DCHECK_LE(vector_len, vector_length_);
DCHECK_ALIGNED_PARAM(vector_length_, chunk_info_per_page);
if (UNLIKELY(vector_len == 0)) {
// Nothing to compact. Entire heap is empty.
black_dense_end_ = mid_gen_end_ = moving_space_begin_;
return false;
}
for (size_t i = 0; i < vector_len; i++) {
DCHECK_LE(chunk_info_vec_[i], kOffsetChunkSize)
<< "i:" << i << " vector_length:" << vector_len << " vector_length_:" << vector_length_;
DCHECK_EQ(chunk_info_vec_[i], live_words_bitmap_->LiveBytesInBitmapWord(i));
}
// TODO: We can do a lot of neat tricks with this offset vector to tune the
// compaction as we wish. Originally, the compaction algorithm slides all
// live objects towards the beginning of the heap. This is nice because it
// keeps the spatial locality of objects intact.
// However, sometimes it's desired to compact objects in certain portions
// of the heap. For instance, it is expected that, over time,
// objects towards the beginning of the heap are long lived and are always
// densely packed. In this case, it makes sense to only update references in
// there and not try to compact it.
// Furthermore, we might have some large objects and may not want to move such
// objects.
// We can adjust, without too much effort, the values in the chunk_info_vec_ such
// that the objects in the dense beginning area aren't moved. OTOH, large
// objects, which could be anywhere in the heap, could also be kept from
// moving by using a similar trick. The only issue is that by doing this we will
// leave an unused hole in the middle of the heap which can't be used for
// allocations until we do a *full* compaction.
//
// At this point every element in the chunk_info_vec_ contains the live-bytes
// of the corresponding chunk. For old-to-new address computation we need
// every element to reflect total live-bytes till the corresponding chunk.
size_t black_dense_idx = 0;
GcCause gc_cause = GetCurrentIteration()->GetGcCause();
if (young_gen_) {
DCHECK_ALIGNED_PARAM(old_gen_end_, gPageSize);
DCHECK_GE(mid_gen_end_, old_gen_end_);
DCHECK_GE(black_allocations_begin_, mid_gen_end_);
// old-gen's boundary was decided at the end of previous GC-cycle.
black_dense_idx = (old_gen_end_ - moving_space_begin_) / kOffsetChunkSize;
if (black_dense_idx == vector_len) {
// There is nothing live in young-gen.
DCHECK_EQ(old_gen_end_, black_allocations_begin_);
mid_gen_end_ = black_allocations_begin_;
return false;
}
InitNonMovingFirstObjects(reinterpret_cast<uintptr_t>(moving_space_begin_),
reinterpret_cast<uintptr_t>(old_gen_end_),
moving_space_bitmap_,
first_objs_moving_space_);
} else if (gc_cause != kGcCauseExplicit && gc_cause != kGcCauseCollectorTransition &&
!GetCurrentIteration()->GetClearSoftReferences()) {
uint64_t live_bytes = 0, total_bytes = 0;
size_t aligned_vec_len = RoundUp(vector_len, chunk_info_per_page);
size_t num_pages = aligned_vec_len / chunk_info_per_page;
size_t threshold_passing_marker = 0; // In number of pages
std::vector<uint32_t> pages_live_bytes;
pages_live_bytes.reserve(num_pages);
// Identify the largest chunk towards the beginning of moving space which
// passes the black-dense threshold.
for (size_t i = 0; i < aligned_vec_len; i += chunk_info_per_page) {
uint32_t page_live_bytes = 0;
for (size_t j = 0; j < chunk_info_per_page; j++) {
page_live_bytes += chunk_info_vec_[i + j];
total_bytes += kOffsetChunkSize;
}
live_bytes += page_live_bytes;
pages_live_bytes.push_back(page_live_bytes);
if (live_bytes * 100U >= total_bytes * kBlackDenseRegionThreshold) {
threshold_passing_marker = pages_live_bytes.size();
}
}
DCHECK_EQ(pages_live_bytes.size(), num_pages);
// Eliminate the pages at the end of the chunk which are lower than the threshold.
if (threshold_passing_marker > 0) {
auto iter = std::find_if(
pages_live_bytes.rbegin() + (num_pages - threshold_passing_marker),
pages_live_bytes.rend(),
[](uint32_t bytes) { return bytes * 100U >= gPageSize * kBlackDenseRegionThreshold; });
black_dense_idx = (pages_live_bytes.rend() - iter) * chunk_info_per_page;
}
black_dense_end_ = moving_space_begin_ + black_dense_idx * kOffsetChunkSize;
DCHECK_ALIGNED_PARAM(black_dense_end_, gPageSize);
// Adjust for class allocated after black_dense_end_ while its object(s)
// are earlier. This is required as we update the references in the
// black-dense region in-place. And if the class pointer of some first
// object for a page, which started in some preceding page, is already
// updated, then we will read wrong class data like ref-offset bitmap.
for (auto iter = class_after_obj_map_.rbegin();
iter != class_after_obj_map_.rend() &&
reinterpret_cast<uint8_t*>(iter->first.AsMirrorPtr()) >= black_dense_end_;
iter++) {
black_dense_end_ =
std::min(black_dense_end_, reinterpret_cast<uint8_t*>(iter->second.AsMirrorPtr()));
black_dense_end_ = AlignDown(black_dense_end_, gPageSize);
}
black_dense_idx = (black_dense_end_ - moving_space_begin_) / kOffsetChunkSize;
DCHECK_LE(black_dense_idx, vector_len);
if (black_dense_idx == vector_len) {
// There is nothing to compact. All the in-use pages are completely full.
mid_gen_end_ = black_allocations_begin_;
return false;
}
InitNonMovingFirstObjects(reinterpret_cast<uintptr_t>(moving_space_begin_),
reinterpret_cast<uintptr_t>(black_dense_end_),
moving_space_bitmap_,
first_objs_moving_space_);
} else {
black_dense_end_ = moving_space_begin_;
}
InitMovingSpaceFirstObjects(vector_len, black_dense_idx / chunk_info_per_page);
non_moving_first_objs_count_ =
InitNonMovingFirstObjects(reinterpret_cast<uintptr_t>(non_moving_space_->Begin()),
reinterpret_cast<uintptr_t>(non_moving_space_->End()),
non_moving_space_bitmap_,
first_objs_non_moving_space_);
// Update the vector one past the heap usage as it is required for black
// allocated objects' post-compact address computation.
uint32_t total_bytes;
if (vector_len < vector_length_) {
vector_len++;
total_bytes = 0;
} else {
// Fetch the value stored in the last element before it gets overwritten by
// std::exclusive_scan().
total_bytes = chunk_info_vec_[vector_len - 1];
}
std::exclusive_scan(chunk_info_vec_ + black_dense_idx,
chunk_info_vec_ + vector_len,
chunk_info_vec_ + black_dense_idx,
black_dense_idx * kOffsetChunkSize);
total_bytes += chunk_info_vec_[vector_len - 1];
post_compact_end_ = AlignUp(moving_space_begin_ + total_bytes, gPageSize);
CHECK_EQ(post_compact_end_, moving_space_begin_ + moving_first_objs_count_ * gPageSize)
<< "moving_first_objs_count_:" << moving_first_objs_count_
<< " black_dense_idx:" << black_dense_idx << " vector_len:" << vector_len
<< " total_bytes:" << total_bytes
<< " black_dense_end:" << reinterpret_cast<void*>(black_dense_end_)
<< " chunk_info_per_page:" << chunk_info_per_page;
black_objs_slide_diff_ = black_allocations_begin_ - post_compact_end_;
// We shouldn't be consuming more space after compaction than pre-compaction.
CHECK_GE(black_objs_slide_diff_, 0);
for (size_t i = vector_len; i < vector_length_; i++) {
DCHECK_EQ(chunk_info_vec_[i], 0u);
}
if (black_objs_slide_diff_ == 0) {
// Regardless of the gc-type, there are no pages to be compacted. Ensure
// that we don't shrink the mid-gen, which will become old-gen in
// FinishPhase(), thereby possibly moving some objects back to young-gen,
// which can cause memory corruption due to missing card marks.
mid_gen_end_ = std::max(mid_gen_end_, black_dense_end_);
mid_gen_end_ = std::min(mid_gen_end_, post_compact_end_);
return false;
}
if (use_generational_) {
// Current value of mid_gen_end_ represents end of 'pre-compacted' mid-gen,
// which was done at the end of previous GC. Compute, 'post-compacted' end of
// mid-gen, which will be consumed by old-gen at the end of this GC cycle.
DCHECK_NE(mid_gen_end_, nullptr);
mirror::Object* first_obj = nullptr;
if (mid_gen_end_ < black_allocations_begin_) {
ReaderMutexLock rmu(thread_running_gc_, *Locks::heap_bitmap_lock_);
// Find the first live object in the young-gen.
moving_space_bitmap_->VisitMarkedRange</*kVisitOnce=*/true>(
reinterpret_cast<uintptr_t>(mid_gen_end_),
reinterpret_cast<uintptr_t>(black_allocations_begin_),
[&first_obj](mirror::Object* obj) { first_obj = obj; });
}
if (first_obj != nullptr) {
mirror::Object* compacted_obj;
if (reinterpret_cast<uint8_t*>(first_obj) >= old_gen_end_) {
// post-compact address of the first live object in young-gen.
compacted_obj = PostCompactOldObjAddr(first_obj);
DCHECK_LT(reinterpret_cast<uint8_t*>(compacted_obj), post_compact_end_);
} else {
DCHECK(!young_gen_);
compacted_obj = first_obj;
}
// It's important to page-align mid-gen boundary. However, that means
// there could be an object overlapping that boundary. We will deal with
// the consequences of that at different places. Aligning up is important
// to ensure that we don't de-promote an object from old-gen back to
// young-gen. Otherwise, we may skip dirtying card for such an object if
// it contains native-roots to young-gen.
mid_gen_end_ = AlignUp(reinterpret_cast<uint8_t*>(compacted_obj), gPageSize);
// We need to ensure that for any object in old-gen, its class is also in
// there (for the same reason as mentioned above in the black-dense case).
// So adjust mid_gen_end_ accordingly, in the worst case all the way up
// to post_compact_end_.
auto iter = class_after_obj_map_.lower_bound(ObjReference::FromMirrorPtr(first_obj));
for (; iter != class_after_obj_map_.end(); iter++) {
// 'mid_gen_end_' is now post-compact, so need to compare with
// post-compact addresses.
compacted_obj =
PostCompactAddress(iter->second.AsMirrorPtr(), old_gen_end_, moving_space_end_);
// We cannot update the map with post-compact addresses yet as compaction-phase
// expects pre-compacted addresses. So we will update in FinishPhase().
if (reinterpret_cast<uint8_t*>(compacted_obj) < mid_gen_end_) {
mirror::Object* klass = iter->first.AsMirrorPtr();
DCHECK_LT(reinterpret_cast<uint8_t*>(klass), black_allocations_begin_);
klass = PostCompactAddress(klass, old_gen_end_, moving_space_end_);
// We only need to make sure that the class object doesn't move during
// compaction, which can be ensured by just making its first word be
// consumed in to the old-gen.
mid_gen_end_ =
std::max(mid_gen_end_, reinterpret_cast<uint8_t*>(klass) + kObjectAlignment);
mid_gen_end_ = AlignUp(mid_gen_end_, gPageSize);
}
}
CHECK_LE(mid_gen_end_, post_compact_end_);
} else {
// Young-gen is empty.
mid_gen_end_ = post_compact_end_;
}
DCHECK_LE(mid_gen_end_, post_compact_end_);
// We need this temporary bitmap only when running in generational mode.
if (old_gen_end_ < mid_gen_end_) {
mid_to_old_promo_bit_vec_.reset(
new BitVector((mid_gen_end_ - old_gen_end_) / kObjectAlignment,
/*expandable=*/false,
Allocator::GetCallocAllocator()));
}
}
// How do we handle compaction of heap portion used for allocations after the
// marking-pause?
// All allocations after the marking-pause are considered black (reachable)
// for this GC cycle. However, they need not be allocated contiguously as
// different mutators use TLABs. So we will compact the heap till the point
// where allocations took place before the marking-pause. And everything after
// that will be slid with TLAB holes, and then TLAB info in TLS will be
// appropriately updated in the pre-compaction pause.
// The chunk-info vector entries for the post marking-pause allocations will be
// also updated in the pre-compaction pause.
if (!uffd_initialized_) {
CreateUserfaultfd(/*post_fork=*/false);
}
return true;
}
template <typename Visitor>
class MarkCompact::VisitReferencesVisitor {
public:
explicit VisitReferencesVisitor(Visitor visitor) : visitor_(visitor) {}
ALWAYS_INLINE void operator()(mirror::Object* obj,
MemberOffset offset,
[[maybe_unused]] bool is_static) const
REQUIRES(Locks::heap_bitmap_lock_) REQUIRES_SHARED(Locks::mutator_lock_) {
visitor_(obj->GetFieldObject<mirror::Object>(offset));
}
ALWAYS_INLINE void operator()([[maybe_unused]] ObjPtr<mirror::Class> klass,
ObjPtr<mirror::Reference> ref) const
REQUIRES(Locks::heap_bitmap_lock_) REQUIRES_SHARED(Locks::mutator_lock_) {
visitor_(ref.Ptr());
}
void VisitRootIfNonNull(mirror::CompressedReference<mirror::Object>* root) const
REQUIRES(Locks::heap_bitmap_lock_) REQUIRES_SHARED(Locks::mutator_lock_) {
if (!root->IsNull()) {
VisitRoot(root);
}
}
void VisitRoot(mirror::CompressedReference<mirror::Object>* root) const
REQUIRES(Locks::heap_bitmap_lock_) REQUIRES_SHARED(Locks::mutator_lock_) {
visitor_(root->AsMirrorPtr());
}
private:
Visitor visitor_;
};
void MarkCompact::VerifyNoMissingCardMarks() {
if (kVerifyNoMissingCardMarks) {
accounting::CardTable* card_table = heap_->GetCardTable();
for (const auto& space : heap_->GetContinuousSpaces()) {
auto obj_visitor = [&](mirror::Object* obj) {
VisitReferencesVisitor ref_visitor([&](mirror::Object* ref)
REQUIRES_SHARED(Locks::mutator_lock_, Locks::heap_bitmap_lock_) {
if (ref != nullptr && !IsMarked(ref)) {
CHECK(card_table->IsDirty(obj))
<< "obj:" << obj << " (" << obj->PrettyTypeOf() << ") ref:" << ref
<< " card:" << static_cast<int>(card_table->GetCard(obj))
<< " space:" << space->GetName()
<< " retention-policy:" << space->GetGcRetentionPolicy();
}
});
// We can't expect referent to hold the assertion.
obj->VisitReferences</*kVisitNativeRoots=*/true>(ref_visitor, VoidFunctor());
};
space->GetMarkBitmap()->VisitMarkedRange(reinterpret_cast<uintptr_t>(space->Begin()),
reinterpret_cast<uintptr_t>(space->End()),
obj_visitor);
}
}
}
class MarkCompact::VerifyRootMarkedVisitor : public SingleRootVisitor {
public:
explicit VerifyRootMarkedVisitor(MarkCompact* collector) : collector_(collector) { }
void VisitRoot(mirror::Object* root, const RootInfo& info) override
REQUIRES_SHARED(Locks::mutator_lock_, Locks::heap_bitmap_lock_) {
CHECK(collector_->IsMarked(root) != nullptr) << info.ToString();
}
private:
MarkCompact* const collector_;
};
void MarkCompact::ReMarkRoots(Runtime* runtime) {
TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
DCHECK_EQ(thread_running_gc_, Thread::Current());
Locks::mutator_lock_->AssertExclusiveHeld(thread_running_gc_);
MarkNonThreadRoots(runtime);
MarkConcurrentRoots(
static_cast<VisitRootFlags>(kVisitRootFlagNewRoots | kVisitRootFlagStopLoggingNewRoots |
kVisitRootFlagClearRootLog),
runtime);
if (kVerifyRootsMarked) {
TimingLogger::ScopedTiming t2("(Paused)VerifyRoots", GetTimings());
VerifyRootMarkedVisitor visitor(this);
runtime->VisitRoots(&visitor);
}
}
void MarkCompact::MarkingPause() {
TimingLogger::ScopedTiming t("(Paused)MarkingPause", GetTimings());
Runtime* runtime = Runtime::Current();
Locks::mutator_lock_->AssertExclusiveHeld(thread_running_gc_);
{
// Handle the dirty objects as we are a concurrent GC
WriterMutexLock mu(thread_running_gc_, *Locks::heap_bitmap_lock_);
VerifyNoMissingCardMarks();
{
MutexLock mu2(thread_running_gc_, *Locks::runtime_shutdown_lock_);
MutexLock mu3(thread_running_gc_, *Locks::thread_list_lock_);
std::list<Thread*> thread_list = runtime->GetThreadList()->GetList();
for (Thread* thread : thread_list) {
thread->VisitRoots(this, static_cast<VisitRootFlags>(0));
DCHECK_EQ(thread->GetThreadLocalGcBuffer(), nullptr);
// Need to revoke all the thread-local allocation stacks since we will
// swap the allocation stacks (below) and don't want anybody to allocate
// into the live stack.
thread->RevokeThreadLocalAllocationStack();
bump_pointer_space_->RevokeThreadLocalBuffers(thread);
}
}
ProcessMarkStack();
// Fetch only the accumulated objects-allocated count as it is guaranteed to
// be up-to-date after the TLAB revocation above.
freed_objects_ += bump_pointer_space_->GetAccumulatedObjectsAllocated();
// Capture 'end' of moving-space at this point. Every allocation beyond this
// point will be considered as black.
// Align-up to page boundary so that black allocations happen from next page
// onwards. Also, it ensures that 'end' is aligned for card-table's
// ClearCardRange().
black_allocations_begin_ = bump_pointer_space_->AlignEnd(thread_running_gc_, gPageSize, heap_);
DCHECK_ALIGNED_PARAM(black_allocations_begin_, gPageSize);
// Re-mark root set. Doesn't include thread-roots as they are already marked
// above.
ReMarkRoots(runtime);
// Scan dirty objects.
RecursiveMarkDirtyObjects(/*paused*/ true, accounting::CardTable::kCardDirty);
{
TimingLogger::ScopedTiming t2("SwapStacks", GetTimings());
heap_->SwapStacks();
live_stack_freeze_size_ = heap_->GetLiveStack()->Size();
}
}
// TODO: For PreSweepingGcVerification(), find correct strategy to visit/walk
// objects in bump-pointer space when we have a mark-bitmap to indicate live
// objects. At the same time we also need to be able to visit black allocations,
// even though they are not marked in the bitmap. Without both of these we fail
// pre-sweeping verification. As well as we leave windows open wherein a
// VisitObjects/Walk on the space would either miss some objects or visit
// unreachable ones. These windows are when we are switching from shared
// mutator-lock to exclusive and vice-versa starting from here till compaction pause.
// heap_->PreSweepingGcVerification(this);
// Disallow new system weaks to prevent a race which occurs when someone adds
// a new system weak before we sweep them. Since this new system weak may not
// be marked, the GC may incorrectly sweep it. This also fixes a race where
// interning may attempt to return a strong reference to a string that is
// about to be swept.
runtime->DisallowNewSystemWeaks();
// Enable the reference processing slow path, needs to be done with mutators
// paused since there is no lock in the GetReferent fast path.
heap_->GetReferenceProcessor()->EnableSlowPath();
marking_done_ = true;
}
void MarkCompact::SweepSystemWeaks(Thread* self, Runtime* runtime, const bool paused) {
TimingLogger::ScopedTiming t(paused ? "(Paused)SweepSystemWeaks" : "SweepSystemWeaks",
GetTimings());
ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
runtime->SweepSystemWeaks(this);
}
void MarkCompact::ProcessReferences(Thread* self) {
WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
GetHeap()->GetReferenceProcessor()->ProcessReferences(self, GetTimings());
}
void MarkCompact::SweepArray(accounting::ObjectStack* obj_arr, bool swap_bitmaps) {
TimingLogger::ScopedTiming t("SweepArray", GetTimings());
std::vector<space::ContinuousSpace*> sweep_spaces;
for (space::ContinuousSpace* space : heap_->GetContinuousSpaces()) {
if (!space->IsAllocSpace() || space == bump_pointer_space_ ||
immune_spaces_.ContainsSpace(space) || space->GetLiveBitmap() == nullptr) {
continue;
}
sweep_spaces.push_back(space);
}
GarbageCollector::SweepArray(obj_arr, swap_bitmaps, &sweep_spaces);
}
void MarkCompact::Sweep(bool swap_bitmaps) {
TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
if (young_gen_) {
// Only sweep objects on the live stack.
SweepArray(heap_->GetLiveStack(), /*swap_bitmaps=*/false);
} else {
// Ensure that nobody inserted objects in the live stack after we swapped the
// stacks.
CHECK_GE(live_stack_freeze_size_, GetHeap()->GetLiveStack()->Size());
{
TimingLogger::ScopedTiming t2("MarkAllocStackAsLive", GetTimings());
// Mark everything allocated since the last GC as live so that we can sweep
// concurrently, knowing that new allocations won't be marked as live.
accounting::ObjectStack* live_stack = heap_->GetLiveStack();
heap_->MarkAllocStackAsLive(live_stack);
live_stack->Reset();
DCHECK(mark_stack_->IsEmpty());
}
for (const auto& space : GetHeap()->GetContinuousSpaces()) {
if (space->IsContinuousMemMapAllocSpace() && space != bump_pointer_space_ &&
!immune_spaces_.ContainsSpace(space)) {
space::ContinuousMemMapAllocSpace* alloc_space = space->AsContinuousMemMapAllocSpace();
DCHECK(!alloc_space->IsZygoteSpace());
TimingLogger::ScopedTiming split("SweepMallocSpace", GetTimings());
RecordFree(alloc_space->Sweep(swap_bitmaps));
}
}
SweepLargeObjects(swap_bitmaps);
}
}
void MarkCompact::SweepLargeObjects(bool swap_bitmaps) {
space::LargeObjectSpace* los = heap_->GetLargeObjectsSpace();
if (los != nullptr) {
TimingLogger::ScopedTiming split(__FUNCTION__, GetTimings());
RecordFreeLOS(los->Sweep(swap_bitmaps));
}
}
void MarkCompact::ReclaimPhase() {
TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
DCHECK(thread_running_gc_ == Thread::Current());
Runtime* const runtime = Runtime::Current();
// Process the references concurrently.
ProcessReferences(thread_running_gc_);
// TODO: Try to merge this system-weak sweeping with the one while updating
// references during the compaction pause.
SweepSystemWeaks(thread_running_gc_, runtime, /*paused*/ false);
runtime->AllowNewSystemWeaks();
// Clean up class loaders after system weaks are swept since that is how we know if class
// unloading occurred.
runtime->GetClassLinker()->CleanupClassLoaders();
{
WriterMutexLock mu(thread_running_gc_, *Locks::heap_bitmap_lock_);
// Reclaim unmarked objects.
Sweep(false);
// Swap the live and mark bitmaps for each space which we modified space. This is an
// optimization that enables us to not clear live bits inside of the sweep. Only swaps unbound
// bitmaps.
SwapBitmaps();
// Unbind the live and mark bitmaps.
GetHeap()->UnBindBitmaps();
}
// After sweeping and unbinding, we will need to use non-moving space'
// live-bitmap, instead of mark-bitmap.
non_moving_space_bitmap_ = non_moving_space_->GetLiveBitmap();
}
// We want to avoid checking for every reference if it's within the page or
// not. This can be done if we know where in the page the holder object lies.
// If it doesn't overlap either boundaries then we can skip the checks.
//
// If kDirtyOldToMid = true, then check if the object contains any references
// into young-gen, which will be mid-gen after this GC. This is required
// as we mark and compact mid-gen again in next GC-cycle, and hence cards
// need to be dirtied. Note that even black-allocations (the next young-gen)
// will also have to be checked because the pages are being compacted and hence
// the card corresponding to the compacted page needs to be dirtied.
template <bool kCheckBegin, bool kCheckEnd, bool kDirtyOldToMid>
class MarkCompact::RefsUpdateVisitor {
public:
RefsUpdateVisitor(MarkCompact* collector,
mirror::Object* obj,
uint8_t* begin,
uint8_t* end,
accounting::CardTable* card_table = nullptr,
mirror::Object* card_obj = nullptr)
: RefsUpdateVisitor(collector, obj, begin, end, false) {
DCHECK(!kCheckBegin || begin != nullptr);
DCHECK(!kCheckEnd || end != nullptr);
// We can skip checking each reference for objects whose cards are already dirty.
if (kDirtyOldToMid && card_obj != nullptr) {
dirty_card_ = card_table->IsDirty(card_obj);
}
}
RefsUpdateVisitor(
MarkCompact* collector, mirror::Object* obj, uint8_t* begin, uint8_t* end, bool dirty_card)
: collector_(collector),
moving_space_begin_(collector->black_dense_end_),
moving_space_end_(collector->moving_space_end_),
young_gen_begin_(collector->mid_gen_end_),
obj_(obj),
begin_(begin),
end_(end),
dirty_card_(dirty_card) {}
bool ShouldDirtyCard() const { return dirty_card_; }
void operator()([[maybe_unused]] mirror::Object* old,
MemberOffset offset,
[[maybe_unused]] bool is_static) const ALWAYS_INLINE
REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES_SHARED(Locks::heap_bitmap_lock_) {
bool update = true;
if (kCheckBegin || kCheckEnd) {
uint8_t* ref = reinterpret_cast<uint8_t*>(obj_) + offset.Int32Value();
update = (!kCheckBegin || ref >= begin_) && (!kCheckEnd || ref < end_);
}
if (update) {
mirror::Object* new_ref =
collector_->UpdateRef(obj_, offset, moving_space_begin_, moving_space_end_);
CheckShouldDirtyCard(new_ref);
}
}
// For object arrays we don't need to check boundaries here as it's done in
// VisitReferenes().
// TODO: Optimize reference updating using SIMD instructions. Object arrays
// are perfect as all references are tightly packed.
void operator()([[maybe_unused]] mirror::Object* old,
MemberOffset offset,
[[maybe_unused]] bool is_static,
[[maybe_unused]] bool is_obj_array) const ALWAYS_INLINE
REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES_SHARED(Locks::heap_bitmap_lock_) {
mirror::Object* new_ref =
collector_->UpdateRef(obj_, offset, moving_space_begin_, moving_space_end_);
CheckShouldDirtyCard(new_ref);
}
void VisitRootIfNonNull(mirror::CompressedReference<mirror::Object>* root) const
ALWAYS_INLINE
REQUIRES_SHARED(Locks::mutator_lock_) {
if (!root->IsNull()) {
VisitRoot(root);
}
}
void VisitRoot(mirror::CompressedReference<mirror::Object>* root) const
ALWAYS_INLINE
REQUIRES_SHARED(Locks::mutator_lock_) {
mirror::Object* new_ref = collector_->UpdateRoot(root, moving_space_begin_, moving_space_end_);
CheckShouldDirtyCard(new_ref);
}
private:
inline void CheckShouldDirtyCard(mirror::Object* ref) const {
if (kDirtyOldToMid && !dirty_card_) {
// moving_space_end_ is young-gen's end.
dirty_card_ = reinterpret_cast<uint8_t*>(ref) >= young_gen_begin_ &&
reinterpret_cast<uint8_t*>(ref) < moving_space_end_;
}
}
MarkCompact* const collector_;
uint8_t* const moving_space_begin_;
uint8_t* const moving_space_end_;
uint8_t* const young_gen_begin_;
mirror::Object* const obj_;
uint8_t* const begin_;
uint8_t* const end_;
mutable bool dirty_card_;
};
inline void MarkCompact::SetBitForMidToOldPromotion(uint8_t* obj) {
DCHECK(use_generational_);
DCHECK_GE(obj, old_gen_end_);
DCHECK_LT(obj, mid_gen_end_);
// This doesn't need to be atomic as every thread only sets bits in the
// bit_vector words corresponding to the page it is compacting.
mid_to_old_promo_bit_vec_->SetBit((obj - old_gen_end_) / kObjectAlignment);
}
bool MarkCompact::IsValidObject(mirror::Object* obj) const {
mirror::Class* klass = obj->GetClass<kVerifyNone, kWithoutReadBarrier>();
if (!heap_->GetVerification()->IsValidHeapObjectAddress(klass)) {
return false;
}
return heap_->GetVerification()->IsValidClassUnchecked<kWithFromSpaceBarrier>(
obj->GetClass<kVerifyNone, kWithFromSpaceBarrier>());
}
template <typename Callback>
void MarkCompact::VerifyObject(mirror::Object* ref, Callback& callback) const {
if (kIsDebugBuild) {
mirror::Class* klass = ref->GetClass<kVerifyNone, kWithFromSpaceBarrier>();
mirror::Class* pre_compact_klass = ref->GetClass<kVerifyNone, kWithoutReadBarrier>();
mirror::Class* klass_klass = klass->GetClass<kVerifyNone, kWithFromSpaceBarrier>();
mirror::Class* klass_klass_klass = klass_klass->GetClass<kVerifyNone, kWithFromSpaceBarrier>();
if (HasAddress(pre_compact_klass) &&
reinterpret_cast<uint8_t*>(pre_compact_klass) < black_allocations_begin_) {
CHECK(moving_space_bitmap_->Test(pre_compact_klass))
<< "ref=" << ref
<< " post_compact_end=" << static_cast<void*>(post_compact_end_)
<< " pre_compact_klass=" << pre_compact_klass
<< " black_allocations_begin=" << static_cast<void*>(black_allocations_begin_);
if (!young_gen_) {
CHECK(live_words_bitmap_->Test(pre_compact_klass));
}
}
if (!IsValidObject(ref)) {
std::ostringstream oss;
oss << "Invalid object: "
<< "ref=" << ref
<< " klass=" << klass
<< " klass_klass=" << klass_klass
<< " klass_klass_klass=" << klass_klass_klass
<< " pre_compact_klass=" << pre_compact_klass
<< " from_space_begin=" << static_cast<void*>(from_space_begin_)
<< " pre_compact_begin=" << static_cast<void*>(bump_pointer_space_->Begin())
<< " post_compact_end=" << static_cast<void*>(post_compact_end_)
<< " black_allocations_begin=" << static_cast<void*>(black_allocations_begin_);
// Call callback before dumping larger data like RAM and space dumps.
callback(oss);
oss << " \nobject="
<< heap_->GetVerification()->DumpRAMAroundAddress(reinterpret_cast<uintptr_t>(ref), 128)
<< " \nklass(from)="
<< heap_->GetVerification()->DumpRAMAroundAddress(reinterpret_cast<uintptr_t>(klass), 128)
<< "spaces:\n";
heap_->DumpSpaces(oss);
LOG(FATAL) << oss.str();
}
}
}
template <bool kSetupForGenerational>
void MarkCompact::CompactPage(mirror::Object* obj,
uint32_t offset,
uint8_t* addr,
uint8_t* to_space_addr,
bool needs_memset_zero) {
DCHECK_ALIGNED_PARAM(to_space_addr, gPageSize);
DCHECK(moving_space_bitmap_->Test(obj)
&& live_words_bitmap_->Test(obj));
DCHECK(live_words_bitmap_->Test(offset)) << "obj=" << obj
<< " offset=" << offset
<< " addr=" << static_cast<void*>(addr)
<< " black_allocs_begin="
<< static_cast<void*>(black_allocations_begin_)
<< " post_compact_addr="
<< static_cast<void*>(post_compact_end_);
accounting::CardTable* card_table = heap_->GetCardTable();
uint8_t* const start_addr = addr;
// We need to find the cards in the mid-gen (which is going to be consumed
// into old-gen after this GC) for dirty cards (dirtied after marking-pause and
// until compaction pause) and dirty the corresponding post-compact cards. We
// could have found reference fields while updating them in RefsUpdateVisitor.
// But it will not catch native-roots and hence we need to directly look at the
// pre-compact card-table.
// NOTE: we may get some false-positives if the same address in post-compact
// heap is already allocated as TLAB and has been having write-barrers be
// called. But that is not harmful.
size_t cards_per_page = gPageSize >> accounting::CardTable::kCardShift;
size_t dest_cards = 0;
DCHECK(IsAligned<accounting::CardTable::kCardSize>(gPageSize));
static_assert(sizeof(dest_cards) * kBitsPerByte >=
kMaxPageSize / accounting::CardTable::kCardSize);
// How many distinct live-strides do we have.
size_t stride_count = 0;
uint8_t* last_stride = addr;
uint32_t last_stride_begin = 0;
auto verify_obj_callback = [&](std::ostream& os) {
os << " stride_count=" << stride_count << " last_stride=" << static_cast<void*>(last_stride)
<< " offset=" << offset << " start_addr=" << static_cast<void*>(start_addr);
};
live_words_bitmap_->VisitLiveStrides(
offset,
black_allocations_begin_,
gPageSize,
[&](uint32_t stride_begin, size_t stride_size, [[maybe_unused]] bool is_last)
REQUIRES_SHARED(Locks::mutator_lock_) {
size_t stride_in_bytes = stride_size * kAlignment;
size_t stride_begin_bytes = stride_begin * kAlignment;
DCHECK_LE(stride_in_bytes, gPageSize);
last_stride_begin = stride_begin;
DCHECK(IsAligned<kAlignment>(addr));
memcpy(addr, from_space_begin_ + stride_begin_bytes, stride_in_bytes);
if (kIsDebugBuild) {
uint8_t* space_begin = bump_pointer_space_->Begin();
// We can interpret the first word of the stride as an
// obj only from second stride onwards, as the first
// stride's first-object may have started on previous
// page. The only exception is the first page of the
// moving space.
if (stride_count > 0 || stride_begin * kAlignment < gPageSize) {
mirror::Object* o =
reinterpret_cast<mirror::Object*>(space_begin + stride_begin * kAlignment);
CHECK(live_words_bitmap_->Test(o)) << "ref=" << o;
CHECK(moving_space_bitmap_->Test(o))
<< "ref=" << o << " bitmap: " << moving_space_bitmap_->DumpMemAround(o);
VerifyObject(reinterpret_cast<mirror::Object*>(addr), verify_obj_callback);
}
}
last_stride = addr;
stride_count++;
if (kSetupForGenerational) {
// Card idx within the gPageSize sized destination page.
size_t dest_card_idx = (addr - start_addr) >> accounting::CardTable::kCardShift;
DCHECK_LT(dest_card_idx, cards_per_page);
// Bytes remaining to fill in the current dest card.
size_t dest_bytes_remaining = accounting::CardTable::kCardSize -
(addr - start_addr) % accounting::CardTable::kCardSize;
// Update 'addr' for next stride before starting to modify
// 'stride_in_bytes' in the loops below.
addr += stride_in_bytes;
// Unconsumed bytes in the current src card.
size_t src_card_bytes = accounting::CardTable::kCardSize -
stride_begin_bytes % accounting::CardTable::kCardSize;
src_card_bytes = std::min(src_card_bytes, stride_in_bytes);
uint8_t* end_card = card_table->CardFromAddr(
moving_space_begin_ + stride_begin_bytes + stride_in_bytes - 1);
for (uint8_t* card =
card_table->CardFromAddr(moving_space_begin_ + stride_begin_bytes);
card <= end_card;
card++) {
if (*card == accounting::CardTable::kCardDirty) {
// If the current src card will contribute to the next dest
// card as well, then dirty the next one too.
size_t val = dest_bytes_remaining < src_card_bytes ? 3 : 1;
dest_cards |= val << dest_card_idx;
}
// Adjust destination card and its remaining bytes for next iteration.
if (dest_bytes_remaining <= src_card_bytes) {
dest_bytes_remaining =
accounting::CardTable::kCardSize - (src_card_bytes - dest_bytes_remaining);
dest_card_idx++;
} else {
dest_bytes_remaining -= src_card_bytes;
}
DCHECK_LE(dest_card_idx, cards_per_page);
stride_in_bytes -= src_card_bytes;
src_card_bytes = std::min(accounting::CardTable::kCardSize, stride_in_bytes);
}
} else {
addr += stride_in_bytes;
}
});
DCHECK_LT(last_stride, start_addr + gPageSize);
DCHECK_GT(stride_count, 0u);
size_t obj_size = 0;
uint32_t offset_within_obj =
offset * kAlignment - (reinterpret_cast<uint8_t*>(obj) - moving_space_begin_);
// First object
if (offset_within_obj > 0) {
bool should_dirty_card;
mirror::Object* to_ref = reinterpret_cast<mirror::Object*>(start_addr - offset_within_obj);
mirror::Object* from_obj = GetFromSpaceAddr(obj);
mirror::Object* post_compact_obj = nullptr;
if (kSetupForGenerational) {
post_compact_obj = PostCompactAddress(obj, black_dense_end_, moving_space_end_);
}
if (stride_count > 1) {
RefsUpdateVisitor</*kCheckBegin*/ true, /*kCheckEnd*/ false, kSetupForGenerational> visitor(
this, to_ref, start_addr, nullptr, card_table, post_compact_obj);
obj_size =
from_obj->VisitRefsForCompaction</*kFetchObjSize*/ true, /*kVisitNativeRoots*/ false>(
visitor, MemberOffset(offset_within_obj), MemberOffset(-1));
should_dirty_card = visitor.ShouldDirtyCard();
} else {
RefsUpdateVisitor</*kCheckBegin*/ true, /*kCheckEnd*/ true, kSetupForGenerational> visitor(
this, to_ref, start_addr, start_addr + gPageSize, card_table, post_compact_obj);
obj_size =
from_obj->VisitRefsForCompaction</*kFetchObjSize*/ true, /*kVisitNativeRoots*/ false>(
visitor,
MemberOffset(offset_within_obj),
MemberOffset(offset_within_obj + gPageSize));
should_dirty_card = visitor.ShouldDirtyCard();
}
if (kSetupForGenerational && should_dirty_card) {
card_table->MarkCard(post_compact_obj);
}
obj_size = RoundUp(obj_size, kAlignment);
DCHECK_GT(obj_size, offset_within_obj)
<< "obj:" << obj
<< " class:" << from_obj->GetClass<kDefaultVerifyFlags, kWithFromSpaceBarrier>()
<< " to_addr:" << to_ref
<< " black-allocation-begin:" << reinterpret_cast<void*>(black_allocations_begin_)
<< " post-compact-end:" << reinterpret_cast<void*>(post_compact_end_)
<< " offset:" << offset * kAlignment << " class-after-obj-iter:"
<< (class_after_obj_iter_ != class_after_obj_map_.rend()
? class_after_obj_iter_->first.AsMirrorPtr()
: nullptr)
<< " last-reclaimed-page:" << reinterpret_cast<void*>(last_reclaimed_page_)
<< " last-checked-reclaim-page-idx:" << last_checked_reclaim_page_idx_
<< " offset-of-last-idx:"
<< pre_compact_offset_moving_space_[last_checked_reclaim_page_idx_] * kAlignment
<< " first-obj-of-last-idx:"
<< first_objs_moving_space_[last_checked_reclaim_page_idx_].AsMirrorPtr();
obj_size -= offset_within_obj;
// If there is only one stride, then adjust last_stride_begin to the
// end of the first object.
if (stride_count == 1) {
last_stride_begin += obj_size / kAlignment;
}
}
// Except for the last page being compacted, the pages will have addr ==
// start_addr + gPageSize.
uint8_t* const end_addr = addr;
addr = start_addr;
size_t bytes_done = obj_size;
// All strides except the last one can be updated without any boundary
// checks.
DCHECK_LE(addr, last_stride);
size_t bytes_to_visit = last_stride - addr;
DCHECK_LE(bytes_to_visit, gPageSize);
while (bytes_to_visit > bytes_done) {
mirror::Object* ref = reinterpret_cast<mirror::Object*>(addr + bytes_done);
VerifyObject(ref, verify_obj_callback);
RefsUpdateVisitor</*kCheckBegin*/ false, /*kCheckEnd*/ false, kSetupForGenerational> visitor(
this,
ref,
nullptr,
nullptr,
dest_cards & (1 << (bytes_done >> accounting::CardTable::kCardShift)));
obj_size = ref->VisitRefsForCompaction(visitor, MemberOffset(0), MemberOffset(-1));
if (kSetupForGenerational) {
SetBitForMidToOldPromotion(to_space_addr + bytes_done);
if (visitor.ShouldDirtyCard()) {
card_table->MarkCard(reinterpret_cast<mirror::Object*>(to_space_addr + bytes_done));
}
}
obj_size = RoundUp(obj_size, kAlignment);
bytes_done += obj_size;
}
// Last stride may have multiple objects in it and we don't know where the
// last object which crosses the page boundary starts, therefore check
// page-end in all of these objects. Also, we need to call
// VisitRefsForCompaction() with from-space object as we fetch object size,
// which in case of klass requires 'class_size_'.
uint8_t* from_addr = from_space_begin_ + last_stride_begin * kAlignment;
bytes_to_visit = end_addr - addr;
DCHECK_LE(bytes_to_visit, gPageSize);
while (bytes_to_visit > bytes_done) {
mirror::Object* ref = reinterpret_cast<mirror::Object*>(addr + bytes_done);
obj = reinterpret_cast<mirror::Object*>(from_addr);
VerifyObject(ref, verify_obj_callback);
RefsUpdateVisitor</*kCheckBegin*/ false, /*kCheckEnd*/ true, kSetupForGenerational> visitor(
this,
ref,
nullptr,
start_addr + gPageSize,
dest_cards & (1 << (bytes_done >> accounting::CardTable::kCardShift)));
obj_size = obj->VisitRefsForCompaction(visitor,
MemberOffset(0),
MemberOffset(end_addr - (addr + bytes_done)));
if (kSetupForGenerational) {
SetBitForMidToOldPromotion(to_space_addr + bytes_done);
if (visitor.ShouldDirtyCard()) {
card_table->MarkCard(reinterpret_cast<mirror::Object*>(to_space_addr + bytes_done));
}
}
obj_size = RoundUp(obj_size, kAlignment);
DCHECK_GT(obj_size, 0u)
<< "from_addr:" << obj
<< " from-space-class:" << obj->GetClass<kDefaultVerifyFlags, kWithFromSpaceBarrier>()
<< " to_addr:" << ref
<< " black-allocation-begin:" << reinterpret_cast<void*>(black_allocations_begin_)
<< " post-compact-end:" << reinterpret_cast<void*>(post_compact_end_)
<< " offset:" << offset * kAlignment << " bytes_done:" << bytes_done
<< " class-after-obj-iter:"
<< (class_after_obj_iter_ != class_after_obj_map_.rend() ?
class_after_obj_iter_->first.AsMirrorPtr() :
nullptr)
<< " last-reclaimed-page:" << reinterpret_cast<void*>(last_reclaimed_page_)
<< " last-checked-reclaim-page-idx:" << last_checked_reclaim_page_idx_
<< " offset-of-last-idx:"
<< pre_compact_offset_moving_space_[last_checked_reclaim_page_idx_] * kAlignment
<< " first-obj-of-last-idx:"
<< first_objs_moving_space_[last_checked_reclaim_page_idx_].AsMirrorPtr();
from_addr += obj_size;
bytes_done += obj_size;
}
// The last page that we compact may have some bytes left untouched in the
// end, we should zero them as the kernel copies at page granularity.
if (needs_memset_zero && UNLIKELY(bytes_done < gPageSize)) {
std::memset(addr + bytes_done, 0x0, gPageSize - bytes_done);
}
}
// We store the starting point (pre_compact_page - first_obj) and first-chunk's
// size. If more TLAB(s) started in this page, then those chunks are identified
// using mark bitmap. All this info is prepared in UpdateMovingSpaceBlackAllocations().
// If we find a set bit in the bitmap, then we copy the remaining page and then
// use the bitmap to visit each object for updating references.
void MarkCompact::SlideBlackPage(mirror::Object* first_obj,
mirror::Object* next_page_first_obj,
uint32_t first_chunk_size,
uint8_t* const pre_compact_page,
uint8_t* dest,
bool needs_memset_zero) {
DCHECK(IsAlignedParam(pre_compact_page, gPageSize));
size_t bytes_copied;
uint8_t* src_addr = reinterpret_cast<uint8_t*>(GetFromSpaceAddr(first_obj));
uint8_t* pre_compact_addr = reinterpret_cast<uint8_t*>(first_obj);
uint8_t* const pre_compact_page_end = pre_compact_page + gPageSize;
uint8_t* const dest_page_end = dest + gPageSize;
auto verify_obj_callback = [&] (std::ostream& os) {
os << " first_obj=" << first_obj
<< " next_page_first_obj=" << next_page_first_obj
<< " first_chunk_sie=" << first_chunk_size
<< " dest=" << static_cast<void*>(dest)
<< " pre_compact_page="
<< static_cast<void* const>(pre_compact_page);
};
// We have empty portion at the beginning of the page. Zero it.
if (pre_compact_addr > pre_compact_page) {
bytes_copied = pre_compact_addr - pre_compact_page;
DCHECK_LT(bytes_copied, gPageSize);
if (needs_memset_zero) {
std::memset(dest, 0x0, bytes_copied);
}
dest += bytes_copied;
} else {
bytes_copied = 0;
size_t offset = pre_compact_page - pre_compact_addr;
pre_compact_addr = pre_compact_page;
src_addr += offset;
DCHECK(IsAlignedParam(src_addr, gPageSize));
}
// Copy the first chunk of live words
std::memcpy(dest, src_addr, first_chunk_size);
// Update references in the first chunk. Use object size to find next object.
{
size_t bytes_to_visit = first_chunk_size;
size_t obj_size;
// The first object started in some previous page. So we need to check the
// beginning.
DCHECK_LE(reinterpret_cast<uint8_t*>(first_obj), pre_compact_addr);
size_t offset = pre_compact_addr - reinterpret_cast<uint8_t*>(first_obj);
if (bytes_copied == 0 && offset > 0) {
mirror::Object* to_obj = reinterpret_cast<mirror::Object*>(dest - offset);
mirror::Object* from_obj = reinterpret_cast<mirror::Object*>(src_addr - offset);
// If the next page's first-obj is in this page or nullptr, then we don't
// need to check end boundary
if (next_page_first_obj == nullptr
|| (first_obj != next_page_first_obj
&& reinterpret_cast<uint8_t*>(next_page_first_obj) <= pre_compact_page_end)) {
RefsUpdateVisitor</*kCheckBegin*/true, /*kCheckEnd*/false> visitor(this,
to_obj,
dest,
nullptr);
obj_size = from_obj->VisitRefsForCompaction<
/*kFetchObjSize*/true, /*kVisitNativeRoots*/false>(visitor,
MemberOffset(offset),
MemberOffset(-1));
} else {
RefsUpdateVisitor</*kCheckBegin*/true, /*kCheckEnd*/true> visitor(this,
to_obj,
dest,
dest_page_end);
obj_size = from_obj->VisitRefsForCompaction<
/*kFetchObjSize*/true, /*kVisitNativeRoots*/false>(visitor,
MemberOffset(offset),
MemberOffset(offset
+ gPageSize));
if (first_obj == next_page_first_obj) {
// First object is the only object on this page. So there's nothing else left to do.
return;
}
}
obj_size = RoundUp(obj_size, kAlignment);
obj_size -= offset;
dest += obj_size;
bytes_to_visit -= obj_size;
}
bytes_copied += first_chunk_size;
// If the last object in this page is next_page_first_obj, then we need to check end boundary
bool check_last_obj = false;
if (next_page_first_obj != nullptr
&& reinterpret_cast<uint8_t*>(next_page_first_obj) < pre_compact_page_end
&& bytes_copied == gPageSize) {
size_t diff = pre_compact_page_end - reinterpret_cast<uint8_t*>(next_page_first_obj);
DCHECK_LE(diff, gPageSize);
DCHECK_LE(diff, bytes_to_visit);
bytes_to_visit -= diff;
check_last_obj = true;
}
while (bytes_to_visit > 0) {
mirror::Object* dest_obj = reinterpret_cast<mirror::Object*>(dest);
VerifyObject(dest_obj, verify_obj_callback);
RefsUpdateVisitor</*kCheckBegin*/false, /*kCheckEnd*/false> visitor(this,
dest_obj,
nullptr,
nullptr);
obj_size = dest_obj->VisitRefsForCompaction(visitor, MemberOffset(0), MemberOffset(-1));
obj_size = RoundUp(obj_size, kAlignment);
bytes_to_visit -= obj_size;
dest += obj_size;
}
DCHECK_EQ(bytes_to_visit, 0u);
if (check_last_obj) {
mirror::Object* dest_obj = reinterpret_cast<mirror::Object*>(dest);
VerifyObject(dest_obj, verify_obj_callback);
RefsUpdateVisitor</*kCheckBegin*/false, /*kCheckEnd*/true> visitor(this,
dest_obj,
nullptr,
dest_page_end);
mirror::Object* obj = GetFromSpaceAddr(next_page_first_obj);
obj->VisitRefsForCompaction</*kFetchObjSize*/false>(visitor,
MemberOffset(0),
MemberOffset(dest_page_end - dest));
return;
}
}
// Probably a TLAB finished on this page and/or a new TLAB started as well.
if (bytes_copied < gPageSize) {
src_addr += first_chunk_size;
pre_compact_addr += first_chunk_size;
// Use mark-bitmap to identify where objects are. First call
// VisitMarkedRange for only the first marked bit. If found, zero all bytes
// until that object and then call memcpy on the rest of the page.
// Then call VisitMarkedRange for all marked bits *after* the one found in
// this invocation. This time to visit references.
uintptr_t start_visit = reinterpret_cast<uintptr_t>(pre_compact_addr);
uintptr_t page_end = reinterpret_cast<uintptr_t>(pre_compact_page_end);
mirror::Object* found_obj = nullptr;
moving_space_bitmap_->VisitMarkedRange</*kVisitOnce*/true>(start_visit,
page_end,
[&found_obj](mirror::Object* obj) {
found_obj = obj;
});
size_t remaining_bytes = gPageSize - bytes_copied;
if (found_obj == nullptr) {
if (needs_memset_zero) {
// No more black objects in this page. Zero the remaining bytes and return.
std::memset(dest, 0x0, remaining_bytes);
}
return;
}
// Copy everything in this page, which includes any zeroed regions
// in-between.
std::memcpy(dest, src_addr, remaining_bytes);
DCHECK_LT(reinterpret_cast<uintptr_t>(found_obj), page_end);
moving_space_bitmap_->VisitMarkedRange(
reinterpret_cast<uintptr_t>(found_obj) + mirror::kObjectHeaderSize,
page_end,
[&found_obj, pre_compact_addr, dest, this, verify_obj_callback] (mirror::Object* obj)
REQUIRES_SHARED(Locks::mutator_lock_) {
ptrdiff_t diff = reinterpret_cast<uint8_t*>(found_obj) - pre_compact_addr;
mirror::Object* ref = reinterpret_cast<mirror::Object*>(dest + diff);
VerifyObject(ref, verify_obj_callback);
RefsUpdateVisitor</*kCheckBegin*/false, /*kCheckEnd*/false>
visitor(this, ref, nullptr, nullptr);
ref->VisitRefsForCompaction</*kFetchObjSize*/false>(visitor,
MemberOffset(0),
MemberOffset(-1));
// Remember for next round.
found_obj = obj;
});
// found_obj may have been updated in VisitMarkedRange. Visit the last found
// object.
DCHECK_GT(reinterpret_cast<uint8_t*>(found_obj), pre_compact_addr);
DCHECK_LT(reinterpret_cast<uintptr_t>(found_obj), page_end);
ptrdiff_t diff = reinterpret_cast<uint8_t*>(found_obj) - pre_compact_addr;
mirror::Object* dest_obj = reinterpret_cast<mirror::Object*>(dest + diff);
VerifyObject(dest_obj, verify_obj_callback);
RefsUpdateVisitor</*kCheckBegin*/ false, /*kCheckEnd*/ true> visitor(
this, dest_obj, nullptr, dest_page_end);
// Last object could overlap with next page. And if it happens to be a
// class, then we may access something (like static-fields' offsets) which
// is on the next page. Therefore, use from-space's reference.
mirror::Object* obj = GetFromSpaceAddr(found_obj);
obj->VisitRefsForCompaction</*kFetchObjSize*/ false>(
visitor, MemberOffset(0), MemberOffset(page_end - reinterpret_cast<uintptr_t>(found_obj)));
}
}
template <uint32_t kYieldMax = 5, uint64_t kSleepUs = 10>
static void BackOff(uint32_t i) {
// TODO: Consider adding x86 PAUSE and/or ARM YIELD here.
if (i <= kYieldMax) {
sched_yield();
} else {
// nanosleep is not in the async-signal-safe list, but bionic implements it
// with a pure system call, so it should be fine.
NanoSleep(kSleepUs * 1000 * (i - kYieldMax));
}
}
size_t MarkCompact::ZeropageIoctl(void* addr,
size_t length,
bool tolerate_eexist,
bool tolerate_enoent) {
int32_t backoff_count = -1;
int32_t max_backoff = 10; // max native priority.
struct uffdio_zeropage uffd_zeropage;
DCHECK(IsAlignedParam(addr, gPageSize));
uffd_zeropage.range.start = reinterpret_cast<uintptr_t>(addr);
uffd_zeropage.range.len = length;
uffd_zeropage.mode = gUffdSupportsMmapTrylock ? UFFDIO_ZEROPAGE_MODE_MMAP_TRYLOCK : 0;
while (true) {
uffd_zeropage.zeropage = 0;
int ret = ioctl(uffd_, UFFDIO_ZEROPAGE, &uffd_zeropage);
if (ret == 0) {
DCHECK_EQ(uffd_zeropage.zeropage, static_cast<ssize_t>(length));
return length;
} else if (errno == EAGAIN) {
if (uffd_zeropage.zeropage > 0) {
// Contention was observed after acquiring mmap_lock. But the first page
// is already done, which is what we care about.
DCHECK(IsAlignedParam(uffd_zeropage.zeropage, gPageSize));
DCHECK_GE(uffd_zeropage.zeropage, static_cast<ssize_t>(gPageSize));
return uffd_zeropage.zeropage;
} else if (uffd_zeropage.zeropage < 0) {
// mmap_read_trylock() failed due to contention. Back-off and retry.
DCHECK_EQ(uffd_zeropage.zeropage, -EAGAIN);
if (backoff_count == -1) {
int prio = Thread::Current()->GetNativePriority();
DCHECK(prio > 0 && prio <= 10) << prio;
max_backoff -= prio;
backoff_count = 0;
}
if (backoff_count < max_backoff) {
// Using 3 to align 'normal' priority threads with sleep.
BackOff</*kYieldMax=*/3, /*kSleepUs=*/1000>(backoff_count++);
} else {
uffd_zeropage.mode = 0;
}
}
} else if (tolerate_eexist && errno == EEXIST) {
// Ioctl returns the number of bytes it mapped. The page on which EEXIST occurred
// wouldn't be included in it.
return uffd_zeropage.zeropage > 0 ? uffd_zeropage.zeropage + gPageSize : gPageSize;
} else {
CHECK(tolerate_enoent && errno == ENOENT)
<< "ioctl_userfaultfd: zeropage failed: " << strerror(errno) << ". addr:" << addr;
return 0;
}
}
}
size_t MarkCompact::CopyIoctl(
void* dst, void* buffer, size_t length, bool return_on_contention, bool tolerate_enoent) {
int32_t backoff_count = -1;
int32_t max_backoff = 10; // max native priority.
struct uffdio_copy uffd_copy;
uffd_copy.mode = gUffdSupportsMmapTrylock ? UFFDIO_COPY_MODE_MMAP_TRYLOCK : 0;
uffd_copy.src = reinterpret_cast<uintptr_t>(buffer);
uffd_copy.dst = reinterpret_cast<uintptr_t>(dst);
uffd_copy.len = length;
uffd_copy.copy = 0;
while (true) {
int ret = ioctl(uffd_, UFFDIO_COPY, &uffd_copy);
if (ret == 0) {
DCHECK_EQ(uffd_copy.copy, static_cast<ssize_t>(length));
break;
} else if (errno == EAGAIN) {
// Contention observed.
DCHECK_NE(uffd_copy.copy, 0);
if (uffd_copy.copy > 0) {
// Contention was observed after acquiring mmap_lock.
DCHECK(IsAlignedParam(uffd_copy.copy, gPageSize));
DCHECK_GE(uffd_copy.copy, static_cast<ssize_t>(gPageSize));
break;
} else {
// mmap_read_trylock() failed due to contention.
DCHECK_EQ(uffd_copy.copy, -EAGAIN);
uffd_copy.copy = 0;
if (return_on_contention) {
break;
}
}
if (backoff_count == -1) {
int prio = Thread::Current()->GetNativePriority();
DCHECK(prio > 0 && prio <= 10) << prio;
max_backoff -= prio;
backoff_count = 0;
}
if (backoff_count < max_backoff) {
// Using 3 to align 'normal' priority threads with sleep.
BackOff</*kYieldMax=*/3, /*kSleepUs=*/1000>(backoff_count++);
} else {
uffd_copy.mode = 0;
}
} else if (errno == EEXIST) {
DCHECK_NE(uffd_copy.copy, 0);
if (uffd_copy.copy < 0) {
uffd_copy.copy = 0;
}
// Ioctl returns the number of bytes it mapped. The page on which EEXIST occurred
// wouldn't be included in it.
uffd_copy.copy += gPageSize;
break;
} else {
CHECK(tolerate_enoent && errno == ENOENT)
<< "ioctl_userfaultfd: copy failed: " << strerror(errno) << ". src:" << buffer
<< " dst:" << dst;
return uffd_copy.copy > 0 ? uffd_copy.copy : 0;
}
}
return uffd_copy.copy;
}
template <int kMode, typename CompactionFn>
bool MarkCompact::DoPageCompactionWithStateChange(size_t page_idx,
uint8_t* to_space_page,
uint8_t* page,
bool map_immediately,
CompactionFn func) {
uint32_t expected_state = static_cast<uint8_t>(PageState::kUnprocessed);
uint32_t desired_state = static_cast<uint8_t>(map_immediately ? PageState::kProcessingAndMapping :
PageState::kProcessing);
// In the concurrent case (kMode != kFallbackMode) we need to ensure that the update
// to moving_spaces_status_[page_idx] is released before the contents of the page are
// made accessible to other threads.
//
// We need acquire ordering here to ensure that when the CAS fails, another thread
// has completed processing the page, which is guaranteed by the release below.
if (kMode == kFallbackMode || moving_pages_status_[page_idx].compare_exchange_strong(
expected_state, desired_state, std::memory_order_acquire)) {
func();
if (kMode == kCopyMode) {
if (map_immediately) {
CopyIoctl(to_space_page,
page,
gPageSize,
/*return_on_contention=*/false,
/*tolerate_enoent=*/false);
// Store is sufficient as no other thread could modify the status at this
// point. Relaxed order is sufficient as the ioctl will act as a fence.
moving_pages_status_[page_idx].store(static_cast<uint8_t>(PageState::kProcessedAndMapped),
std::memory_order_relaxed);
} else {
// Add the src page's index in the status word.
DCHECK(from_space_map_.HasAddress(page));
DCHECK_LE(static_cast<size_t>(page - from_space_begin_),
std::numeric_limits<uint32_t>::max());
uint32_t store_val = page - from_space_begin_;
DCHECK_EQ(store_val & kPageStateMask, 0u);
store_val |= static_cast<uint8_t>(PageState::kProcessed);
// Store is sufficient as no other thread would modify the status at this point.
moving_pages_status_[page_idx].store(store_val, std::memory_order_release);
}
}
return true;
} else {
// Only GC thread could have set the state to Processed.
DCHECK_NE(expected_state, static_cast<uint8_t>(PageState::kProcessed));
return false;
}
}
bool MarkCompact::FreeFromSpacePages(size_t cur_page_idx, int mode, size_t end_idx_for_mapping) {
// Thanks to sliding compaction, bump-pointer allocations, and reverse
// compaction (see CompactMovingSpace) the logic here is pretty simple: find
// the to-space page up to which compaction has finished, all the from-space
// pages corresponding to this onwards can be freed. There are some corner
// cases to be taken care of, which are described below.
size_t idx = last_checked_reclaim_page_idx_;
// Find the to-space page up to which the corresponding from-space pages can be
// freed.
for (; idx > cur_page_idx; idx--) {
PageState state = GetMovingPageState(idx - 1);
if (state == PageState::kMutatorProcessing) {
// Some mutator is working on the page.
break;
}
DCHECK(state >= PageState::kProcessed ||
(state == PageState::kUnprocessed &&
(mode == kFallbackMode || idx > moving_first_objs_count_)));
}
DCHECK_LE(idx, last_checked_reclaim_page_idx_);
if (idx == last_checked_reclaim_page_idx_) {
// Nothing to do.
return false;
}
uint8_t* reclaim_begin;
uint8_t* idx_addr;
// Calculate the first from-space page to be freed using 'idx'. If the
// first-object of the idx'th to-space page started before the corresponding
// from-space page, which is almost always the case in the compaction portion
// of the moving-space, then it indicates that the subsequent pages that are
// yet to be compacted will need the from-space pages. Therefore, find the page
// (from the already compacted pages) whose first-object is different from
// ours. All the from-space pages starting from that one are safe to be
// removed. Please note that this iteration is not expected to be long in
// normal cases as objects are smaller than page size.
if (idx >= moving_first_objs_count_) {
// black-allocated portion of the moving-space
idx_addr = black_allocations_begin_ + (idx - moving_first_objs_count_) * gPageSize;
reclaim_begin = idx_addr;
mirror::Object* first_obj = first_objs_moving_space_[idx].AsMirrorPtr();
if (first_obj != nullptr && reinterpret_cast<uint8_t*>(first_obj) < reclaim_begin) {
size_t idx_len = moving_first_objs_count_ + black_page_count_;
for (size_t i = idx + 1; i < idx_len; i++) {
mirror::Object* obj = first_objs_moving_space_[i].AsMirrorPtr();
// A null first-object indicates that the corresponding to-space page is
// not used yet. So we can compute its from-space page and use that.
if (obj != first_obj) {
reclaim_begin = obj != nullptr
? AlignUp(reinterpret_cast<uint8_t*>(obj), gPageSize)
: (black_allocations_begin_ + (i - moving_first_objs_count_) * gPageSize);
break;
}
}
}
} else {
DCHECK_GE(pre_compact_offset_moving_space_[idx], 0u);
idx_addr = moving_space_begin_ + idx * gPageSize;
if (idx_addr >= black_dense_end_) {
idx_addr = moving_space_begin_ + pre_compact_offset_moving_space_[idx] * kAlignment;
}
reclaim_begin = idx_addr;
DCHECK_LE(reclaim_begin, black_allocations_begin_);
mirror::Object* first_obj = first_objs_moving_space_[idx].AsMirrorPtr();
if (first_obj != nullptr) {
if (reinterpret_cast<uint8_t*>(first_obj) < reclaim_begin) {
DCHECK_LT(idx, moving_first_objs_count_);
mirror::Object* obj = first_obj;
for (size_t i = idx + 1; i < moving_first_objs_count_; i++) {
obj = first_objs_moving_space_[i].AsMirrorPtr();
if (obj == nullptr) {
reclaim_begin = moving_space_begin_ + i * gPageSize;
break;
} else if (first_obj != obj) {
DCHECK_LT(first_obj, obj);
DCHECK_LT(reclaim_begin, reinterpret_cast<uint8_t*>(obj));
reclaim_begin = reinterpret_cast<uint8_t*>(obj);
break;
}
}
if (obj == first_obj) {
reclaim_begin = black_allocations_begin_;
}
}
}
reclaim_begin = AlignUp(reclaim_begin, gPageSize);
}
DCHECK_NE(reclaim_begin, nullptr);
DCHECK_ALIGNED_PARAM(reclaim_begin, gPageSize);
DCHECK_ALIGNED_PARAM(last_reclaimed_page_, gPageSize);
// Check if the 'class_after_obj_map_' map allows pages to be freed.
for (; class_after_obj_iter_ != class_after_obj_map_.rend(); class_after_obj_iter_++) {
mirror::Object* klass = class_after_obj_iter_->first.AsMirrorPtr();
mirror::Class* from_klass = static_cast<mirror::Class*>(GetFromSpaceAddr(klass));
// Check with class' end to ensure that, if required, the entire class survives.
uint8_t* klass_end = reinterpret_cast<uint8_t*>(klass) + from_klass->SizeOf<kVerifyNone>();
DCHECK_LE(klass_end, last_reclaimed_page_);
if (reinterpret_cast<uint8_t*>(klass_end) >= reclaim_begin) {
// Found a class which is in the reclaim range.
if (reinterpret_cast<uint8_t*>(class_after_obj_iter_->second.AsMirrorPtr()) < idx_addr) {
// Its lowest-address object is not compacted yet. Reclaim starting from
// the end of this class.
reclaim_begin = AlignUp(klass_end, gPageSize);
} else {
// Continue consuming pairs wherein the lowest address object has already
// been compacted.
continue;
}
}
// All the remaining class (and thereby corresponding object) addresses are
// lower than the reclaim range.
break;
}
bool all_mapped = mode == kFallbackMode;
ssize_t size = last_reclaimed_page_ - reclaim_begin;
if (size > kMinFromSpaceMadviseSize) {
// Map all the pages in the range.
if (mode == kCopyMode && cur_page_idx < end_idx_for_mapping) {
if (MapMovingSpacePages(cur_page_idx,
end_idx_for_mapping,
/*from_ioctl=*/false,
/*return_on_contention=*/true,
/*tolerate_enoent=*/false) == end_idx_for_mapping - cur_page_idx) {
all_mapped = true;
}
} else {
// This for the black-allocations pages so that madvise is not missed.
all_mapped = true;
}
// If not all pages are mapped, then take it as a hint that mmap_lock is
// contended and hence don't madvise as that also needs the same lock.
if (all_mapped) {
// Retain a few pages for subsequent compactions.
const ssize_t gBufferPages = 4 * gPageSize;
DCHECK_LT(gBufferPages, kMinFromSpaceMadviseSize);
size -= gBufferPages;
uint8_t* addr = last_reclaimed_page_ - size;
CHECK_EQ(madvise(addr + from_space_slide_diff_, size, MADV_DONTNEED), 0)
<< "madvise of from-space failed: " << strerror(errno);
last_reclaimed_page_ = addr;
cur_reclaimable_page_ = addr;
}
}
last_reclaimable_page_ = std::min(reclaim_begin, last_reclaimable_page_);
last_checked_reclaim_page_idx_ = idx;
return all_mapped;
}
template <int kMode>
void MarkCompact::CompactMovingSpace(uint8_t* page) {
// For every page we have a starting object, which may have started in some
// preceding page, and an offset within that object from where we must start
// copying.
// Consult the live-words bitmap to copy all contiguously live words at a
// time. These words may constitute multiple objects. To avoid the need for
// consulting mark-bitmap to find where does the next live object start, we
// use the object-size returned by VisitRefsForCompaction.
//
// We do the compaction in reverse direction so that the pages containing
// TLAB and latest allocations are processed first.
TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
size_t page_status_arr_len = moving_first_objs_count_ + black_page_count_;
size_t idx = page_status_arr_len;
size_t black_dense_end_idx = (black_dense_end_ - moving_space_begin_) / gPageSize;
uint8_t* to_space_end = moving_space_begin_ + page_status_arr_len * gPageSize;
uint8_t* pre_compact_page = black_allocations_begin_ + (black_page_count_ * gPageSize);
DCHECK(IsAlignedParam(pre_compact_page, gPageSize));
// These variables are maintained by FreeFromSpacePages().
last_reclaimed_page_ = pre_compact_page;
last_reclaimable_page_ = last_reclaimed_page_;
cur_reclaimable_page_ = last_reclaimed_page_;
last_checked_reclaim_page_idx_ = idx;
class_after_obj_iter_ = class_after_obj_map_.rbegin();
// Allocated-black pages
mirror::Object* next_page_first_obj = nullptr;
while (idx > moving_first_objs_count_) {
idx--;
pre_compact_page -= gPageSize;
to_space_end -= gPageSize;
if (kMode == kFallbackMode) {
page = to_space_end;
}
mirror::Object* first_obj = first_objs_moving_space_[idx].AsMirrorPtr();
uint32_t first_chunk_size = black_alloc_pages_first_chunk_size_[idx];
if (first_obj != nullptr) {
DoPageCompactionWithStateChange<kMode>(idx,
to_space_end,
page,
/*map_immediately=*/true,
[&]() REQUIRES_SHARED(Locks::mutator_lock_) {
SlideBlackPage(first_obj,
next_page_first_obj,
first_chunk_size,
pre_compact_page,
page,
kMode == kCopyMode);
});
// We are sliding here, so no point attempting to madvise for every
// page. Wait for enough pages to be done.
if (idx % DivideByPageSize(kMinFromSpaceMadviseSize) == 0) {
FreeFromSpacePages(idx, kMode, /*end_idx_for_mapping=*/0);
}
}
next_page_first_obj = first_obj;
}
DCHECK_EQ(pre_compact_page, black_allocations_begin_);
// Reserved page to be used if we can't find any reclaimable page for processing.
uint8_t* reserve_page = page;
size_t end_idx_for_mapping = idx;
while (idx > black_dense_end_idx) {
idx--;
to_space_end -= gPageSize;
if (kMode == kFallbackMode) {
page = to_space_end;
} else {
DCHECK_EQ(kMode, kCopyMode);
if (cur_reclaimable_page_ > last_reclaimable_page_) {
cur_reclaimable_page_ -= gPageSize;
page = cur_reclaimable_page_ + from_space_slide_diff_;
} else {
page = reserve_page;
}
}
mirror::Object* first_obj = first_objs_moving_space_[idx].AsMirrorPtr();
bool success = DoPageCompactionWithStateChange<kMode>(
idx,
to_space_end,
page,
/*map_immediately=*/page == reserve_page,
[&]() REQUIRES_SHARED(Locks::mutator_lock_) {
if (use_generational_ && to_space_end < mid_gen_end_) {
CompactPage</*kSetupForGenerational=*/true>(first_obj,
pre_compact_offset_moving_space_[idx],
page,
to_space_end,
kMode == kCopyMode);
} else {
CompactPage</*kSetupForGenerational=*/false>(first_obj,
pre_compact_offset_moving_space_[idx],
page,
to_space_end,
kMode == kCopyMode);
}
});
if (kMode == kCopyMode && (!success || page == reserve_page) && end_idx_for_mapping - idx > 1) {
// map the pages in the following address as they can't be mapped with the
// pages yet-to-be-compacted as their src-side pages won't be contiguous.
MapMovingSpacePages(idx + 1,
end_idx_for_mapping,
/*from_fault=*/false,
/*return_on_contention=*/true,
/*tolerate_enoent=*/false);
}
if (FreeFromSpacePages(idx, kMode, end_idx_for_mapping)) {
end_idx_for_mapping = idx;
}
}
while (idx > 0) {
idx--;
to_space_end -= gPageSize;
mirror::Object* first_obj = first_objs_moving_space_[idx].AsMirrorPtr();
if (first_obj != nullptr) {
DoPageCompactionWithStateChange<kMode>(
idx,
to_space_end,
to_space_end + from_space_slide_diff_,
/*map_immediately=*/false,
[&]() REQUIRES_SHARED(Locks::mutator_lock_) {
if (use_generational_) {
UpdateNonMovingPage</*kSetupForGenerational=*/true>(
first_obj, to_space_end, from_space_slide_diff_, moving_space_bitmap_);
} else {
UpdateNonMovingPage</*kSetupForGenerational=*/false>(
first_obj, to_space_end, from_space_slide_diff_, moving_space_bitmap_);
}
if (kMode == kFallbackMode) {
memcpy(to_space_end, to_space_end + from_space_slide_diff_, gPageSize);
}
});
} else {
// The page has no reachable object on it. Just declare it mapped.
// Mutators shouldn't step on this page, which is asserted in sigbus
// handler.
DCHECK_EQ(moving_pages_status_[idx].load(std::memory_order_relaxed),
static_cast<uint8_t>(PageState::kUnprocessed));
moving_pages_status_[idx].store(static_cast<uint8_t>(PageState::kProcessedAndMapped),
std::memory_order_release);
}
if (FreeFromSpacePages(idx, kMode, end_idx_for_mapping)) {
end_idx_for_mapping = idx;
}
}
// map one last time to finish anything left.
if (kMode == kCopyMode && end_idx_for_mapping > 0) {
MapMovingSpacePages(idx,
end_idx_for_mapping,
/*from_fault=*/false,
/*return_on_contention=*/false,
/*tolerate_enoent=*/false);
}
DCHECK_EQ(to_space_end, bump_pointer_space_->Begin());
}
size_t MarkCompact::MapMovingSpacePages(size_t start_idx,
size_t arr_len,
bool from_fault,
bool return_on_contention,
bool tolerate_enoent) {
DCHECK_LT(start_idx, arr_len);
size_t arr_idx = start_idx;
bool wait_for_unmapped = false;
while (arr_idx < arr_len) {
size_t map_count = 0;
uint32_t cur_state = moving_pages_status_[arr_idx].load(std::memory_order_acquire);
// Find a contiguous range that can be mapped with single ioctl.
for (uint32_t i = arr_idx, from_page = cur_state & ~kPageStateMask; i < arr_len;
i++, map_count++, from_page += gPageSize) {
uint32_t s = moving_pages_status_[i].load(std::memory_order_acquire);
uint32_t cur_from_page = s & ~kPageStateMask;
if (GetPageStateFromWord(s) != PageState::kProcessed || cur_from_page != from_page) {
break;
}
}
if (map_count == 0) {
if (from_fault) {
bool mapped = GetPageStateFromWord(cur_state) == PageState::kProcessedAndMapped;
return mapped ? 1 : 0;
}
// Skip the pages that this thread cannot map.
for (; arr_idx < arr_len; arr_idx++) {
PageState s = GetMovingPageState(arr_idx);
if (s == PageState::kProcessed) {
break;
} else if (s != PageState::kProcessedAndMapped) {
wait_for_unmapped = true;
}
}
} else {
uint32_t from_space_offset = cur_state & ~kPageStateMask;
uint8_t* to_space_start = moving_space_begin_ + arr_idx * gPageSize;
uint8_t* from_space_start = from_space_begin_ + from_space_offset;
DCHECK_ALIGNED_PARAM(to_space_start, gPageSize);
DCHECK_ALIGNED_PARAM(from_space_start, gPageSize);
size_t mapped_len = CopyIoctl(to_space_start,
from_space_start,
map_count * gPageSize,
return_on_contention,
tolerate_enoent);
for (size_t l = 0; l < mapped_len; l += gPageSize, arr_idx++) {
// Store is sufficient as anyone storing is doing it with the same value.
moving_pages_status_[arr_idx].store(static_cast<uint8_t>(PageState::kProcessedAndMapped),
std::memory_order_release);
}
if (from_fault) {
return DivideByPageSize(mapped_len);
}
// We can return from COPY ioctl with a smaller length also if a page
// was found to be already mapped. But that doesn't count as contention.
if (return_on_contention && DivideByPageSize(mapped_len) < map_count && errno != EEXIST) {
return arr_idx - start_idx;
}
}
}
if (wait_for_unmapped) {
for (size_t i = start_idx; i < arr_len; i++) {
PageState s = GetMovingPageState(i);
DCHECK_GT(s, PageState::kProcessed);
uint32_t backoff_count = 0;
while (s != PageState::kProcessedAndMapped) {
BackOff(backoff_count++);
s = GetMovingPageState(i);
}
}
}
return arr_len - start_idx;
}
template <bool kSetupForGenerational>
void MarkCompact::UpdateNonMovingPage(mirror::Object* first,
uint8_t* page,
ptrdiff_t from_space_diff,
accounting::ContinuousSpaceBitmap* bitmap) {
DCHECK_LT(reinterpret_cast<uint8_t*>(first), page + gPageSize);
accounting::CardTable* card_table = heap_->GetCardTable();
mirror::Object* curr_obj = first;
uint8_t* from_page = page + from_space_diff;
uint8_t* from_page_end = from_page + gPageSize;
uint8_t* scan_begin =
std::max(reinterpret_cast<uint8_t*>(first) + mirror::kObjectHeaderSize, page);
// For every object found in the page, visit the previous object. This ensures
// that we can visit without checking page-end boundary.
// Call VisitRefsForCompaction with from-space read-barrier as the klass object and
// super-class loads require it.
// TODO: Set kVisitNativeRoots to false once we implement concurrent
// compaction
auto obj_visitor = [&](mirror::Object* next_obj) {
if (curr_obj != nullptr) {
mirror::Object* from_obj =
reinterpret_cast<mirror::Object*>(reinterpret_cast<uint8_t*>(curr_obj) + from_space_diff);
bool should_dirty_card;
if (reinterpret_cast<uint8_t*>(curr_obj) < page) {
RefsUpdateVisitor</*kCheckBegin*/ true, /*kCheckEnd*/ false, kSetupForGenerational> visitor(
this, from_obj, from_page, from_page_end, card_table, curr_obj);
MemberOffset begin_offset(page - reinterpret_cast<uint8_t*>(curr_obj));
// Native roots shouldn't be visited as they are done when this
// object's beginning was visited in the preceding page.
from_obj->VisitRefsForCompaction</*kFetchObjSize*/ false, /*kVisitNativeRoots*/ false>(
visitor, begin_offset, MemberOffset(-1));
should_dirty_card = visitor.ShouldDirtyCard();
} else {
RefsUpdateVisitor</*kCheckBegin*/ false, /*kCheckEnd*/ false, kSetupForGenerational>
visitor(this, from_obj, from_page, from_page_end, card_table, curr_obj);
from_obj->VisitRefsForCompaction</*kFetchObjSize*/ false>(
visitor, MemberOffset(0), MemberOffset(-1));
should_dirty_card = visitor.ShouldDirtyCard();
}
if (kSetupForGenerational && should_dirty_card) {
card_table->MarkCard(curr_obj);
}
}
curr_obj = next_obj;
};
if (young_gen_) {
DCHECK(bitmap->Test(first));
// If the first-obj is covered by the same card which also covers the first
// word of the page, then it's important to set curr_obj to nullptr to avoid
// updating the references twice.
if (card_table->IsClean(first) ||
card_table->CardFromAddr(first) == card_table->CardFromAddr(scan_begin)) {
curr_obj = nullptr;
}
// We cannot acquire heap-bitmap-lock here as this function is called from
// SIGBUS handler. But it's safe as the bitmap passed to Scan function
// can't get modified until this GC cycle is finished.
FakeMutexLock mu(*Locks::heap_bitmap_lock_);
card_table->Scan</*kClearCard=*/false>(
bitmap, scan_begin, page + gPageSize, obj_visitor, accounting::CardTable::kCardAged2);
} else {
bitmap->VisitMarkedRange(reinterpret_cast<uintptr_t>(scan_begin),
reinterpret_cast<uintptr_t>(page + gPageSize),
obj_visitor);
}
if (curr_obj != nullptr) {
bool should_dirty_card;
mirror::Object* from_obj =
reinterpret_cast<mirror::Object*>(reinterpret_cast<uint8_t*>(curr_obj) + from_space_diff);
MemberOffset end_offset(page + gPageSize - reinterpret_cast<uint8_t*>(curr_obj));
if (reinterpret_cast<uint8_t*>(curr_obj) < page) {
RefsUpdateVisitor</*kCheckBegin*/ true, /*kCheckEnd*/ true, kSetupForGenerational> visitor(
this, from_obj, from_page, from_page_end, card_table, curr_obj);
from_obj->VisitRefsForCompaction</*kFetchObjSize*/ false, /*kVisitNativeRoots*/ false>(
visitor, MemberOffset(page - reinterpret_cast<uint8_t*>(curr_obj)), end_offset);
should_dirty_card = visitor.ShouldDirtyCard();
} else {
RefsUpdateVisitor</*kCheckBegin*/ false, /*kCheckEnd*/ true, kSetupForGenerational> visitor(
this, from_obj, from_page, from_page_end, card_table, curr_obj);
from_obj->VisitRefsForCompaction</*kFetchObjSize*/ false>(
visitor, MemberOffset(0), end_offset);
should_dirty_card = visitor.ShouldDirtyCard();
}
if (kSetupForGenerational && should_dirty_card) {
card_table->MarkCard(curr_obj);
}
}
}
void MarkCompact::UpdateNonMovingSpace() {
TimingLogger::ScopedTiming t("(Paused)UpdateNonMovingSpace", GetTimings());
// Iterating in reverse ensures that the class pointer in objects which span
// across more than one page gets updated in the end. This is necessary for
// VisitRefsForCompaction() to work correctly.
// TODO: If and when we make non-moving space update concurrent, implement a
// mechanism to remember class pointers for such objects off-heap and pass it
// to VisitRefsForCompaction().
uint8_t* page = non_moving_space_->Begin() + non_moving_first_objs_count_ * gPageSize;
for (ssize_t i = non_moving_first_objs_count_ - 1; i >= 0; i--) {
mirror::Object* obj = first_objs_non_moving_space_[i].AsMirrorPtr();
page -= gPageSize;
// null means there are no objects on the page to update references.
if (obj != nullptr) {
if (use_generational_) {
UpdateNonMovingPage</*kSetupForGenerational=*/true>(
obj, page, /*from_space_diff=*/0, non_moving_space_bitmap_);
} else {
UpdateNonMovingPage</*kSetupForGenerational=*/false>(
obj, page, /*from_space_diff=*/0, non_moving_space_bitmap_);
}
}
}
}
void MarkCompact::UpdateMovingSpaceBlackAllocations() {
// For sliding black pages, we need the first-object, which overlaps with the
// first byte of the page. Additionally, we compute the size of first chunk of
// black objects. This will suffice for most black pages. Unlike, compaction
// pages, here we don't need to pre-compute the offset within first-obj from
// where sliding has to start. That can be calculated using the pre-compact
// address of the page. Therefore, to save space, we store the first chunk's
// size in black_alloc_pages_first_chunk_size_ array.
// For the pages which may have holes after the first chunk, which could happen
// if a new TLAB starts in the middle of the page, we mark the objects in
// the mark-bitmap. So, if the first-chunk size is smaller than gPageSize,
// then we use the mark-bitmap for the remainder of the page.
uint8_t* const begin = bump_pointer_space_->Begin();
uint8_t* black_allocs = black_allocations_begin_;
DCHECK_LE(begin, black_allocs);
size_t consumed_blocks_count = 0;
size_t first_block_size;
// Needed only for debug at the end of the function. Hopefully compiler will
// eliminate it otherwise.
size_t num_blocks = 0;
// Get the list of all blocks allocated in the bump-pointer space.
std::vector<size_t>* block_sizes = bump_pointer_space_->GetBlockSizes(thread_running_gc_,
&first_block_size);
DCHECK_LE(first_block_size, (size_t)(black_allocs - begin));
if (block_sizes != nullptr) {
size_t black_page_idx = moving_first_objs_count_;
uint8_t* block_end = begin + first_block_size;
uint32_t remaining_chunk_size = 0;
uint32_t first_chunk_size = 0;
mirror::Object* first_obj = nullptr;
num_blocks = block_sizes->size();
for (size_t block_size : *block_sizes) {
block_end += block_size;
// Skip the blocks that are prior to the black allocations. These will be
// merged with the main-block later.
if (black_allocs >= block_end) {
consumed_blocks_count++;
continue;
}
mirror::Object* obj = reinterpret_cast<mirror::Object*>(black_allocs);
bool set_mark_bit = remaining_chunk_size > 0;
// We don't know how many objects are allocated in the current block. When we hit
// a null assume it's the end. This works as every block is expected to
// have objects allocated linearly using bump-pointer.
// BumpPointerSpace::Walk() also works similarly.
while (black_allocs < block_end
&& obj->GetClass<kDefaultVerifyFlags, kWithoutReadBarrier>() != nullptr) {
// Try to keep instructions which access class instance together to
// avoid reloading the pointer from object.
size_t obj_size = obj->SizeOf();
bytes_scanned_ += obj_size;
obj_size = RoundUp(obj_size, kAlignment);
UpdateClassAfterObjectMap(obj);
if (first_obj == nullptr) {
first_obj = obj;
}
// We only need the mark-bitmap in the pages wherein a new TLAB starts in
// the middle of the page.
if (set_mark_bit) {
moving_space_bitmap_->Set(obj);
}
// Handle objects which cross page boundary, including objects larger
// than page size.
if (remaining_chunk_size + obj_size >= gPageSize) {
set_mark_bit = false;
first_chunk_size += gPageSize - remaining_chunk_size;
remaining_chunk_size += obj_size;
// We should not store first-object and remaining_chunk_size if there were
// unused bytes before this TLAB, in which case we must have already
// stored the values (below).
if (black_alloc_pages_first_chunk_size_[black_page_idx] == 0) {
black_alloc_pages_first_chunk_size_[black_page_idx] = first_chunk_size;
first_objs_moving_space_[black_page_idx].Assign(first_obj);
}
black_page_idx++;
remaining_chunk_size -= gPageSize;
// Consume an object larger than page size.
while (remaining_chunk_size >= gPageSize) {
black_alloc_pages_first_chunk_size_[black_page_idx] = gPageSize;
first_objs_moving_space_[black_page_idx].Assign(obj);
black_page_idx++;
remaining_chunk_size -= gPageSize;
}
first_obj = remaining_chunk_size > 0 ? obj : nullptr;
first_chunk_size = remaining_chunk_size;
} else {
DCHECK_LE(first_chunk_size, remaining_chunk_size);
first_chunk_size += obj_size;
remaining_chunk_size += obj_size;
}
black_allocs += obj_size;
obj = reinterpret_cast<mirror::Object*>(black_allocs);
}
DCHECK_LE(black_allocs, block_end);
DCHECK_LT(remaining_chunk_size, gPageSize);
// consume the unallocated portion of the block
if (black_allocs < block_end) {
// first-chunk of the current page ends here. Store it.
if (first_chunk_size > 0 && black_alloc_pages_first_chunk_size_[black_page_idx] == 0) {
black_alloc_pages_first_chunk_size_[black_page_idx] = first_chunk_size;
first_objs_moving_space_[black_page_idx].Assign(first_obj);
}
first_chunk_size = 0;
first_obj = nullptr;
size_t page_remaining = gPageSize - remaining_chunk_size;
size_t block_remaining = block_end - black_allocs;
if (page_remaining <= block_remaining) {
block_remaining -= page_remaining;
// current page and the subsequent empty pages in the block
black_page_idx += 1 + DivideByPageSize(block_remaining);
remaining_chunk_size = ModuloPageSize(block_remaining);
} else {
remaining_chunk_size += block_remaining;
}
black_allocs = block_end;
}
}
if (black_page_idx < DivideByPageSize(bump_pointer_space_->Size())) {
// Store the leftover first-chunk, if any, and update page index.
if (black_alloc_pages_first_chunk_size_[black_page_idx] > 0) {
black_page_idx++;
} else if (first_chunk_size > 0) {
black_alloc_pages_first_chunk_size_[black_page_idx] = first_chunk_size;
first_objs_moving_space_[black_page_idx].Assign(first_obj);
black_page_idx++;
}
}
black_page_count_ = black_page_idx - moving_first_objs_count_;
delete block_sizes;
}
// Update bump-pointer space by consuming all the pre-black blocks into the
// main one.
bump_pointer_space_->SetBlockSizes(thread_running_gc_,
post_compact_end_ - begin,
consumed_blocks_count);
if (kIsDebugBuild) {
size_t moving_space_size = bump_pointer_space_->Size();
size_t los_size = 0;
if (heap_->GetLargeObjectsSpace()) {
los_size = heap_->GetLargeObjectsSpace()->GetBytesAllocated();
}
// The moving-space size is already updated to post-compact size in SetBlockSizes above.
// Also, bytes-allocated has already been adjusted with large-object space' freed-bytes
// in Sweep(), but not with moving-space freed-bytes.
CHECK_GE(heap_->GetBytesAllocated() - black_objs_slide_diff_, moving_space_size + los_size)
<< " moving-space size:" << moving_space_size
<< " moving-space bytes-freed:" << black_objs_slide_diff_
<< " large-object-space size:" << los_size
<< " large-object-space bytes-freed:" << GetCurrentIteration()->GetFreedLargeObjectBytes()
<< " num-tlabs-merged:" << consumed_blocks_count
<< " main-block-size:" << (post_compact_end_ - begin)
<< " total-tlabs-moving-space:" << num_blocks;
}
}
void MarkCompact::UpdateNonMovingSpaceBlackAllocations() {
accounting::ObjectStack* stack = heap_->GetAllocationStack();
const StackReference<mirror::Object>* limit = stack->End();
uint8_t* const space_begin = non_moving_space_->Begin();
size_t num_pages = DivideByPageSize(non_moving_space_->Capacity());
for (StackReference<mirror::Object>* it = stack->Begin(); it != limit; ++it) {
mirror::Object* obj = it->AsMirrorPtr();
if (obj != nullptr && non_moving_space_bitmap_->HasAddress(obj)) {
non_moving_space_bitmap_->Set(obj);
if (!use_generational_) {
// Clear so that we don't try to set the bit again in the next GC-cycle.
it->Clear();
}
size_t idx = DivideByPageSize(reinterpret_cast<uint8_t*>(obj) - space_begin);
uint8_t* page_begin = AlignDown(reinterpret_cast<uint8_t*>(obj), gPageSize);
mirror::Object* first_obj = first_objs_non_moving_space_[idx].AsMirrorPtr();
if (first_obj == nullptr
|| (obj < first_obj && reinterpret_cast<uint8_t*>(first_obj) > page_begin)) {
first_objs_non_moving_space_[idx].Assign(obj);
}
if (++idx == num_pages) {
continue;
}
mirror::Object* next_page_first_obj = first_objs_non_moving_space_[idx].AsMirrorPtr();
uint8_t* next_page_begin = page_begin + gPageSize;
if (next_page_first_obj == nullptr
|| reinterpret_cast<uint8_t*>(next_page_first_obj) > next_page_begin) {
size_t obj_size = RoundUp(obj->SizeOf<kDefaultVerifyFlags>(), kAlignment);
uint8_t* obj_end = reinterpret_cast<uint8_t*>(obj) + obj_size;
while (next_page_begin < obj_end) {
first_objs_non_moving_space_[idx++].Assign(obj);
next_page_begin += gPageSize;
}
}
// update first_objs count in case we went past non_moving_first_objs_count_
non_moving_first_objs_count_ = std::max(non_moving_first_objs_count_, idx);
}
}
}
class MarkCompact::ImmuneSpaceUpdateObjVisitor {
public:
explicit ImmuneSpaceUpdateObjVisitor(MarkCompact* collector) : collector_(collector) {}
void operator()(mirror::Object* obj) const ALWAYS_INLINE REQUIRES(Locks::mutator_lock_) {
RefsUpdateVisitor</*kCheckBegin*/false, /*kCheckEnd*/false> visitor(collector_,
obj,
/*begin_*/nullptr,
/*end_*/nullptr);
obj->VisitRefsForCompaction</*kFetchObjSize*/ false>(
visitor, MemberOffset(0), MemberOffset(-1));
}
static void Callback(mirror::Object* obj, void* arg) REQUIRES(Locks::mutator_lock_) {
reinterpret_cast<ImmuneSpaceUpdateObjVisitor*>(arg)->operator()(obj);
}
private:
MarkCompact* const collector_;
};
class MarkCompact::ClassLoaderRootsUpdater : public ClassLoaderVisitor {
public:
explicit ClassLoaderRootsUpdater(MarkCompact* collector)
: collector_(collector),
moving_space_begin_(collector->black_dense_end_),
moving_space_end_(collector->moving_space_end_) {}
void Visit(ObjPtr<mirror::ClassLoader> class_loader) override
REQUIRES_SHARED(Locks::classlinker_classes_lock_, Locks::mutator_lock_) {
ClassTable* const class_table = class_loader->GetClassTable();
if (class_table != nullptr) {
// Classes are updated concurrently.
class_table->VisitRoots(*this, /*skip_classes=*/true);
}
}
void VisitRootIfNonNull(mirror::CompressedReference<mirror::Object>* root) const ALWAYS_INLINE
REQUIRES(Locks::heap_bitmap_lock_) REQUIRES_SHARED(Locks::mutator_lock_) {
if (!root->IsNull()) {
VisitRoot(root);
}
}
void VisitRoot(mirror::CompressedReference<mirror::Object>* root) const ALWAYS_INLINE
REQUIRES(Locks::heap_bitmap_lock_) REQUIRES_SHARED(Locks::mutator_lock_) {
collector_->UpdateRoot(
root, moving_space_begin_, moving_space_end_, RootInfo(RootType::kRootVMInternal));
}
private:
MarkCompact* collector_;
uint8_t* const moving_space_begin_;
uint8_t* const moving_space_end_;
};
class MarkCompact::LinearAllocPageUpdater {
public:
explicit LinearAllocPageUpdater(MarkCompact* collector)
: collector_(collector),
moving_space_begin_(collector->black_dense_end_),
moving_space_end_(collector->moving_space_end_),
last_page_touched_(false) {}
// Update a page in multi-object arena.
void MultiObjectArena(uint8_t* page_begin, uint8_t* first_obj)
REQUIRES_SHARED(Locks::mutator_lock_) {
DCHECK(first_obj != nullptr);
DCHECK_ALIGNED_PARAM(page_begin, gPageSize);
uint8_t* page_end = page_begin + gPageSize;
uint32_t obj_size;
for (uint8_t* byte = first_obj; byte < page_end;) {
TrackingHeader* header = reinterpret_cast<TrackingHeader*>(byte);
obj_size = header->GetSize();
if (UNLIKELY(obj_size == 0)) {
// No more objects in this page to visit.
last_page_touched_ = byte >= page_begin;
return;
}
uint8_t* obj = byte + sizeof(TrackingHeader);
uint8_t* obj_end = byte + obj_size;
if (header->Is16Aligned()) {
obj = AlignUp(obj, 16);
}
uint8_t* begin_boundary = std::max(obj, page_begin);
uint8_t* end_boundary = std::min(obj_end, page_end);
if (begin_boundary < end_boundary) {
VisitObject(header->GetKind(), obj, begin_boundary, end_boundary);
}
if (ArenaAllocator::IsRunningOnMemoryTool()) {
obj_size += ArenaAllocator::kMemoryToolRedZoneBytes;
}
byte += RoundUp(obj_size, LinearAlloc::kAlignment);
}
last_page_touched_ = true;
}
// This version is only used for cases where the entire page is filled with
// GC-roots. For example, class-table and intern-table.
void SingleObjectArena(uint8_t* page_begin, size_t page_size)
REQUIRES_SHARED(Locks::mutator_lock_) {
static_assert(sizeof(uint32_t) == sizeof(GcRoot<mirror::Object>));
DCHECK_ALIGNED(page_begin, kAlignment);
// Least significant bits are used by class-table.
static constexpr uint32_t kMask = kObjectAlignment - 1;
size_t num_roots = page_size / sizeof(GcRoot<mirror::Object>);
uint32_t* root_ptr = reinterpret_cast<uint32_t*>(page_begin);
for (size_t i = 0; i < num_roots; root_ptr++, i++) {
uint32_t word = *root_ptr;
if (word != 0) {
uint32_t lsbs = word & kMask;
word &= ~kMask;
VisitRootIfNonNull(reinterpret_cast<mirror::CompressedReference<mirror::Object>*>(&word));
*root_ptr = word | lsbs;
last_page_touched_ = true;
}
}
}
bool WasLastPageTouched() const { return last_page_touched_; }
void VisitRootIfNonNull(mirror::CompressedReference<mirror::Object>* root) const
ALWAYS_INLINE REQUIRES_SHARED(Locks::mutator_lock_) {
if (!root->IsNull()) {
VisitRoot(root);
}
}
void VisitRoot(mirror::CompressedReference<mirror::Object>* root) const
ALWAYS_INLINE REQUIRES_SHARED(Locks::mutator_lock_) {
mirror::Object* old_ref = root->AsMirrorPtr();
DCHECK_NE(old_ref, nullptr);
if (MarkCompact::HasAddress(old_ref, moving_space_begin_, moving_space_end_)) {
mirror::Object* new_ref = old_ref;
if (reinterpret_cast<uint8_t*>(old_ref) >= collector_->black_allocations_begin_) {
new_ref = collector_->PostCompactBlackObjAddr(old_ref);
} else if (collector_->live_words_bitmap_->Test(old_ref)) {
DCHECK(collector_->moving_space_bitmap_->Test(old_ref))
<< "ref:" << old_ref << " root:" << root;
new_ref = collector_->PostCompactOldObjAddr(old_ref);
}
if (old_ref != new_ref) {
root->Assign(new_ref);
}
}
}
private:
void VisitObject(LinearAllocKind kind,
void* obj,
uint8_t* start_boundary,
uint8_t* end_boundary) const ALWAYS_INLINE
REQUIRES_SHARED(Locks::mutator_lock_) {
switch (kind) {
case LinearAllocKind::kNoGCRoots:
break;
case LinearAllocKind::kGCRootArray:
{
GcRoot<mirror::Object>* root = reinterpret_cast<GcRoot<mirror::Object>*>(start_boundary);
GcRoot<mirror::Object>* last = reinterpret_cast<GcRoot<mirror::Object>*>(end_boundary);
for (; root < last; root++) {
VisitRootIfNonNull(root->AddressWithoutBarrier());
}
}
break;
case LinearAllocKind::kArtMethodArray:
{
LengthPrefixedArray<ArtMethod>* array = static_cast<LengthPrefixedArray<ArtMethod>*>(obj);
// Old methods are clobbered in debug builds. Check size to confirm if the array
// has any GC roots to visit. See ClassLinker::LinkMethodsHelper::ClobberOldMethods()
if (array->size() > 0) {
if (collector_->pointer_size_ == PointerSize::k64) {
ArtMethod::VisitArrayRoots<PointerSize::k64>(
*this, start_boundary, end_boundary, array);
} else {
DCHECK_EQ(collector_->pointer_size_, PointerSize::k32);
ArtMethod::VisitArrayRoots<PointerSize::k32>(
*this, start_boundary, end_boundary, array);
}
}
}
break;
case LinearAllocKind::kArtMethod:
ArtMethod::VisitRoots(*this, start_boundary, end_boundary, static_cast<ArtMethod*>(obj));
break;
case LinearAllocKind::kArtFieldArray:
ArtField::VisitArrayRoots(*this,
start_boundary,
end_boundary,
static_cast<LengthPrefixedArray<ArtField>*>(obj));
break;
case LinearAllocKind::kDexCacheArray:
{
mirror::DexCachePair<mirror::Object>* first =
reinterpret_cast<mirror::DexCachePair<mirror::Object>*>(start_boundary);
mirror::DexCachePair<mirror::Object>* last =
reinterpret_cast<mirror::DexCachePair<mirror::Object>*>(end_boundary);
mirror::DexCache::VisitDexCachePairRoots(*this, first, last);
}
}
}
MarkCompact* const collector_;
// Cache to speed up checking if GC-root is in moving space or not.
uint8_t* const moving_space_begin_;
uint8_t* const moving_space_end_;
// Whether the last page was touched or not.
bool last_page_touched_ = false;
};
void MarkCompact::UpdateClassTableClasses(Runtime* runtime, bool immune_class_table_only) {
// If the process is debuggable then redefinition is allowed, which may mean
// pre-zygote-fork class-tables may have pointer to class in moving-space.
// So visit classes from class-sets that are not in linear-alloc arena-pool.
if (UNLIKELY(runtime->IsJavaDebuggableAtInit())) {
ClassLinker* linker = runtime->GetClassLinker();
ClassLoaderRootsUpdater updater(this);
GcVisitedArenaPool* pool = static_cast<GcVisitedArenaPool*>(runtime->GetLinearAllocArenaPool());
auto cond = [this, pool, immune_class_table_only](ClassTable::ClassSet& set) -> bool {
if (!set.empty()) {
return immune_class_table_only ?
immune_spaces_.ContainsObject(reinterpret_cast<mirror::Object*>(&*set.begin())) :
!pool->Contains(reinterpret_cast<void*>(&*set.begin()));
}
return false;
};
linker->VisitClassTables([cond, &updater](ClassTable* table)
REQUIRES_SHARED(Locks::mutator_lock_) {
table->VisitClassesIfConditionMet(cond, updater);
});
ReaderMutexLock rmu(thread_running_gc_, *Locks::classlinker_classes_lock_);
linker->GetBootClassTable()->VisitClassesIfConditionMet(cond, updater);
}
}
void MarkCompact::CompactionPause() {
TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
Runtime* runtime = Runtime::Current();
if (kIsDebugBuild) {
DCHECK_EQ(thread_running_gc_, Thread::Current());
// TODO(Simulator): Test that this should not operate on the simulated stack when the simulator
// supports mark compact.
stack_low_addr_ = thread_running_gc_->GetStackEnd<kNativeStackType>();
stack_high_addr_ = reinterpret_cast<char*>(stack_low_addr_)
+ thread_running_gc_->GetUsableStackSize<kNativeStackType>();
}
{
TimingLogger::ScopedTiming t2("(Paused)UpdateCompactionDataStructures", GetTimings());
ReaderMutexLock rmu(thread_running_gc_, *Locks::heap_bitmap_lock_);
// Refresh data-structures to catch-up on allocations that may have
// happened since marking-phase pause.
// There could be several TLABs that got allocated since marking pause. We
// don't want to compact them and instead update the TLAB info in TLS and
// let mutators continue to use the TLABs.
// We need to set all the bits in live-words bitmap corresponding to allocated
// objects. Also, we need to find the objects that are overlapping with
// page-begin boundaries. Unlike objects allocated before
// black_allocations_begin_, which can be identified via mark-bitmap, we can get
// this info only via walking the space past black_allocations_begin_, which
// involves fetching object size.
// TODO: We can reduce the time spent on this in a pause by performing one
// round of this concurrently prior to the pause.
UpdateMovingSpaceBlackAllocations();
// Iterate over the allocation_stack_, for every object in the non-moving
// space:
// 1. Mark the object in live bitmap
// 2. Erase the object from allocation stack
// 3. In the corresponding page, if the first-object vector needs updating
// then do so.
UpdateNonMovingSpaceBlackAllocations();
// This store is visible to mutator (or uffd worker threads) as the mutator
// lock's unlock guarantees that.
compacting_ = true;
// Start updating roots and system weaks now.
heap_->GetReferenceProcessor()->UpdateRoots(this);
}
{
// TODO: Immune space updation has to happen either before or after
// remapping pre-compact pages to from-space. And depending on when it's
// done, we have to invoke VisitRefsForCompaction() with or without
// read-barrier.
TimingLogger::ScopedTiming t2("(Paused)UpdateImmuneSpaces", GetTimings());
accounting::CardTable* const card_table = heap_->GetCardTable();
for (auto& space : immune_spaces_.GetSpaces()) {
DCHECK(space->IsImageSpace() || space->IsZygoteSpace());
accounting::ContinuousSpaceBitmap* live_bitmap = space->GetLiveBitmap();
accounting::ModUnionTable* table = heap_->FindModUnionTableFromSpace(space);
// Having zygote-space indicates that the first zygote fork has taken
// place and that the classes/dex-caches in immune-spaces may have allocations
// (ArtMethod/ArtField arrays, dex-cache array, etc.) in the
// non-userfaultfd visited private-anonymous mappings. Visit them here.
ImmuneSpaceUpdateObjVisitor visitor(this);
if (table != nullptr) {
table->ProcessCards();
table->VisitObjects(ImmuneSpaceUpdateObjVisitor::Callback, &visitor);
} else {
WriterMutexLock wmu(thread_running_gc_, *Locks::heap_bitmap_lock_);
card_table->Scan<false>(
live_bitmap,
space->Begin(),
space->Limit(),
visitor,
accounting::CardTable::kCardDirty - 1);
}
}
}
{
TimingLogger::ScopedTiming t2("(Paused)UpdateRoots", GetTimings());
runtime->VisitConcurrentRoots(this, kVisitRootFlagAllRoots);
runtime->VisitNonThreadRoots(this);
{
ClassLinker* linker = runtime->GetClassLinker();
ClassLoaderRootsUpdater updater(this);
ReaderMutexLock rmu(thread_running_gc_, *Locks::classlinker_classes_lock_);
linker->VisitClassLoaders(&updater);
linker->GetBootClassTable()->VisitRoots(updater, /*skip_classes=*/true);
}
SweepSystemWeaks(thread_running_gc_, runtime, /*paused=*/true);
bool has_zygote_space = heap_->HasZygoteSpace();
GcVisitedArenaPool* arena_pool =
static_cast<GcVisitedArenaPool*>(runtime->GetLinearAllocArenaPool());
// Update immune/pre-zygote class-tables in case class redefinition took
// place. pre-zygote class-tables that are not in immune spaces are updated
// below if we are in fallback-mode or if there is no zygote space. So in
// that case only visit class-tables that are there in immune-spaces.
UpdateClassTableClasses(runtime, uffd_ == kFallbackMode || !has_zygote_space);
// Acquire arena-pool's lock, which should be released after the pool is
// userfaultfd registered. This is to ensure that no new arenas are
// allocated and used in between. Since they will not be captured in
// linear_alloc_arenas_ below, we will miss updating their pages. The same
// reason also applies to new allocations within the existing arena which
// may change last_byte.
// Since we are in a STW pause, this shouldn't happen anyways, but holding
// the lock confirms it.
// TODO (b/305779657): Replace with ExclusiveTryLock() and assert that it
// doesn't fail once it is available for ReaderWriterMutex.
WriterMutexLock pool_wmu(thread_running_gc_, arena_pool->GetLock());
// TODO: Find out why it's not sufficient to visit native roots of immune
// spaces, and why all the pre-zygote fork arenas have to be linearly updated.
// Is it possible that some native root starts getting pointed to by some object
// in moving space after fork? Or are we missing a write-barrier somewhere
// when a native root is updated?
auto arena_visitor = [this](uint8_t* page_begin, uint8_t* first_obj, size_t page_size)
REQUIRES_SHARED(Locks::mutator_lock_) {
LinearAllocPageUpdater updater(this);
if (first_obj != nullptr) {
updater.MultiObjectArena(page_begin, first_obj);
} else {
updater.SingleObjectArena(page_begin, page_size);
}
};
if (uffd_ == kFallbackMode || (!has_zygote_space && runtime->IsZygote())) {
// Besides fallback-mode, visit linear-alloc space in the pause for zygote
// processes prior to first fork (that's when zygote space gets created).
if (kIsDebugBuild && IsValidFd(uffd_)) {
// All arenas allocated so far are expected to be pre-zygote fork.
arena_pool->ForEachAllocatedArena(
[](const TrackedArena& arena)
REQUIRES_SHARED(Locks::mutator_lock_) { CHECK(arena.IsPreZygoteForkArena()); });
}
arena_pool->VisitRoots(arena_visitor);
} else {
// Inform the arena-pool that compaction is going on. So the TrackedArena
// objects corresponding to the arenas that are freed shouldn't be deleted
// immediately. We will do that in FinishPhase(). This is to avoid ABA
// problem.
arena_pool->DeferArenaFreeing();
arena_pool->ForEachAllocatedArena(
[this, arena_visitor, has_zygote_space](const TrackedArena& arena)
REQUIRES_SHARED(Locks::mutator_lock_) {
// The pre-zygote fork arenas are not visited concurrently in the
// zygote children processes. The native roots of the dirty objects
// are visited during immune space visit below.
if (!arena.IsPreZygoteForkArena()) {
uint8_t* last_byte = arena.GetLastUsedByte();
auto ret = linear_alloc_arenas_.insert({&arena, last_byte});
CHECK(ret.second);
} else if (!arena.IsSingleObjectArena() || !has_zygote_space) {
// Pre-zygote class-table and intern-table don't need to be updated.
// TODO: Explore the possibility of using /proc/self/pagemap to
// fetch which pages in these arenas are private-dirty and then only
// visit those pages. To optimize it further, we can keep all
// pre-zygote arenas in a single memory range so that just one read
// from pagemap is sufficient.
arena.VisitRoots(arena_visitor);
}
});
}
// Release order wrt to mutator threads' SIGBUS handler load.
sigbus_in_progress_count_[0].store(0, std::memory_order_relaxed);
sigbus_in_progress_count_[1].store(0, std::memory_order_release);
app_slow_path_start_time_ = MilliTime();
KernelPreparation();
}
UpdateNonMovingSpace();
// fallback mode
if (uffd_ == kFallbackMode) {
CompactMovingSpace<kFallbackMode>(nullptr);
int32_t freed_bytes = black_objs_slide_diff_;
bump_pointer_space_->RecordFree(freed_objects_, freed_bytes);
RecordFree(ObjectBytePair(freed_objects_, freed_bytes));
} else {
DCHECK_EQ(compaction_buffer_counter_.load(std::memory_order_relaxed), 1);
}
stack_low_addr_ = nullptr;
}
void MarkCompact::KernelPrepareRangeForUffd(uint8_t* to_addr, uint8_t* from_addr, size_t map_size) {
int mremap_flags = MREMAP_MAYMOVE | MREMAP_FIXED;
if (gHaveMremapDontunmap) {
mremap_flags |= MREMAP_DONTUNMAP;
}
void* ret = mremap(to_addr, map_size, map_size, mremap_flags, from_addr);
CHECK_EQ(ret, static_cast<void*>(from_addr))
<< "mremap to move pages failed: " << strerror(errno)
<< ". space-addr=" << reinterpret_cast<void*>(to_addr) << " size=" << PrettySize(map_size);
if (!gHaveMremapDontunmap) {
// Without MREMAP_DONTUNMAP the source mapping is unmapped by mremap. So mmap
// the moving space again.
int mmap_flags = MAP_FIXED;
// Use MAP_FIXED_NOREPLACE so that if someone else reserves 'to_addr'
// mapping in meantime, which can happen when MREMAP_DONTUNMAP isn't
// available, to avoid unmapping someone else' mapping and then causing
// crashes elsewhere.
mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED_NOREPLACE;
ret = mmap(to_addr, map_size, PROT_READ | PROT_WRITE, mmap_flags, -1, 0);
CHECK_EQ(ret, static_cast<void*>(to_addr))
<< "mmap for moving space failed: " << strerror(errno);
}
}
void MarkCompact::KernelPreparation() {
TimingLogger::ScopedTiming t("(Paused)KernelPreparation", GetTimings());
uint8_t* moving_space_begin = bump_pointer_space_->Begin();
size_t moving_space_size = bump_pointer_space_->Capacity();
size_t moving_space_register_sz = (moving_first_objs_count_ + black_page_count_) * gPageSize;
DCHECK_LE(moving_space_register_sz, moving_space_size);
KernelPrepareRangeForUffd(moving_space_begin, from_space_begin_, moving_space_size);
if (IsValidFd(uffd_)) {
if (moving_space_register_sz > 0) {
// mremap clears 'anon_vma' field of anonymous mappings. If we
// uffd-register only the used portion of the space, then the vma gets
// split (between used and unused portions) and as soon as pages are
// mapped to the vmas, they get different `anon_vma` assigned, which
// ensures that the two vmas cannot merge after we uffd-unregister the
// used portion. OTOH, registering the entire space avoids the split, but
// unnecessarily causes userfaults on allocations.
// By faulting-in a page we force the kernel to allocate 'anon_vma' *before*
// the vma-split in uffd-register. This ensures that when we unregister
// the used portion after compaction, the two split vmas merge. This is
// necessary for the mremap of the next GC cycle to not fail due to having
// more than one vma in the source range.
//
// Fault in address aligned to PMD size so that in case THP is enabled,
// we don't mistakenly fault a page in beginning portion that will be
// registered with uffd. If the alignment takes us beyond the space, then
// fault the first page and madvise it.
size_t pmd_size = Heap::GetPMDSize();
uint8_t* fault_in_addr = AlignUp(moving_space_begin + moving_space_register_sz, pmd_size);
if (bump_pointer_space_->Contains(reinterpret_cast<mirror::Object*>(fault_in_addr))) {
*const_cast<volatile uint8_t*>(fault_in_addr) = 0;
} else {
DCHECK_ALIGNED_PARAM(moving_space_begin, gPageSize);
*const_cast<volatile uint8_t*>(moving_space_begin) = 0;
madvise(moving_space_begin, pmd_size, MADV_DONTNEED);
}
// Register the moving space with userfaultfd.
RegisterUffd(moving_space_begin, moving_space_register_sz);
// madvise ensures that if any page gets mapped (only possible if some
// thread is reading the page(s) without trying to make sense as we hold
// mutator-lock exclusively) between mremap and uffd-registration, then
// it gets zapped so that the map is empty and ready for userfaults. If
// we could mremap after uffd-registration (like in case of linear-alloc
// space below) then we wouldn't need it. But since we don't register the
// entire space, we can't do that.
madvise(moving_space_begin, moving_space_register_sz, MADV_DONTNEED);
}
// Prepare linear-alloc for concurrent compaction.
for (auto& data : linear_alloc_spaces_data_) {
DCHECK_EQ(static_cast<ssize_t>(data.shadow_.Size()), data.end_ - data.begin_);
// There could be threads running in suspended mode when the compaction
// pause is being executed. In order to make the userfaultfd setup atomic,
// the registration has to be done *before* moving the pages to shadow map.
RegisterUffd(data.begin_, data.shadow_.Size());
KernelPrepareRangeForUffd(data.begin_, data.shadow_.Begin(), data.shadow_.Size());
}
}
}
bool MarkCompact::SigbusHandler(siginfo_t* info) {
class ScopedInProgressCount {
public:
explicit ScopedInProgressCount(MarkCompact* collector) : collector_(collector) {
// Increment the count only if compaction is not done yet.
for (idx_ = 0; idx_ < 2; idx_++) {
SigbusCounterType prev =
collector_->sigbus_in_progress_count_[idx_].load(std::memory_order_relaxed);
while ((prev & kSigbusCounterCompactionDoneMask) == 0) {
if (collector_->sigbus_in_progress_count_[idx_].compare_exchange_strong(
prev, prev + 1, std::memory_order_acquire)) {
DCHECK_LT(prev, kSigbusCounterCompactionDoneMask - 1);
return;
}
}
}
}
bool TolerateEnoent() const { return idx_ == 1; }
bool IsCompactionDone() const { return idx_ == 2; }
~ScopedInProgressCount() {
if (idx_ < 2) {
collector_->sigbus_in_progress_count_[idx_].fetch_sub(1, std::memory_order_release);
}
}
private:
MarkCompact* const collector_;
uint8_t idx_;
};
if (info->si_code != BUS_ADRERR) {
// Userfaultfd raises SIGBUS with BUS_ADRERR. All other causes can't be
// handled here.
return false;
}
ScopedInProgressCount spc(this);
uint8_t* fault_page = AlignDown(reinterpret_cast<uint8_t*>(info->si_addr), gPageSize);
if (!spc.IsCompactionDone()) {
if (HasAddress(reinterpret_cast<mirror::Object*>(fault_page))) {
Thread* self = Thread::Current();
Locks::mutator_lock_->AssertSharedHeld(self);
size_t nr_moving_space_used_pages = moving_first_objs_count_ + black_page_count_;
ConcurrentlyProcessMovingPage(fault_page,
self->GetThreadLocalGcBuffer(),
nr_moving_space_used_pages,
spc.TolerateEnoent());
return true;
} else {
// Find the linear-alloc space containing fault-addr
for (auto& data : linear_alloc_spaces_data_) {
if (data.begin_ <= fault_page && data.end_ > fault_page) {
ConcurrentlyProcessLinearAllocPage(fault_page, spc.TolerateEnoent());
return true;
}
}
// Fault address doesn't belong to either moving-space or linear-alloc.
return false;
}
} else {
// We may spuriously get SIGBUS fault, which was initiated before the
// compaction was finished, but ends up here. In that case, if the fault
// address is valid then consider it handled.
return HasAddress(reinterpret_cast<mirror::Object*>(fault_page)) ||
linear_alloc_spaces_data_.end() !=
std::find_if(linear_alloc_spaces_data_.begin(),
linear_alloc_spaces_data_.end(),
[fault_page](const LinearAllocSpaceData& data) {
return data.begin_ <= fault_page && data.end_ > fault_page;
});
}
}
void MarkCompact::ConcurrentlyProcessMovingPage(uint8_t* fault_page,
uint8_t* buf,
size_t nr_moving_space_used_pages,
bool tolerate_enoent) {
Thread* self = Thread::Current();
uint8_t* unused_space_begin = moving_space_begin_ + nr_moving_space_used_pages * gPageSize;
DCHECK(IsAlignedParam(unused_space_begin, gPageSize));
if (fault_page >= unused_space_begin) {
// There is a race which allows more than one thread to install a
// zero-page. But we can tolerate that. So absorb the EEXIST returned by
// the ioctl and move on.
ZeropageIoctl(fault_page, gPageSize, /*tolerate_eexist=*/true, tolerate_enoent);
return;
}
size_t page_idx = DivideByPageSize(fault_page - moving_space_begin_);
DCHECK_LT(page_idx, moving_first_objs_count_ + black_page_count_);
mirror::Object* first_obj = first_objs_moving_space_[page_idx].AsMirrorPtr();
if (first_obj == nullptr) {
DCHECK_GT(fault_page, post_compact_end_);
// Install zero-page in the entire remaining tlab to avoid multiple ioctl invocations.
uint8_t* end = AlignDown(self->GetTlabEnd(), gPageSize);
if (fault_page < self->GetTlabStart() || fault_page >= end) {
end = fault_page + gPageSize;
}
size_t end_idx = page_idx + DivideByPageSize(end - fault_page);
size_t length = 0;
for (size_t idx = page_idx; idx < end_idx; idx++, length += gPageSize) {
uint32_t cur_state = moving_pages_status_[idx].load(std::memory_order_acquire);
if (cur_state != static_cast<uint8_t>(PageState::kUnprocessed)) {
DCHECK_EQ(cur_state, static_cast<uint8_t>(PageState::kProcessedAndMapped));
break;
}
}
if (length > 0) {
length = ZeropageIoctl(fault_page, length, /*tolerate_eexist=*/true, tolerate_enoent);
for (size_t len = 0, idx = page_idx; len < length; idx++, len += gPageSize) {
moving_pages_status_[idx].store(static_cast<uint8_t>(PageState::kProcessedAndMapped),
std::memory_order_release);
}
}
return;
}
uint32_t raw_state = moving_pages_status_[page_idx].load(std::memory_order_acquire);
uint32_t backoff_count = 0;
PageState state;
while (true) {
state = GetPageStateFromWord(raw_state);
if (state == PageState::kProcessing || state == PageState::kMutatorProcessing ||
state == PageState::kProcessingAndMapping || state == PageState::kProcessedAndMapping) {
// Wait for the page to be mapped (by gc-thread or some mutator) before returning.
// The wait is not expected to be long as the read state indicates that the other
// thread is actively working on the page.
BackOff(backoff_count++);
raw_state = moving_pages_status_[page_idx].load(std::memory_order_acquire);
} else if (state == PageState::kProcessedAndMapped) {
// Nothing to do.
break;
} else {
// Acquire order to ensure we don't start writing to a page, which could
// be shared, before the CAS is successful.
if (state == PageState::kUnprocessed &&
moving_pages_status_[page_idx].compare_exchange_strong(
raw_state,
static_cast<uint8_t>(PageState::kMutatorProcessing),
std::memory_order_acquire)) {
if (fault_page < black_dense_end_) {
if (use_generational_) {
UpdateNonMovingPage</*kSetupForGenerational=*/true>(
first_obj, fault_page, from_space_slide_diff_, moving_space_bitmap_);
} else {
UpdateNonMovingPage</*kSetupForGenerational=*/false>(
first_obj, fault_page, from_space_slide_diff_, moving_space_bitmap_);
}
buf = fault_page + from_space_slide_diff_;
} else {
if (UNLIKELY(buf == nullptr)) {
uint16_t idx = compaction_buffer_counter_.fetch_add(1, std::memory_order_relaxed);
// The buffer-map is one page bigger as the first buffer is used by GC-thread.
CHECK_LE(idx, kMutatorCompactionBufferCount);
buf = compaction_buffers_map_.Begin() + idx * gPageSize;
DCHECK(compaction_buffers_map_.HasAddress(buf));
self->SetThreadLocalGcBuffer(buf);
}
if (fault_page < post_compact_end_) {
// The page has to be compacted.
if (use_generational_ && fault_page < mid_gen_end_) {
CompactPage</*kSetupGenerational=*/true>(first_obj,
pre_compact_offset_moving_space_[page_idx],
buf,
fault_page,
/*needs_memset_zero=*/true);
} else {
CompactPage</*kSetupGenerational=*/false>(first_obj,
pre_compact_offset_moving_space_[page_idx],
buf,
fault_page,
/*needs_memset_zero=*/true);
}
} else {
DCHECK_NE(first_obj, nullptr);
DCHECK_GT(pre_compact_offset_moving_space_[page_idx], 0u);
uint8_t* pre_compact_page = black_allocations_begin_ + (fault_page - post_compact_end_);
uint32_t first_chunk_size = black_alloc_pages_first_chunk_size_[page_idx];
mirror::Object* next_page_first_obj = nullptr;
if (page_idx + 1 < moving_first_objs_count_ + black_page_count_) {
next_page_first_obj = first_objs_moving_space_[page_idx + 1].AsMirrorPtr();
}
DCHECK(IsAlignedParam(pre_compact_page, gPageSize));
SlideBlackPage(first_obj,
next_page_first_obj,
first_chunk_size,
pre_compact_page,
buf,
/*needs_memset_zero=*/true);
}
}
// Nobody else would simultaneously modify this page's state so an
// atomic store is sufficient. Use 'release' order to guarantee that
// loads/stores to the page are finished before this store. Since the
// mutator used its own buffer for the processing, there is no reason to
// put its index in the status of the page. Also, the mutator is going
// to immediately map the page, so that info is not needed.
moving_pages_status_[page_idx].store(static_cast<uint8_t>(PageState::kProcessedAndMapping),
std::memory_order_release);
CopyIoctl(fault_page, buf, gPageSize, /*return_on_contention=*/false, tolerate_enoent);
// Store is sufficient as no other thread modifies the status at this stage.
moving_pages_status_[page_idx].store(static_cast<uint8_t>(PageState::kProcessedAndMapped),
std::memory_order_release);
break;
}
state = GetPageStateFromWord(raw_state);
if (state == PageState::kProcessed) {
size_t arr_len = moving_first_objs_count_ + black_page_count_;
// The page is processed but not mapped. We should map it. The release
// order used in MapMovingSpacePages will ensure that the increment to
// moving_compaction_in_progress is done first.
if (MapMovingSpacePages(page_idx,
arr_len,
/*from_fault=*/true,
/*return_on_contention=*/false,
tolerate_enoent) > 0) {
break;
}
raw_state = moving_pages_status_[page_idx].load(std::memory_order_acquire);
}
}
}
}
bool MarkCompact::MapUpdatedLinearAllocPages(uint8_t* start_page,
uint8_t* start_shadow_page,
Atomic<PageState>* state,
size_t length,
bool free_pages,
bool single_ioctl,
bool tolerate_enoent) {
DCHECK_ALIGNED_PARAM(length, gPageSize);
Atomic<PageState>* madv_state = state;
size_t madv_len = length;
uint8_t* madv_start = start_shadow_page;
bool check_state_for_madv = false;
uint8_t* end_page = start_page + length;
while (start_page < end_page) {
size_t map_len = 0;
// Find a contiguous range of pages that we can map in single ioctl.
for (Atomic<PageState>* cur_state = state;
map_len < length && cur_state->load(std::memory_order_acquire) == PageState::kProcessed;
map_len += gPageSize, cur_state++) {
// No body.
}
if (map_len == 0) {
if (single_ioctl) {
return state->load(std::memory_order_relaxed) == PageState::kProcessedAndMapped;
}
// Skip all the pages that this thread can't map.
while (length > 0) {
PageState s = state->load(std::memory_order_relaxed);
if (s == PageState::kProcessed) {
break;
}
// If we find any page which is being processed or mapped (only possible by a mutator(s))
// then we need to re-check the page-state and, if needed, wait for the state to change
// to 'mapped', before the shadow pages are reclaimed.
check_state_for_madv |= s > PageState::kUnprocessed && s < PageState::kProcessedAndMapped;
state++;
length -= gPageSize;
start_shadow_page += gPageSize;
start_page += gPageSize;
}
} else {
map_len = CopyIoctl(start_page,
start_shadow_page,
map_len,
/*return_on_contention=*/false,
tolerate_enoent);
DCHECK_NE(map_len, 0u);
// Declare that the pages are ready to be accessed. Store is sufficient
// as any thread will be storing the same value.
for (size_t l = 0; l < map_len; l += gPageSize, state++) {
PageState s = state->load(std::memory_order_relaxed);
DCHECK(s == PageState::kProcessed || s == PageState::kProcessedAndMapped) << "state:" << s;
state->store(PageState::kProcessedAndMapped, std::memory_order_release);
}
if (single_ioctl) {
break;
}
start_page += map_len;
start_shadow_page += map_len;
length -= map_len;
// state is already updated above.
}
}
if (free_pages) {
if (check_state_for_madv) {
// Wait until all the pages are mapped before releasing them. This is needed to be
// checked only if some mutators were found to be concurrently mapping pages earlier.
for (size_t l = 0; l < madv_len; l += gPageSize, madv_state++) {
uint32_t backoff_count = 0;
PageState s = madv_state->load(std::memory_order_relaxed);
while (s > PageState::kUnprocessed && s < PageState::kProcessedAndMapped) {
BackOff(backoff_count++);
s = madv_state->load(std::memory_order_relaxed);
}
}
}
ZeroAndReleaseMemory(madv_start, madv_len);
}
return true;
}
void MarkCompact::ConcurrentlyProcessLinearAllocPage(uint8_t* fault_page, bool tolerate_enoent) {
auto arena_iter = linear_alloc_arenas_.end();
{
TrackedArena temp_arena(fault_page);
arena_iter = linear_alloc_arenas_.upper_bound(&temp_arena);
arena_iter = arena_iter != linear_alloc_arenas_.begin() ? std::prev(arena_iter)
: linear_alloc_arenas_.end();
}
// Unlike ProcessLinearAlloc(), we don't need to hold arena-pool's lock here
// because a thread trying to access the page and as a result causing this
// userfault confirms that nobody can delete the corresponding arena and
// release its pages.
// NOTE: We may have some memory range be recycled several times during a
// compaction cycle, thereby potentially causing userfault on the same page
// several times. That's not a problem as all of them (except for possibly the
// first one) would require us mapping a zero-page, which we do without updating
// the 'state_arr'.
if (arena_iter == linear_alloc_arenas_.end() ||
arena_iter->first->IsWaitingForDeletion() ||
arena_iter->second <= fault_page) {
// Fault page isn't in any of the arenas that existed before we started
// compaction. So map zeropage and return.
ZeropageIoctl(fault_page, gPageSize, /*tolerate_eexist=*/true, tolerate_enoent);
} else {
// Find the linear-alloc space containing fault-page
LinearAllocSpaceData* space_data = nullptr;
for (auto& data : linear_alloc_spaces_data_) {
if (data.begin_ <= fault_page && fault_page < data.end_) {
space_data = &data;
break;
}
}
DCHECK_NE(space_data, nullptr);
ptrdiff_t diff = space_data->shadow_.Begin() - space_data->begin_;
size_t page_idx = DivideByPageSize(fault_page - space_data->begin_);
Atomic<PageState>* state_arr =
reinterpret_cast<Atomic<PageState>*>(space_data->page_status_map_.Begin());
PageState state = state_arr[page_idx].load(std::memory_order_acquire);
uint32_t backoff_count = 0;
while (true) {
switch (state) {
case PageState::kUnprocessed: {
// Acquire order to ensure we don't start writing to shadow map, which is
// shared, before the CAS is successful.
if (state_arr[page_idx].compare_exchange_strong(
state, PageState::kProcessing, std::memory_order_acquire)) {
LinearAllocPageUpdater updater(this);
uint8_t* first_obj = arena_iter->first->GetFirstObject(fault_page);
// null first_obj indicates that it's a page from arena for
// intern-table/class-table. So first object isn't required.
if (first_obj != nullptr) {
updater.MultiObjectArena(fault_page + diff, first_obj + diff);
} else {
updater.SingleObjectArena(fault_page + diff, gPageSize);
}
if (updater.WasLastPageTouched()) {
state_arr[page_idx].store(PageState::kProcessed, std::memory_order_release);
state = PageState::kProcessed;
continue;
} else {
// If the page wasn't touched, then it means it is empty and
// is most likely not present on the shadow-side. Furthermore,
// since the shadow is also userfaultfd registered doing copy
// ioctl fails as the copy-from-user in the kernel will cause
// userfault. Instead, just map a zeropage, which is not only
// correct but also efficient as it avoids unnecessary memcpy
// in the kernel.
if (ZeropageIoctl(fault_page,
gPageSize,
/*tolerate_eexist=*/false,
tolerate_enoent)) {
state_arr[page_idx].store(PageState::kProcessedAndMapped,
std::memory_order_release);
}
return;
}
}
}
continue;
case PageState::kProcessed:
// Map as many pages as possible in a single ioctl, without spending
// time freeing pages.
if (MapUpdatedLinearAllocPages(fault_page,
fault_page + diff,
state_arr + page_idx,
space_data->end_ - fault_page,
/*free_pages=*/false,
/*single_ioctl=*/true,
tolerate_enoent)) {
return;
}
// fault_page was not mapped by this thread (some other thread claimed
// it). Wait for it to be mapped before returning.
FALLTHROUGH_INTENDED;
case PageState::kProcessing:
case PageState::kProcessingAndMapping:
case PageState::kProcessedAndMapping:
// Wait for the page to be mapped before returning.
BackOff(backoff_count++);
state = state_arr[page_idx].load(std::memory_order_acquire);
continue;
case PageState::kMutatorProcessing:
LOG(FATAL) << "Unreachable";
UNREACHABLE();
case PageState::kProcessedAndMapped:
// Somebody else took care of the page.
return;
}
break;
}
}
}
void MarkCompact::ProcessLinearAlloc() {
GcVisitedArenaPool* arena_pool =
static_cast<GcVisitedArenaPool*>(Runtime::Current()->GetLinearAllocArenaPool());
DCHECK_EQ(thread_running_gc_, Thread::Current());
uint8_t* unmapped_range_start = nullptr;
uint8_t* unmapped_range_end = nullptr;
// Pointer to the linear-alloc space containing the current arena in the loop
// below. Also helps in ensuring that two arenas, which are contiguous in
// address space but are from different linear-alloc spaces, are not coalesced
// into one range for mapping purpose.
LinearAllocSpaceData* space_data = nullptr;
Atomic<PageState>* state_arr = nullptr;
ptrdiff_t diff = 0;
auto map_pages = [&]() {
DCHECK_NE(diff, 0);
DCHECK_NE(space_data, nullptr);
DCHECK_GE(unmapped_range_start, space_data->begin_);
DCHECK_LT(unmapped_range_start, space_data->end_);
DCHECK_GT(unmapped_range_end, space_data->begin_);
DCHECK_LE(unmapped_range_end, space_data->end_);
DCHECK_LT(unmapped_range_start, unmapped_range_end);
DCHECK_ALIGNED_PARAM(unmapped_range_end - unmapped_range_start, gPageSize);
size_t page_idx = DivideByPageSize(unmapped_range_start - space_data->begin_);
MapUpdatedLinearAllocPages(unmapped_range_start,
unmapped_range_start + diff,
state_arr + page_idx,
unmapped_range_end - unmapped_range_start,
/*free_pages=*/true,
/*single_ioctl=*/false,
/*tolerate_enoent=*/false);
};
for (auto& pair : linear_alloc_arenas_) {
const TrackedArena* arena = pair.first;
size_t arena_size = arena->Size();
uint8_t* arena_begin = arena->Begin();
// linear_alloc_arenas_ is sorted on arena-begin. So we will get all arenas
// in that order.
DCHECK_LE(unmapped_range_end, arena_begin);
DCHECK(space_data == nullptr || arena_begin > space_data->begin_)
<< "space-begin:" << static_cast<void*>(space_data->begin_)
<< " arena-begin:" << static_cast<void*>(arena_begin);
if (space_data == nullptr || space_data->end_ <= arena_begin) {
// Map the processed arenas as we are switching to another space.
if (space_data != nullptr && unmapped_range_end != nullptr) {
map_pages();
unmapped_range_end = nullptr;
}
// Find the linear-alloc space containing the arena
LinearAllocSpaceData* curr_space_data = space_data;
for (auto& data : linear_alloc_spaces_data_) {
if (data.begin_ <= arena_begin && arena_begin < data.end_) {
// Since arenas are sorted, the next space should be higher in address
// order than the current one.
DCHECK(space_data == nullptr || data.begin_ >= space_data->end_);
diff = data.shadow_.Begin() - data.begin_;
state_arr = reinterpret_cast<Atomic<PageState>*>(data.page_status_map_.Begin());
space_data = &data;
break;
}
}
CHECK_NE(space_data, curr_space_data)
<< "Couldn't find space for arena-begin:" << static_cast<void*>(arena_begin);
}
// Map the processed arenas if we found a hole within the current space.
if (unmapped_range_end != nullptr && unmapped_range_end < arena_begin) {
map_pages();
unmapped_range_end = nullptr;
}
if (unmapped_range_end == nullptr) {
unmapped_range_start = unmapped_range_end = arena_begin;
}
DCHECK_NE(unmapped_range_start, nullptr);
// It's ok to include all arenas in the unmapped range. Since the
// corresponding state bytes will be kUnprocessed, we will skip calling
// ioctl and madvise on arenas which are waiting to be deleted.
unmapped_range_end += arena_size;
{
// Acquire arena-pool's lock (in shared-mode) so that the arena being updated
// does not get deleted at the same time. If this critical section is too
// long and impacts mutator response time, then we get rid of this lock by
// holding onto memory ranges of all deleted (since compaction pause)
// arenas until completion finishes.
ReaderMutexLock rmu(thread_running_gc_, arena_pool->GetLock());
// If any arenas were freed since compaction pause then skip them from
// visiting.
if (arena->IsWaitingForDeletion()) {
continue;
}
uint8_t* last_byte = pair.second;
DCHECK_ALIGNED_PARAM(last_byte, gPageSize);
auto visitor = [space_data, last_byte, diff, this, state_arr](
uint8_t* page_begin,
uint8_t* first_obj,
size_t page_size) REQUIRES_SHARED(Locks::mutator_lock_) {
// No need to process pages past last_byte as they already have updated
// gc-roots, if any.
if (page_begin >= last_byte) {
return;
}
LinearAllocPageUpdater updater(this);
size_t page_idx = DivideByPageSize(page_begin - space_data->begin_);
DCHECK_LT(page_idx, space_data->page_status_map_.Size());
PageState expected_state = PageState::kUnprocessed;
// Acquire order to ensure that we don't start accessing the shadow page,
// which is shared with other threads, prior to CAS. Also, for same
// reason, we used 'release' order for changing the state to 'processed'.
if (state_arr[page_idx].compare_exchange_strong(
expected_state, PageState::kProcessing, std::memory_order_acquire)) {
// null first_obj indicates that it's a page from arena for
// intern-table/class-table. So first object isn't required.
if (first_obj != nullptr) {
updater.MultiObjectArena(page_begin + diff, first_obj + diff);
} else {
DCHECK_EQ(page_size, gPageSize);
updater.SingleObjectArena(page_begin + diff, page_size);
}
expected_state = PageState::kProcessing;
// Store is sufficient as no other thread could be modifying it. Use
// release order to ensure that the writes to shadow page are
// committed to memory before.
if (updater.WasLastPageTouched()) {
state_arr[page_idx].store(PageState::kProcessed, std::memory_order_release);
} else {
// See comment in ConcurrentlyProcessLinearAllocPage() with same situation.
ZeropageIoctl(
page_begin, gPageSize, /*tolerate_eexist=*/false, /*tolerate_enoent=*/false);
// Ioctl will act as release fence.
state_arr[page_idx].store(PageState::kProcessedAndMapped, std::memory_order_release);
}
}
};
arena->VisitRoots(visitor);
}
}
if (unmapped_range_end > unmapped_range_start) {
// Map remaining pages.
map_pages();
}
}
void MarkCompact::RegisterUffd(void* addr, size_t size) {
DCHECK(IsValidFd(uffd_));
struct uffdio_register uffd_register;
uffd_register.range.start = reinterpret_cast<uintptr_t>(addr);
uffd_register.range.len = size;
uffd_register.mode = UFFDIO_REGISTER_MODE_MISSING;
CHECK_EQ(ioctl(uffd_, UFFDIO_REGISTER, &uffd_register), 0)
<< "ioctl_userfaultfd: register failed: " << strerror(errno)
<< ". start:" << static_cast<void*>(addr) << " len:" << PrettySize(size);
}
// TODO: sometime we may want to tolerate certain error conditions (like ENOMEM
// when we unregister the unused portion of the moving-space). Implement support
// for that.
void MarkCompact::UnregisterUffd(uint8_t* start, size_t len) {
DCHECK(IsValidFd(uffd_));
struct uffdio_range range;
range.start = reinterpret_cast<uintptr_t>(start);
range.len = len;
CHECK_EQ(ioctl(uffd_, UFFDIO_UNREGISTER, &range), 0)
<< "ioctl_userfaultfd: unregister failed: " << strerror(errno)
<< ". addr:" << static_cast<void*>(start) << " len:" << PrettySize(len);
}
void MarkCompact::CompactionPhase() {
TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
{
int32_t freed_bytes = black_objs_slide_diff_;
bump_pointer_space_->RecordFree(freed_objects_, freed_bytes);
RecordFree(ObjectBytePair(freed_objects_, freed_bytes));
}
CompactMovingSpace<kCopyMode>(compaction_buffers_map_.Begin());
ProcessLinearAlloc();
auto wait_for_compaction_counter = [this](size_t idx) {
SigbusCounterType count = sigbus_in_progress_count_[idx].fetch_or(
kSigbusCounterCompactionDoneMask, std::memory_order_acq_rel);
// Wait for SIGBUS handlers already in play.
for (uint32_t i = 0; count > 0; i++) {
BackOff(i);
count = sigbus_in_progress_count_[idx].load(std::memory_order_acquire);
count &= ~kSigbusCounterCompactionDoneMask;
}
};
// Set compaction-done bit in the first counter to indicate that gc-thread
// is done compacting and mutators should stop incrementing this counter.
// Mutator should tolerate ENOENT after this. This helps avoid priority
// inversion in case mutators need to map zero-pages after compaction is
// finished but before gc-thread manages to unregister the spaces.
wait_for_compaction_counter(0);
// Unregister moving-space
size_t moving_space_size = bump_pointer_space_->Capacity();
size_t used_size = (moving_first_objs_count_ + black_page_count_) * gPageSize;
if (used_size > 0) {
UnregisterUffd(bump_pointer_space_->Begin(), used_size);
}
// Unregister linear-alloc spaces
for (auto& data : linear_alloc_spaces_data_) {
DCHECK_EQ(data.end_ - data.begin_, static_cast<ssize_t>(data.shadow_.Size()));
UnregisterUffd(data.begin_, data.shadow_.Size());
}
GetCurrentIteration()->SetAppSlowPathDurationMs(MilliTime() - app_slow_path_start_time_);
// Set compaction-done bit in the second counter to indicate that gc-thread
// is done unregistering the spaces and therefore mutators, if in SIGBUS,
// should return without attempting to map the faulted page. When the mutator
// will access the address again, it will succeed. Once this counter is 0,
// the gc-thread can safely initialize/madvise the data structures.
wait_for_compaction_counter(1);
// Release all of the memory taken by moving-space's from-map
from_space_map_.MadviseDontNeedAndZero();
// mprotect(PROT_NONE) all maps except to-space in debug-mode to catch any unexpected accesses.
DCHECK_EQ(mprotect(from_space_begin_, moving_space_size, PROT_NONE), 0)
<< "mprotect(PROT_NONE) for from-space failed: " << strerror(errno);
// madvise linear-allocs's page-status array. Note that we don't need to
// madvise the shado-map as the pages from it were reclaimed in
// ProcessLinearAlloc() after arenas were mapped.
for (auto& data : linear_alloc_spaces_data_) {
data.page_status_map_.MadviseDontNeedAndZero();
}
}
template <size_t kBufferSize>
class MarkCompact::ThreadRootsVisitor : public RootVisitor {
public:
using RefType = StackReference<mirror::Object>;
explicit ThreadRootsVisitor(MarkCompact* mark_compact, Thread* const self)
: mark_compact_(mark_compact), self_(self) {
if (kVerifyGcRootDuringMarking) {
verification_ = mark_compact->GetHeap()->GetVerification();
}
}
~ThreadRootsVisitor() {
if (overflow_arr_start_ != nullptr) {
// Pass on the thread-local overflow array to the gc-thread for processing
// after checkpoint.
CHECK_GT(top_, overflow_arr_start_);
auto pair = std::make_pair(overflow_arr_start_, top_ - overflow_arr_start_);
MutexLock mu(self_, mark_compact_->lock_);
if (mark_compact_->overflow_arrays_ == nullptr) {
mark_compact_->overflow_arrays_ = new std::vector<std::pair<RefType*, size_t>>(1, pair);
} else {
mark_compact_->overflow_arrays_->push_back(pair);
}
} else {
// Since we don't reset mark-stack between the two stack-scan checkpoints
// in marking phase, we need to clear the stale references that are left
// unused in the stack.
for (; top_ < end_; top_++) {
top_->Assign(nullptr);
}
}
}
void VisitRoots(mirror::Object*** roots,
size_t count,
[[maybe_unused]] const RootInfo& info) override
REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_) {
for (size_t i = 0; i < count; i++) {
mirror::Object* obj = *roots[i];
if (kVerifyGcRootDuringMarking) {
CHECK(verification_->IsValidObject(obj)) << obj;
}
if (mark_compact_->MarkObjectNonNullNoPush</*kParallel*/true>(obj)) {
Push(obj);
}
}
}
void VisitRoots(mirror::CompressedReference<mirror::Object>** roots,
size_t count,
[[maybe_unused]] const RootInfo& info) override
REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_) {
for (size_t i = 0; i < count; i++) {
mirror::Object* obj = roots[i]->AsMirrorPtr();
if (kVerifyGcRootDuringMarking) {
CHECK(verification_->IsValidObject(obj)) << obj;
}
if (mark_compact_->MarkObjectNonNullNoPush</*kParallel*/true>(obj)) {
Push(obj);
}
}
}
private:
void FetchBuffer() REQUIRES_SHARED(Locks::mutator_lock_) {
size_t requested_size;
ptrdiff_t new_top_offset;
if (LIKELY(overflow_arr_start_ == nullptr)) {
// During stack scanning threads can only be calling AtomicBumpBack() on
// the mark-stack.
if (mark_compact_->mark_stack_->AtomicBumpBack(kBufferSize, &top_, &end_)) {
return;
}
new_top_offset = 0;
requested_size = kBufferSize;
} else {
DCHECK_GT(end_, overflow_arr_start_);
new_top_offset = end_ - overflow_arr_start_;
requested_size = 2 * new_top_offset;
}
// realloc() acts like malloc() when overflow_arr_start_ is null.
overflow_arr_start_ =
static_cast<RefType*>(realloc(overflow_arr_start_, requested_size * sizeof(RefType)));
top_ = overflow_arr_start_ + new_top_offset;
end_ = overflow_arr_start_ + requested_size;
}
void Push(mirror::Object* obj) REQUIRES_SHARED(Locks::mutator_lock_)
REQUIRES(Locks::heap_bitmap_lock_) {
if (UNLIKELY(top_ == end_)) {
FetchBuffer();
DCHECK_GE(end_ - top_, static_cast<ssize_t>(kBufferSize));
}
top_->Assign(obj);
top_++;
}
// If mark-stack has slots available, [top_, end_) represents the slots
// acquired from the mark-stack for storing references. After mark-stack
// is full, [top_, end_) is the range in overflow array.
RefType* top_ = nullptr;
RefType* end_ = nullptr;
// Thread-local array of references to be used if and when mark-stack is full.
RefType* overflow_arr_start_ = nullptr;
MarkCompact* const mark_compact_;
Thread* const self_;
const Verification* verification_;
};
class MarkCompact::CheckpointMarkThreadRoots : public Closure {
public:
explicit CheckpointMarkThreadRoots(MarkCompact* mark_compact) : mark_compact_(mark_compact) {}
void Run(Thread* thread) override NO_THREAD_SAFETY_ANALYSIS {
ScopedTrace trace("Marking thread roots");
// Note: self is not necessarily equal to thread since thread may be
// suspended.
Thread* const self = Thread::Current();
CHECK(thread == self
|| thread->IsSuspended()
|| thread->GetState() == ThreadState::kWaitingPerformingGc)
<< thread->GetState() << " thread " << thread << " self " << self;
{
ThreadRootsVisitor</*kBufferSize*/ 20> visitor(mark_compact_, self);
thread->VisitRoots(&visitor, kVisitRootFlagAllRoots);
}
// Clear page-buffer to prepare for compaction phase.
thread->SetThreadLocalGcBuffer(nullptr);
// If thread is a running mutator, then act on behalf of the garbage
// collector. See the code in ThreadList::RunCheckpoint.
mark_compact_->GetBarrier().Pass(self);
}
private:
MarkCompact* const mark_compact_;
};
inline void MarkCompact::ProcessMarkObject(mirror::Object* obj) {
DCHECK(obj != nullptr);
ScanObject</*kUpdateLiveWords=*/true>(obj);
}
void MarkCompact::ProcessMarkStackNonNull() {
TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
while (!mark_stack_->IsEmpty()) {
mirror::Object* obj = mark_stack_->PopBack();
if (obj != nullptr) {
ProcessMarkObject(obj);
}
}
}
void MarkCompact::MarkRootsCheckpoint(Thread* self, Runtime* runtime) {
// We revote TLABs later during paused round of marking.
TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
CheckpointMarkThreadRoots check_point(this);
ThreadList* thread_list = runtime->GetThreadList();
gc_barrier_.Init(self, 0);
// Request the check point is run on all threads returning a count of the threads that must
// run through the barrier including self.
size_t barrier_count = thread_list->RunCheckpoint(&check_point);
// Release locks then wait for all mutator threads to pass the barrier.
// If there are no threads to wait which implies that all the checkpoint functions are finished,
// then no need to release locks.
if (barrier_count > 0) {
Locks::heap_bitmap_lock_->ExclusiveUnlock(self);
Locks::mutator_lock_->SharedUnlock(self);
{
ScopedThreadStateChange tsc(self, ThreadState::kWaitingForCheckPointsToRun);
gc_barrier_.Increment(self, barrier_count);
}
Locks::mutator_lock_->SharedLock(self);
Locks::heap_bitmap_lock_->ExclusiveLock(self);
}
// We may have null in the mark-stack as some thread(s) may have not filled
// the buffer completely.
ProcessMarkStackNonNull();
std::vector<std::pair<StackReference<mirror::Object>*, size_t>>* vec = nullptr;
{
MutexLock mu(self, lock_);
if (overflow_arrays_ != nullptr) {
vec = overflow_arrays_;
overflow_arrays_ = nullptr;
}
}
if (vec != nullptr) {
for (auto [arr, size] : *vec) {
for (size_t i = 0; i < size; i++) {
ProcessMarkObject(arr[i].AsMirrorPtr());
}
free(arr);
ProcessMarkStack();
}
delete vec;
}
}
void MarkCompact::MarkNonThreadRoots(Runtime* runtime) {
TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
runtime->VisitNonThreadRoots(this);
ProcessMarkStack();
}
void MarkCompact::MarkConcurrentRoots(VisitRootFlags flags, Runtime* runtime) {
TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
runtime->VisitConcurrentRoots(this, flags);
ProcessMarkStack();
}
void MarkCompact::RevokeAllThreadLocalBuffers() {
TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
bump_pointer_space_->RevokeAllThreadLocalBuffers();
}
class MarkCompact::ScanObjectVisitor {
public:
explicit ScanObjectVisitor(MarkCompact* const mark_compact) ALWAYS_INLINE
: mark_compact_(mark_compact) {}
void operator()(ObjPtr<mirror::Object> obj) const
ALWAYS_INLINE
REQUIRES(Locks::heap_bitmap_lock_)
REQUIRES_SHARED(Locks::mutator_lock_) {
mark_compact_->ScanObject</*kUpdateLiveWords*/ false>(obj.Ptr());
}
private:
MarkCompact* const mark_compact_;
};
void MarkCompact::UpdateAndMarkModUnion() {
accounting::CardTable* const card_table = heap_->GetCardTable();
for (const auto& space : immune_spaces_.GetSpaces()) {
const char* name = space->IsZygoteSpace()
? "UpdateAndMarkZygoteModUnionTable"
: "UpdateAndMarkImageModUnionTable";
DCHECK(space->IsZygoteSpace() || space->IsImageSpace()) << *space;
TimingLogger::ScopedTiming t(name, GetTimings());
accounting::ModUnionTable* table = heap_->FindModUnionTableFromSpace(space);
if (table != nullptr) {
// UpdateAndMarkReferences() doesn't visit Reference-type objects. But
// that's fine because these objects are immutable enough (referent can
// only be cleared) and hence the only referents they can have are intra-space.
table->UpdateAndMarkReferences(this);
} else {
// No mod-union table, scan all dirty/aged cards in the corresponding
// card-table. This can only occur for app images.
card_table->Scan</*kClearCard*/ false>(space->GetMarkBitmap(),
space->Begin(),
space->End(),
ScanObjectVisitor(this),
gc::accounting::CardTable::kCardAged);
}
}
}
void MarkCompact::ScanOldGenObjects() {
TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
accounting::CardTable* const card_table = heap_->GetCardTable();
// Moving space
card_table->Scan</*kClearCard=*/false>(moving_space_bitmap_,
moving_space_begin_,
old_gen_end_,
ScanObjectVisitor(this),
gc::accounting::CardTable::kCardAged2);
ProcessMarkStack();
// Non-moving space
card_table->Scan</*kClearCard=*/false>(non_moving_space_bitmap_,
non_moving_space_->Begin(),
non_moving_space_->End(),
ScanObjectVisitor(this),
gc::accounting::CardTable::kCardAged2);
ProcessMarkStack();
}
void MarkCompact::MarkReachableObjects() {
UpdateAndMarkModUnion();
// Recursively mark all the non-image bits set in the mark bitmap.
ProcessMarkStack();
if (young_gen_) {
// For the object overlapping on the old-gen boundary, we need to visit it
// to make sure that we don't miss the references in the mid-gen area, and
// also update the corresponding liveness info.
if (old_gen_end_ > moving_space_begin_) {
uintptr_t old_gen_end = reinterpret_cast<uintptr_t>(old_gen_end_);
mirror::Object* obj = moving_space_bitmap_->FindPrecedingObject(old_gen_end - kAlignment);
if (obj != nullptr) {
size_t obj_size = obj->SizeOf<kDefaultVerifyFlags>();
if (reinterpret_cast<uintptr_t>(obj) + RoundUp(obj_size, kAlignment) > old_gen_end) {
ScanObject</*kUpdateLiveWords=*/true>(obj);
}
}
}
ScanOldGenObjects();
}
}
void MarkCompact::ScanDirtyObjects(bool paused, uint8_t minimum_age) {
accounting::CardTable* card_table = heap_->GetCardTable();
for (const auto& space : heap_->GetContinuousSpaces()) {
const char* name = nullptr;
switch (space->GetGcRetentionPolicy()) {
case space::kGcRetentionPolicyNeverCollect:
name = paused ? "(Paused)ScanGrayImmuneSpaceObjects" : "ScanGrayImmuneSpaceObjects";
break;
case space::kGcRetentionPolicyFullCollect:
name = paused ? "(Paused)ScanGrayZygoteSpaceObjects" : "ScanGrayZygoteSpaceObjects";
break;
case space::kGcRetentionPolicyAlwaysCollect:
DCHECK(space == bump_pointer_space_ || space == non_moving_space_);
name = paused ? "(Paused)ScanGrayAllocSpaceObjects" : "ScanGrayAllocSpaceObjects";
break;
}
TimingLogger::ScopedTiming t(name, GetTimings());
if (paused && use_generational_ &&
space->GetGcRetentionPolicy() == space::kGcRetentionPolicyAlwaysCollect) {
DCHECK_EQ(minimum_age, accounting::CardTable::kCardDirty);
auto mod_visitor = [](uint8_t* card, uint8_t cur_val) {
DCHECK_EQ(cur_val, accounting::CardTable::kCardDirty);
*card = accounting::CardTable::kCardAged;
};
card_table->Scan</*kClearCard=*/false>(space->GetMarkBitmap(),
space->Begin(),
space->End(),
ScanObjectVisitor(this),
mod_visitor,
minimum_age);
} else {
card_table->Scan</*kClearCard=*/false>(space->GetMarkBitmap(),
space->Begin(),
space->End(),
ScanObjectVisitor(this),
minimum_age);
}
ProcessMarkStack();
}
}
void MarkCompact::RecursiveMarkDirtyObjects(bool paused, uint8_t minimum_age) {
ScanDirtyObjects(paused, minimum_age);
CHECK(mark_stack_->IsEmpty());
}
void MarkCompact::MarkRoots(VisitRootFlags flags) {
TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
Runtime* runtime = Runtime::Current();
// Make sure that the checkpoint which collects the stack roots is the first
// one capturning GC-roots. As this one is supposed to find the address
// everything allocated after that (during this marking phase) will be
// considered 'marked'.
MarkRootsCheckpoint(thread_running_gc_, runtime);
MarkNonThreadRoots(runtime);
MarkConcurrentRoots(flags, runtime);
}
void MarkCompact::PreCleanCards() {
TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
CHECK(!Locks::mutator_lock_->IsExclusiveHeld(thread_running_gc_));
// Age the card-table before thread stack scanning checkpoint in MarkRoots()
// as it ensures that there are no in-progress write barriers which started
// prior to aging the card-table.
PrepareForMarking(/*pre_marking=*/false);
MarkRoots(static_cast<VisitRootFlags>(kVisitRootFlagClearRootLog | kVisitRootFlagNewRoots));
RecursiveMarkDirtyObjects(/*paused*/ false, accounting::CardTable::kCardDirty - 1);
}
// In a concurrent marking algorithm, if we are not using a write/read barrier, as
// in this case, then we need a stop-the-world (STW) round in the end to mark
// objects which were written into concurrently while concurrent marking was
// performed.
// In order to minimize the pause time, we could take one of the two approaches:
// 1. Keep repeating concurrent marking of dirty cards until the time spent goes
// below a threshold.
// 2. Do two rounds concurrently and then attempt a paused one. If we figure
// that it's taking too long, then resume mutators and retry.
//
// Given the non-trivial fixed overhead of running a round (card table and root
// scan), it might be better to go with approach 2.
void MarkCompact::MarkingPhase() {
TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
DCHECK_EQ(thread_running_gc_, Thread::Current());
WriterMutexLock mu(thread_running_gc_, *Locks::heap_bitmap_lock_);
MaybeClampGcStructures();
PrepareForMarking(/*pre_marking=*/true);
MarkZygoteLargeObjects();
MarkRoots(
static_cast<VisitRootFlags>(kVisitRootFlagAllRoots | kVisitRootFlagStartLoggingNewRoots));
MarkReachableObjects();
// Pre-clean dirtied cards to reduce pauses.
PreCleanCards();
// Setup reference processing and forward soft references once before enabling
// slow path (in MarkingPause)
ReferenceProcessor* rp = GetHeap()->GetReferenceProcessor();
bool clear_soft_references = GetCurrentIteration()->GetClearSoftReferences();
rp->Setup(thread_running_gc_, this, /*concurrent=*/ true, clear_soft_references);
if (!clear_soft_references) {
// Forward as many SoftReferences as possible before inhibiting reference access.
rp->ForwardSoftReferences(GetTimings());
}
}
class MarkCompact::RefFieldsVisitor {
public:
ALWAYS_INLINE RefFieldsVisitor(MarkCompact* const mark_compact)
: mark_compact_(mark_compact),
young_gen_begin_(mark_compact->mid_gen_end_),
young_gen_end_(mark_compact->moving_space_end_),
dirty_card_(false),
// Ideally we should only check for objects outside young-gen. However,
// the boundary of young-gen can change later in PrepareForCompaction()
// as we need the mid-gen-end to be page-aligned. Since most of the
// objects don't have native-roots, it's not too costly to check all
// objects being visited during marking.
check_native_roots_to_young_gen_(mark_compact->use_generational_) {}
bool ShouldDirtyCard() const { return dirty_card_; }
ALWAYS_INLINE void operator()(mirror::Object* obj,
MemberOffset offset,
[[maybe_unused]] bool is_static) const
REQUIRES(Locks::heap_bitmap_lock_) REQUIRES_SHARED(Locks::mutator_lock_) {
if (kCheckLocks) {
Locks::mutator_lock_->AssertSharedHeld(Thread::Current());
Locks::heap_bitmap_lock_->AssertExclusiveHeld(Thread::Current());
}
mirror::Object* ref = obj->GetFieldObject<mirror::Object>(offset);
mark_compact_->MarkObject(ref, obj, offset);
}
void operator()(ObjPtr<mirror::Class> klass, ObjPtr<mirror::Reference> ref) const ALWAYS_INLINE
REQUIRES(Locks::heap_bitmap_lock_) REQUIRES_SHARED(Locks::mutator_lock_) {
mark_compact_->DelayReferenceReferent(klass, ref);
}
void VisitRootIfNonNull(mirror::CompressedReference<mirror::Object>* root) const ALWAYS_INLINE
REQUIRES(Locks::heap_bitmap_lock_) REQUIRES_SHARED(Locks::mutator_lock_) {
if (!root->IsNull()) {
VisitRoot(root);
}
}
void VisitRoot(mirror::CompressedReference<mirror::Object>* root) const
REQUIRES(Locks::heap_bitmap_lock_)
REQUIRES_SHARED(Locks::mutator_lock_) {
if (kCheckLocks) {
Locks::mutator_lock_->AssertSharedHeld(Thread::Current());
Locks::heap_bitmap_lock_->AssertExclusiveHeld(Thread::Current());
}
mirror::Object* ref = root->AsMirrorPtr();
mark_compact_->MarkObject(ref);
if (check_native_roots_to_young_gen_) {
dirty_card_ |= reinterpret_cast<uint8_t*>(ref) >= young_gen_begin_ &&
reinterpret_cast<uint8_t*>(ref) < young_gen_end_;
}
}
private:
MarkCompact* const mark_compact_;
uint8_t* const young_gen_begin_;
uint8_t* const young_gen_end_;
mutable bool dirty_card_;
const bool check_native_roots_to_young_gen_;
};
template <size_t kAlignment>
size_t MarkCompact::LiveWordsBitmap<kAlignment>::LiveBytesInBitmapWord(size_t chunk_idx) const {
const size_t index = chunk_idx * kBitmapWordsPerVectorWord;
size_t words = 0;
for (uint32_t i = 0; i < kBitmapWordsPerVectorWord; i++) {
words += POPCOUNT(Bitmap::Begin()[index + i]);
}
return words * kAlignment;
}
void MarkCompact::UpdateLivenessInfo(mirror::Object* obj, size_t obj_size) {
DCHECK(obj != nullptr);
DCHECK_EQ(obj_size, obj->SizeOf<kDefaultVerifyFlags>());
uintptr_t obj_begin = reinterpret_cast<uintptr_t>(obj);
UpdateClassAfterObjectMap(obj);
size_t size = RoundUp(obj_size, kAlignment);
uintptr_t bit_index = live_words_bitmap_->SetLiveWords(obj_begin, size);
size_t chunk_idx =
(obj_begin - reinterpret_cast<uintptr_t>(moving_space_begin_)) / kOffsetChunkSize;
// Compute the bit-index within the chunk-info vector word.
bit_index %= kBitsPerVectorWord;
size_t first_chunk_portion = std::min(size, (kBitsPerVectorWord - bit_index) * kAlignment);
chunk_info_vec_[chunk_idx] += first_chunk_portion;
DCHECK_LE(chunk_info_vec_[chunk_idx], kOffsetChunkSize)
<< "first_chunk_portion:" << first_chunk_portion
<< " obj-size:" << RoundUp(obj_size, kAlignment);
chunk_idx++;
DCHECK_LE(first_chunk_portion, size);
for (size -= first_chunk_portion; size > kOffsetChunkSize; size -= kOffsetChunkSize) {
DCHECK_EQ(chunk_info_vec_[chunk_idx], 0u);
chunk_info_vec_[chunk_idx++] = kOffsetChunkSize;
}
chunk_info_vec_[chunk_idx] += size;
DCHECK_LE(chunk_info_vec_[chunk_idx], kOffsetChunkSize)
<< "size:" << size << " obj-size:" << RoundUp(obj_size, kAlignment);
}
template <bool kUpdateLiveWords>
void MarkCompact::ScanObject(mirror::Object* obj) {
mirror::Class* klass = obj->GetClass<kVerifyNone, kWithoutReadBarrier>();
// TODO(lokeshgidra): Remove the following condition once b/373609505 is fixed.
if (UNLIKELY(klass == nullptr)) {
// It was seen in ConcurrentCopying GC that after a small wait when we reload
// the class pointer, it turns out to be a valid class object. So as a workaround,
// we can continue execution and log an error that this happened.
for (size_t i = 0; i < 1000; i++) {
// Wait for 1ms at a time. Don't wait for more than 1 second in total.
usleep(1000);
klass = obj->GetClass<kVerifyNone, kWithoutReadBarrier>();
if (klass != nullptr) {
break;
}
}
if (klass == nullptr) {
// It must be heap corruption.
LOG(FATAL_WITHOUT_ABORT) << "klass pointer for obj: " << obj << " found to be null."
<< " black_dense_end: " << static_cast<void*>(black_dense_end_)
<< " mid_gen_end: " << static_cast<void*>(mid_gen_end_)
<< " prev_post_compact_end: " << prev_post_compact_end_
<< " prev_black_allocations_begin: " << prev_black_allocations_begin_
<< " prev_black_dense_end: " << prev_black_dense_end_
<< " prev_gc_young: " << prev_gc_young_
<< " prev_gc_performed_compaction: "
<< prev_gc_performed_compaction_;
heap_->GetVerification()->LogHeapCorruption(
obj, mirror::Object::ClassOffset(), klass, /*fatal=*/true);
}
}
// The size of `obj` is used both here (to update `bytes_scanned_`) and in
// `UpdateLivenessInfo`. As fetching this value can be expensive, do it once
// here and pass that information to `UpdateLivenessInfo`.
size_t obj_size = obj->SizeOf<kDefaultVerifyFlags>();
bytes_scanned_ += obj_size;
RefFieldsVisitor visitor(this);
DCHECK(IsMarked(obj)) << "Scanning marked object " << obj << "\n" << heap_->DumpSpaces();
if (kUpdateLiveWords && HasAddress(obj)) {
UpdateLivenessInfo(obj, obj_size);
freed_objects_--;
}
obj->VisitReferences(visitor, visitor);
// old-gen cards for objects containing references to mid-gen needs to be kept
// dirty for re-scan in the next GC cycle. We take care of that majorly during
// compaction-phase as that enables us to implicitly take care of
// black-allocated objects as well. Unfortunately, since we don't visit
// native-roots during compaction, that has to be captured during marking.
//
// Note that we can't dirty the cards right away because then we will wrongly
// age them during re-scan of this marking-phase, and thereby may loose them
// by the end of the GC cycle.
if (visitor.ShouldDirtyCard()) {
dirty_cards_later_vec_.push_back(obj);
}
}
// Scan anything that's on the mark stack.
void MarkCompact::ProcessMarkStack() {
// TODO: eventually get rid of this as we now call this function quite a few times.
TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
// TODO: try prefetch like in CMS
while (!mark_stack_->IsEmpty()) {
ProcessMarkObject(mark_stack_->PopBack());
}
}
void MarkCompact::ExpandMarkStack() {
const size_t new_size = mark_stack_->Capacity() * 2;
// TODO: We could reduce the overhead here by making the Resize() of
// AtomicStack take care of transferring references.
std::vector<StackReference<mirror::Object>> temp(mark_stack_->Begin(),
mark_stack_->End());
mark_stack_->Resize(new_size);
for (auto& ref : temp) {
mark_stack_->PushBack(ref.AsMirrorPtr());
}
DCHECK(!mark_stack_->IsFull());
}
inline void MarkCompact::PushOnMarkStack(mirror::Object* obj) {
if (UNLIKELY(mark_stack_->IsFull())) {
ExpandMarkStack();
}
mark_stack_->PushBack(obj);
}
inline void MarkCompact::MarkObjectNonNull(mirror::Object* obj,
mirror::Object* holder,
MemberOffset offset) {
DCHECK(obj != nullptr);
if (MarkObjectNonNullNoPush</*kParallel*/false>(obj, holder, offset)) {
PushOnMarkStack(obj);
}
}
template <bool kParallel>
inline bool MarkCompact::MarkObjectNonNullNoPush(mirror::Object* obj,
mirror::Object* holder,
MemberOffset offset) {
// We expect most of the referenes to be in bump-pointer space, so try that
// first to keep the cost of this function minimal.
if (LIKELY(HasAddress(obj))) {
// If obj is in old-gen (during young-gc) then we shouldn't add it to
// mark-stack to limit marking to young generation.
if (young_gen_ && reinterpret_cast<uint8_t*>(obj) < old_gen_end_) {
DCHECK(moving_space_bitmap_->Test(obj));
return false;
}
return kParallel ? !moving_space_bitmap_->AtomicTestAndSet(obj)
: !moving_space_bitmap_->Set(obj);
} else if (non_moving_space_bitmap_->HasAddress(obj)) {
return kParallel ? !non_moving_space_bitmap_->AtomicTestAndSet(obj)
: !non_moving_space_bitmap_->Set(obj);
} else if (immune_spaces_.ContainsObject(obj)) {
DCHECK(IsMarked(obj) != nullptr);
return false;
} else {
// Must be a large-object space, otherwise it's a case of heap corruption.
if (!IsAlignedParam(obj, space::LargeObjectSpace::ObjectAlignment())) {
// Objects in large-object space are aligned to the large-object alignment.
// So if we have an object which doesn't belong to any space and is not
// page-aligned as well, then it's memory corruption.
// TODO: implement protect/unprotect in bump-pointer space.
heap_->GetVerification()->LogHeapCorruption(holder, offset, obj, /*fatal*/ true);
}
DCHECK_NE(heap_->GetLargeObjectsSpace(), nullptr)
<< "ref=" << obj
<< " doesn't belong to any of the spaces and large object space doesn't exist";
accounting::LargeObjectBitmap* los_bitmap = heap_->GetLargeObjectsSpace()->GetMarkBitmap();
DCHECK(los_bitmap->HasAddress(obj));
if (kParallel) {
los_bitmap->AtomicTestAndSet(obj);
} else {
los_bitmap->Set(obj);
}
// We only have primitive arrays in large object space. So there is no
// reason to push into mark-stack.
DCHECK(obj->IsString() || (obj->IsArrayInstance() && !obj->IsObjectArray()));
return false;
}
}
inline void MarkCompact::MarkObject(mirror::Object* obj,
mirror::Object* holder,
MemberOffset offset) {
if (obj != nullptr) {
MarkObjectNonNull(obj, holder, offset);
}
}
mirror::Object* MarkCompact::MarkObject(mirror::Object* obj) {
MarkObject(obj, nullptr, MemberOffset(0));
return obj;
}
void MarkCompact::MarkHeapReference(mirror::HeapReference<mirror::Object>* obj,
[[maybe_unused]] bool do_atomic_update) {
MarkObject(obj->AsMirrorPtr(), nullptr, MemberOffset(0));
}
void MarkCompact::VisitRoots(mirror::Object*** roots,
size_t count,
const RootInfo& info) {
if (compacting_) {
uint8_t* moving_space_begin = black_dense_end_;
uint8_t* moving_space_end = moving_space_end_;
for (size_t i = 0; i < count; ++i) {
UpdateRoot(roots[i], moving_space_begin, moving_space_end, info);
}
} else {
const Verification* verification = GetHeap()->GetVerification();
for (size_t i = 0; i < count; ++i) {
mirror::Object* obj = *roots[i];
if (kVerifyGcRootDuringMarking) {
CHECK(verification->IsValidObject(obj)) << obj << " info:" << info;
}
MarkObjectNonNull(obj);
}
}
}
void MarkCompact::VisitRoots(mirror::CompressedReference<mirror::Object>** roots,
size_t count,
const RootInfo& info) {
// TODO: do we need to check if the root is null or not?
if (compacting_) {
uint8_t* moving_space_begin = black_dense_end_;
uint8_t* moving_space_end = moving_space_end_;
for (size_t i = 0; i < count; ++i) {
UpdateRoot(roots[i], moving_space_begin, moving_space_end, info);
}
} else {
const Verification* verification = GetHeap()->GetVerification();
for (size_t i = 0; i < count; ++i) {
mirror::Object* obj = roots[i]->AsMirrorPtr();
if (kVerifyGcRootDuringMarking) {
CHECK(verification->IsValidObject(obj)) << obj << " info:" << info;
}
MarkObjectNonNull(obj);
}
}
}
mirror::Object* MarkCompact::IsMarked(mirror::Object* obj) {
if (HasAddress(obj)) {
const bool is_black = reinterpret_cast<uint8_t*>(obj) >= black_allocations_begin_;
if (compacting_) {
if (is_black) {
return PostCompactBlackObjAddr(obj);
} else if (moving_space_bitmap_->Test(obj)) {
if (reinterpret_cast<uint8_t*>(obj) < black_dense_end_) {
return obj;
} else {
return PostCompactOldObjAddr(obj);
}
} else {
return nullptr;
}
}
return (is_black || moving_space_bitmap_->Test(obj)) ? obj : nullptr;
} else if (non_moving_space_bitmap_->HasAddress(obj)) {
if (non_moving_space_bitmap_->Test(obj)) {
return obj;
}
} else if (immune_spaces_.ContainsObject(obj)) {
return obj;
} else {
DCHECK(heap_->GetLargeObjectsSpace())
<< "ref=" << obj
<< " doesn't belong to any of the spaces and large object space doesn't exist";
accounting::LargeObjectBitmap* los_bitmap = heap_->GetLargeObjectsSpace()->GetMarkBitmap();
if (los_bitmap->HasAddress(obj)) {
DCHECK(IsAlignedParam(obj, space::LargeObjectSpace::ObjectAlignment()));
if (los_bitmap->Test(obj)) {
return obj;
}
} else {
// The given obj is not in any of the known spaces, so return null. This could
// happen for instance in interpreter caches wherein a concurrent updation
// to the cache could result in obj being a non-reference. This is
// tolerable because SweepInterpreterCaches only updates if the given
// object has moved, which can't be the case for the non-reference.
return nullptr;
}
}
return marking_done_ && IsOnAllocStack(obj) ? obj : nullptr;
}
bool MarkCompact::IsNullOrMarkedHeapReference(mirror::HeapReference<mirror::Object>* obj,
[[maybe_unused]] bool do_atomic_update) {
mirror::Object* ref = obj->AsMirrorPtr();
if (ref == nullptr) {
return true;
}
return IsMarked(ref);
}
// Process the 'referent' field in a java.lang.ref.Reference. If the referent
// has not yet been marked, put it on the appropriate list in the heap for later
// processing.
void MarkCompact::DelayReferenceReferent(ObjPtr<mirror::Class> klass,
ObjPtr<mirror::Reference> ref) {
heap_->GetReferenceProcessor()->DelayReferenceReferent(klass, ref, this);
}
void MarkCompact::VerifyNoMissingGenerationalCardMarks() {
if (kVerifyNoMissingCardMarks) {
accounting::CardTable* card_table = heap_->GetCardTable();
auto obj_visitor = [&](mirror::Object* obj) REQUIRES_SHARED(Locks::mutator_lock_) {
bool found = false;
VisitReferencesVisitor visitor(
[begin = old_gen_end_, end = moving_space_end_, &found](mirror::Object* ref) {
found |= ref >= reinterpret_cast<mirror::Object*>(begin) &&
ref < reinterpret_cast<mirror::Object*>(end);
});
obj->VisitReferences</*kVisitNativeRoots=*/true>(visitor, visitor);
if (found) {
size_t obj_size = RoundUp(obj->SizeOf<kDefaultVerifyFlags>(), kAlignment);
if (!card_table->IsDirty(obj) &&
reinterpret_cast<uint8_t*>(obj) + obj_size <= old_gen_end_) {
std::ostringstream oss;
obj->DumpReferences</*kDumpNativeRoots=*/true>(oss, /*dump_type_of=*/true);
LOG(FATAL_WITHOUT_ABORT)
<< "Object " << obj << " (" << obj->PrettyTypeOf()
<< ") has references to mid-gen/young-gen:"
<< "\n obj-size = " << obj_size
<< "\n old-gen-end = " << static_cast<void*>(old_gen_end_)
<< "\n mid-gen-end = " << static_cast<void*>(mid_gen_end_) << "\n references =\n"
<< oss.str();
heap_->GetVerification()->LogHeapCorruption(
/*holder=*/nullptr, MemberOffset(0), obj, /*fatal=*/true);
}
}
};
moving_space_bitmap_->VisitMarkedRange(reinterpret_cast<uintptr_t>(moving_space_begin_),
reinterpret_cast<uintptr_t>(old_gen_end_),
obj_visitor);
}
}
void MarkCompact::VerifyPostGCObjects(bool performed_compaction, uint8_t* mark_bitmap_clear_end) {
if (kVerifyPostGCObjects) {
mirror::Object* last_visited_obj = nullptr;
auto obj_visitor =
[&](mirror::Object* obj, bool verify_bitmap = false) REQUIRES_SHARED(Locks::mutator_lock_) {
std::vector<mirror::Object*> invalid_refs;
if (verify_bitmap && !moving_space_bitmap_->Test(obj)) {
LOG(FATAL) << "Obj " << obj << " (" << obj->PrettyTypeOf()
<< ") doesn't have mark-bit set"
<< "\n prev-black-dense-end = " << static_cast<void*>(prev_black_dense_end_)
<< "\n old-gen-end = " << static_cast<void*>(old_gen_end_)
<< "\n mid-gen-end = " << static_cast<void*>(mid_gen_end_);
}
VisitReferencesVisitor visitor(
[verification = heap_->GetVerification(), &invalid_refs](mirror::Object* ref)
REQUIRES_SHARED(Locks::mutator_lock_) {
if (ref != nullptr && !verification->IsValidObject(ref)) {
invalid_refs.push_back(ref);
}
});
obj->VisitReferences</*kVisitNativeRoots=*/true>(visitor, visitor);
if (!invalid_refs.empty()) {
std::ostringstream oss;
for (mirror::Object* ref : invalid_refs) {
oss << ref << " ";
}
LOG(FATAL_WITHOUT_ABORT)
<< "Object " << obj << " (" << obj->PrettyTypeOf() << ") has invalid references:\n"
<< oss.str() << "\ncard = " << static_cast<int>(heap_->GetCardTable()->GetCard(obj))
<< "\n prev-black-dense-end = " << static_cast<void*>(prev_black_dense_end_)
<< "\n old-gen-end = " << static_cast<void*>(old_gen_end_)
<< "\n mid-gen-end = " << static_cast<void*>(mid_gen_end_)
<< "\n black-allocations-begin = " << static_cast<void*>(black_allocations_begin_);
// Calling PrettyTypeOf() on a stale reference mostly results in segfault.
oss.str("");
obj->DumpReferences</*kDumpNativeRoots=*/true>(oss, /*dump_type_of=*/false);
LOG(FATAL_WITHOUT_ABORT) << "\n references =\n" << oss.str();
heap_->GetVerification()->LogHeapCorruption(
/*holder=*/nullptr, MemberOffset(0), obj, /*fatal=*/true);
}
last_visited_obj = obj;
};
non_moving_space_bitmap_->VisitAllMarked(obj_visitor);
last_visited_obj = nullptr;
// We should verify all objects that have survived, which means old and mid-gen
// Objects that were promoted to old-gen and mid-gen in this GC cycle are tightly
// packed, except if compaction was not performed. So we use object size to walk
// the heap and also verify that the mark-bit is set in the tightly packed portion.
moving_space_bitmap_->VisitMarkedRange(
reinterpret_cast<uintptr_t>(moving_space_begin_),
reinterpret_cast<uintptr_t>(performed_compaction ? prev_black_dense_end_
: mark_bitmap_clear_end),
obj_visitor);
if (performed_compaction) {
mirror::Object* obj = last_visited_obj;
if (obj == nullptr || AlignUp(reinterpret_cast<uint8_t*>(obj) + obj->SizeOf(), kAlignment) <
prev_black_dense_end_) {
obj = reinterpret_cast<mirror::Object*>(prev_black_dense_end_);
}
while (reinterpret_cast<uint8_t*>(obj) < mid_gen_end_ && obj->GetClass() != nullptr) {
// Objects in mid-gen will not have their corresponding mark-bits set.
obj_visitor(obj, reinterpret_cast<void*>(obj) < black_dense_end_);
uintptr_t next = reinterpret_cast<uintptr_t>(obj) + obj->SizeOf();
obj = reinterpret_cast<mirror::Object*>(RoundUp(next, kAlignment));
}
}
}
}
void MarkCompact::FinishPhase(bool performed_compaction) {
TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
GetCurrentIteration()->SetScannedBytes(bytes_scanned_);
bool is_zygote = Runtime::Current()->IsZygote();
compacting_ = false;
marking_done_ = false;
uint8_t* mark_bitmap_clear_end = black_dense_end_;
LOG(DEBUG) << "ART-GC black_dense_end:" << static_cast<void*>(black_dense_end_)
<< " mid_gen_end:" << static_cast<void*>(mid_gen_end_)
<< " post_compact_end:" << static_cast<void*>(post_compact_end_)
<< " black_allocations_begin:" << static_cast<void*>(black_allocations_begin_)
<< " young:" << young_gen_ << " performed_compaction:" << performed_compaction;
// Retain values of some fields for logging in next GC cycle, in case there is
// a memory corruption detected.
prev_black_allocations_begin_ = static_cast<void*>(black_allocations_begin_);
prev_black_dense_end_ = static_cast<void*>(black_dense_end_);
prev_post_compact_end_ = static_cast<void*>(post_compact_end_);
prev_gc_young_ = young_gen_;
prev_gc_performed_compaction_ = performed_compaction;
// Whether compaction is performend or not, we always set post_compact_end_
// before reaching here.
CHECK_NE(post_compact_end_, nullptr);
if (use_generational_) {
{
ReaderMutexLock mu(thread_running_gc_, *Locks::mutator_lock_);
// We need to retain and update class-after-object map for old-gen as
// that won't be created in next young-gc.
// Jump to the first class which is getting promoted to old-gen. Since
// it is not compacted, references into old-gen don't need to be udated.
// All pairs in mid-gen will be updated with post-compact addresses and
// retained, as mid-gen is getting consumed into old-gen now. All pairs
// after mid-gen will be erased as they are not required in next GC cycle.
auto iter = class_after_obj_map_.lower_bound(
ObjReference::FromMirrorPtr(reinterpret_cast<mirror::Object*>(old_gen_end_)));
while (iter != class_after_obj_map_.end()) {
mirror::Object* klass = iter->first.AsMirrorPtr();
mirror::Object* obj = iter->second.AsMirrorPtr();
DCHECK_GT(klass, obj);
// Black allocations begin after marking-pause. Therefore, we cannot
// have a situation wherein class is allocated after the pause while its
// object is before.
if (reinterpret_cast<uint8_t*>(klass) >= black_allocations_begin_) {
for (auto it = iter; it != class_after_obj_map_.end(); it++) {
DCHECK_GE(reinterpret_cast<uint8_t*>(it->second.AsMirrorPtr()),
black_allocations_begin_);
}
class_after_obj_map_.erase(iter, class_after_obj_map_.end());
break;
}
DCHECK(moving_space_bitmap_->Test(klass));
DCHECK(moving_space_bitmap_->Test(obj));
// As 'mid_gen_end_' is where our old-gen will end now, compute compacted
// addresses of <class, object> for comparisons and updating in the map.
mirror::Object* compacted_klass = klass;
mirror::Object* compacted_obj = obj;
if (performed_compaction) {
compacted_klass = PostCompactAddress(klass, old_gen_end_, moving_space_end_);
compacted_obj = PostCompactAddress(obj, old_gen_end_, moving_space_end_);
DCHECK_GT(compacted_klass, compacted_obj);
}
if (reinterpret_cast<uint8_t*>(compacted_obj) >= mid_gen_end_) {
iter = class_after_obj_map_.erase(iter);
continue;
} else if (mid_to_old_promo_bit_vec_.get() != nullptr) {
if (reinterpret_cast<uint8_t*>(compacted_klass) >= old_gen_end_) {
DCHECK(mid_to_old_promo_bit_vec_->IsBitSet(
(reinterpret_cast<uint8_t*>(compacted_obj) - old_gen_end_) / kAlignment));
}
if (reinterpret_cast<uint8_t*>(compacted_klass) < mid_gen_end_) {
DCHECK(mid_to_old_promo_bit_vec_->IsBitSet(
(reinterpret_cast<uint8_t*>(compacted_klass) - old_gen_end_) / kAlignment));
}
}
if (performed_compaction) {
auto nh = class_after_obj_map_.extract(iter++);
nh.key() = ObjReference::FromMirrorPtr(compacted_klass);
nh.mapped() = ObjReference::FromMirrorPtr(compacted_obj);
auto success = class_after_obj_map_.insert(iter, std::move(nh));
CHECK_EQ(success->first.AsMirrorPtr(), compacted_klass);
} else {
iter++;
}
}
// Dirty the cards for objects captured from native-roots during marking-phase.
accounting::CardTable* card_table = heap_->GetCardTable();
for (auto obj : dirty_cards_later_vec_) {
// Only moving and non-moving spaces are relevant as the remaining
// spaces are all immune-spaces which anyways use card-table.
if (HasAddress(obj)) {
// Objects in young-gen that refer to other young-gen objects don't
// need to be tracked.
// The vector contains pre-compact object references whereas
// 'mid_gen_end_' is post-compact boundary. So compare against
// post-compact object reference.
mirror::Object* compacted_obj =
performed_compaction ? PostCompactAddress(obj, black_dense_end_, moving_space_end_)
: obj;
if (reinterpret_cast<uint8_t*>(compacted_obj) < mid_gen_end_) {
card_table->MarkCard(compacted_obj);
}
} else if (non_moving_space_->HasAddress(obj)) {
card_table->MarkCard(obj);
}
}
}
dirty_cards_later_vec_.clear();
// Copy mid-gen bitmap into moving-space's mark-bitmap
if (mid_to_old_promo_bit_vec_.get() != nullptr) {
DCHECK_EQ(mid_to_old_promo_bit_vec_->GetBitSizeOf(),
(mid_gen_end_ - old_gen_end_) / kObjectAlignment);
uint32_t* bitmap_begin = reinterpret_cast<uint32_t*>(moving_space_bitmap_->Begin());
DCHECK(IsAligned<kObjectAlignment * BitVector::kWordBits>(gPageSize));
size_t index = (old_gen_end_ - moving_space_begin_) / kObjectAlignment / BitVector::kWordBits;
mid_to_old_promo_bit_vec_->CopyTo(&bitmap_begin[index],
mid_to_old_promo_bit_vec_->GetSizeOf());
mid_to_old_promo_bit_vec_.reset(nullptr);
} else if (!performed_compaction) {
// We typically only retain the mark-bitmap for the old-generation as the
// objects following it are expected to be contiguous. However, when
// compaction is not performed, we may have decided to tolerate few holes
// here and there. So we have to retain the bitmap for the entire
// 'compacted' portion of the heap, which is up to mid-gen-end.
DCHECK_LE(old_gen_end_, post_compact_end_);
mark_bitmap_clear_end = post_compact_end_;
}
// Promote all mid-gen objects to old-gen and young-gen objects to mid-gen
// for next GC cycle.
old_gen_end_ = mid_gen_end_;
mid_gen_end_ = post_compact_end_;
post_compact_end_ = nullptr;
// Verify (in debug builds) after updating mark-bitmap if class-after-object
// map is correct or not.
for (auto iter : class_after_obj_map_) {
DCHECK(moving_space_bitmap_->Test(iter.second.AsMirrorPtr()));
mirror::Object* klass = iter.first.AsMirrorPtr();
DCHECK_IMPLIES(!moving_space_bitmap_->Test(klass),
reinterpret_cast<uint8_t*>(klass) >= old_gen_end_);
}
} else {
class_after_obj_map_.clear();
if (!performed_compaction) {
DCHECK_LE(old_gen_end_, post_compact_end_);
mark_bitmap_clear_end = post_compact_end_;
}
}
// Black-dense region, which requires bitmap for object-walk, could be larger
// than old-gen. Therefore, until next GC retain the bitmap for entire
// black-dense region. At the beginning of next cycle, we clear [old_gen_end_,
// moving_space_end_).
mark_bitmap_clear_end = std::max(black_dense_end_, mark_bitmap_clear_end);
DCHECK_ALIGNED_PARAM(mark_bitmap_clear_end, gPageSize);
if (moving_space_begin_ == mark_bitmap_clear_end) {
moving_space_bitmap_->Clear();
} else {
DCHECK_LT(moving_space_begin_, mark_bitmap_clear_end);
DCHECK_LE(mark_bitmap_clear_end, moving_space_end_);
moving_space_bitmap_->ClearRange(reinterpret_cast<mirror::Object*>(mark_bitmap_clear_end),
reinterpret_cast<mirror::Object*>(moving_space_end_));
}
bump_pointer_space_->SetBlackDenseRegionSize(mark_bitmap_clear_end - moving_space_begin_);
if (UNLIKELY(is_zygote && IsValidFd(uffd_))) {
// This unregisters all ranges as a side-effect.
close(uffd_);
uffd_ = kFdUnused;
uffd_initialized_ = false;
}
CHECK(mark_stack_->IsEmpty()); // Ensure that the mark stack is empty.
mark_stack_->Reset();
ZeroAndReleaseMemory(compaction_buffers_map_.Begin(), compaction_buffers_map_.Size());
info_map_.MadviseDontNeedAndZero();
live_words_bitmap_->ClearBitmap();
DCHECK_EQ(thread_running_gc_, Thread::Current());
if (kIsDebugBuild) {
MutexLock mu(thread_running_gc_, lock_);
if (updated_roots_.get() != nullptr) {
updated_roots_->clear();
}
}
linear_alloc_arenas_.clear();
{
ReaderMutexLock mu(thread_running_gc_, *Locks::mutator_lock_);
WriterMutexLock mu2(thread_running_gc_, *Locks::heap_bitmap_lock_);
heap_->ClearMarkedObjects();
if (use_generational_) {
if (performed_compaction) {
// Clear the bits set temporarily for black allocations in non-moving
// space in UpdateNonMovingSpaceBlackAllocations(), which is called when
// we perform compaction, so that objects are considered for GC in next cycle.
accounting::ObjectStack* stack = heap_->GetAllocationStack();
const StackReference<mirror::Object>* limit = stack->End();
for (StackReference<mirror::Object>* it = stack->Begin(); it != limit; ++it) {
mirror::Object* obj = it->AsMirrorPtr();
if (obj != nullptr && non_moving_space_bitmap_->HasAddress(obj)) {
non_moving_space_bitmap_->Clear(obj);
}
}
} else {
// Since we didn't perform compaction, we need to identify old objects
// referring to the mid-gen.
auto obj_visitor = [this, card_table = heap_->GetCardTable()](mirror::Object* obj) {
bool found = false;
VisitReferencesVisitor visitor(
[begin = old_gen_end_, end = mid_gen_end_, &found](mirror::Object* ref) {
found |= ref >= reinterpret_cast<mirror::Object*>(begin) &&
ref < reinterpret_cast<mirror::Object*>(end);
});
uint8_t* card = card_table->CardFromAddr(obj);
if (*card == accounting::CardTable::kCardDirty) {
return;
}
// Native-roots are captured during marking and the corresponding cards are already
// dirtied above.
obj->VisitReferences</*kVisitNativeRoots=*/false>(visitor, visitor);
if (found) {
*card = accounting::CardTable::kCardDirty;
}
};
moving_space_bitmap_->VisitMarkedRange(reinterpret_cast<uintptr_t>(moving_space_begin_),
reinterpret_cast<uintptr_t>(old_gen_end_),
obj_visitor);
non_moving_space_bitmap_->VisitAllMarked(obj_visitor);
}
}
}
GcVisitedArenaPool* arena_pool =
static_cast<GcVisitedArenaPool*>(Runtime::Current()->GetLinearAllocArenaPool());
arena_pool->DeleteUnusedArenas();
if (kVerifyNoMissingCardMarks && use_generational_) {
// This must be done in a pause as otherwise verification between mutation
// and card-dirtying by a mutator will spuriosely fail.
ScopedPause pause(this);
WriterMutexLock mu(thread_running_gc_, *Locks::heap_bitmap_lock_);
VerifyNoMissingGenerationalCardMarks();
}
if (kVerifyPostGCObjects && use_generational_) {
ReaderMutexLock mu(thread_running_gc_, *Locks::mutator_lock_);
WriterMutexLock mu2(thread_running_gc_, *Locks::heap_bitmap_lock_);
VerifyPostGCObjects(performed_compaction, mark_bitmap_clear_end);
}
}
} // namespace collector
} // namespace gc
} // namespace art