Add a workaround for null klass during marking phase
There is a race condition due to which sometimes we read a null class
pointer from an object during marking phase in AddLiveBytesAndScanRef().
This CL adds a workaround (until the root cause is identified). The
workaround is to wait a small amount of time and read the class
pointer again, which seems to work.
Test: art/test/testrunner/testrunner.py
Bug: 173676071
Bug: 173771057
Change-Id: I96004341c54593e9e6c5b4fa4a2ec6713acd0af7
diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc
index 3b1ad56..be27001 100644
--- a/runtime/gc/collector/concurrent_copying.cc
+++ b/runtime/gc/collector/concurrent_copying.cc
@@ -1055,20 +1055,29 @@
obj->GetFieldObject<mirror::Object, kVerifyNone, kWithoutReadBarrier>(offset);
// TODO(lokeshgidra): Remove the following condition once b/173676071 is fixed.
if (UNLIKELY(ref == nullptr && offset == mirror::Object::ClassOffset())) {
- // As of this change, we don't know for sure what is causing the crash in
- // the above bug. The only possibility seems to be some race condition.
- // Therefore we add a small delay and then read class ref
- // again to confirm if that is the case.
- sleep(1);
- // It must be heap corruption. Remove memory protection and dump data.
- collector_->region_space_->Unprotect();
- mirror::Class* klass = obj->GetClass<kVerifyNone, kWithoutReadBarrier>();
- LOG(FATAL_WITHOUT_ABORT) << "klass pointer for ref: " << obj
- << " found to be null. klass read again and found: " << klass;
- collector_->heap_->GetVerification()->LogHeapCorruption(obj,
- offset,
- klass,
- /* fatal */ true);
+ // It has been verified as a race condition (see b/173676071)! After a small
+ // wait when we reload the class pointer, it turns out to be a valid class
+ // object. So as a workaround, we can continue execution and log an error
+ // that this happened.
+ for (size_t i = 0; i < 1000; i++) {
+ // Wait for 1ms at a time. Don't wait for more than 1 second in total.
+ usleep(1000);
+ ref = obj->GetClass<kVerifyNone, kWithoutReadBarrier>();
+ if (ref != nullptr) {
+ LOG(ERROR) << "klass pointer for obj: "
+ << obj << " (" << mirror::Object::PrettyTypeOf(obj)
+ << ") found to be null first. Reloading after a small wait fetched klass: "
+ << ref << " (" << mirror::Object::PrettyTypeOf(ref) << ")";
+ break;
+ }
+ }
+
+ if (UNLIKELY(ref == nullptr)) {
+ // It must be heap corruption. Remove memory protection and dump data.
+ collector_->region_space_->Unprotect();
+ LOG(FATAL_WITHOUT_ABORT) << "klass pointer for ref: " << obj << " found to be null.";
+ collector_->heap_->GetVerification()->LogHeapCorruption(obj, offset, ref, /* fatal */ true);
+ }
}
CheckReference(ref);
}