Implement a simple count based boot marker.

We write the number of failed boots to the marker and only
prune the dalvik cache if the number of consecutive failed
boots is higher than a predefined threshold.

Note that the code is forgiving of errors related to boot
markers; we continue the boot process even if we're unable
to create / write or flush the boot marker.

bug: 19360096
Change-Id: Ia17c3b783318ddf43c9199d0f7f09c54a4176667
diff --git a/runtime/gc/space/image_space.cc b/runtime/gc/space/image_space.cc
index 261d3c2..d873e6d 100644
--- a/runtime/gc/space/image_space.cc
+++ b/runtime/gc/space/image_space.cc
@@ -19,6 +19,7 @@
 #include <dirent.h>
 #include <sys/statvfs.h>
 #include <sys/types.h>
+#include <unistd.h>
 
 #include <random>
 
@@ -122,21 +123,50 @@
 // every zygote boot and delete it when the boot completes. If we find a file already
 // present, it usually means the boot didn't complete. We wipe the entire dalvik
 // cache if that's the case.
-static void MarkZygoteStart(const InstructionSet isa) {
+static void MarkZygoteStart(const InstructionSet isa, const uint32_t max_failed_boots) {
   const std::string isa_subdir = GetDalvikCacheOrDie(GetInstructionSetString(isa), false);
   const std::string boot_marker = isa_subdir + "/.booting";
+  const char* file_name = boot_marker.c_str();
 
-  if (OS::FileExists(boot_marker.c_str())) {
+  uint32_t num_failed_boots = 0;
+  std::unique_ptr<File> file(OS::OpenFileReadWrite(file_name));
+  if (file.get() == nullptr) {
+    file.reset(OS::CreateEmptyFile(file_name));
+
+    if (file.get() == nullptr) {
+      PLOG(WARNING) << "Failed to create boot marker.";
+      return;
+    }
+  } else {
+    if (!file->ReadFully(&num_failed_boots, sizeof(num_failed_boots))) {
+      PLOG(WARNING) << "Failed to read boot marker.";
+      file->Erase();
+      return;
+    }
+  }
+
+  if (max_failed_boots != 0 && num_failed_boots > max_failed_boots) {
     LOG(WARNING) << "Incomplete boot detected. Pruning dalvik cache";
     RealPruneDalvikCache(isa_subdir);
   }
 
-  VLOG(startup) << "Creating boot start marker: " << boot_marker;
-  std::unique_ptr<File> f(OS::CreateEmptyFile(boot_marker.c_str()));
-  if (f.get() != nullptr) {
-    if (f->FlushCloseOrErase() != 0) {
-      PLOG(WARNING) << "Failed to write boot marker.";
-    }
+  ++num_failed_boots;
+  VLOG(startup) << "Number of failed boots on : " << boot_marker << " = " << num_failed_boots;
+
+  if (lseek(file->Fd(), 0, SEEK_SET) == -1) {
+    PLOG(WARNING) << "Failed to write boot marker.";
+    file->Erase();
+    return;
+  }
+
+  if (!file->WriteFully(&num_failed_boots, sizeof(num_failed_boots))) {
+    PLOG(WARNING) << "Failed to write boot marker.";
+    file->Erase();
+    return;
+  }
+
+  if (file->FlushCloseOrErase() != 0) {
+    PLOG(WARNING) << "Failed to flush boot marker.";
   }
 }
 
@@ -450,7 +480,7 @@
                                              &has_cache, &is_global_cache);
 
   if (Runtime::Current()->IsZygote()) {
-    MarkZygoteStart(image_isa);
+    MarkZygoteStart(image_isa, Runtime::Current()->GetZygoteMaxFailedBoots());
   }
 
   ImageSpace* space;
diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc
index daa4373..99369ca 100644
--- a/runtime/parsed_options.cc
+++ b/runtime/parsed_options.cc
@@ -238,6 +238,9 @@
       .Define("-XX:NativeBridge=_")
           .WithType<std::string>()
           .IntoKey(M::NativeBridge)
+      .Define("-Xzygote-max-failed-boots=_")
+          .WithType<unsigned int>()
+          .IntoKey(M::ZygoteMaxFailedBoots)
       .Ignore({
           "-ea", "-da", "-enableassertions", "-disableassertions", "--runtime-arg", "-esa",
           "-dsa", "-enablesystemassertions", "-disablesystemassertions", "-Xrs", "-Xint:_",
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 0ccc97c..f38f65e 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -173,7 +173,8 @@
       implicit_null_checks_(false),
       implicit_so_checks_(false),
       implicit_suspend_checks_(false),
-      is_native_bridge_loaded_(false) {
+      is_native_bridge_loaded_(false),
+      zygote_max_failed_boots_(0) {
   CheckAsmSupportOffsetsAndSizes();
 }
 
@@ -766,6 +767,8 @@
     GetInstrumentation()->ForceInterpretOnly();
   }
 
+  zygote_max_failed_boots_ = runtime_options.GetOrDefault(Opt::ZygoteMaxFailedBoots);
+
   XGcOption xgc_option = runtime_options.GetOrDefault(Opt::GcOption);
   heap_ = new gc::Heap(runtime_options.GetOrDefault(Opt::MemoryInitialSize),
                        runtime_options.GetOrDefault(Opt::HeapGrowthLimit),
diff --git a/runtime/runtime.h b/runtime/runtime.h
index 118c838..e7d95fb 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -516,6 +516,10 @@
     return target_sdk_version_;
   }
 
+  uint32_t GetZygoteMaxFailedBoots() const {
+    return zygote_max_failed_boots_;
+  }
+
  private:
   static void InitPlatformSignalHandlers();
 
@@ -682,6 +686,11 @@
   // that there's no native bridge.
   bool is_native_bridge_loaded_;
 
+  // The maximum number of failed boots we allow before pruning the dalvik cache
+  // and trying again. This option is only inspected when we're running as a
+  // zygote.
+  uint32_t zygote_max_failed_boots_;
+
   DISALLOW_COPY_AND_ASSIGN(Runtime);
 };
 std::ostream& operator<<(std::ostream& os, const Runtime::CalleeSaveType& rhs);
diff --git a/runtime/runtime_options.def b/runtime/runtime_options.def
index 1bdc980..71a0152 100644
--- a/runtime/runtime_options.def
+++ b/runtime/runtime_options.def
@@ -114,6 +114,7 @@
                                                                           // We don't call abort(3) by default; see
                                                                           // Runtime::Abort.
 RUNTIME_OPTIONS_KEY (void (*)(),          HookAbort,                      nullptr)
+RUNTIME_OPTIONS_KEY (unsigned int,        ZygoteMaxFailedBoots,           1)
 
 
 #undef RUNTIME_OPTIONS_KEY