Merge from Chromium at DEPS revision r207203

This commit was generated by merge_to_master.py.

Change-Id: I4c3250c02af5ef744b940a98c20e2ec93f9c9f4d
diff --git a/AUTHORS b/AUTHORS
index 27a9407..fc40194 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -6,3 +6,6 @@
 # Initial version authors:
 Jeffrey Dean <jeff@google.com>
 Sanjay Ghemawat <sanjay@google.com>
+
+# Partial list of contributors:
+Kevin Regan <kevin.d.regan@gmail.com>
diff --git a/Makefile b/Makefile
index ab5ed83..96af776 100644
--- a/Makefile
+++ b/Makefile
@@ -42,6 +42,7 @@
 	env_test \
 	filename_test \
 	filter_block_test \
+	issue178_test \
 	log_test \
 	memenv_test \
 	skiplist_test \
@@ -69,7 +70,7 @@
 else
 # Update db.h if you change these.
 SHARED_MAJOR = 1
-SHARED_MINOR = 10
+SHARED_MINOR = 12
 SHARED1 = libleveldb.$(PLATFORM_SHARED_EXT)
 SHARED2 = $(SHARED1).$(SHARED_MAJOR)
 SHARED3 = $(SHARED1).$(SHARED_MAJOR).$(SHARED_MINOR)
@@ -146,6 +147,9 @@
 filter_block_test: table/filter_block_test.o $(LIBOBJECTS) $(TESTHARNESS)
 	$(CXX) $(LDFLAGS) table/filter_block_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
 
+issue178_test: issues/issue178_test.o $(LIBOBJECTS) $(TESTHARNESS)
+	$(CXX) $(LDFLAGS) issues/issue178_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
+
 log_test: db/log_test.o $(LIBOBJECTS) $(TESTHARNESS)
 	$(CXX) $(LDFLAGS) db/log_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
 
diff --git a/db/db_impl.cc b/db/db_impl.cc
index 5c30d6f..395d317 100644
--- a/db/db_impl.cc
+++ b/db/db_impl.cc
@@ -35,6 +35,8 @@
 
 namespace leveldb {
 
+const int kNumNonTableCacheFiles = 10;
+
 // Information kept for every waiting writer
 struct DBImpl::Writer {
   Status status;
@@ -92,9 +94,9 @@
   Options result = src;
   result.comparator = icmp;
   result.filter_policy = (src.filter_policy != NULL) ? ipolicy : NULL;
-  ClipToRange(&result.max_open_files,            20,     50000);
-  ClipToRange(&result.write_buffer_size,         64<<10, 1<<30);
-  ClipToRange(&result.block_size,                1<<10,  4<<20);
+  ClipToRange(&result.max_open_files,    64 + kNumNonTableCacheFiles, 50000);
+  ClipToRange(&result.write_buffer_size, 64<<10,                      1<<30);
+  ClipToRange(&result.block_size,        1<<10,                       4<<20);
   if (result.info_log == NULL) {
     // Open a log file in the same directory as the db
     src.env->CreateDir(dbname);  // In case it does not exist
@@ -130,12 +132,13 @@
       log_(NULL),
       tmp_batch_(new WriteBatch),
       bg_compaction_scheduled_(false),
-      manual_compaction_(NULL) {
+      manual_compaction_(NULL),
+      consecutive_compaction_errors_(0) {
   mem_->Ref();
   has_imm_.Release_Store(NULL);
 
   // Reserve ten files or so for other uses and give the rest to TableCache.
-  const int table_cache_size = options.max_open_files - 10;
+  const int table_cache_size = options.max_open_files - kNumNonTableCacheFiles;
   table_cache_ = new TableCache(dbname_, &options_, table_cache_size);
 
   versions_ = new VersionSet(dbname_, &options_, table_cache_,
@@ -619,6 +622,7 @@
     Status s = BackgroundCompaction();
     if (s.ok()) {
       // Success
+      consecutive_compaction_errors_ = 0;
     } else if (shutting_down_.Acquire_Load()) {
       // Error most likely due to shutdown; do not wait
     } else {
@@ -630,7 +634,12 @@
       Log(options_.info_log, "Waiting after background compaction error: %s",
           s.ToString().c_str());
       mutex_.Unlock();
-      env_->SleepForMicroseconds(1000000);
+      ++consecutive_compaction_errors_;
+      int seconds_to_sleep = 1;
+      for (int i = 0; i < 3 && i < consecutive_compaction_errors_ - 1; ++i) {
+        seconds_to_sleep *= 2;
+      }
+      env_->SleepForMicroseconds(seconds_to_sleep * 1000000);
       mutex_.Lock();
     }
   }
diff --git a/db/db_impl.h b/db/db_impl.h
index bd29dd8..3c8d711 100644
--- a/db/db_impl.h
+++ b/db/db_impl.h
@@ -163,6 +163,7 @@
 
   // Have we encountered a background error in paranoid mode?
   Status bg_error_;
+  int consecutive_compaction_errors_;
 
   // Per level compaction stats.  stats_[level] stores the stats for
   // compactions that produced data for the specified "level".
diff --git a/db/db_test.cc b/db/db_test.cc
index 2f51296..49aae04 100644
--- a/db/db_test.cc
+++ b/db/db_test.cc
@@ -33,8 +33,11 @@
  public:
   AtomicCounter() : count_(0) { }
   void Increment() {
+    IncrementBy(1);
+  }
+  void IncrementBy(int count) {
     MutexLock l(&mu_);
-    count_++;
+    count_ += count;
   }
   int Read() {
     MutexLock l(&mu_);
@@ -45,6 +48,10 @@
     count_ = 0;
   }
 };
+
+void DelayMilliseconds(int millis) {
+  Env::Default()->SleepForMicroseconds(millis * 1000);
+}
 }
 
 // Special Env used to delay background operations
@@ -69,6 +76,7 @@
   AtomicCounter random_read_counter_;
 
   AtomicCounter sleep_counter_;
+  AtomicCounter sleep_time_counter_;
 
   explicit SpecialEnv(Env* base) : EnvWrapper(base) {
     delay_sstable_sync_.Release_Store(NULL);
@@ -103,7 +111,7 @@
       Status Flush() { return base_->Flush(); }
       Status Sync() {
         while (env_->delay_sstable_sync_.Acquire_Load() != NULL) {
-          env_->SleepForMicroseconds(100000);
+          DelayMilliseconds(100);
         }
         return base_->Sync();
       }
@@ -174,8 +182,9 @@
 
   virtual void SleepForMicroseconds(int micros) {
     sleep_counter_.Increment();
-    target()->SleepForMicroseconds(micros);
+    sleep_time_counter_.IncrementBy(micros);
   }
+
 };
 
 class DBTest {
@@ -625,7 +634,7 @@
     }
 
     // Step 4: Wait for compaction to finish
-    env_->SleepForMicroseconds(1000000);
+    DelayMilliseconds(1000);
 
     ASSERT_EQ(NumTableFilesAtLevel(0), 0);
   } while (ChangeOptions());
@@ -1309,7 +1318,7 @@
   Reopen();
   Reopen();
   ASSERT_EQ("(a->v)", Contents());
-  env_->SleepForMicroseconds(1000000);  // Wait for compaction to finish
+  DelayMilliseconds(1000);  // Wait for compaction to finish
   ASSERT_EQ("(a->v)", Contents());
 }
 
@@ -1325,7 +1334,7 @@
   Put("","");
   Reopen();
   Put("","");
-  env_->SleepForMicroseconds(1000000);  // Wait for compaction to finish
+  DelayMilliseconds(1000);  // Wait for compaction to finish
   Reopen();
   Put("d","dv");
   Reopen();
@@ -1335,7 +1344,7 @@
   Delete("b");
   Reopen();
   ASSERT_EQ("(->)(c->cv)", Contents());
-  env_->SleepForMicroseconds(1000000);  // Wait for compaction to finish
+  DelayMilliseconds(1000);  // Wait for compaction to finish
   ASSERT_EQ("(->)(c->cv)", Contents());
 }
 
@@ -1520,6 +1529,30 @@
   ASSERT_GE(env_->sleep_counter_.Read(), 5);
 }
 
+TEST(DBTest, ExponentialBackoff) {
+  Options options = CurrentOptions();
+  options.env = env_;
+  Reopen(&options);
+
+  ASSERT_OK(Put("foo", "v1"));
+  ASSERT_EQ("v1", Get("foo"));
+  Compact("a", "z");
+  env_->non_writable_.Release_Store(env_);  // Force errors for new files
+  env_->sleep_counter_.Reset();
+  env_->sleep_time_counter_.Reset();
+  for (int i = 0; i < 5; i++) {
+    dbfull()->TEST_CompactRange(2, NULL, NULL);
+  }
+  env_->non_writable_.Release_Store(NULL);
+
+  // Wait for compaction to finish
+  DelayMilliseconds(1000);
+
+  ASSERT_GE(env_->sleep_counter_.Read(), 5);
+  ASSERT_LT(env_->sleep_counter_.Read(), 10);
+  ASSERT_GE(env_->sleep_time_counter_.Read(), 10e6);
+}
+
 TEST(DBTest, NonWritableFileSystem) {
   Options options = CurrentOptions();
   options.write_buffer_size = 1000;
@@ -1533,7 +1566,7 @@
     fprintf(stderr, "iter %d; errors %d\n", i, errors);
     if (!Put("foo", big).ok()) {
       errors++;
-      env_->SleepForMicroseconds(100000);
+      DelayMilliseconds(100);
     }
   }
   ASSERT_GT(errors, 0);
@@ -1589,6 +1622,7 @@
   dbfull()->TEST_CompactMemTable();
   ASSERT_EQ("bar", Get("foo"));
 
+  Close();
   ASSERT_TRUE(DeleteAnSSTFile());
   Options options = CurrentOptions();
   options.paranoid_checks = true;
@@ -1742,13 +1776,13 @@
     }
 
     // Let them run for a while
-    env_->SleepForMicroseconds(kTestSeconds * 1000000);
+    DelayMilliseconds(kTestSeconds * 1000);
 
     // Stop the threads and wait for them to finish
     mt.stop.Release_Store(&mt);
     for (int id = 0; id < kNumThreads; id++) {
       while (mt.thread_done[id].Acquire_Load() == NULL) {
-        env_->SleepForMicroseconds(100000);
+        DelayMilliseconds(100);
       }
     }
   } while (ChangeOptions());
diff --git a/db/filename_test.cc b/db/filename_test.cc
index 47353d6..5a26da4 100644
--- a/db/filename_test.cc
+++ b/db/filename_test.cc
@@ -70,7 +70,7 @@
   for (int i = 0; i < sizeof(errors) / sizeof(errors[0]); i++) {
     std::string f = errors[i];
     ASSERT_TRUE(!ParseFileName(f, &number, &type)) << f;
-  };
+  }
 }
 
 TEST(FileNameTest, Construction) {
diff --git a/db/version_set.cc b/db/version_set.cc
index 7d0a5de..4fd1dde 100644
--- a/db/version_set.cc
+++ b/db/version_set.cc
@@ -1331,14 +1331,19 @@
   }
 
   // Avoid compacting too much in one shot in case the range is large.
-  const uint64_t limit = MaxFileSizeForLevel(level);
-  uint64_t total = 0;
-  for (size_t i = 0; i < inputs.size(); i++) {
-    uint64_t s = inputs[i]->file_size;
-    total += s;
-    if (total >= limit) {
-      inputs.resize(i + 1);
-      break;
+  // But we cannot do this for level-0 since level-0 files can overlap
+  // and we must not pick one file and drop another older file if the
+  // two files overlap.
+  if (level > 0) {
+    const uint64_t limit = MaxFileSizeForLevel(level);
+    uint64_t total = 0;
+    for (size_t i = 0; i < inputs.size(); i++) {
+      uint64_t s = inputs[i]->file_size;
+      total += s;
+      if (total >= limit) {
+        inputs.resize(i + 1);
+        break;
+      }
     }
   }
 
diff --git a/include/leveldb/db.h b/include/leveldb/db.h
index a37c097..da8b11a 100644
--- a/include/leveldb/db.h
+++ b/include/leveldb/db.h
@@ -14,7 +14,7 @@
 
 // Update Makefile if you change these
 static const int kMajorVersion = 1;
-static const int kMinorVersion = 10;
+static const int kMinorVersion = 12;
 
 struct Options;
 struct ReadOptions;
diff --git a/issues/issue178_test.cc b/issues/issue178_test.cc
new file mode 100644
index 0000000..1b1cf8b
--- /dev/null
+++ b/issues/issue178_test.cc
@@ -0,0 +1,92 @@
+// Copyright (c) 2013 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+// Test for issue 178: a manual compaction causes deleted data to reappear.
+#include <iostream>
+#include <sstream>
+#include <cstdlib>
+
+#include "leveldb/db.h"
+#include "leveldb/write_batch.h"
+#include "util/testharness.h"
+
+namespace {
+
+const int kNumKeys = 1100000;
+
+std::string Key1(int i) {
+  char buf[100];
+  snprintf(buf, sizeof(buf), "my_key_%d", i);
+  return buf;
+}
+
+std::string Key2(int i) {
+  return Key1(i) + "_xxx";
+}
+
+class Issue178 { };
+
+TEST(Issue178, Test) {
+  // Get rid of any state from an old run.
+  std::string dbpath = leveldb::test::TmpDir() + "/leveldb_cbug_test";
+  DestroyDB(dbpath, leveldb::Options());
+
+  // Open database.  Disable compression since it affects the creation
+  // of layers and the code below is trying to test against a very
+  // specific scenario.
+  leveldb::DB* db;
+  leveldb::Options db_options;
+  db_options.create_if_missing = true;
+  db_options.compression = leveldb::kNoCompression;
+  ASSERT_OK(leveldb::DB::Open(db_options, dbpath, &db));
+
+  // create first key range
+  leveldb::WriteBatch batch;
+  for (size_t i = 0; i < kNumKeys; i++) {
+    batch.Put(Key1(i), "value for range 1 key");
+  }
+  ASSERT_OK(db->Write(leveldb::WriteOptions(), &batch));
+
+  // create second key range
+  batch.Clear();
+  for (size_t i = 0; i < kNumKeys; i++) {
+    batch.Put(Key2(i), "value for range 2 key");
+  }
+  ASSERT_OK(db->Write(leveldb::WriteOptions(), &batch));
+
+  // delete second key range
+  batch.Clear();
+  for (size_t i = 0; i < kNumKeys; i++) {
+    batch.Delete(Key2(i));
+  }
+  ASSERT_OK(db->Write(leveldb::WriteOptions(), &batch));
+
+  // compact database
+  std::string start_key = Key1(0);
+  std::string end_key = Key1(kNumKeys - 1);
+  leveldb::Slice least(start_key.data(), start_key.size());
+  leveldb::Slice greatest(end_key.data(), end_key.size());
+
+  // commenting out the line below causes the example to work correctly
+  db->CompactRange(&least, &greatest);
+
+  // count the keys
+  leveldb::Iterator* iter = db->NewIterator(leveldb::ReadOptions());
+  size_t num_keys = 0;
+  for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
+    num_keys++;
+  }
+  delete iter;
+  ASSERT_EQ(kNumKeys, num_keys) << "Bad number of keys";
+
+  // close database
+  delete db;
+  DestroyDB(dbpath, leveldb::Options());
+}
+
+}  // anonymous namespace
+
+int main(int argc, char** argv) {
+  return leveldb::test::RunAllTests();
+}
diff --git a/util/coding_test.cc b/util/coding_test.cc
index 2c52b17..fb5726e 100644
--- a/util/coding_test.cc
+++ b/util/coding_test.cc
@@ -109,7 +109,7 @@
     values.push_back(power);
     values.push_back(power-1);
     values.push_back(power+1);
-  };
+  }
 
   std::string s;
   for (int i = 0; i < values.size(); i++) {
diff --git a/util/env_posix.cc b/util/env_posix.cc
index bb58c1d..a3f197d 100644
--- a/util/env_posix.cc
+++ b/util/env_posix.cc
@@ -466,7 +466,7 @@
       result = IOError(fname, errno);
     }
     return result;
-  };
+  }
 
   virtual Status CreateDir(const std::string& name) {
     Status result;
@@ -474,7 +474,7 @@
       result = IOError(name, errno);
     }
     return result;
-  };
+  }
 
   virtual Status DeleteDir(const std::string& name) {
     Status result;
@@ -482,7 +482,7 @@
       result = IOError(name, errno);
     }
     return result;
-  };
+  }
 
   virtual Status GetFileSize(const std::string& fname, uint64_t* size) {
     Status s;