db/db_iter.cc - platform/external/chromium_org/third_party/leveldatabase/src - Git at Google

 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file. See the AUTHORS file for names of contributors.

 #include "db/db_iter.h"

 #include "db/filename.h"
 #include "db/dbformat.h"
 #include "include/env.h"
 #include "include/iterator.h"
 #include "port/port.h"
 #include "util/logging.h"
 #include "util/mutexlock.h"

 namespace leveldb {

 #if 0
 static void DumpInternalIter(Iterator* iter) {
   for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
     ParsedInternalKey k;
     if (!ParseInternalKey(iter->key(), &k)) {
       fprintf(stderr, "Corrupt '%s'\n", EscapeString(iter->key()).c_str());
     } else {
       fprintf(stderr, "@ '%s'\n", k.DebugString().c_str());
     }
   }
 }
 #endif

 namespace {

 // Memtables and sstables that make the DB representation contain
 // (userkey,seq,type) => uservalue entries.  DBIter
 // combines multiple entries for the same userkey found in the DB
 // representation into a single entry while accounting for sequence
 // numbers, deletion markers, overwrites, etc.
 class DBIter: public Iterator {
  public:
   DBIter(const std::string* dbname, Env* env,
          const Comparator* cmp, Iterator* iter, SequenceNumber s)
       : dbname_(dbname),
         env_(env),
         user_comparator_(cmp),
         iter_(iter),
         sequence_(s),
         large_(NULL),
         valid_(false) {
   }
   virtual ~DBIter() {
     delete iter_;
     delete large_;
   }
   virtual bool Valid() const { return valid_; }
   virtual Slice key() const {
     assert(valid_);
     return key_;
   }
   virtual Slice value() const {
     assert(valid_);
     if (large_ == NULL) {
       return value_;
     } else {
       MutexLock l(&large_->mutex);
       if (!large_->produced) {
         ReadIndirectValue();
       }
       return large_->value;
     }
   }

   virtual void Next() {
     assert(valid_);
     // iter_ is already positioned past DBIter::key()
     FindNextUserEntry();
   }

   virtual void Prev() {
     assert(valid_);
     bool ignored;
     ScanUntilBeforeCurrentKey(&ignored);
     FindPrevUserEntry();
   }

   virtual void Seek(const Slice& target) {
     ParsedInternalKey ikey(target, sequence_, kValueTypeForSeek);
     std::string tmp;
     AppendInternalKey(&tmp, ikey);
     iter_->Seek(tmp);
     FindNextUserEntry();
   }
   virtual void SeekToFirst() {
     iter_->SeekToFirst();
     FindNextUserEntry();
   }

   virtual void SeekToLast();

   virtual Status status() const {
     if (status_.ok()) {
       if (large_ != NULL && !large_->status.ok()) return large_->status;
       return iter_->status();
     } else {
       return status_;
     }
   }

  private:
   void FindNextUserEntry();
   void FindPrevUserEntry();
   void SaveKey(const Slice& k) { key_.assign(k.data(), k.size()); }
   void SaveValue(const Slice& v) {
     if (value_.capacity() > v.size() + 1048576) {
       std::string empty;
       swap(empty, value_);
     }
     value_.assign(v.data(), v.size());
   }
   bool ParseKey(ParsedInternalKey* key);
   void SkipPast(const Slice& k);
   void ScanUntilBeforeCurrentKey(bool* found_live);

   void ReadIndirectValue() const;

   struct Large {
     port::Mutex mutex;
     std::string value;
     bool produced;
     Status status;
   };

   const std::string* const dbname_;
   Env* const env_;

   const Comparator* const user_comparator_;

   // iter_ is positioned just past current entry for DBIter if valid_
   Iterator* const iter_;

   SequenceNumber const sequence_;
   Status status_;
   std::string key_;                  // Always a user key
   std::string value_;
   Large* large_;      // Non-NULL if value is an indirect reference
   bool valid_;

   // No copying allowed
   DBIter(const DBIter&);
   void operator=(const DBIter&);
 };

 inline bool DBIter::ParseKey(ParsedInternalKey* ikey) {
   if (!ParseInternalKey(iter_->key(), ikey)) {
     status_ = Status::Corruption("corrupted internal key in DBIter");
     return false;
   } else {
     return true;
   }
 }

 void DBIter::FindNextUserEntry() {
   if (large_ != NULL) {
     if (status_.ok() && !large_->status.ok()) {
       status_ = large_->status;
     }
     delete large_;
     large_ = NULL;
   }
   while (iter_->Valid()) {
     ParsedInternalKey ikey;
     if (!ParseKey(&ikey)) {
       // Skip past corrupted entry
       iter_->Next();
       continue;
     }
     if (ikey.sequence > sequence_) {
       // Ignore entries newer than the snapshot
       iter_->Next();
       continue;
     }

     switch (ikey.type) {
       case kTypeDeletion:
         SaveKey(ikey.user_key);  // Make local copy for use by SkipPast()
         iter_->Next();
         SkipPast(key_);
         // Do not return deleted entries.  Instead keep looping.
         break;

       case kTypeValue:
         SaveKey(ikey.user_key);
         SaveValue(iter_->value());
         iter_->Next();
         SkipPast(key_);
         // Yield the value we just found.
         valid_ = true;
         return;

       case kTypeLargeValueRef:
         SaveKey(ikey.user_key);
         // Save the large value ref as value_, and read it lazily on a call
         // to value()
         SaveValue(iter_->value());
         large_ = new Large;
         large_->produced = false;
         iter_->Next();
         SkipPast(key_);
         // Yield the value we just found.
         valid_ = true;
         return;
     }
   }
   valid_ = false;
   key_.clear();
   value_.clear();
   assert(large_ == NULL);
 }

 void DBIter::SkipPast(const Slice& k) {
   while (iter_->Valid()) {
     ParsedInternalKey ikey;
     // Note that if we cannot parse an internal key, we keep looping
     // so that if we have a run like the following:
     //     <x,100,v> => value100
     //     <corrupted entry for user key x>
     //     <x,50,v> => value50
     // we will skip over the corrupted entry as well as value50.
     if (ParseKey(&ikey) && user_comparator_->Compare(ikey.user_key, k) != 0) {
       break;
     }
     iter_->Next();
   }
 }

 void DBIter::SeekToLast() {
   // Position iter_ at the last uncorrupted user key and then
   // let FindPrevUserEntry() do the heavy lifting to find
   // a user key that is live.
   iter_->SeekToLast();
   ParsedInternalKey current;
   while (iter_->Valid() && !ParseKey(&current)) {
     iter_->Prev();
   }
   if (iter_->Valid()) {
     SaveKey(current.user_key);
   }
   FindPrevUserEntry();
 }

 // Let X be the user key at which iter_ is currently positioned.
 // Adjust DBIter to point at the last entry with a key <= X that
 // has a live value.
 void DBIter::FindPrevUserEntry() {
   // Consider the following example:
   //
   //     A@540
   //     A@400
   //
   //     B@300
   //     B@200
   //     B@100        <- iter_
   //
   //     C@301
   //     C@201
   //
   // The comments marked "(first iteration)" below relate what happens
   // for the preceding example in the first iteration of the while loop
   // below.  There may be more than one iteration either if there are
   // no live values for B, or if there is a corruption.
   while (iter_->Valid()) {
     std::string saved = key_;
     bool found_live;
     ScanUntilBeforeCurrentKey(&found_live);
     // (first iteration) iter_ at A@400
     if (found_live) {
       // Step forward into range of entries with user key >= saved
       if (!iter_->Valid()) {
         iter_->SeekToFirst();
       } else {
         iter_->Next();
       }
       // (first iteration) iter_ at B@300

       FindNextUserEntry();  // Sets key_ to the key of the next value it found
       if (valid_ && user_comparator_->Compare(key_, saved) == 0) {
         // (first iteration) iter_ at C@301
         return;
       }

       // FindNextUserEntry() could not find any entries under the
       // user key "saved".  This is probably a corruption since
       // ScanUntilBefore(saved) found a live value.  So we skip
       // backwards to an earlier key and ignore the corrupted
       // entries for "saved".
       //
       // (first iteration) iter_ at C@301 and saved == "B"
       key_ = saved;
       bool ignored;
       ScanUntilBeforeCurrentKey(&ignored);
       // (first iteration) iter_ at A@400
     }
   }
   valid_ = false;
   key_.clear();
   value_.clear();
 }

 void DBIter::ScanUntilBeforeCurrentKey(bool* found_live) {
   *found_live = false;
   if (!iter_->Valid()) {
     iter_->SeekToLast();
   }

   while (iter_->Valid()) {
     ParsedInternalKey current;
     if (!ParseKey(&current)) {
       iter_->Prev();
       continue;
     }

     if (current.sequence > sequence_) {
       // Ignore entries that are serialized after this read
       iter_->Prev();
       continue;
     }

     const int cmp = user_comparator_->Compare(current.user_key, key_);
     if (cmp < 0) {
       SaveKey(current.user_key);
       return;
     } else if (cmp == 0) {
       switch (current.type) {
         case kTypeDeletion:
           *found_live = false;
           break;

         case kTypeValue:
         case kTypeLargeValueRef:
           *found_live = true;
           break;
       }
     } else {  // cmp > 0
       *found_live = false;
     }

     iter_->Prev();
   }
 }

 void DBIter::ReadIndirectValue() const {
   assert(!large_->produced);
   large_->produced = true;
   LargeValueRef large_ref;
   if (value_.size() != LargeValueRef::ByteSize()) {
     large_->status = Status::Corruption("malformed large value reference");
     return;
   }
   memcpy(large_ref.data, value_.data(), LargeValueRef::ByteSize());
   std::string fname = LargeValueFileName(*dbname_, large_ref);
   RandomAccessFile* file;
   Status s = env_->NewRandomAccessFile(fname, &file);
   if (s.ok()) {
     uint64_t file_size = file->Size();
     uint64_t value_size = large_ref.ValueSize();
     large_->value.resize(value_size);
     Slice result;
     s = file->Read(0, file_size, &result,
                    const_cast<char*>(large_->value.data()));
     if (s.ok()) {
       if (result.size() == file_size) {
         switch (large_ref.compression_type()) {
           case kNoCompression: {
             if (result.data() != large_->value.data()) {
               large_->value.assign(result.data(), result.size());
             }
             break;
           }
           case kLightweightCompression: {
             std::string uncompressed;
             if (port::Lightweight_Uncompress(result.data(), result.size(),
                                        &uncompressed) &&
                 uncompressed.size() == large_ref.ValueSize()) {
               swap(uncompressed, large_->value);
             } else {
               s = Status::Corruption(
                   "Unable to read entire compressed large value file");
             }
           }
         }
       } else {
         s = Status::Corruption("Unable to read entire large value file");
       }
     }
     delete file;        // Ignore errors on closing
   }
   if (!s.ok()) {
     large_->value.clear();
     large_->status = s;
   }
 }

 }  // anonymous namespace

 Iterator* NewDBIterator(
     const std::string* dbname,
     Env* env,
     const Comparator* user_key_comparator,
     Iterator* internal_iter,
     const SequenceNumber& sequence) {
   return new DBIter(dbname, env, user_key_comparator, internal_iter, sequence);
 }

 }
	// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file. See the AUTHORS file for names of contributors.

	#include "db/db_iter.h"

	#include "db/filename.h"
	#include "db/dbformat.h"
	#include "include/env.h"
	#include "include/iterator.h"
	#include "port/port.h"
	#include "util/logging.h"
	#include "util/mutexlock.h"

	namespace leveldb {

	#if 0
	static void DumpInternalIter(Iterator* iter) {
	for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
	ParsedInternalKey k;
	if (!ParseInternalKey(iter->key(), &k)) {
	fprintf(stderr, "Corrupt '%s'\n", EscapeString(iter->key()).c_str());
	} else {
	fprintf(stderr, "@ '%s'\n", k.DebugString().c_str());
	}
	}
	}
	#endif

	namespace {

	// Memtables and sstables that make the DB representation contain
	// (userkey,seq,type) => uservalue entries. DBIter
	// combines multiple entries for the same userkey found in the DB
	// representation into a single entry while accounting for sequence
	// numbers, deletion markers, overwrites, etc.
	class DBIter: public Iterator {
	public:
	DBIter(const std::string* dbname, Env* env,
	const Comparator* cmp, Iterator* iter, SequenceNumber s)
	: dbname_(dbname),
	env_(env),
	user_comparator_(cmp),
	iter_(iter),
	sequence_(s),
	large_(NULL),
	valid_(false) {
	}
	virtual ~DBIter() {
	delete iter_;
	delete large_;
	}
	virtual bool Valid() const { return valid_; }
	virtual Slice key() const {
	assert(valid_);
	return key_;
	}
	virtual Slice value() const {
	assert(valid_);
	if (large_ == NULL) {
	return value_;
	} else {
	MutexLock l(&large_->mutex);
	if (!large_->produced) {
	ReadIndirectValue();
	}
	return large_->value;
	}
	}

	virtual void Next() {
	assert(valid_);
	// iter_ is already positioned past DBIter::key()
	FindNextUserEntry();
	}

	virtual void Prev() {
	assert(valid_);
	bool ignored;
	ScanUntilBeforeCurrentKey(&ignored);
	FindPrevUserEntry();
	}

	virtual void Seek(const Slice& target) {
	ParsedInternalKey ikey(target, sequence_, kValueTypeForSeek);
	std::string tmp;
	AppendInternalKey(&tmp, ikey);
	iter_->Seek(tmp);
	FindNextUserEntry();
	}
	virtual void SeekToFirst() {
	iter_->SeekToFirst();
	FindNextUserEntry();
	}

	virtual void SeekToLast();

	virtual Status status() const {
	if (status_.ok()) {
	if (large_ != NULL && !large_->status.ok()) return large_->status;
	return iter_->status();
	} else {
	return status_;
	}
	}

	private:
	void FindNextUserEntry();
	void FindPrevUserEntry();
	void SaveKey(const Slice& k) { key_.assign(k.data(), k.size()); }
	void SaveValue(const Slice& v) {
	if (value_.capacity() > v.size() + 1048576) {
	std::string empty;
	swap(empty, value_);
	}
	value_.assign(v.data(), v.size());
	}
	bool ParseKey(ParsedInternalKey* key);
	void SkipPast(const Slice& k);
	void ScanUntilBeforeCurrentKey(bool* found_live);

	void ReadIndirectValue() const;

	struct Large {
	port::Mutex mutex;
	std::string value;
	bool produced;
	Status status;
	};

	const std::string* const dbname_;
	Env* const env_;

	const Comparator* const user_comparator_;

	// iter_ is positioned just past current entry for DBIter if valid_
	Iterator* const iter_;

	SequenceNumber const sequence_;
	Status status_;
	std::string key_; // Always a user key
	std::string value_;
	Large* large_; // Non-NULL if value is an indirect reference
	bool valid_;

	// No copying allowed
	DBIter(const DBIter&);
	void operator=(const DBIter&);
	};

	inline bool DBIter::ParseKey(ParsedInternalKey* ikey) {
	if (!ParseInternalKey(iter_->key(), ikey)) {
	status_ = Status::Corruption("corrupted internal key in DBIter");
	return false;
	} else {
	return true;
	}
	}

	void DBIter::FindNextUserEntry() {
	if (large_ != NULL) {
	if (status_.ok() && !large_->status.ok()) {
	status_ = large_->status;
	}
	delete large_;
	large_ = NULL;
	}
	while (iter_->Valid()) {
	ParsedInternalKey ikey;
	if (!ParseKey(&ikey)) {
	// Skip past corrupted entry
	iter_->Next();
	continue;
	}
	if (ikey.sequence > sequence_) {
	// Ignore entries newer than the snapshot
	iter_->Next();
	continue;
	}

	switch (ikey.type) {
	case kTypeDeletion:
	SaveKey(ikey.user_key); // Make local copy for use by SkipPast()
	iter_->Next();
	SkipPast(key_);
	// Do not return deleted entries. Instead keep looping.
	break;

	case kTypeValue:
	SaveKey(ikey.user_key);
	SaveValue(iter_->value());
	iter_->Next();
	SkipPast(key_);
	// Yield the value we just found.
	valid_ = true;
	return;

	case kTypeLargeValueRef:
	SaveKey(ikey.user_key);
	// Save the large value ref as value_, and read it lazily on a call
	// to value()
	SaveValue(iter_->value());
	large_ = new Large;
	large_->produced = false;
	iter_->Next();
	SkipPast(key_);
	// Yield the value we just found.
	valid_ = true;
	return;
	}
	}
	valid_ = false;
	key_.clear();
	value_.clear();
	assert(large_ == NULL);
	}

	void DBIter::SkipPast(const Slice& k) {
	while (iter_->Valid()) {
	ParsedInternalKey ikey;
	// Note that if we cannot parse an internal key, we keep looping
	// so that if we have a run like the following:
	// <x,100,v> => value100
	// <corrupted entry for user key x>
	// <x,50,v> => value50
	// we will skip over the corrupted entry as well as value50.
	if (ParseKey(&ikey) && user_comparator_->Compare(ikey.user_key, k) != 0) {
	break;
	}
	iter_->Next();
	}
	}

	void DBIter::SeekToLast() {
	// Position iter_ at the last uncorrupted user key and then
	// let FindPrevUserEntry() do the heavy lifting to find
	// a user key that is live.
	iter_->SeekToLast();
	ParsedInternalKey current;
	while (iter_->Valid() && !ParseKey(&current)) {
	iter_->Prev();
	}
	if (iter_->Valid()) {
	SaveKey(current.user_key);
	}
	FindPrevUserEntry();
	}

	// Let X be the user key at which iter_ is currently positioned.
	// Adjust DBIter to point at the last entry with a key <= X that
	// has a live value.
	void DBIter::FindPrevUserEntry() {
	// Consider the following example:
	//
	// A@540
	// A@400
	//
	// B@300
	// B@200
	// B@100 <- iter_
	//
	// C@301
	// C@201
	//
	// The comments marked "(first iteration)" below relate what happens
	// for the preceding example in the first iteration of the while loop
	// below. There may be more than one iteration either if there are
	// no live values for B, or if there is a corruption.
	while (iter_->Valid()) {
	std::string saved = key_;
	bool found_live;
	ScanUntilBeforeCurrentKey(&found_live);
	// (first iteration) iter_ at A@400
	if (found_live) {
	// Step forward into range of entries with user key >= saved
	if (!iter_->Valid()) {
	iter_->SeekToFirst();
	} else {
	iter_->Next();
	}
	// (first iteration) iter_ at B@300

	FindNextUserEntry(); // Sets key_ to the key of the next value it found
	if (valid_ && user_comparator_->Compare(key_, saved) == 0) {
	// (first iteration) iter_ at C@301
	return;
	}

	// FindNextUserEntry() could not find any entries under the
	// user key "saved". This is probably a corruption since
	// ScanUntilBefore(saved) found a live value. So we skip
	// backwards to an earlier key and ignore the corrupted
	// entries for "saved".
	//
	// (first iteration) iter_ at C@301 and saved == "B"
	key_ = saved;
	bool ignored;
	ScanUntilBeforeCurrentKey(&ignored);
	// (first iteration) iter_ at A@400
	}
	}
	valid_ = false;
	key_.clear();
	value_.clear();
	}

	void DBIter::ScanUntilBeforeCurrentKey(bool* found_live) {
	*found_live = false;
	if (!iter_->Valid()) {
	iter_->SeekToLast();
	}

	while (iter_->Valid()) {
	ParsedInternalKey current;
	if (!ParseKey(&current)) {
	iter_->Prev();
	continue;
	}

	if (current.sequence > sequence_) {
	// Ignore entries that are serialized after this read
	iter_->Prev();
	continue;
	}

	const int cmp = user_comparator_->Compare(current.user_key, key_);
	if (cmp < 0) {
	SaveKey(current.user_key);
	return;
	} else if (cmp == 0) {
	switch (current.type) {
	case kTypeDeletion:
	*found_live = false;
	break;

	case kTypeValue:
	case kTypeLargeValueRef:
	*found_live = true;
	break;
	}
	} else { // cmp > 0
	*found_live = false;
	}

	iter_->Prev();
	}
	}

	void DBIter::ReadIndirectValue() const {
	assert(!large_->produced);
	large_->produced = true;
	LargeValueRef large_ref;
	if (value_.size() != LargeValueRef::ByteSize()) {
	large_->status = Status::Corruption("malformed large value reference");
	return;
	}
	memcpy(large_ref.data, value_.data(), LargeValueRef::ByteSize());
	std::string fname = LargeValueFileName(*dbname_, large_ref);
	RandomAccessFile* file;
	Status s = env_->NewRandomAccessFile(fname, &file);
	if (s.ok()) {
	uint64_t file_size = file->Size();
	uint64_t value_size = large_ref.ValueSize();
	large_->value.resize(value_size);
	Slice result;
	s = file->Read(0, file_size, &result,
	const_cast<char*>(large_->value.data()));
	if (s.ok()) {
	if (result.size() == file_size) {
	switch (large_ref.compression_type()) {
	case kNoCompression: {
	if (result.data() != large_->value.data()) {
	large_->value.assign(result.data(), result.size());
	}
	break;
	}
	case kLightweightCompression: {
	std::string uncompressed;
	if (port::Lightweight_Uncompress(result.data(), result.size(),
	&uncompressed) &&
	uncompressed.size() == large_ref.ValueSize()) {
	swap(uncompressed, large_->value);
	} else {
	s = Status::Corruption(
	"Unable to read entire compressed large value file");
	}
	}
	}
	} else {
	s = Status::Corruption("Unable to read entire large value file");
	}
	}
	delete file; // Ignore errors on closing
	}
	if (!s.ok()) {
	large_->value.clear();
	large_->status = s;
	}
	}

	} // anonymous namespace

	Iterator* NewDBIterator(
	const std::string* dbname,
	Env* env,
	const Comparator* user_key_comparator,
	Iterator* internal_iter,
	const SequenceNumber& sequence) {
	return new DBIter(dbname, env, user_key_comparator, internal_iter, sequence);
	}

	}