Merge
diff --git a/src/share/vm/classfile/classLoaderData.cpp b/src/share/vm/classfile/classLoaderData.cpp
index 7a64dc4..380c3de 100644
--- a/src/share/vm/classfile/classLoaderData.cpp
+++ b/src/share/vm/classfile/classLoaderData.cpp
@@ -734,7 +734,7 @@
 
 // Move class loader data from main list to the unloaded list for unloading
 // and deallocation later.
-bool ClassLoaderDataGraph::do_unloading(BoolObjectClosure* is_alive_closure) {
+bool ClassLoaderDataGraph::do_unloading(BoolObjectClosure* is_alive_closure, bool clean_alive) {
   ClassLoaderData* data = _head;
   ClassLoaderData* prev = NULL;
   bool seen_dead_loader = false;
@@ -743,27 +743,9 @@
   // purging and we don't want to rewalk the previously unloaded class loader data.
   _saved_unloading = _unloading;
 
-  // mark metadata seen on the stack and code cache so we can delete
-  // unneeded entries.
-  bool has_redefined_a_class = JvmtiExport::has_redefined_a_class();
-  MetadataOnStackMark md_on_stack(has_redefined_a_class);
-  if (has_redefined_a_class) {
-    // purge_previous_versions also cleans weak method links. Because
-    // one method's MDO can reference another method from another
-    // class loader, we need to first clean weak method links for all
-    // class loaders here. Below, we can then free redefined methods
-    // for all class loaders.
-    while (data != NULL) {
-      if (data->is_alive(is_alive_closure)) {
-        data->classes_do(InstanceKlass::purge_previous_versions);
-      }
-      data = data->next();
-    }
-  }
   data = _head;
   while (data != NULL) {
     if (data->is_alive(is_alive_closure)) {
-      data->free_deallocate_list();
       prev = data;
       data = data->next();
       continue;
@@ -785,6 +767,11 @@
     _unloading = dead;
   }
 
+  if (clean_alive) {
+    // Clean previous versions and the deallocate list.
+    ClassLoaderDataGraph::clean_metaspaces();
+  }
+
   if (seen_dead_loader) {
     post_class_unload_events();
   }
@@ -792,6 +779,26 @@
   return seen_dead_loader;
 }
 
+void ClassLoaderDataGraph::clean_metaspaces() {
+  // mark metadata seen on the stack and code cache so we can delete unneeded entries.
+  bool has_redefined_a_class = JvmtiExport::has_redefined_a_class();
+  MetadataOnStackMark md_on_stack(has_redefined_a_class);
+
+  if (has_redefined_a_class) {
+    // purge_previous_versions also cleans weak method links. Because
+    // one method's MDO can reference another method from another
+    // class loader, we need to first clean weak method links for all
+    // class loaders here. Below, we can then free redefined methods
+    // for all class loaders.
+    for (ClassLoaderData* data = _head; data != NULL; data = data->next()) {
+      data->classes_do(InstanceKlass::purge_previous_versions);
+    }
+  }
+
+  // Need to purge the previous version before deallocating.
+  free_deallocate_lists();
+}
+
 void ClassLoaderDataGraph::purge() {
   assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint!");
   ClassLoaderData* list = _unloading;
@@ -819,6 +826,14 @@
 #endif
 }
 
+void ClassLoaderDataGraph::free_deallocate_lists() {
+  for (ClassLoaderData* cld = _head; cld != NULL; cld = cld->next()) {
+    // We need to keep this data until InstanceKlass::purge_previous_version has been
+    // called on all alive classes. See the comment in ClassLoaderDataGraph::clean_metaspaces.
+    cld->free_deallocate_list();
+  }
+}
+
 // CDS support
 
 // Global metaspaces for writing information to the shared archive.  When
diff --git a/src/share/vm/classfile/classLoaderData.hpp b/src/share/vm/classfile/classLoaderData.hpp
index f9c638c..231c135 100644
--- a/src/share/vm/classfile/classLoaderData.hpp
+++ b/src/share/vm/classfile/classLoaderData.hpp
@@ -71,6 +71,7 @@
 
   static ClassLoaderData* add(Handle class_loader, bool anonymous, TRAPS);
   static void post_class_unload_events(void);
+  static void clean_metaspaces();
  public:
   static ClassLoaderData* find_or_create(Handle class_loader, TRAPS);
   static void purge();
@@ -90,7 +91,7 @@
   static void methods_do(void f(Method*));
   static void loaded_classes_do(KlassClosure* klass_closure);
   static void classes_unloading_do(void f(Klass* const));
-  static bool do_unloading(BoolObjectClosure* is_alive);
+  static bool do_unloading(BoolObjectClosure* is_alive, bool clean_alive);
 
   // CMS support.
   static void remember_new_clds(bool remember) { _saved_head = (remember ? _head : NULL); }
@@ -106,6 +107,8 @@
     }
   }
 
+  static void free_deallocate_lists();
+
   static void dump_on(outputStream * const out) PRODUCT_RETURN;
   static void dump() { dump_on(tty); }
   static void verify();
diff --git a/src/share/vm/classfile/metadataOnStackMark.cpp b/src/share/vm/classfile/metadataOnStackMark.cpp
index e9eebb6..18098b9 100644
--- a/src/share/vm/classfile/metadataOnStackMark.cpp
+++ b/src/share/vm/classfile/metadataOnStackMark.cpp
@@ -31,25 +31,23 @@
 #include "runtime/synchronizer.hpp"
 #include "runtime/thread.hpp"
 #include "services/threadService.hpp"
-#include "utilities/growableArray.hpp"
+#include "utilities/chunkedList.hpp"
 
+volatile MetadataOnStackBuffer* MetadataOnStackMark::_used_buffers = NULL;
+volatile MetadataOnStackBuffer* MetadataOnStackMark::_free_buffers = NULL;
 
-// Keep track of marked on-stack metadata so it can be cleared.
-GrowableArray<Metadata*>* _marked_objects = NULL;
 NOT_PRODUCT(bool MetadataOnStackMark::_is_active = false;)
 
 // Walk metadata on the stack and mark it so that redefinition doesn't delete
 // it.  Class unloading also walks the previous versions and might try to
 // delete it, so this class is used by class unloading also.
-MetadataOnStackMark::MetadataOnStackMark(bool has_redefined_a_class) {
+MetadataOnStackMark::MetadataOnStackMark(bool visit_code_cache) {
   assert(SafepointSynchronize::is_at_safepoint(), "sanity check");
+  assert(_used_buffers == NULL, "sanity check");
   NOT_PRODUCT(_is_active = true;)
-  if (_marked_objects == NULL) {
-    _marked_objects = new (ResourceObj::C_HEAP, mtClass) GrowableArray<Metadata*>(1000, true);
-  }
 
   Threads::metadata_do(Metadata::mark_on_stack);
-  if (has_redefined_a_class) {
+  if (visit_code_cache) {
     CodeCache::alive_nmethods_do(nmethod::mark_on_stack);
   }
   CompileBroker::mark_on_stack();
@@ -62,15 +60,93 @@
   // Unmark everything that was marked.   Can't do the same walk because
   // redefine classes messes up the code cache so the set of methods
   // might not be the same.
-  for (int i = 0; i< _marked_objects->length(); i++) {
-    _marked_objects->at(i)->set_on_stack(false);
+
+  retire_buffer_for_thread(Thread::current());
+
+  MetadataOnStackBuffer* buffer = const_cast<MetadataOnStackBuffer* >(_used_buffers);
+  while (buffer != NULL) {
+    // Clear on stack state for all metadata.
+    size_t size = buffer->size();
+    for (size_t i  = 0; i < size; i++) {
+      Metadata* md = buffer->at(i);
+      md->set_on_stack(false);
+    }
+
+    MetadataOnStackBuffer* next = buffer->next_used();
+
+    // Move the buffer to the free list.
+    buffer->clear();
+    buffer->set_next_used(NULL);
+    buffer->set_next_free(const_cast<MetadataOnStackBuffer*>(_free_buffers));
+    _free_buffers = buffer;
+
+    // Step to next used buffer.
+    buffer = next;
   }
-  _marked_objects->clear();   // reuse growable array for next time.
+
+  _used_buffers = NULL;
+
   NOT_PRODUCT(_is_active = false;)
 }
 
+void MetadataOnStackMark::retire_buffer(MetadataOnStackBuffer* buffer) {
+  if (buffer == NULL) {
+    return;
+  }
+
+  MetadataOnStackBuffer* old_head;
+
+  do {
+    old_head = const_cast<MetadataOnStackBuffer*>(_used_buffers);
+    buffer->set_next_used(old_head);
+  } while (Atomic::cmpxchg_ptr(buffer, &_used_buffers, old_head) != old_head);
+}
+
+void MetadataOnStackMark::retire_buffer_for_thread(Thread* thread) {
+  retire_buffer(thread->metadata_on_stack_buffer());
+  thread->set_metadata_on_stack_buffer(NULL);
+}
+
+bool MetadataOnStackMark::has_buffer_for_thread(Thread* thread) {
+  return thread->metadata_on_stack_buffer() != NULL;
+}
+
+MetadataOnStackBuffer* MetadataOnStackMark::allocate_buffer() {
+  MetadataOnStackBuffer* allocated;
+  MetadataOnStackBuffer* new_head;
+
+  do {
+    allocated = const_cast<MetadataOnStackBuffer*>(_free_buffers);
+    if (allocated == NULL) {
+      break;
+    }
+    new_head = allocated->next_free();
+  } while (Atomic::cmpxchg_ptr(new_head, &_free_buffers, allocated) != allocated);
+
+  if (allocated == NULL) {
+    allocated = new MetadataOnStackBuffer();
+  }
+
+  assert(!allocated->is_full(), err_msg("Should not be full: " PTR_FORMAT, p2i(allocated)));
+
+  return allocated;
+}
+
 // Record which objects are marked so we can unmark the same objects.
-void MetadataOnStackMark::record(Metadata* m) {
+void MetadataOnStackMark::record(Metadata* m, Thread* thread) {
   assert(_is_active, "metadata on stack marking is active");
-  _marked_objects->push(m);
+
+  MetadataOnStackBuffer* buffer =  thread->metadata_on_stack_buffer();
+
+  if (buffer != NULL && buffer->is_full()) {
+    retire_buffer(buffer);
+    buffer = NULL;
+  }
+
+  if (buffer == NULL) {
+    buffer = allocate_buffer();
+    thread->set_metadata_on_stack_buffer(buffer);
+  }
+
+  buffer->push(m);
 }
diff --git a/src/share/vm/classfile/metadataOnStackMark.hpp b/src/share/vm/classfile/metadataOnStackMark.hpp
index ae43133..01fff29 100644
--- a/src/share/vm/classfile/metadataOnStackMark.hpp
+++ b/src/share/vm/classfile/metadataOnStackMark.hpp
@@ -26,9 +26,12 @@
 #define SHARE_VM_CLASSFILE_METADATAONSTACKMARK_HPP
 
 #include "memory/allocation.hpp"
+#include "utilities/chunkedList.hpp"
 
 class Metadata;
 
+typedef ChunkedList<Metadata*, mtInternal> MetadataOnStackBuffer;
+
 // Helper class to mark and unmark metadata used on the stack as either handles
 // or executing methods, so that it can't be deleted during class redefinition
 // and class unloading.
@@ -36,10 +39,20 @@
 // metadata during parsing, relocated methods, and methods in backtraces.
 class MetadataOnStackMark : public StackObj {
   NOT_PRODUCT(static bool _is_active;)
+
+  static volatile MetadataOnStackBuffer* _used_buffers;
+  static volatile MetadataOnStackBuffer* _free_buffers;
+
+  static MetadataOnStackBuffer* allocate_buffer();
+  static void retire_buffer(MetadataOnStackBuffer* buffer);
+
  public:
-  MetadataOnStackMark(bool has_redefined_a_class);
-  ~MetadataOnStackMark();
-  static void record(Metadata* m);
+  MetadataOnStackMark(bool visit_code_cache);
+   ~MetadataOnStackMark();
+
+  static void record(Metadata* m, Thread* thread);
+  static void retire_buffer_for_thread(Thread* thread);
+  static bool has_buffer_for_thread(Thread* thread);
 };
 
 #endif // SHARE_VM_CLASSFILE_METADATAONSTACKMARK_HPP
diff --git a/src/share/vm/classfile/systemDictionary.cpp b/src/share/vm/classfile/systemDictionary.cpp
index 793073a..85f90d4 100644
--- a/src/share/vm/classfile/systemDictionary.cpp
+++ b/src/share/vm/classfile/systemDictionary.cpp
@@ -1690,9 +1690,9 @@
 
 // Assumes classes in the SystemDictionary are only unloaded at a safepoint
 // Note: anonymous classes are not in the SD.
-bool SystemDictionary::do_unloading(BoolObjectClosure* is_alive) {
+bool SystemDictionary::do_unloading(BoolObjectClosure* is_alive, bool clean_alive) {
   // First, mark for unload all ClassLoaderData referencing a dead class loader.
-  bool unloading_occurred = ClassLoaderDataGraph::do_unloading(is_alive);
+  bool unloading_occurred = ClassLoaderDataGraph::do_unloading(is_alive, clean_alive);
   if (unloading_occurred) {
     dictionary()->do_unloading();
     constraints()->purge_loader_constraints();
diff --git a/src/share/vm/classfile/systemDictionary.hpp b/src/share/vm/classfile/systemDictionary.hpp
index 33ecc83..fd371bf 100644
--- a/src/share/vm/classfile/systemDictionary.hpp
+++ b/src/share/vm/classfile/systemDictionary.hpp
@@ -334,7 +334,7 @@
 
   // Unload (that is, break root links to) all unmarked classes and
   // loaders.  Returns "true" iff something was unloaded.
-  static bool do_unloading(BoolObjectClosure* is_alive);
+  static bool do_unloading(BoolObjectClosure* is_alive, bool clean_alive = true);
 
   // Used by DumpSharedSpaces only to remove classes that failed verification
   static void remove_classes_in_error_state();
diff --git a/src/share/vm/code/nmethod.cpp b/src/share/vm/code/nmethod.cpp
index 7804712..278ba99 100644
--- a/src/share/vm/code/nmethod.cpp
+++ b/src/share/vm/code/nmethod.cpp
@@ -1700,11 +1700,17 @@
   set_unload_reported();
 }
 
-void static clean_ic_if_metadata_is_dead(CompiledIC *ic, BoolObjectClosure *is_alive) {
+void static clean_ic_if_metadata_is_dead(CompiledIC *ic, BoolObjectClosure *is_alive, bool mark_on_stack) {
   if (ic->is_icholder_call()) {
     // The only exception is compiledICHolder oops which may
     // yet be marked below. (We check this further below).
     CompiledICHolder* cichk_oop = ic->cached_icholder();
+
+    if (mark_on_stack) {
+      Metadata::mark_on_stack(cichk_oop->holder_method());
+      Metadata::mark_on_stack(cichk_oop->holder_klass());
+    }
+
     if (cichk_oop->holder_method()->method_holder()->is_loader_alive(is_alive) &&
         cichk_oop->holder_klass()->is_loader_alive(is_alive)) {
       return;
@@ -1712,6 +1718,10 @@
   } else {
     Metadata* ic_oop = ic->cached_metadata();
     if (ic_oop != NULL) {
+      if (mark_on_stack) {
+        Metadata::mark_on_stack(ic_oop);
+      }
+
       if (ic_oop->is_klass()) {
         if (((Klass*)ic_oop)->is_loader_alive(is_alive)) {
           return;
@@ -1772,7 +1782,7 @@
     while(iter.next()) {
       if (iter.type() == relocInfo::virtual_call_type) {
         CompiledIC *ic = CompiledIC_at(&iter);
-        clean_ic_if_metadata_is_dead(ic, is_alive);
+        clean_ic_if_metadata_is_dead(ic, is_alive, false);
       }
     }
   }
@@ -1840,6 +1850,53 @@
   return clean_if_nmethod_is_unloaded(csc, csc->destination(), is_alive, from);
 }
 
+bool nmethod::unload_if_dead_at(RelocIterator* iter_at_oop, BoolObjectClosure *is_alive, bool unloading_occurred) {
+  assert(iter_at_oop->type() == relocInfo::oop_type, "Wrong relocation type");
+
+  oop_Relocation* r = iter_at_oop->oop_reloc();
+  // Traverse those oops directly embedded in the code.
+  // Other oops (oop_index>0) are seen as part of scopes_oops.
+  assert(1 == (r->oop_is_immediate()) +
+         (r->oop_addr() >= oops_begin() && r->oop_addr() < oops_end()),
+         "oop must be found in exactly one place");
+  if (r->oop_is_immediate() && r->oop_value() != NULL) {
+    // Unload this nmethod if the oop is dead.
+    if (can_unload(is_alive, r->oop_addr(), unloading_occurred)) {
+      return true;;
+    }
+  }
+
+  return false;
+}
+
+void nmethod::mark_metadata_on_stack_at(RelocIterator* iter_at_metadata) {
+  assert(iter_at_metadata->type() == relocInfo::metadata_type, "Wrong relocation type");
+
+  metadata_Relocation* r = iter_at_metadata->metadata_reloc();
+  // In this metadata, we must only follow those metadatas directly embedded in
+  // the code.  Other metadatas (oop_index>0) are seen as part of
+  // the metadata section below.
+  assert(1 == (r->metadata_is_immediate()) +
+         (r->metadata_addr() >= metadata_begin() && r->metadata_addr() < metadata_end()),
+         "metadata must be found in exactly one place");
+  if (r->metadata_is_immediate() && r->metadata_value() != NULL) {
+    Metadata* md = r->metadata_value();
+    if (md != _method) Metadata::mark_on_stack(md);
+  }
+}
+
+void nmethod::mark_metadata_on_stack_non_relocs() {
+    // Visit the metadata section
+    for (Metadata** p = metadata_begin(); p < metadata_end(); p++) {
+      if (*p == Universe::non_oop_word() || *p == NULL)  continue;  // skip non-oops
+      Metadata* md = *p;
+      Metadata::mark_on_stack(md);
+    }
+
+    // Visit metadata not embedded in the other places.
+    if (_method != NULL) Metadata::mark_on_stack(_method);
+}
+
 bool nmethod::do_unloading_parallel(BoolObjectClosure* is_alive, bool unloading_occurred) {
   ResourceMark rm;
 
@@ -1869,6 +1926,11 @@
     unloading_occurred = true;
   }
 
+  // When class redefinition is used all metadata in the CodeCache has to be recorded,
+  // so that unused "previous versions" can be purged. Since walking the CodeCache can
+  // be expensive, the "mark on stack" is piggy-backed on this parallel unloading code.
+  bool mark_metadata_on_stack = a_class_was_redefined;
+
   // Exception cache
   clean_exception_cache(is_alive);
 
@@ -1884,7 +1946,7 @@
       if (unloading_occurred) {
         // If class unloading occurred we first iterate over all inline caches and
         // clear ICs where the cached oop is referring to an unloaded klass or method.
-        clean_ic_if_metadata_is_dead(CompiledIC_at(&iter), is_alive);
+        clean_ic_if_metadata_is_dead(CompiledIC_at(&iter), is_alive, mark_metadata_on_stack);
       }
 
       postponed |= clean_if_nmethod_is_unloaded(CompiledIC_at(&iter), is_alive, this);
@@ -1900,24 +1962,21 @@
 
     case relocInfo::oop_type:
       if (!is_unloaded) {
-        // Unload check
-        oop_Relocation* r = iter.oop_reloc();
-        // Traverse those oops directly embedded in the code.
-        // Other oops (oop_index>0) are seen as part of scopes_oops.
-        assert(1 == (r->oop_is_immediate()) +
-                  (r->oop_addr() >= oops_begin() && r->oop_addr() < oops_end()),
-              "oop must be found in exactly one place");
-        if (r->oop_is_immediate() && r->oop_value() != NULL) {
-          if (can_unload(is_alive, r->oop_addr(), unloading_occurred)) {
-            is_unloaded = true;
-          }
-        }
+        is_unloaded = unload_if_dead_at(&iter, is_alive, unloading_occurred);
       }
       break;
 
+    case relocInfo::metadata_type:
+      if (mark_metadata_on_stack) {
+        mark_metadata_on_stack_at(&iter);
+      }
     }
   }
 
+  if (mark_metadata_on_stack) {
+    mark_metadata_on_stack_non_relocs();
+  }
+
   if (is_unloaded) {
     return postponed;
   }
@@ -2065,7 +2124,7 @@
     while (iter.next()) {
       if (iter.type() == relocInfo::metadata_type ) {
         metadata_Relocation* r = iter.metadata_reloc();
-        // In this lmetadata, we must only follow those metadatas directly embedded in
+        // In this metadata, we must only follow those metadatas directly embedded in
         // the code.  Other metadatas (oop_index>0) are seen as part of
         // the metadata section below.
         assert(1 == (r->metadata_is_immediate()) +
@@ -2099,7 +2158,7 @@
     f(md);
   }
 
-  // Call function Method*, not embedded in these other places.
+  // Visit metadata not embedded in the other places.
   if (_method != NULL) f(_method);
 }
 
diff --git a/src/share/vm/code/nmethod.hpp b/src/share/vm/code/nmethod.hpp
index 486e149..c956796 100644
--- a/src/share/vm/code/nmethod.hpp
+++ b/src/share/vm/code/nmethod.hpp
@@ -615,9 +615,16 @@
   //  The parallel versions are used by G1.
   bool do_unloading_parallel(BoolObjectClosure* is_alive, bool unloading_occurred);
   void do_unloading_parallel_postponed(BoolObjectClosure* is_alive, bool unloading_occurred);
+
+ private:
   //  Unload a nmethod if the *root object is dead.
   bool can_unload(BoolObjectClosure* is_alive, oop* root, bool unloading_occurred);
+  bool unload_if_dead_at(RelocIterator *iter_at_oop, BoolObjectClosure* is_alive, bool unloading_occurred);
 
+  void mark_metadata_on_stack_at(RelocIterator* iter_at_metadata);
+  void mark_metadata_on_stack_non_relocs();
+
+ public:
   void preserve_callee_argument_oops(frame fr, const RegisterMap *reg_map,
                                      OopClosure* f);
   void oops_do(OopClosure* f) { oops_do(f, false); }
diff --git a/src/share/vm/gc_implementation/concurrentMarkSweep/adaptiveFreeList.cpp b/src/share/vm/gc_implementation/concurrentMarkSweep/adaptiveFreeList.cpp
index 03711e3..bb2cb5a 100644
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/adaptiveFreeList.cpp
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/adaptiveFreeList.cpp
@@ -64,23 +64,6 @@
   set_hint(hint);
 }
 
-#ifndef PRODUCT
-template <class Chunk>
-void AdaptiveFreeList<Chunk>::assert_proper_lock_protection_work() const {
-  assert(protecting_lock() != NULL, "Don't call this directly");
-  assert(ParallelGCThreads > 0, "Don't call this directly");
-  Thread* thr = Thread::current();
-  if (thr->is_VM_thread() || thr->is_ConcurrentGC_thread()) {
-    // assert that we are holding the freelist lock
-  } else if (thr->is_GC_task_thread()) {
-    assert(protecting_lock()->owned_by_self(), "FreeList RACE DETECTED");
-  } else if (thr->is_Java_thread()) {
-    assert(!SafepointSynchronize::is_at_safepoint(), "Should not be executing");
-  } else {
-    ShouldNotReachHere();  // unaccounted thread type?
-  }
-}
-#endif
 template <class Chunk>
 void AdaptiveFreeList<Chunk>::init_statistics(bool split_birth) {
   _allocation_stats.initialize(split_birth);
diff --git a/src/share/vm/gc_implementation/concurrentMarkSweep/adaptiveFreeList.hpp b/src/share/vm/gc_implementation/concurrentMarkSweep/adaptiveFreeList.hpp
index b628cd7..f2ae38f 100644
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/adaptiveFreeList.hpp
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/adaptiveFreeList.hpp
@@ -81,8 +81,6 @@
   // Reset the head, tail, hint, and count of a free list.
   void reset(size_t hint);
 
-  void assert_proper_lock_protection_work() const PRODUCT_RETURN;
-
   void print_on(outputStream* st, const char* c = NULL) const;
 
   size_t hint() const {
diff --git a/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp b/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp
index 460692b..1406d5d 100644
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp
@@ -149,18 +149,15 @@
   check_free_list_consistency();
 
   // Initialize locks for parallel case.
-
-  if (CollectedHeap::use_parallel_gc_threads()) {
-    for (size_t i = IndexSetStart; i < IndexSetSize; i += IndexSetStride) {
-      _indexedFreeListParLocks[i] = new Mutex(Mutex::leaf - 1, // == ExpandHeap_lock - 1
-                                              "a freelist par lock",
-                                              true);
-      DEBUG_ONLY(
-        _indexedFreeList[i].set_protecting_lock(_indexedFreeListParLocks[i]);
-      )
-    }
-    _dictionary->set_par_lock(&_parDictionaryAllocLock);
+  for (size_t i = IndexSetStart; i < IndexSetSize; i += IndexSetStride) {
+    _indexedFreeListParLocks[i] = new Mutex(Mutex::leaf - 1, // == ExpandHeap_lock - 1
+                                            "a freelist par lock",
+                                            true);
+    DEBUG_ONLY(
+      _indexedFreeList[i].set_protecting_lock(_indexedFreeListParLocks[i]);
+    )
   }
+  _dictionary->set_par_lock(&_parDictionaryAllocLock);
 }
 
 // Like CompactibleSpace forward() but always calls cross_threshold() to
@@ -622,17 +619,11 @@
         // Mark the boundary of the new block in BOT
         _bt.mark_block(prevEnd, value);
         // put it all in the linAB
-        if (ParallelGCThreads == 0) {
-          _smallLinearAllocBlock._ptr = prevEnd;
-          _smallLinearAllocBlock._word_size = newFcSize;
-          repairLinearAllocBlock(&_smallLinearAllocBlock);
-        } else { // ParallelGCThreads > 0
-          MutexLockerEx x(parDictionaryAllocLock(),
-                          Mutex::_no_safepoint_check_flag);
-          _smallLinearAllocBlock._ptr = prevEnd;
-          _smallLinearAllocBlock._word_size = newFcSize;
-          repairLinearAllocBlock(&_smallLinearAllocBlock);
-        }
+        MutexLockerEx x(parDictionaryAllocLock(),
+                        Mutex::_no_safepoint_check_flag);
+        _smallLinearAllocBlock._ptr = prevEnd;
+        _smallLinearAllocBlock._word_size = newFcSize;
+        repairLinearAllocBlock(&_smallLinearAllocBlock);
         // Births of chunks put into a LinAB are not recorded.  Births
         // of chunks as they are allocated out of a LinAB are.
       } else {
@@ -1740,10 +1731,7 @@
   assert(chunk != NULL && is_in_reserved(chunk), "Not in this space!");
   // One of the parallel gc task threads may be here
   // whilst others are allocating.
-  Mutex* lock = NULL;
-  if (ParallelGCThreads != 0) {
-    lock = &_parDictionaryAllocLock;
-  }
+  Mutex* lock = &_parDictionaryAllocLock;
   FreeChunk* ec;
   {
     MutexLockerEx x(lock, Mutex::_no_safepoint_check_flag);
@@ -1760,7 +1748,7 @@
   }
   ec->set_size(size);
   debug_only(ec->mangleFreed(size));
-  if (size < SmallForDictionary && ParallelGCThreads != 0) {
+  if (size < SmallForDictionary) {
     lock = _indexedFreeListParLocks[size];
   }
   MutexLockerEx x(lock, Mutex::_no_safepoint_check_flag);
diff --git a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp
index 55b8301..bc9547d 100644
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp
@@ -887,10 +887,8 @@
   // along with all the other pointers into the heap but
   // compaction is expected to be a rare event with
   // a heap using cms so don't do it without seeing the need.
-  if (CollectedHeap::use_parallel_gc_threads()) {
-    for (uint i = 0; i < ParallelGCThreads; i++) {
-      _par_gc_thread_states[i]->promo.reset();
-    }
+  for (uint i = 0; i < ParallelGCThreads; i++) {
+    _par_gc_thread_states[i]->promo.reset();
   }
 }
 
@@ -2804,10 +2802,8 @@
   collector()->gc_epilogue(full);
 
   // Also reset promotion tracking in par gc thread states.
-  if (CollectedHeap::use_parallel_gc_threads()) {
-    for (uint i = 0; i < ParallelGCThreads; i++) {
-      _par_gc_thread_states[i]->promo.stopTrackingPromotions(i);
-    }
+  for (uint i = 0; i < ParallelGCThreads; i++) {
+    _par_gc_thread_states[i]->promo.stopTrackingPromotions(i);
   }
 }
 
diff --git a/src/share/vm/gc_implementation/g1/concurrentMark.cpp b/src/share/vm/gc_implementation/g1/concurrentMark.cpp
index 3646241..eeb110c 100644
--- a/src/share/vm/gc_implementation/g1/concurrentMark.cpp
+++ b/src/share/vm/gc_implementation/g1/concurrentMark.cpp
@@ -23,6 +23,7 @@
  */
 
 #include "precompiled.hpp"
+#include "classfile/metadataOnStackMark.hpp"
 #include "classfile/symbolTable.hpp"
 #include "code/codeCache.hpp"
 #include "gc_implementation/g1/concurrentMark.inline.hpp"
@@ -611,74 +612,64 @@
             ConcGCThreads, ParallelGCThreads);
     return;
   }
-  if (ParallelGCThreads == 0) {
-    // if we are not running with any parallel GC threads we will not
-    // spawn any marking threads either
-    _parallel_marking_threads =       0;
-    _max_parallel_marking_threads =   0;
-    _sleep_factor             =     0.0;
-    _marking_task_overhead    =     1.0;
+  if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) {
+    // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent
+    // if both are set
+    _sleep_factor             = 0.0;
+    _marking_task_overhead    = 1.0;
+  } else if (G1MarkingOverheadPercent > 0) {
+    // We will calculate the number of parallel marking threads based
+    // on a target overhead with respect to the soft real-time goal
+    double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
+    double overall_cm_overhead =
+      (double) MaxGCPauseMillis * marking_overhead /
+      (double) GCPauseIntervalMillis;
+    double cpu_ratio = 1.0 / (double) os::processor_count();
+    double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
+    double marking_task_overhead =
+      overall_cm_overhead / marking_thread_num *
+                                              (double) os::processor_count();
+    double sleep_factor =
+                       (1.0 - marking_task_overhead) / marking_task_overhead;
+
+    FLAG_SET_ERGO(uintx, ConcGCThreads, (uint) marking_thread_num);
+    _sleep_factor             = sleep_factor;
+    _marking_task_overhead    = marking_task_overhead;
   } else {
-    if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) {
-      // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent
-      // if both are set
-      _sleep_factor             = 0.0;
-      _marking_task_overhead    = 1.0;
-    } else if (G1MarkingOverheadPercent > 0) {
-      // We will calculate the number of parallel marking threads based
-      // on a target overhead with respect to the soft real-time goal
-      double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
-      double overall_cm_overhead =
-        (double) MaxGCPauseMillis * marking_overhead /
-        (double) GCPauseIntervalMillis;
-      double cpu_ratio = 1.0 / (double) os::processor_count();
-      double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
-      double marking_task_overhead =
-        overall_cm_overhead / marking_thread_num *
-                                                (double) os::processor_count();
-      double sleep_factor =
-                         (1.0 - marking_task_overhead) / marking_task_overhead;
+    // Calculate the number of parallel marking threads by scaling
+    // the number of parallel GC threads.
+    uint marking_thread_num = scale_parallel_threads((uint) ParallelGCThreads);
+    FLAG_SET_ERGO(uintx, ConcGCThreads, marking_thread_num);
+    _sleep_factor             = 0.0;
+    _marking_task_overhead    = 1.0;
+  }
 
-      FLAG_SET_ERGO(uintx, ConcGCThreads, (uint) marking_thread_num);
-      _sleep_factor             = sleep_factor;
-      _marking_task_overhead    = marking_task_overhead;
-    } else {
-      // Calculate the number of parallel marking threads by scaling
-      // the number of parallel GC threads.
-      uint marking_thread_num = scale_parallel_threads((uint) ParallelGCThreads);
-      FLAG_SET_ERGO(uintx, ConcGCThreads, marking_thread_num);
-      _sleep_factor             = 0.0;
-      _marking_task_overhead    = 1.0;
-    }
+  assert(ConcGCThreads > 0, "Should have been set");
+  _parallel_marking_threads = (uint) ConcGCThreads;
+  _max_parallel_marking_threads = _parallel_marking_threads;
 
-    assert(ConcGCThreads > 0, "Should have been set");
-    _parallel_marking_threads = (uint) ConcGCThreads;
-    _max_parallel_marking_threads = _parallel_marking_threads;
-
-    if (parallel_marking_threads() > 1) {
-      _cleanup_task_overhead = 1.0;
-    } else {
-      _cleanup_task_overhead = marking_task_overhead();
-    }
-    _cleanup_sleep_factor =
-                     (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
+  if (parallel_marking_threads() > 1) {
+    _cleanup_task_overhead = 1.0;
+  } else {
+    _cleanup_task_overhead = marking_task_overhead();
+  }
+  _cleanup_sleep_factor =
+                   (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
 
 #if 0
-    gclog_or_tty->print_cr("Marking Threads          %d", parallel_marking_threads());
-    gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
-    gclog_or_tty->print_cr("CM Sleep Factor          %1.4lf", sleep_factor());
-    gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
-    gclog_or_tty->print_cr("CL Sleep Factor          %1.4lf", cleanup_sleep_factor());
+  gclog_or_tty->print_cr("Marking Threads          %d", parallel_marking_threads());
+  gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
+  gclog_or_tty->print_cr("CM Sleep Factor          %1.4lf", sleep_factor());
+  gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
+  gclog_or_tty->print_cr("CL Sleep Factor          %1.4lf", cleanup_sleep_factor());
 #endif
 
-    guarantee(parallel_marking_threads() > 0, "peace of mind");
-    _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads",
-         _max_parallel_marking_threads, false, true);
-    if (_parallel_workers == NULL) {
-      vm_exit_during_initialization("Failed necessary allocation.");
-    } else {
-      _parallel_workers->initialize_workers();
-    }
+  _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads",
+       _max_parallel_marking_threads, false, true);
+  if (_parallel_workers == NULL) {
+    vm_exit_during_initialization("Failed necessary allocation.");
+  } else {
+    _parallel_workers->initialize_workers();
   }
 
   if (FLAG_IS_DEFAULT(MarkStackSize)) {
@@ -1166,29 +1157,23 @@
 // Calculates the number of active workers for a concurrent
 // phase.
 uint ConcurrentMark::calc_parallel_marking_threads() {
-  if (G1CollectedHeap::use_parallel_gc_threads()) {
-    uint n_conc_workers = 0;
-    if (!UseDynamicNumberOfGCThreads ||
-        (!FLAG_IS_DEFAULT(ConcGCThreads) &&
-         !ForceDynamicNumberOfGCThreads)) {
-      n_conc_workers = max_parallel_marking_threads();
-    } else {
-      n_conc_workers =
-        AdaptiveSizePolicy::calc_default_active_workers(
-                                     max_parallel_marking_threads(),
-                                     1, /* Minimum workers */
-                                     parallel_marking_threads(),
-                                     Threads::number_of_non_daemon_threads());
-      // Don't scale down "n_conc_workers" by scale_parallel_threads() because
-      // that scaling has already gone into "_max_parallel_marking_threads".
-    }
-    assert(n_conc_workers > 0, "Always need at least 1");
-    return n_conc_workers;
+  uint n_conc_workers = 0;
+  if (!UseDynamicNumberOfGCThreads ||
+      (!FLAG_IS_DEFAULT(ConcGCThreads) &&
+       !ForceDynamicNumberOfGCThreads)) {
+    n_conc_workers = max_parallel_marking_threads();
+  } else {
+    n_conc_workers =
+      AdaptiveSizePolicy::calc_default_active_workers(
+                                   max_parallel_marking_threads(),
+                                   1, /* Minimum workers */
+                                   parallel_marking_threads(),
+                                   Threads::number_of_non_daemon_threads());
+    // Don't scale down "n_conc_workers" by scale_parallel_threads() because
+    // that scaling has already gone into "_max_parallel_marking_threads".
   }
-  // If we are not running with any parallel GC threads we will not
-  // have spawned any marking threads either. Hence the number of
-  // concurrent workers should be 0.
-  return 0;
+  assert(n_conc_workers > 0, "Always need at least 1");
+  return n_conc_workers;
 }
 
 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) {
@@ -1243,12 +1228,8 @@
     uint active_workers = MAX2(1U, parallel_marking_threads());
 
     CMRootRegionScanTask task(this);
-    if (use_parallel_marking_threads()) {
-      _parallel_workers->set_active_workers((int) active_workers);
-      _parallel_workers->run_task(&task);
-    } else {
-      task.work(0);
-    }
+    _parallel_workers->set_active_workers(active_workers);
+    _parallel_workers->run_task(&task);
 
     // It's possible that has_aborted() is true here without actually
     // aborting the survivor scan earlier. This is OK as it's
@@ -1279,15 +1260,11 @@
   set_concurrency_and_phase(active_workers, true /* concurrent */);
 
   CMConcurrentMarkingTask markingTask(this, cmThread());
-  if (use_parallel_marking_threads()) {
-    _parallel_workers->set_active_workers((int)active_workers);
-    // Don't set _n_par_threads because it affects MT in process_roots()
-    // and the decisions on that MT processing is made elsewhere.
-    assert(_parallel_workers->active_workers() > 0, "Should have been set");
-    _parallel_workers->run_task(&markingTask);
-  } else {
-    markingTask.work(0);
-  }
+  _parallel_workers->set_active_workers(active_workers);
+  // Don't set _n_par_threads because it affects MT in process_roots()
+  // and the decisions on that MT processing is made elsewhere.
+  assert(_parallel_workers->active_workers() > 0, "Should have been set");
+  _parallel_workers->run_task(&markingTask);
   print_stats();
 }
 
@@ -1714,11 +1691,7 @@
                                             _expected_card_bm,
                                             _verbose);
 
-    if (G1CollectedHeap::use_parallel_gc_threads()) {
-      _g1h->heap_region_par_iterate(&verify_cl, worker_id, &_hrclaimer);
-    } else {
-      _g1h->heap_region_iterate(&verify_cl);
-    }
+    _g1h->heap_region_par_iterate(&verify_cl, worker_id, &_hrclaimer);
 
     Atomic::add(verify_cl.failures(), &_failures);
   }
@@ -1821,11 +1794,7 @@
                                                 _actual_region_bm,
                                                 _actual_card_bm);
 
-    if (G1CollectedHeap::use_parallel_gc_threads()) {
-      _g1h->heap_region_par_iterate(&final_update_cl, worker_id, &_hrclaimer);
-    } else {
-      _g1h->heap_region_iterate(&final_update_cl);
-    }
+    _g1h->heap_region_par_iterate(&final_update_cl, worker_id, &_hrclaimer);
   }
 };
 
@@ -1922,11 +1891,7 @@
     HRRSCleanupTask hrrs_cleanup_task;
     G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &local_cleanup_list,
                                            &hrrs_cleanup_task);
-    if (G1CollectedHeap::use_parallel_gc_threads()) {
-      _g1h->heap_region_par_iterate(&g1_note_end, worker_id, &_hrclaimer);
-    } else {
-      _g1h->heap_region_iterate(&g1_note_end);
-    }
+    _g1h->heap_region_par_iterate(&g1_note_end, worker_id, &_hrclaimer);
     assert(g1_note_end.complete(), "Shouldn't have yielded!");
 
     // Now update the lists
@@ -1977,11 +1942,7 @@
   }
 
   void work(uint worker_id) {
-    if (G1CollectedHeap::use_parallel_gc_threads()) {
-      _g1rs->scrub_par(_region_bm, _card_bm, worker_id, &_hrclaimer);
-    } else {
-      _g1rs->scrub(_region_bm, _card_bm);
-    }
+    _g1rs->scrub(_region_bm, _card_bm, worker_id, &_hrclaimer);
   }
 
 };
@@ -2020,18 +1981,13 @@
   // Do counting once more with the world stopped for good measure.
   G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm);
 
-  if (G1CollectedHeap::use_parallel_gc_threads()) {
-    g1h->set_par_threads();
-    n_workers = g1h->n_par_threads();
-    assert(g1h->n_par_threads() == n_workers,
-           "Should not have been reset");
-    g1h->workers()->run_task(&g1_par_count_task);
-    // Done with the parallel phase so reset to 0.
-    g1h->set_par_threads(0);
-  } else {
-    n_workers = 1;
-    g1_par_count_task.work(0);
-  }
+  g1h->set_par_threads();
+  n_workers = g1h->n_par_threads();
+  assert(g1h->n_par_threads() == n_workers,
+         "Should not have been reset");
+  g1h->workers()->run_task(&g1_par_count_task);
+  // Done with the parallel phase so reset to 0.
+  g1h->set_par_threads(0);
 
   if (VerifyDuringGC) {
     // Verify that the counting data accumulated during marking matches
@@ -2047,14 +2003,10 @@
                                                  &expected_region_bm,
                                                  &expected_card_bm);
 
-    if (G1CollectedHeap::use_parallel_gc_threads()) {
-      g1h->set_par_threads((int)n_workers);
-      g1h->workers()->run_task(&g1_par_verify_task);
-      // Done with the parallel phase so reset to 0.
-      g1h->set_par_threads(0);
-    } else {
-      g1_par_verify_task.work(0);
-    }
+    g1h->set_par_threads((int)n_workers);
+    g1h->workers()->run_task(&g1_par_verify_task);
+    // Done with the parallel phase so reset to 0.
+    g1h->set_par_threads(0);
 
     guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures");
   }
@@ -2078,13 +2030,9 @@
 
   // Note end of marking in all heap regions.
   G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list, n_workers);
-  if (G1CollectedHeap::use_parallel_gc_threads()) {
-    g1h->set_par_threads((int)n_workers);
-    g1h->workers()->run_task(&g1_par_note_end_task);
-    g1h->set_par_threads(0);
-  } else {
-    g1_par_note_end_task.work(0);
-  }
+  g1h->set_par_threads((int)n_workers);
+  g1h->workers()->run_task(&g1_par_note_end_task);
+  g1h->set_par_threads(0);
   g1h->check_gc_time_stamps();
 
   if (!cleanup_list_is_empty()) {
@@ -2099,13 +2047,9 @@
   if (G1ScrubRemSets) {
     double rs_scrub_start = os::elapsedTime();
     G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm, n_workers);
-    if (G1CollectedHeap::use_parallel_gc_threads()) {
-      g1h->set_par_threads((int)n_workers);
-      g1h->workers()->run_task(&g1_par_scrub_rs_task);
-      g1h->set_par_threads(0);
-    } else {
-      g1_par_scrub_rs_task.work(0);
-    }
+    g1h->set_par_threads((int)n_workers);
+    g1h->workers()->run_task(&g1_par_scrub_rs_task);
+    g1h->set_par_threads(0);
 
     double rs_scrub_end = os::elapsedTime();
     double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
@@ -2502,7 +2446,7 @@
     // is not multi-threaded we use the current (VMThread) thread,
     // otherwise we use the work gang from the G1CollectedHeap and
     // we utilize all the worker threads we can.
-    bool processing_is_mt = rp->processing_is_mt() && g1h->workers() != NULL;
+    bool processing_is_mt = rp->processing_is_mt();
     uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U);
     active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U);
 
@@ -2564,17 +2508,27 @@
     G1CMTraceTime trace("Unloading", G1Log::finer());
 
     if (ClassUnloadingWithConcurrentMark) {
+      // Cleaning of klasses depends on correct information from MetadataMarkOnStack. The CodeCache::mark_on_stack
+      // part is too slow to be done serially, so it is handled during the weakRefsWorkParallelPart phase.
+      // Defer the cleaning until we have complete on_stack data.
+      MetadataOnStackMark md_on_stack(false /* Don't visit the code cache at this point */);
+
       bool purged_classes;
 
       {
         G1CMTraceTime trace("System Dictionary Unloading", G1Log::finest());
-        purged_classes = SystemDictionary::do_unloading(&g1_is_alive);
+        purged_classes = SystemDictionary::do_unloading(&g1_is_alive, false /* Defer klass cleaning */);
       }
 
       {
         G1CMTraceTime trace("Parallel Unloading", G1Log::finest());
         weakRefsWorkParallelPart(&g1_is_alive, purged_classes);
       }
+
+      {
+        G1CMTraceTime trace("Deallocate Metadata", G1Log::finest());
+        ClassLoaderDataGraph::free_deallocate_lists();
+      }
     }
 
     if (G1StringDedup::is_enabled()) {
@@ -2611,16 +2565,15 @@
   G1CMOopClosure _cm_cl;
   MarkingCodeBlobClosure _code_cl;
   int _thread_parity;
-  bool _is_par;
 
  public:
-  G1RemarkThreadsClosure(G1CollectedHeap* g1h, CMTask* task, bool is_par) :
+  G1RemarkThreadsClosure(G1CollectedHeap* g1h, CMTask* task) :
     _cm_obj(task), _cm_cl(g1h, g1h->concurrent_mark(), task), _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations),
-    _thread_parity(SharedHeap::heap()->strong_roots_parity()), _is_par(is_par) {}
+    _thread_parity(SharedHeap::heap()->strong_roots_parity()) {}
 
   void do_thread(Thread* thread) {
     if (thread->is_Java_thread()) {
-      if (thread->claim_oops_do(_is_par, _thread_parity)) {
+      if (thread->claim_oops_do(true, _thread_parity)) {
         JavaThread* jt = (JavaThread*)thread;
 
         // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking
@@ -2634,7 +2587,7 @@
         jt->satb_mark_queue().apply_closure_and_empty(&_cm_obj);
       }
     } else if (thread->is_VM_thread()) {
-      if (thread->claim_oops_do(_is_par, _thread_parity)) {
+      if (thread->claim_oops_do(true, _thread_parity)) {
         JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_obj);
       }
     }
@@ -2644,7 +2597,6 @@
 class CMRemarkTask: public AbstractGangTask {
 private:
   ConcurrentMark* _cm;
-  bool            _is_serial;
 public:
   void work(uint worker_id) {
     // Since all available tasks are actually started, we should
@@ -2656,14 +2608,14 @@
         ResourceMark rm;
         HandleMark hm;
 
-        G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task, !_is_serial);
+        G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task);
         Threads::threads_do(&threads_f);
       }
 
       do {
         task->do_marking_step(1000000000.0 /* something very large */,
                               true         /* do_termination       */,
-                              _is_serial);
+                              false        /* is_serial            */);
       } while (task->has_aborted() && !_cm->has_overflown());
       // If we overflow, then we do not want to restart. We instead
       // want to abort remark and do concurrent marking again.
@@ -2671,8 +2623,8 @@
     }
   }
 
-  CMRemarkTask(ConcurrentMark* cm, int active_workers, bool is_serial) :
-    AbstractGangTask("Par Remark"), _cm(cm), _is_serial(is_serial) {
+  CMRemarkTask(ConcurrentMark* cm, int active_workers) :
+    AbstractGangTask("Par Remark"), _cm(cm) {
     _cm->terminator()->reset_for_reuse(active_workers);
   }
 };
@@ -2686,43 +2638,28 @@
 
   g1h->ensure_parsability(false);
 
-  if (G1CollectedHeap::use_parallel_gc_threads()) {
-    G1CollectedHeap::StrongRootsScope srs(g1h);
-    // this is remark, so we'll use up all active threads
-    uint active_workers = g1h->workers()->active_workers();
-    if (active_workers == 0) {
-      assert(active_workers > 0, "Should have been set earlier");
-      active_workers = (uint) ParallelGCThreads;
-      g1h->workers()->set_active_workers(active_workers);
-    }
-    set_concurrency_and_phase(active_workers, false /* concurrent */);
-    // Leave _parallel_marking_threads at it's
-    // value originally calculated in the ConcurrentMark
-    // constructor and pass values of the active workers
-    // through the gang in the task.
-
-    CMRemarkTask remarkTask(this, active_workers, false /* is_serial */);
-    // We will start all available threads, even if we decide that the
-    // active_workers will be fewer. The extra ones will just bail out
-    // immediately.
-    g1h->set_par_threads(active_workers);
-    g1h->workers()->run_task(&remarkTask);
-    g1h->set_par_threads(0);
-  } else {
-    G1CollectedHeap::StrongRootsScope srs(g1h);
-    uint active_workers = 1;
-    set_concurrency_and_phase(active_workers, false /* concurrent */);
-
-    // Note - if there's no work gang then the VMThread will be
-    // the thread to execute the remark - serially. We have
-    // to pass true for the is_serial parameter so that
-    // CMTask::do_marking_step() doesn't enter the sync
-    // barriers in the event of an overflow. Doing so will
-    // cause an assert that the current thread is not a
-    // concurrent GC thread.
-    CMRemarkTask remarkTask(this, active_workers, true /* is_serial*/);
-    remarkTask.work(0);
+  G1CollectedHeap::StrongRootsScope srs(g1h);
+  // this is remark, so we'll use up all active threads
+  uint active_workers = g1h->workers()->active_workers();
+  if (active_workers == 0) {
+    assert(active_workers > 0, "Should have been set earlier");
+    active_workers = (uint) ParallelGCThreads;
+    g1h->workers()->set_active_workers(active_workers);
   }
+  set_concurrency_and_phase(active_workers, false /* concurrent */);
+  // Leave _parallel_marking_threads at it's
+  // value originally calculated in the ConcurrentMark
+  // constructor and pass values of the active workers
+  // through the gang in the task.
+
+  CMRemarkTask remarkTask(this, active_workers);
+  // We will start all available threads, even if we decide that the
+  // active_workers will be fewer. The extra ones will just bail out
+  // immediately.
+  g1h->set_par_threads(active_workers);
+  g1h->workers()->run_task(&remarkTask);
+  g1h->set_par_threads(0);
+
   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
   guarantee(has_overflown() ||
             satb_mq_set.completed_buffers_num() == 0,
@@ -3268,30 +3205,20 @@
   void work(uint worker_id) {
     AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id);
 
-    if (G1CollectedHeap::use_parallel_gc_threads()) {
-      _g1h->heap_region_par_iterate(&cl, worker_id, &_hrclaimer);
-    } else {
-      _g1h->heap_region_iterate(&cl);
-    }
+    _g1h->heap_region_par_iterate(&cl, worker_id, &_hrclaimer);
   }
 };
 
 
 void ConcurrentMark::aggregate_count_data() {
-  int n_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
-                        _g1h->workers()->active_workers() :
-                        1);
+  int n_workers = _g1h->workers()->active_workers();
 
   G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm,
                                            _max_worker_id, n_workers);
 
-  if (G1CollectedHeap::use_parallel_gc_threads()) {
-    _g1h->set_par_threads(n_workers);
-    _g1h->workers()->run_task(&g1_par_agg_task);
-    _g1h->set_par_threads(0);
-  } else {
-    g1_par_agg_task.work(0);
-  }
+  _g1h->set_par_threads(n_workers);
+  _g1h->workers()->run_task(&g1_par_agg_task);
+  _g1h->set_par_threads(0);
   _g1h->allocation_context_stats().update_at_remark();
 }
 
@@ -3419,9 +3346,7 @@
 }
 
 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
-  if (use_parallel_marking_threads()) {
-    _parallel_workers->print_worker_threads_on(st);
-  }
+  _parallel_workers->print_worker_threads_on(st);
 }
 
 void ConcurrentMark::print_on_error(outputStream* st) const {
@@ -3942,32 +3867,17 @@
 
   CMObjectClosure oc(this);
   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
-  if (G1CollectedHeap::use_parallel_gc_threads()) {
-    satb_mq_set.set_par_closure(_worker_id, &oc);
-  } else {
-    satb_mq_set.set_closure(&oc);
-  }
+  satb_mq_set.set_closure(_worker_id, &oc);
 
   // This keeps claiming and applying the closure to completed buffers
   // until we run out of buffers or we need to abort.
-  if (G1CollectedHeap::use_parallel_gc_threads()) {
-    while (!has_aborted() &&
-           satb_mq_set.par_apply_closure_to_completed_buffer(_worker_id)) {
-      if (_cm->verbose_medium()) {
-        gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id);
-      }
-      statsOnly( ++_satb_buffers_processed );
-      regular_clock_call();
+  while (!has_aborted() &&
+         satb_mq_set.apply_closure_to_completed_buffer(_worker_id)) {
+    if (_cm->verbose_medium()) {
+      gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id);
     }
-  } else {
-    while (!has_aborted() &&
-           satb_mq_set.apply_closure_to_completed_buffer()) {
-      if (_cm->verbose_medium()) {
-        gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id);
-      }
-      statsOnly( ++_satb_buffers_processed );
-      regular_clock_call();
-    }
+    statsOnly( ++_satb_buffers_processed );
+    regular_clock_call();
   }
 
   _draining_satb_buffers = false;
@@ -3976,11 +3886,7 @@
          concurrent() ||
          satb_mq_set.completed_buffers_num() == 0, "invariant");
 
-  if (G1CollectedHeap::use_parallel_gc_threads()) {
-    satb_mq_set.set_par_closure(_worker_id, NULL);
-  } else {
-    satb_mq_set.set_closure(NULL);
-  }
+  satb_mq_set.set_closure(_worker_id, NULL);
 
   // again, this was a potentially expensive operation, decrease the
   // limits to get the regular clock call early
diff --git a/src/share/vm/gc_implementation/g1/concurrentMark.hpp b/src/share/vm/gc_implementation/g1/concurrentMark.hpp
index 8364502..6e18c01 100644
--- a/src/share/vm/gc_implementation/g1/concurrentMark.hpp
+++ b/src/share/vm/gc_implementation/g1/concurrentMark.hpp
@@ -519,15 +519,6 @@
   double cleanup_sleep_factor()             { return _cleanup_sleep_factor; }
   double cleanup_task_overhead()            { return _cleanup_task_overhead;}
 
-  bool use_parallel_marking_threads() const {
-    assert(parallel_marking_threads() <=
-           max_parallel_marking_threads(), "sanity");
-    assert((_parallel_workers == NULL && parallel_marking_threads() == 0) ||
-           parallel_marking_threads() > 0,
-           "parallel workers not set up correctly");
-    return _parallel_workers != NULL;
-  }
-
   HeapWord*               finger()          { return _finger;   }
   bool                    concurrent()      { return _concurrent; }
   uint                    active_tasks()    { return _active_tasks; }
diff --git a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp
index aef1aa0..7ac2782 100644
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp
@@ -28,6 +28,7 @@
 #endif
 
 #include "precompiled.hpp"
+#include "classfile/metadataOnStackMark.hpp"
 #include "classfile/stringTable.hpp"
 #include "code/codeCache.hpp"
 #include "code/icBuffer.hpp"
@@ -1438,36 +1439,31 @@
       }
 
       // Rebuild remembered sets of all regions.
-      if (G1CollectedHeap::use_parallel_gc_threads()) {
-        uint n_workers =
-          AdaptiveSizePolicy::calc_active_workers(workers()->total_workers(),
-                                                  workers()->active_workers(),
-                                                  Threads::number_of_non_daemon_threads());
-        assert(UseDynamicNumberOfGCThreads ||
-               n_workers == workers()->total_workers(),
-               "If not dynamic should be using all the  workers");
-        workers()->set_active_workers(n_workers);
-        // Set parallel threads in the heap (_n_par_threads) only
-        // before a parallel phase and always reset it to 0 after
-        // the phase so that the number of parallel threads does
-        // no get carried forward to a serial phase where there
-        // may be code that is "possibly_parallel".
-        set_par_threads(n_workers);
+      uint n_workers =
+        AdaptiveSizePolicy::calc_active_workers(workers()->total_workers(),
+                                                workers()->active_workers(),
+                                                Threads::number_of_non_daemon_threads());
+      assert(UseDynamicNumberOfGCThreads ||
+             n_workers == workers()->total_workers(),
+             "If not dynamic should be using all the  workers");
+      workers()->set_active_workers(n_workers);
+      // Set parallel threads in the heap (_n_par_threads) only
+      // before a parallel phase and always reset it to 0 after
+      // the phase so that the number of parallel threads does
+      // no get carried forward to a serial phase where there
+      // may be code that is "possibly_parallel".
+      set_par_threads(n_workers);
 
-        ParRebuildRSTask rebuild_rs_task(this);
-        assert(UseDynamicNumberOfGCThreads ||
-               workers()->active_workers() == workers()->total_workers(),
-               "Unless dynamic should use total workers");
-        // Use the most recent number of  active workers
-        assert(workers()->active_workers() > 0,
-               "Active workers not properly set");
-        set_par_threads(workers()->active_workers());
-        workers()->run_task(&rebuild_rs_task);
-        set_par_threads(0);
-      } else {
-        RebuildRSOutOfRegionClosure rebuild_rs(this);
-        heap_region_iterate(&rebuild_rs);
-      }
+      ParRebuildRSTask rebuild_rs_task(this);
+      assert(UseDynamicNumberOfGCThreads ||
+             workers()->active_workers() == workers()->total_workers(),
+             "Unless dynamic should use total workers");
+      // Use the most recent number of  active workers
+      assert(workers()->active_workers() > 0,
+             "Active workers not properly set");
+      set_par_threads(workers()->active_workers());
+      workers()->run_task(&rebuild_rs_task);
+      set_par_threads(0);
 
       // Rebuild the strong code root lists for each region
       rebuild_strong_code_roots();
@@ -2482,6 +2478,7 @@
 
   unsigned int gc_count_before;
   unsigned int old_marking_count_before;
+  unsigned int full_gc_count_before;
   bool retry_gc;
 
   do {
@@ -2492,6 +2489,7 @@
 
       // Read the GC count while holding the Heap_lock
       gc_count_before = total_collections();
+      full_gc_count_before = total_full_collections();
       old_marking_count_before = _old_marking_cycles_started;
     }
 
@@ -2536,7 +2534,7 @@
         VMThread::execute(&op);
       } else {
         // Schedule a Full GC.
-        VM_G1CollectFull op(gc_count_before, old_marking_count_before, cause);
+        VM_G1CollectFull op(gc_count_before, full_gc_count_before, cause);
         VMThread::execute(&op);
       }
     }
@@ -2678,27 +2676,25 @@
   // Then thread t will start at region floor ((t * n) / p)
 
   result = g1_policy()->collection_set();
-  if (G1CollectedHeap::use_parallel_gc_threads()) {
-    uint cs_size = g1_policy()->cset_region_length();
-    uint active_workers = workers()->active_workers();
-    assert(UseDynamicNumberOfGCThreads ||
-             active_workers == workers()->total_workers(),
-             "Unless dynamic should use total workers");
+  uint cs_size = g1_policy()->cset_region_length();
+  uint active_workers = workers()->active_workers();
+  assert(UseDynamicNumberOfGCThreads ||
+           active_workers == workers()->total_workers(),
+           "Unless dynamic should use total workers");
 
-    uint end_ind   = (cs_size * worker_i) / active_workers;
-    uint start_ind = 0;
+  uint end_ind   = (cs_size * worker_i) / active_workers;
+  uint start_ind = 0;
 
-    if (worker_i > 0 &&
-        _worker_cset_start_region_time_stamp[worker_i - 1] == gc_time_stamp) {
-      // Previous workers starting region is valid
-      // so let's iterate from there
-      start_ind = (cs_size * (worker_i - 1)) / active_workers;
-      result = _worker_cset_start_region[worker_i - 1];
-    }
+  if (worker_i > 0 &&
+      _worker_cset_start_region_time_stamp[worker_i - 1] == gc_time_stamp) {
+    // Previous workers starting region is valid
+    // so let's iterate from there
+    start_ind = (cs_size * (worker_i - 1)) / active_workers;
+    result = _worker_cset_start_region[worker_i - 1];
+  }
 
-    for (uint i = start_ind; i < end_ind; i++) {
-      result = result->next_in_collection_set();
-    }
+  for (uint i = start_ind; i < end_ind; i++) {
+    result = result->next_in_collection_set();
   }
 
   // Note: the calculated starting heap region may be NULL
@@ -3373,9 +3369,7 @@
 }
 
 void G1CollectedHeap::print_gc_threads_on(outputStream* st) const {
-  if (G1CollectedHeap::use_parallel_gc_threads()) {
-    workers()->print_worker_threads_on(st);
-  }
+  workers()->print_worker_threads_on(st);
   _cmThread->print_on(st);
   st->cr();
   _cm->print_worker_threads_on(st);
@@ -3386,9 +3380,7 @@
 }
 
 void G1CollectedHeap::gc_threads_do(ThreadClosure* tc) const {
-  if (G1CollectedHeap::use_parallel_gc_threads()) {
-    workers()->threads_do(tc);
-  }
+  workers()->threads_do(tc);
   tc->do_thread(_cmThread);
   _cg1r->threads_do(tc);
   if (G1StringDedup::is_enabled()) {
@@ -3683,7 +3675,7 @@
   print_taskqueue_stats_hdr(st);
 
   TaskQueueStats totals;
-  const int n = workers() != NULL ? workers()->total_workers() : 1;
+  const int n = workers()->total_workers();
   for (int i = 0; i < n; ++i) {
     st->print("%3d ", i); task_queue(i)->stats.print(st); st->cr();
     totals += task_queue(i)->stats;
@@ -3694,7 +3686,7 @@
 }
 
 void G1CollectedHeap::reset_taskqueue_stats() {
-  const int n = workers() != NULL ? workers()->total_workers() : 1;
+  const int n = workers()->total_workers();
   for (int i = 0; i < n; ++i) {
     task_queue(i)->stats.reset();
   }
@@ -3792,8 +3784,7 @@
 
     TraceCPUTime tcpu(G1Log::finer(), true, gclog_or_tty);
 
-    int active_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
-                                workers()->active_workers() : 1);
+    int active_workers = workers()->active_workers();
     double pause_start_sec = os::elapsedTime();
     g1_policy()->phase_times()->note_gc_start(active_workers);
     log_gc_header();
@@ -4787,12 +4778,10 @@
   int _symbols_processed;
   int _symbols_removed;
 
-  bool _do_in_parallel;
 public:
   G1StringSymbolTableUnlinkTask(BoolObjectClosure* is_alive, bool process_strings, bool process_symbols) :
     AbstractGangTask("String/Symbol Unlinking"),
     _is_alive(is_alive),
-    _do_in_parallel(G1CollectedHeap::use_parallel_gc_threads()),
     _process_strings(process_strings), _strings_processed(0), _strings_removed(0),
     _process_symbols(process_symbols), _symbols_processed(0), _symbols_removed(0) {
 
@@ -4807,10 +4796,10 @@
   }
 
   ~G1StringSymbolTableUnlinkTask() {
-    guarantee(!_process_strings || !_do_in_parallel || StringTable::parallel_claimed_index() >= _initial_string_table_size,
+    guarantee(!_process_strings || StringTable::parallel_claimed_index() >= _initial_string_table_size,
               err_msg("claim value %d after unlink less than initial string table size %d",
                       StringTable::parallel_claimed_index(), _initial_string_table_size));
-    guarantee(!_process_symbols || !_do_in_parallel || SymbolTable::parallel_claimed_index() >= _initial_symbol_table_size,
+    guarantee(!_process_symbols || SymbolTable::parallel_claimed_index() >= _initial_symbol_table_size,
               err_msg("claim value %d after unlink less than initial symbol table size %d",
                       SymbolTable::parallel_claimed_index(), _initial_symbol_table_size));
 
@@ -4824,28 +4813,19 @@
   }
 
   void work(uint worker_id) {
-    if (_do_in_parallel) {
-      int strings_processed = 0;
-      int strings_removed = 0;
-      int symbols_processed = 0;
-      int symbols_removed = 0;
-      if (_process_strings) {
-        StringTable::possibly_parallel_unlink(_is_alive, &strings_processed, &strings_removed);
-        Atomic::add(strings_processed, &_strings_processed);
-        Atomic::add(strings_removed, &_strings_removed);
-      }
-      if (_process_symbols) {
-        SymbolTable::possibly_parallel_unlink(&symbols_processed, &symbols_removed);
-        Atomic::add(symbols_processed, &_symbols_processed);
-        Atomic::add(symbols_removed, &_symbols_removed);
-      }
-    } else {
-      if (_process_strings) {
-        StringTable::unlink(_is_alive, &_strings_processed, &_strings_removed);
-      }
-      if (_process_symbols) {
-        SymbolTable::unlink(&_symbols_processed, &_symbols_removed);
-      }
+    int strings_processed = 0;
+    int strings_removed = 0;
+    int symbols_processed = 0;
+    int symbols_removed = 0;
+    if (_process_strings) {
+      StringTable::possibly_parallel_unlink(_is_alive, &strings_processed, &strings_removed);
+      Atomic::add(strings_processed, &_strings_processed);
+      Atomic::add(strings_removed, &_strings_removed);
+    }
+    if (_process_symbols) {
+      SymbolTable::possibly_parallel_unlink(&symbols_processed, &symbols_removed);
+      Atomic::add(symbols_processed, &_symbols_processed);
+      Atomic::add(symbols_removed, &_symbols_removed);
     }
   }
 
@@ -5013,6 +4993,10 @@
         clean_nmethod(claimed_nmethods[i]);
       }
     }
+
+    // The nmethod cleaning helps out and does the CodeCache part of MetadataOnStackMark.
+    // Need to retire the buffers now that this thread has stopped cleaning nmethods.
+    MetadataOnStackMark::retire_buffer_for_thread(Thread::current());
   }
 
   void work_second_pass(uint worker_id) {
@@ -5065,6 +5049,9 @@
     // G1 specific cleanup work that has
     // been moved here to be done in parallel.
     ik->clean_dependent_nmethods();
+    if (JvmtiExport::has_redefined_a_class()) {
+      InstanceKlass::purge_previous_versions(ik);
+    }
   }
 
   void work() {
@@ -5099,8 +5086,18 @@
       _klass_cleaning_task(is_alive) {
   }
 
+  void pre_work_verification() {
+    assert(!MetadataOnStackMark::has_buffer_for_thread(Thread::current()), "Should be empty");
+  }
+
+  void post_work_verification() {
+    assert(!MetadataOnStackMark::has_buffer_for_thread(Thread::current()), "Should be empty");
+  }
+
   // The parallel work done by all worker threads.
   void work(uint worker_id) {
+    pre_work_verification();
+
     // Do first pass of code cache cleaning.
     _code_cache_task.work_first_pass(worker_id);
 
@@ -5119,6 +5116,8 @@
 
     // Clean all klasses that were not unloaded.
     _klass_cleaning_task.work();
+
+    post_work_verification();
   }
 };
 
@@ -5127,33 +5126,23 @@
                                         bool process_strings,
                                         bool process_symbols,
                                         bool class_unloading_occurred) {
-  uint n_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
-                    workers()->active_workers() : 1);
+  uint n_workers = workers()->active_workers();
 
   G1ParallelCleaningTask g1_unlink_task(is_alive, process_strings, process_symbols,
                                         n_workers, class_unloading_occurred);
-  if (G1CollectedHeap::use_parallel_gc_threads()) {
-    set_par_threads(n_workers);
-    workers()->run_task(&g1_unlink_task);
-    set_par_threads(0);
-  } else {
-    g1_unlink_task.work(0);
-  }
+  set_par_threads(n_workers);
+  workers()->run_task(&g1_unlink_task);
+  set_par_threads(0);
 }
 
 void G1CollectedHeap::unlink_string_and_symbol_table(BoolObjectClosure* is_alive,
                                                      bool process_strings, bool process_symbols) {
   {
-    uint n_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
-                     _g1h->workers()->active_workers() : 1);
+    uint n_workers = _g1h->workers()->active_workers();
     G1StringSymbolTableUnlinkTask g1_unlink_task(is_alive, process_strings, process_symbols);
-    if (G1CollectedHeap::use_parallel_gc_threads()) {
-      set_par_threads(n_workers);
-      workers()->run_task(&g1_unlink_task);
-      set_par_threads(0);
-    } else {
-      g1_unlink_task.work(0);
-    }
+    set_par_threads(n_workers);
+    workers()->run_task(&g1_unlink_task);
+    set_par_threads(0);
   }
 
   if (G1StringDedup::is_enabled()) {
@@ -5171,11 +5160,7 @@
     double start_time = os::elapsedTime();
 
     RedirtyLoggedCardTableEntryClosure cl;
-    if (G1CollectedHeap::heap()->use_parallel_gc_threads()) {
-      _queue->par_apply_closure_to_all_completed_buffers(&cl);
-    } else {
-      _queue->apply_closure_to_all_completed_buffers(&cl);
-    }
+    _queue->par_apply_closure_to_all_completed_buffers(&cl);
 
     G1GCPhaseTimes* timer = G1CollectedHeap::heap()->g1_policy()->phase_times();
     timer->record_redirty_logged_cards_time_ms(worker_id, (os::elapsedTime() - start_time) * 1000.0);
@@ -5186,18 +5171,13 @@
 void G1CollectedHeap::redirty_logged_cards() {
   double redirty_logged_cards_start = os::elapsedTime();
 
-  uint n_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
-                   _g1h->workers()->active_workers() : 1);
+  uint n_workers = _g1h->workers()->active_workers();
 
   G1RedirtyLoggedCardsTask redirty_task(&dirty_card_queue_set());
   dirty_card_queue_set().reset_for_par_iteration();
-  if (use_parallel_gc_threads()) {
-    set_par_threads(n_workers);
-    workers()->run_task(&redirty_task);
-    set_par_threads(0);
-  } else {
-    redirty_task.work(0);
-  }
+  set_par_threads(n_workers);
+  workers()->run_task(&redirty_task);
+  set_par_threads(0);
 
   DirtyCardQueueSet& dcq = JavaThread::dirty_card_queue_set();
   dcq.merge_bufferlists(&dirty_card_queue_set());
@@ -5587,20 +5567,14 @@
   // referents points to another object which is also referenced by an
   // object discovered by the STW ref processor.
 
-  assert(!G1CollectedHeap::use_parallel_gc_threads() ||
-           no_of_gc_workers == workers()->active_workers(),
-           "Need to reset active GC workers");
+  assert(no_of_gc_workers == workers()->active_workers(), "Need to reset active GC workers");
 
   set_par_threads(no_of_gc_workers);
   G1ParPreserveCMReferentsTask keep_cm_referents(this,
                                                  no_of_gc_workers,
                                                  _task_queues);
 
-  if (G1CollectedHeap::use_parallel_gc_threads()) {
-    workers()->run_task(&keep_cm_referents);
-  } else {
-    keep_cm_referents.work(0);
-  }
+  workers()->run_task(&keep_cm_referents);
 
   set_par_threads(0);
 
@@ -5727,21 +5701,15 @@
   hot_card_cache->set_use_cache(false);
 
   uint n_workers;
-  if (G1CollectedHeap::use_parallel_gc_threads()) {
-    n_workers =
-      AdaptiveSizePolicy::calc_active_workers(workers()->total_workers(),
-                                     workers()->active_workers(),
-                                     Threads::number_of_non_daemon_threads());
-    assert(UseDynamicNumberOfGCThreads ||
-           n_workers == workers()->total_workers(),
-           "If not dynamic should be using all the  workers");
-    workers()->set_active_workers(n_workers);
-    set_par_threads(n_workers);
-  } else {
-    assert(n_par_threads() == 0,
-           "Should be the original non-parallel value");
-    n_workers = 1;
-  }
+  n_workers =
+    AdaptiveSizePolicy::calc_active_workers(workers()->total_workers(),
+                                   workers()->active_workers(),
+                                   Threads::number_of_non_daemon_threads());
+  assert(UseDynamicNumberOfGCThreads ||
+         n_workers == workers()->total_workers(),
+         "If not dynamic should be using all the  workers");
+  workers()->set_active_workers(n_workers);
+  set_par_threads(n_workers);
 
   G1ParTask g1_par_task(this, _task_queues);
 
@@ -5760,18 +5728,13 @@
       ClassLoaderDataGraph::clear_claimed_marks();
     }
 
-    if (G1CollectedHeap::use_parallel_gc_threads()) {
-      // The individual threads will set their evac-failure closures.
-      if (PrintTerminationStats) G1ParScanThreadState::print_termination_stats_hdr();
-      // These tasks use ShareHeap::_process_strong_tasks
-      assert(UseDynamicNumberOfGCThreads ||
-             workers()->active_workers() == workers()->total_workers(),
-             "If not dynamic should be using all the  workers");
-      workers()->run_task(&g1_par_task);
-    } else {
-      g1_par_task.set_for_termination(n_workers);
-      g1_par_task.work(0);
-    }
+     // The individual threads will set their evac-failure closures.
+     if (PrintTerminationStats) G1ParScanThreadState::print_termination_stats_hdr();
+     // These tasks use ShareHeap::_process_strong_tasks
+     assert(UseDynamicNumberOfGCThreads ||
+            workers()->active_workers() == workers()->total_workers(),
+            "If not dynamic should be using all the  workers");
+    workers()->run_task(&g1_par_task);
     end_par_time_sec = os::elapsedTime();
 
     // Closing the inner scope will execute the destructor
@@ -6077,22 +6040,9 @@
     // Iterate over the dirty cards region list.
     G1ParCleanupCTTask cleanup_task(ct_bs, this);
 
-    if (G1CollectedHeap::use_parallel_gc_threads()) {
-      set_par_threads();
-      workers()->run_task(&cleanup_task);
-      set_par_threads(0);
-    } else {
-      while (_dirty_cards_region_list) {
-        HeapRegion* r = _dirty_cards_region_list;
-        cleanup_task.clear_cards(r);
-        _dirty_cards_region_list = r->get_next_dirty_cards_region();
-        if (_dirty_cards_region_list == r) {
-          // The last region.
-          _dirty_cards_region_list = NULL;
-        }
-        r->set_next_dirty_cards_region(NULL);
-      }
-    }
+    set_par_threads();
+    workers()->run_task(&cleanup_task);
+    set_par_threads(0);
 #ifndef PRODUCT
     if (G1VerifyCTCleanup || VerifyAfterGC) {
       G1VerifyCardTableCleanup cleanup_verifier(this, ct_bs);
@@ -6632,7 +6582,6 @@
 void G1CollectedHeap::set_par_threads() {
   // Don't change the number of workers.  Use the value previously set
   // in the workgroup.
-  assert(G1CollectedHeap::use_parallel_gc_threads(), "shouldn't be here otherwise");
   uint n_workers = workers()->active_workers();
   assert(UseDynamicNumberOfGCThreads ||
            n_workers == workers()->total_workers(),
diff --git a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp
index 1cf1b17..4b193af 100644
--- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp
+++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp
@@ -84,8 +84,7 @@
 };
 
 G1CollectorPolicy::G1CollectorPolicy() :
-  _parallel_gc_threads(G1CollectedHeap::use_parallel_gc_threads()
-                        ? ParallelGCThreads : 1),
+  _parallel_gc_threads(ParallelGCThreads),
 
   _recent_gc_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)),
   _stop_world_start(0.0),
@@ -1544,32 +1543,6 @@
   }
 }
 
-class KnownGarbageClosure: public HeapRegionClosure {
-  G1CollectedHeap* _g1h;
-  CollectionSetChooser* _hrSorted;
-
-public:
-  KnownGarbageClosure(CollectionSetChooser* hrSorted) :
-    _g1h(G1CollectedHeap::heap()), _hrSorted(hrSorted) { }
-
-  bool doHeapRegion(HeapRegion* r) {
-    // We only include humongous regions in collection
-    // sets when concurrent mark shows that their contained object is
-    // unreachable.
-
-    // Do we have any marking information for this region?
-    if (r->is_marked()) {
-      // We will skip any region that's currently used as an old GC
-      // alloc region (we should not consider those for collection
-      // before we fill them up).
-      if (_hrSorted->should_add(r) && !_g1h->is_old_gc_alloc_region(r)) {
-        _hrSorted->add_region(r);
-      }
-    }
-    return false;
-  }
-};
-
 class ParKnownGarbageHRClosure: public HeapRegionClosure {
   G1CollectedHeap* _g1h;
   CSetChooserParUpdater _cset_updater;
@@ -1617,34 +1590,29 @@
   _collectionSetChooser->clear();
 
   uint region_num = _g1->num_regions();
-  if (G1CollectedHeap::use_parallel_gc_threads()) {
-    const uint OverpartitionFactor = 4;
-    uint WorkUnit;
-    // The use of MinChunkSize = 8 in the original code
-    // causes some assertion failures when the total number of
-    // region is less than 8.  The code here tries to fix that.
-    // Should the original code also be fixed?
-    if (no_of_gc_threads > 0) {
-      const uint MinWorkUnit = MAX2(region_num / no_of_gc_threads, 1U);
-      WorkUnit = MAX2(region_num / (no_of_gc_threads * OverpartitionFactor),
-                      MinWorkUnit);
-    } else {
-      assert(no_of_gc_threads > 0,
-        "The active gc workers should be greater than 0");
-      // In a product build do something reasonable to avoid a crash.
-      const uint MinWorkUnit = MAX2(region_num / (uint) ParallelGCThreads, 1U);
-      WorkUnit =
-        MAX2(region_num / (uint) (ParallelGCThreads * OverpartitionFactor),
-             MinWorkUnit);
-    }
-    _collectionSetChooser->prepare_for_par_region_addition(_g1->num_regions(),
-                                                           WorkUnit);
-    ParKnownGarbageTask parKnownGarbageTask(_collectionSetChooser, WorkUnit, (uint) no_of_gc_threads);
-    _g1->workers()->run_task(&parKnownGarbageTask);
+  const uint OverpartitionFactor = 4;
+  uint WorkUnit;
+  // The use of MinChunkSize = 8 in the original code
+  // causes some assertion failures when the total number of
+  // region is less than 8.  The code here tries to fix that.
+  // Should the original code also be fixed?
+  if (no_of_gc_threads > 0) {
+    const uint MinWorkUnit = MAX2(region_num / no_of_gc_threads, 1U);
+    WorkUnit = MAX2(region_num / (no_of_gc_threads * OverpartitionFactor),
+                    MinWorkUnit);
   } else {
-    KnownGarbageClosure knownGarbagecl(_collectionSetChooser);
-    _g1->heap_region_iterate(&knownGarbagecl);
+    assert(no_of_gc_threads > 0,
+      "The active gc workers should be greater than 0");
+    // In a product build do something reasonable to avoid a crash.
+    const uint MinWorkUnit = MAX2(region_num / (uint) ParallelGCThreads, 1U);
+    WorkUnit =
+      MAX2(region_num / (uint) (ParallelGCThreads * OverpartitionFactor),
+           MinWorkUnit);
   }
+  _collectionSetChooser->prepare_for_par_region_addition(_g1->num_regions(),
+                                                         WorkUnit);
+  ParKnownGarbageTask parKnownGarbageTask(_collectionSetChooser, WorkUnit, (uint) no_of_gc_threads);
+  _g1->workers()->run_task(&parKnownGarbageTask);
 
   _collectionSetChooser->sort_regions();
 
diff --git a/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.cpp b/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.cpp
index 13d1c9d..5db2c66 100644
--- a/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.cpp
+++ b/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.cpp
@@ -292,36 +292,25 @@
   if (_root_region_scan_wait_time_ms > 0.0) {
     print_stats(1, "Root Region Scan Waiting", _root_region_scan_wait_time_ms);
   }
-  if (G1CollectedHeap::use_parallel_gc_threads()) {
-    print_stats(1, "Parallel Time", _cur_collection_par_time_ms, _active_gc_threads);
-    _last_gc_worker_start_times_ms.print(2, "GC Worker Start (ms)");
-    _last_ext_root_scan_times_ms.print(2, "Ext Root Scanning (ms)");
-    if (_last_satb_filtering_times_ms.sum() > 0.0) {
-      _last_satb_filtering_times_ms.print(2, "SATB Filtering (ms)");
-    }
-    _last_update_rs_times_ms.print(2, "Update RS (ms)");
-      _last_update_rs_processed_buffers.print(3, "Processed Buffers");
-    _last_scan_rs_times_ms.print(2, "Scan RS (ms)");
-    _last_strong_code_root_scan_times_ms.print(2, "Code Root Scanning (ms)");
-    _last_obj_copy_times_ms.print(2, "Object Copy (ms)");
-    _last_termination_times_ms.print(2, "Termination (ms)");
-    if (G1Log::finest()) {
-      _last_termination_attempts.print(3, "Termination Attempts");
-    }
-    _last_gc_worker_other_times_ms.print(2, "GC Worker Other (ms)");
-    _last_gc_worker_times_ms.print(2, "GC Worker Total (ms)");
-    _last_gc_worker_end_times_ms.print(2, "GC Worker End (ms)");
-  } else {
-    _last_ext_root_scan_times_ms.print(1, "Ext Root Scanning (ms)");
-    if (_last_satb_filtering_times_ms.sum() > 0.0) {
-      _last_satb_filtering_times_ms.print(1, "SATB Filtering (ms)");
-    }
-    _last_update_rs_times_ms.print(1, "Update RS (ms)");
-      _last_update_rs_processed_buffers.print(2, "Processed Buffers");
-    _last_scan_rs_times_ms.print(1, "Scan RS (ms)");
-    _last_strong_code_root_scan_times_ms.print(1, "Code Root Scanning (ms)");
-    _last_obj_copy_times_ms.print(1, "Object Copy (ms)");
+  print_stats(1, "Parallel Time", _cur_collection_par_time_ms, _active_gc_threads);
+  _last_gc_worker_start_times_ms.print(2, "GC Worker Start (ms)");
+  _last_ext_root_scan_times_ms.print(2, "Ext Root Scanning (ms)");
+  if (_last_satb_filtering_times_ms.sum() > 0.0) {
+    _last_satb_filtering_times_ms.print(2, "SATB Filtering (ms)");
   }
+  _last_update_rs_times_ms.print(2, "Update RS (ms)");
+    _last_update_rs_processed_buffers.print(3, "Processed Buffers");
+  _last_scan_rs_times_ms.print(2, "Scan RS (ms)");
+  _last_strong_code_root_scan_times_ms.print(2, "Code Root Scanning (ms)");
+  _last_obj_copy_times_ms.print(2, "Object Copy (ms)");
+  _last_termination_times_ms.print(2, "Termination (ms)");
+  if (G1Log::finest()) {
+    _last_termination_attempts.print(3, "Termination Attempts");
+  }
+  _last_gc_worker_other_times_ms.print(2, "GC Worker Other (ms)");
+  _last_gc_worker_times_ms.print(2, "GC Worker Total (ms)");
+  _last_gc_worker_end_times_ms.print(2, "GC Worker End (ms)");
+
   print_stats(1, "Code Root Fixup", _cur_collection_code_root_fixup_time_ms);
   print_stats(1, "Code Root Purge", _cur_strong_code_root_purge_time_ms);
   if (G1StringDedup::is_enabled()) {
diff --git a/src/share/vm/gc_implementation/g1/g1HotCardCache.cpp b/src/share/vm/gc_implementation/g1/g1HotCardCache.cpp
index 72427cd..9d5b382 100644
--- a/src/share/vm/gc_implementation/g1/g1HotCardCache.cpp
+++ b/src/share/vm/gc_implementation/g1/g1HotCardCache.cpp
@@ -43,7 +43,7 @@
     _hot_cache_idx = 0;
 
     // For refining the cards in the hot cache in parallel
-    _hot_cache_par_chunk_size = (ParallelGCThreads > 0 ? ClaimChunkSize : _hot_cache_size);
+    _hot_cache_par_chunk_size = ClaimChunkSize;
     _hot_cache_par_claimed_idx = 0;
 
     _card_counts.initialize(card_counts_storage);
@@ -119,7 +119,7 @@
             // RSet updating while within an evacuation pause.
             // In this case worker_i should be the id of a GC worker thread
             assert(SafepointSynchronize::is_at_safepoint(), "Should be at a safepoint");
-            assert(worker_i < (ParallelGCThreads == 0 ? 1 : ParallelGCThreads),
+            assert(worker_i < ParallelGCThreads,
                    err_msg("incorrect worker id: %u", worker_i));
 
             into_cset_dcq->enqueue(card_ptr);
diff --git a/src/share/vm/gc_implementation/g1/g1RemSet.cpp b/src/share/vm/gc_implementation/g1/g1RemSet.cpp
index a24b5a8..833404b 100644
--- a/src/share/vm/gc_implementation/g1/g1RemSet.cpp
+++ b/src/share/vm/gc_implementation/g1/g1RemSet.cpp
@@ -80,7 +80,6 @@
     _prev_period_summary()
 {
   _seq_task = new SubTasksDone(NumSeqTasks);
-  guarantee(n_workers() > 0, "There should be some workers");
   _cset_rs_update_cl = NEW_C_HEAP_ARRAY(OopsInHeapRegionClosure*, n_workers(), mtGC);
   for (uint i = 0; i < n_workers(); i++) {
     _cset_rs_update_cl[i] = NULL;
@@ -282,7 +281,7 @@
     // is during RSet updating within an evacuation pause.
     // In this case worker_i should be the id of a GC worker thread.
     assert(SafepointSynchronize::is_at_safepoint(), "not during an evacuation pause");
-    assert(worker_i < (ParallelGCThreads == 0 ? 1 : ParallelGCThreads), "should be a GC worker");
+    assert(worker_i < ParallelGCThreads, "should be a GC worker");
 
     if (_g1rs->refine_card(card_ptr, worker_i, true)) {
       // 'card_ptr' contains references that point into the collection
@@ -343,8 +342,6 @@
   // DirtyCardQueueSet that is used to manage RSet updates
   DirtyCardQueue into_cset_dcq(&_g1->into_cset_dirty_card_queue_set());
 
-  assert((ParallelGCThreads > 0) || worker_i == 0, "invariant");
-
   updateRS(&into_cset_dcq, worker_i);
   scanRS(oc, code_root_cl, worker_i);
 
@@ -420,12 +417,7 @@
   }
 };
 
-void G1RemSet::scrub(BitMap* region_bm, BitMap* card_bm) {
-  ScrubRSClosure scrub_cl(region_bm, card_bm);
-  _g1->heap_region_iterate(&scrub_cl);
-}
-
-void G1RemSet::scrub_par(BitMap* region_bm, BitMap* card_bm, uint worker_num, HeapRegionClaimer *hrclaimer) {
+void G1RemSet::scrub(BitMap* region_bm, BitMap* card_bm, uint worker_num, HeapRegionClaimer *hrclaimer) {
   ScrubRSClosure scrub_cl(region_bm, card_bm);
   _g1->heap_region_par_iterate(&scrub_cl, worker_num, hrclaimer);
 }
diff --git a/src/share/vm/gc_implementation/g1/g1RemSet.hpp b/src/share/vm/gc_implementation/g1/g1RemSet.hpp
index 73c7917..126703e 100644
--- a/src/share/vm/gc_implementation/g1/g1RemSet.hpp
+++ b/src/share/vm/gc_implementation/g1/g1RemSet.hpp
@@ -124,14 +124,10 @@
   // Requires "region_bm" and "card_bm" to be bitmaps with 1 bit per region
   // or card, respectively, such that a region or card with a corresponding
   // 0 bit contains no part of any live object.  Eliminates any remembered
-  // set entries that correspond to dead heap ranges.
-  void scrub(BitMap* region_bm, BitMap* card_bm);
-
-  // Like the above, but assumes is called in parallel: "worker_num" is the
-  // parallel thread id of the current thread, and "hrclaimer" is the shared
-  // HeapRegionClaimer that should be used to claim heap regions.
-  void scrub_par(BitMap* region_bm, BitMap* card_bm,
-                 uint worker_num, HeapRegionClaimer* hrclaimer);
+  // set entries that correspond to dead heap ranges. "worker_num" is the
+  // parallel thread id of the current thread, and "hrclaimer" is the
+  // HeapRegionClaimer that should be used.
+  void scrub(BitMap* region_bm, BitMap* card_bm, uint worker_num, HeapRegionClaimer* hrclaimer);
 
   // Refine the card corresponding to "card_ptr".
   // If check_for_refs_into_cset is true, a true result is returned
diff --git a/src/share/vm/gc_implementation/g1/g1RemSet.inline.hpp b/src/share/vm/gc_implementation/g1/g1RemSet.inline.hpp
index 2112b3f..523817d 100644
--- a/src/share/vm/gc_implementation/g1/g1RemSet.inline.hpp
+++ b/src/share/vm/gc_implementation/g1/g1RemSet.inline.hpp
@@ -31,11 +31,7 @@
 #include "oops/oop.inline.hpp"
 
 inline uint G1RemSet::n_workers() {
-  if (_g1->workers() != NULL) {
-    return _g1->workers()->total_workers();
-  } else {
-    return 1;
-  }
+  return _g1->workers()->total_workers();
 }
 
 template <class T>
diff --git a/src/share/vm/gc_implementation/g1/g1StringDedup.cpp b/src/share/vm/gc_implementation/g1/g1StringDedup.cpp
index 4bf6365..362404f 100644
--- a/src/share/vm/gc_implementation/g1/g1StringDedup.cpp
+++ b/src/share/vm/gc_implementation/g1/g1StringDedup.cpp
@@ -154,14 +154,10 @@
   double fixup_start = os::elapsedTime();
 
   G1StringDedupUnlinkOrOopsDoTask task(is_alive, keep_alive, allow_resize_and_rehash);
-  if (G1CollectedHeap::use_parallel_gc_threads()) {
-    G1CollectedHeap* g1h = G1CollectedHeap::heap();
-    g1h->set_par_threads();
-    g1h->workers()->run_task(&task);
-    g1h->set_par_threads(0);
-  } else {
-    task.work(0);
-  }
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  g1h->set_par_threads();
+  g1h->workers()->run_task(&task);
+  g1h->set_par_threads(0);
 
   double fixup_time_ms = (os::elapsedTime() - fixup_start) * 1000.0;
   g1p->phase_times()->record_string_dedup_fixup_time(fixup_time_ms);
diff --git a/src/share/vm/gc_implementation/g1/satbQueue.cpp b/src/share/vm/gc_implementation/g1/satbQueue.cpp
index 5831650..cc9cc2d 100644
--- a/src/share/vm/gc_implementation/g1/satbQueue.cpp
+++ b/src/share/vm/gc_implementation/g1/satbQueue.cpp
@@ -202,7 +202,7 @@
 #endif // _MSC_VER
 
 SATBMarkQueueSet::SATBMarkQueueSet() :
-  PtrQueueSet(), _closure(NULL), _par_closures(NULL),
+  PtrQueueSet(), _closures(NULL),
   _shared_satb_queue(this, true /*perm*/) { }
 
 void SATBMarkQueueSet::initialize(Monitor* cbl_mon, Mutex* fl_lock,
@@ -210,9 +210,7 @@
                                   Mutex* lock) {
   PtrQueueSet::initialize(cbl_mon, fl_lock, process_completed_threshold, -1);
   _shared_satb_queue.set_lock(lock);
-  if (ParallelGCThreads > 0) {
-    _par_closures = NEW_C_HEAP_ARRAY(ObjectClosure*, ParallelGCThreads, mtGC);
-  }
+  _closures = NEW_C_HEAP_ARRAY(ObjectClosure*, ParallelGCThreads, mtGC);
 }
 
 void SATBMarkQueueSet::handle_zero_index_for_thread(JavaThread* t) {
@@ -276,17 +274,13 @@
   shared_satb_queue()->filter();
 }
 
-void SATBMarkQueueSet::set_closure(ObjectClosure* closure) {
-  _closure = closure;
+void SATBMarkQueueSet::set_closure(uint worker, ObjectClosure* closure) {
+  assert(_closures != NULL, "Precondition");
+  assert(worker < ParallelGCThreads, "Worker index must be in range [0...ParallelGCThreads)");
+  _closures[worker] = closure;
 }
 
-void SATBMarkQueueSet::set_par_closure(int i, ObjectClosure* par_closure) {
-  assert(ParallelGCThreads > 0 && _par_closures != NULL, "Precondition");
-  _par_closures[i] = par_closure;
-}
-
-bool SATBMarkQueueSet::apply_closure_to_completed_buffer_work(bool par,
-                                                              uint worker) {
+bool SATBMarkQueueSet::apply_closure_to_completed_buffer(uint worker) {
   BufferNode* nd = NULL;
   {
     MutexLockerEx x(_cbl_mon, Mutex::_no_safepoint_check_flag);
@@ -298,7 +292,7 @@
       if (_n_completed_buffers == 0) _process_completed = false;
     }
   }
-  ObjectClosure* cl = (par ? _par_closures[worker] : _closure);
+  ObjectClosure* cl = _closures[worker];
   if (nd != NULL) {
     void **buf = BufferNode::make_buffer_from_node(nd);
     ObjPtrQueue::apply_closure_to_buffer(cl, buf, 0, _sz);
diff --git a/src/share/vm/gc_implementation/g1/satbQueue.hpp b/src/share/vm/gc_implementation/g1/satbQueue.hpp
index 4da5447..a295ebc 100644
--- a/src/share/vm/gc_implementation/g1/satbQueue.hpp
+++ b/src/share/vm/gc_implementation/g1/satbQueue.hpp
@@ -78,16 +78,10 @@
 };
 
 class SATBMarkQueueSet: public PtrQueueSet {
-  ObjectClosure* _closure;
-  ObjectClosure** _par_closures;  // One per ParGCThread.
+  ObjectClosure** _closures;  // One per ParGCThread.
 
   ObjPtrQueue _shared_satb_queue;
 
-  // Utility function to support sequential and parallel versions.  If
-  // "par" is true, then "worker" is the par thread id; if "false", worker
-  // is ignored.
-  bool apply_closure_to_completed_buffer_work(bool par, uint worker);
-
 #ifdef ASSERT
   void dump_active_states(bool expected_active);
   void verify_active_states(bool expected_active);
@@ -111,26 +105,16 @@
   // Filter all the currently-active SATB buffers.
   void filter_thread_buffers();
 
-  // Register "blk" as "the closure" for all queues.  Only one such closure
-  // is allowed.  The "apply_closure_to_completed_buffer" method will apply
-  // this closure to a completed buffer, and "iterate_closure_all_threads"
+  // Register closure for the given worker thread. The "apply_closure_to_completed_buffer"
+  // method will apply this closure to a completed buffer, and "iterate_closure_all_threads"
   // applies it to partially-filled buffers (the latter should only be done
   // with the world stopped).
-  void set_closure(ObjectClosure* closure);
-  // Set the parallel closures: pointer is an array of pointers to
-  // closures, one for each parallel GC thread.
-  void set_par_closure(int i, ObjectClosure* closure);
+  void set_closure(uint worker, ObjectClosure* closure);
 
   // If there exists some completed buffer, pop it, then apply the
   // registered closure to all its elements, and return true.  If no
   // completed buffers exist, return false.
-  bool apply_closure_to_completed_buffer() {
-    return apply_closure_to_completed_buffer_work(false, 0);
-  }
-  // Parallel version of the above.
-  bool par_apply_closure_to_completed_buffer(uint worker) {
-    return apply_closure_to_completed_buffer_work(true, worker);
-  }
+  bool apply_closure_to_completed_buffer(uint worker);
 
   // Apply the given closure on enqueued and currently-active buffers
   // respectively. Both methods are read-only, i.e., they do not
diff --git a/src/share/vm/gc_implementation/g1/vm_operations_g1.hpp b/src/share/vm/gc_implementation/g1/vm_operations_g1.hpp
index 37ce303..c8014d4 100644
--- a/src/share/vm/gc_implementation/g1/vm_operations_g1.hpp
+++ b/src/share/vm/gc_implementation/g1/vm_operations_g1.hpp
@@ -60,7 +60,7 @@
   VM_G1CollectFull(unsigned int gc_count_before,
                    unsigned int full_gc_count_before,
                    GCCause::Cause cause)
-    : VM_GC_Operation(gc_count_before, cause, full_gc_count_before) { }
+    : VM_GC_Operation(gc_count_before, cause, full_gc_count_before, true) { }
   virtual VMOp_Type type() const { return VMOp_G1CollectFull; }
   virtual void doit();
   virtual const char* name() const {
diff --git a/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp b/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp
index e8b218e..a923cf5 100644
--- a/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp
+++ b/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp
@@ -479,7 +479,7 @@
     _next_gen.par_oop_since_save_marks_iterate_done(i);
   }
 
-  if (UseConcMarkSweepGC && ParallelGCThreads > 0) {
+  if (UseConcMarkSweepGC) {
     // We need to call this even when ResizeOldPLAB is disabled
     // so as to avoid breaking some asserts. While we may be able
     // to avoid this by reorganizing the code a bit, I am loathe
diff --git a/src/share/vm/gc_implementation/parallelScavenge/generationSizer.cpp b/src/share/vm/gc_implementation/parallelScavenge/generationSizer.cpp
index e5defc9..29fb547 100644
--- a/src/share/vm/gc_implementation/parallelScavenge/generationSizer.cpp
+++ b/src/share/vm/gc_implementation/parallelScavenge/generationSizer.cpp
@@ -47,11 +47,6 @@
   // Do basic sizing work
   GenCollectorPolicy::initialize_flags();
 
-  assert(UseSerialGC ||
-          !FLAG_IS_DEFAULT(ParallelGCThreads) ||
-          (ParallelGCThreads > 0),
-         "ParallelGCThreads should be set before flag initialization");
-
   // The survivor ratio's are calculated "raw", unlike the
   // default gc, which adds 2 to the ratio value. We need to
   // make sure the values are valid before using them.
diff --git a/src/share/vm/memory/freeBlockDictionary.cpp b/src/share/vm/memory/freeBlockDictionary.cpp
index 915ec20..1db7dcf 100644
--- a/src/share/vm/memory/freeBlockDictionary.cpp
+++ b/src/share/vm/memory/freeBlockDictionary.cpp
@@ -43,12 +43,10 @@
 
 template <class Chunk> void FreeBlockDictionary<Chunk>::verify_par_locked() const {
 #ifdef ASSERT
-  if (ParallelGCThreads > 0) {
-    Thread* my_thread = Thread::current();
-    if (my_thread->is_GC_task_thread()) {
-      assert(par_lock() != NULL, "Should be using locking?");
-      assert_lock_strong(par_lock());
-    }
+  Thread* my_thread = Thread::current();
+  if (my_thread->is_GC_task_thread()) {
+    assert(par_lock() != NULL, "Should be using locking?");
+    assert_lock_strong(par_lock());
   }
 #endif // ASSERT
 }
diff --git a/src/share/vm/memory/freeList.cpp b/src/share/vm/memory/freeList.cpp
index 3ab6f09..b1d901b 100644
--- a/src/share/vm/memory/freeList.cpp
+++ b/src/share/vm/memory/freeList.cpp
@@ -287,11 +287,14 @@
   return false;
 }
 
-#ifndef PRODUCT
+#ifdef ASSERT
 template <class Chunk>
 void FreeList<Chunk>::assert_proper_lock_protection_work() const {
-  assert(protecting_lock() != NULL, "Don't call this directly");
-  assert(ParallelGCThreads > 0, "Don't call this directly");
+  // Nothing to do if the list has no assigned protecting lock
+  if (protecting_lock() == NULL) {
+    return;
+  }
+
   Thread* thr = Thread::current();
   if (thr->is_VM_thread() || thr->is_ConcurrentGC_thread()) {
     // assert that we are holding the freelist lock
diff --git a/src/share/vm/memory/freeList.hpp b/src/share/vm/memory/freeList.hpp
index e971471..80d0711 100644
--- a/src/share/vm/memory/freeList.hpp
+++ b/src/share/vm/memory/freeList.hpp
@@ -56,15 +56,12 @@
 
 #ifdef ASSERT
   Mutex*        _protecting_lock;
+  void assert_proper_lock_protection_work() const;
 #endif
 
   // Asserts false if the protecting lock (if any) is not held.
-  void assert_proper_lock_protection_work() const PRODUCT_RETURN;
   void assert_proper_lock_protection() const {
-#ifdef ASSERT
-    if (_protecting_lock != NULL)
-      assert_proper_lock_protection_work();
-#endif
+    DEBUG_ONLY(assert_proper_lock_protection_work());
   }
 
   void increment_count()    {
diff --git a/src/share/vm/memory/sharedHeap.cpp b/src/share/vm/memory/sharedHeap.cpp
index 5d268c3..7d7648e 100644
--- a/src/share/vm/memory/sharedHeap.cpp
+++ b/src/share/vm/memory/sharedHeap.cpp
@@ -68,11 +68,9 @@
     vm_exit_during_initialization("Failed necessary allocation.");
   }
   _sh = this;  // ch is static, should be set only once.
-  if ((UseParNewGC ||
-      (UseConcMarkSweepGC && (CMSParallelInitialMarkEnabled ||
-                              CMSParallelRemarkEnabled)) ||
-       UseG1GC) &&
-      ParallelGCThreads > 0) {
+  if (UseParNewGC ||
+      UseG1GC ||
+      (UseConcMarkSweepGC && (CMSParallelInitialMarkEnabled || CMSParallelRemarkEnabled) && use_parallel_gc_threads())) {
     _workers = new FlexibleWorkGang("Parallel GC Threads", ParallelGCThreads,
                             /* are_GC_task_threads */true,
                             /* are_ConcurrentGC_threads */false);
diff --git a/src/share/vm/oops/constantPool.cpp b/src/share/vm/oops/constantPool.cpp
index f26c0d4..fb474ce 100644
--- a/src/share/vm/oops/constantPool.cpp
+++ b/src/share/vm/oops/constantPool.cpp
@@ -1779,11 +1779,22 @@
 
 void ConstantPool::set_on_stack(const bool value) {
   if (value) {
-    _flags |= _on_stack;
+    int old_flags = *const_cast<volatile int *>(&_flags);
+    while ((old_flags & _on_stack) == 0) {
+      int new_flags = old_flags | _on_stack;
+      int result = Atomic::cmpxchg(new_flags, &_flags, old_flags);
+
+      if (result == old_flags) {
+        // Succeeded.
+        MetadataOnStackMark::record(this, Thread::current());
+        return;
+      }
+      old_flags = result;
+    }
   } else {
+    // Clearing is done single-threadedly.
     _flags &= ~_on_stack;
   }
-  if (value) MetadataOnStackMark::record(this);
 }
 
 // JSR 292 support for patching constant pool oops after the class is linked and
diff --git a/src/share/vm/oops/method.cpp b/src/share/vm/oops/method.cpp
index b715c36..ef0349e 100644
--- a/src/share/vm/oops/method.cpp
+++ b/src/share/vm/oops/method.cpp
@@ -1862,9 +1862,12 @@
 void Method::set_on_stack(const bool value) {
   // Set both the method itself and its constant pool.  The constant pool
   // on stack means some method referring to it is also on the stack.
-  _access_flags.set_on_stack(value);
   constants()->set_on_stack(value);
-  if (value) MetadataOnStackMark::record(this);
+
+  bool succeeded = _access_flags.set_on_stack(value);
+  if (value && succeeded) {
+    MetadataOnStackMark::record(this, Thread::current());
+  }
 }
 
 // Called when the class loader is unloaded to make all methods weak.
diff --git a/src/share/vm/prims/jni.cpp b/src/share/vm/prims/jni.cpp
index 398ad5d..7b6974e 100644
--- a/src/share/vm/prims/jni.cpp
+++ b/src/share/vm/prims/jni.cpp
@@ -3861,6 +3861,7 @@
 void TestBitMap_test();
 void TestAsUtf8();
 void Test_linked_list();
+void TestChunkedList_test();
 #if INCLUDE_ALL_GCS
 void TestOldFreeSpaceCalculation_test();
 void TestG1BiasedArray_test();
@@ -3894,6 +3895,7 @@
     run_unit_test(TestAsUtf8());
     run_unit_test(ObjectMonitor::sanity_checks());
     run_unit_test(Test_linked_list());
+    run_unit_test(TestChunkedList_test());
 #if INCLUDE_VM_STRUCTS
     run_unit_test(VMStructs::test());
 #endif
diff --git a/src/share/vm/runtime/arguments.cpp b/src/share/vm/runtime/arguments.cpp
index d62d286..346208f 100644
--- a/src/share/vm/runtime/arguments.cpp
+++ b/src/share/vm/runtime/arguments.cpp
@@ -3782,27 +3782,33 @@
   bool settings_file_specified = false;
   bool needs_hotspotrc_warning = false;
 
-  ArgumentsExt::process_options(args);
-
   const char* flags_file;
   int index;
   for (index = 0; index < args->nOptions; index++) {
     const JavaVMOption *option = args->options + index;
+    if (ArgumentsExt::process_options(option)) {
+      continue;
+    }
     if (match_option(option, "-XX:Flags=", &tail)) {
       flags_file = tail;
       settings_file_specified = true;
+      continue;
     }
     if (match_option(option, "-XX:+PrintVMOptions", &tail)) {
       PrintVMOptions = true;
+      continue;
     }
     if (match_option(option, "-XX:-PrintVMOptions", &tail)) {
       PrintVMOptions = false;
+      continue;
     }
     if (match_option(option, "-XX:+IgnoreUnrecognizedVMOptions", &tail)) {
       IgnoreUnrecognizedVMOptions = true;
+      continue;
     }
     if (match_option(option, "-XX:-IgnoreUnrecognizedVMOptions", &tail)) {
       IgnoreUnrecognizedVMOptions = false;
+      continue;
     }
     if (match_option(option, "-XX:+PrintFlagsInitial", &tail)) {
       CommandLineFlags::printFlags(tty, false);
@@ -3824,6 +3830,7 @@
       } else {
         vm_exit_during_initialization("Syntax error, expecting -XX:NativeMemoryTracking=[off|summary|detail]", NULL);
       }
+      continue;
     }
 #endif
 
diff --git a/src/share/vm/runtime/arguments_ext.hpp b/src/share/vm/runtime/arguments_ext.hpp
index b0fbcc3..4016182 100644
--- a/src/share/vm/runtime/arguments_ext.hpp
+++ b/src/share/vm/runtime/arguments_ext.hpp
@@ -34,7 +34,10 @@
   static inline bool check_gc_consistency_user();
   static inline bool check_gc_consistency_ergo();
   static inline bool check_vm_args_consistency();
-  static        void process_options(const JavaVMInitArgs* args) {}
+  // The argument processing extension. Returns true if there is
+  // no additional parsing needed in Arguments::parse() for the option.
+  // Otherwise returns false.
+  static inline bool process_options(const JavaVMOption *option) { return false; }
 };
 
 void ArgumentsExt::select_gc_ergonomically() {
diff --git a/src/share/vm/runtime/thread.cpp b/src/share/vm/runtime/thread.cpp
index a522867..7e7dc5c 100644
--- a/src/share/vm/runtime/thread.cpp
+++ b/src/share/vm/runtime/thread.cpp
@@ -201,6 +201,8 @@
   // This initial value ==> never claimed.
   _oops_do_parity = 0;
 
+  _metadata_on_stack_buffer = NULL;
+
   // the handle mark links itself to last_handle_mark
   new HandleMark(this);
 
diff --git a/src/share/vm/runtime/thread.hpp b/src/share/vm/runtime/thread.hpp
index 79f25e8..e1110f2 100644
--- a/src/share/vm/runtime/thread.hpp
+++ b/src/share/vm/runtime/thread.hpp
@@ -42,11 +42,10 @@
 #include "runtime/threadLocalStorage.hpp"
 #include "runtime/thread_ext.hpp"
 #include "runtime/unhandledOops.hpp"
-#include "utilities/macros.hpp"
-
 #include "trace/traceBackend.hpp"
 #include "trace/traceMacros.hpp"
 #include "utilities/exceptions.hpp"
+#include "utilities/macros.hpp"
 #include "utilities/top.hpp"
 #if INCLUDE_ALL_GCS
 #include "gc_implementation/g1/dirtyCardQueue.hpp"
@@ -83,6 +82,10 @@
 class ThreadClosure;
 class IdealGraphPrinter;
 
+class Metadata;
+template <class T, MEMFLAGS F> class ChunkedList;
+typedef ChunkedList<Metadata*, mtInternal> MetadataOnStackBuffer;
+
 DEBUG_ONLY(class ResourceMark;)
 
 class WorkerThread;
@@ -255,6 +258,9 @@
   jlong _allocated_bytes;                       // Cumulative number of bytes allocated on
                                                 // the Java heap
 
+  // Thread-local buffer used by MetadataOnStackMark.
+  MetadataOnStackBuffer* _metadata_on_stack_buffer;
+
   TRACE_DATA _trace_data;                       // Thread-local data for tracing
 
   ThreadExt _ext;
@@ -490,7 +496,10 @@
   // creation fails due to lack of memory, too many threads etc.
   bool set_as_starting_thread();
 
- protected:
+  void set_metadata_on_stack_buffer(MetadataOnStackBuffer* buffer) { _metadata_on_stack_buffer = buffer; }
+  MetadataOnStackBuffer* metadata_on_stack_buffer() const          { return _metadata_on_stack_buffer; }
+
+protected:
   // OS data associated with the thread
   OSThread* _osthread;  // Platform-specific thread information
 
diff --git a/src/share/vm/utilities/accessFlags.cpp b/src/share/vm/utilities/accessFlags.cpp
index e2d6039..0aeb011 100644
--- a/src/share/vm/utilities/accessFlags.cpp
+++ b/src/share/vm/utilities/accessFlags.cpp
@@ -47,6 +47,21 @@
   } while(f != old_flags);
 }
 
+// Returns true iff this thread succeeded setting the bit.
+bool AccessFlags::atomic_set_one_bit(jint bit) {
+  // Atomically update the flags with the bit given
+  jint old_flags, new_flags, f;
+  bool is_setting_bit = false;
+  do {
+    old_flags = _flags;
+    new_flags = old_flags | bit;
+    is_setting_bit = old_flags != new_flags;
+    f = Atomic::cmpxchg(new_flags, &_flags, old_flags);
+  } while(f != old_flags);
+
+  return is_setting_bit;
+}
+
 #if !defined(PRODUCT) || INCLUDE_JVMTI
 
 void AccessFlags::print_on(outputStream* st) const {
diff --git a/src/share/vm/utilities/accessFlags.hpp b/src/share/vm/utilities/accessFlags.hpp
index 5b1ff17..4f0c030 100644
--- a/src/share/vm/utilities/accessFlags.hpp
+++ b/src/share/vm/utilities/accessFlags.hpp
@@ -172,6 +172,7 @@
 
   // Atomic update of flags
   void atomic_set_bits(jint bits);
+  bool atomic_set_one_bit(jint bit);
   void atomic_clear_bits(jint bits);
 
  private:
@@ -233,12 +234,13 @@
                                          atomic_set_bits(JVM_ACC_FIELD_HAS_GENERIC_SIGNATURE);
                                        }
 
-  void set_on_stack(const bool value)
+  bool set_on_stack(const bool value)
                                        {
                                          if (value) {
-                                           atomic_set_bits(JVM_ACC_ON_STACK);
+                                           return atomic_set_one_bit(JVM_ACC_ON_STACK);
                                          } else {
                                            atomic_clear_bits(JVM_ACC_ON_STACK);
+                                           return true; // Ignored
                                          }
                                        }
   // Conversion
diff --git a/src/share/vm/utilities/chunkedList.cpp b/src/share/vm/utilities/chunkedList.cpp
new file mode 100644
index 0000000..caf80e7
--- /dev/null
+++ b/src/share/vm/utilities/chunkedList.cpp
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "utilities/chunkedList.hpp"
+#include "utilities/debug.hpp"
+
+/////////////// Unit tests ///////////////
+
+#ifndef PRODUCT
+
+template <typename T>
+class TestChunkedList {
+  typedef ChunkedList<T, mtOther> ChunkedListT;
+
+ public:
+  static void testEmpty() {
+    ChunkedListT buffer;
+    assert(buffer.size() == 0, "assert");
+  }
+
+  static void testFull() {
+    ChunkedListT buffer;
+    for (uintptr_t i = 0; i < ChunkedListT::BufferSize; i++) {
+      buffer.push((T)i);
+    }
+    assert(buffer.size() == ChunkedListT::BufferSize, "assert");
+    assert(buffer.is_full(), "assert");
+  }
+
+  static void testSize() {
+    ChunkedListT buffer;
+    for (uintptr_t i = 0; i < ChunkedListT::BufferSize; i++) {
+      assert(buffer.size() == i, "assert");
+      buffer.push((T)i);
+      assert(buffer.size() == i + 1, "assert");
+    }
+  }
+
+  static void testClear() {
+    ChunkedListT buffer;
+
+    buffer.clear();
+    assert(buffer.size() == 0, "assert");
+
+    for (uintptr_t i = 0; i < ChunkedListT::BufferSize / 2; i++) {
+      buffer.push((T)i);
+    }
+    buffer.clear();
+    assert(buffer.size() == 0, "assert");
+
+    for (uintptr_t i = 0; i < ChunkedListT::BufferSize; i++) {
+      buffer.push((T)i);
+    }
+    buffer.clear();
+    assert(buffer.size() == 0, "assert");
+  }
+
+  static void testAt() {
+    ChunkedListT buffer;
+
+    for (uintptr_t i = 0; i < ChunkedListT::BufferSize; i++) {
+      buffer.push((T)i);
+      assert(buffer.at(i) == (T)i, "assert");
+    }
+
+    for (uintptr_t i = 0; i < ChunkedListT::BufferSize; i++) {
+      assert(buffer.at(i) == (T)i, "assert");
+    }
+  }
+
+  static void test() {
+    testEmpty();
+    testFull();
+    testSize();
+    testClear();
+    testAt();
+  }
+};
+
+class Metadata;
+
+void TestChunkedList_test() {
+  TestChunkedList<Metadata*>::test();
+  TestChunkedList<size_t>::test();
+}
+
+#endif
diff --git a/src/share/vm/utilities/chunkedList.hpp b/src/share/vm/utilities/chunkedList.hpp
new file mode 100644
index 0000000..06c5295
--- /dev/null
+++ b/src/share/vm/utilities/chunkedList.hpp
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_UTILITIES_CHUNKED_LIST_HPP
+#define SHARE_VM_UTILITIES_CHUNKED_LIST_HPP
+
+#include "memory/allocation.hpp"
+#include "utilities/debug.hpp"
+
+template <class T, MEMFLAGS F> class ChunkedList : public CHeapObj<F> {
+  template <class U> friend class TestChunkedList;
+
+  static const size_t BufferSize = 64;
+
+  T  _values[BufferSize];
+  T* _top;
+
+  ChunkedList<T, F>* _next_used;
+  ChunkedList<T, F>* _next_free;
+
+  T const * end() const {
+    return &_values[BufferSize];
+  }
+
+ public:
+  ChunkedList<T, F>() : _top(_values), _next_used(NULL), _next_free(NULL) {}
+
+  bool is_full() const {
+    return _top == end();
+  }
+
+  void clear() {
+    _top = _values;
+    // Don't clear the next pointers since that would interfere
+    // with other threads trying to iterate through the lists.
+  }
+
+  void push(T m) {
+    assert(!is_full(), "Buffer is full");
+    *_top = m;
+    _top++;
+  }
+
+  void set_next_used(ChunkedList<T, F>* buffer) { _next_used = buffer; }
+  void set_next_free(ChunkedList<T, F>* buffer) { _next_free = buffer; }
+
+  ChunkedList<T, F>* next_used() const          { return _next_used; }
+  ChunkedList<T, F>* next_free() const          { return _next_free; }
+
+  size_t size() const {
+    return pointer_delta(_top, _values, sizeof(T));
+  }
+
+  T at(size_t i) {
+    assert(i < size(), err_msg("IOOBE i: " SIZE_FORMAT " size(): " SIZE_FORMAT, i, size()));
+    return _values[i];
+  }
+};
+
+#endif // SHARE_VM_UTILITIES_CHUNKED_LIST_HPP
diff --git a/test/runtime/CommandLine/TestVMOptions.java b/test/runtime/CommandLine/TestVMOptions.java
new file mode 100644
index 0000000..354c6ed
--- /dev/null
+++ b/test/runtime/CommandLine/TestVMOptions.java
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @bug 8060256
+ * @summary Test various command line options
+ * @library /testlibrary
+ * @run main TestVMOptions
+ */
+
+import com.oracle.java.testlibrary.*;
+import java.io.File;
+
+public class TestVMOptions {
+  public static void main(String[] args) throws Exception {
+    ProcessBuilder pb = ProcessTools.createJavaProcessBuilder(
+        "-XX:bogus",
+        "-XX:+IgnoreUnrecognizedVMOptions",
+        "-XX:+PrintFlagsInitial");
+    OutputAnalyzer output = new OutputAnalyzer(pb.start());
+    output.shouldContain("bool PrintGCDetails");
+
+    pb = ProcessTools.createJavaProcessBuilder(
+        "-XX:-PrintVMOptions", "-version");
+    output = new OutputAnalyzer(pb.start());
+    output.shouldContain("java version");
+
+    File dir = new File(System.getProperty("test.src", "."));
+    File file = new File(dir, "flagfile.txt");
+    String s = file.getAbsolutePath();
+    pb = ProcessTools.createJavaProcessBuilder("-XX:Flags="+s);
+    output = new OutputAnalyzer(pb.start());
+    output.shouldContain("VM option '-IgnoreUnrecognizedVMOptions'");
+  }
+}
diff --git a/test/runtime/CommandLine/flagfile.txt b/test/runtime/CommandLine/flagfile.txt
new file mode 100644
index 0000000..7123b35
--- /dev/null
+++ b/test/runtime/CommandLine/flagfile.txt
@@ -0,0 +1 @@
++PrintVMOptions -IgnoreUnrecognizedVMOptions