gcc-4.4.0/libstdc++-v3/include/parallel/multiway_mergesort.h - toolchain/gcc - Git at Google

 // -*- C++ -*-

 // Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc.
 //
 // This file is part of the GNU ISO C++ Library.  This library is free
 // software; you can redistribute it and/or modify it under the terms
 // of the GNU General Public License as published by the Free Software
 // Foundation; either version 3, or (at your option) any later
 // version.

 // This library is distributed in the hope that it will be useful, but
 // WITHOUT ANY WARRANTY; without even the implied warranty of
 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 // General Public License for more details.

 // Under Section 7 of GPL version 3, you are granted additional
 // permissions described in the GCC Runtime Library Exception, version
 // 3.1, as published by the Free Software Foundation.

 // You should have received a copy of the GNU General Public License and
 // a copy of the GCC Runtime Library Exception along with this program;
 // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 // <http://www.gnu.org/licenses/>.

 /** @file parallel/multiway_mergesort.h
  *  @brief Parallel multiway merge sort.
  *  This file is a GNU parallel extension to the Standard C++ Library.
  */

 // Written by Johannes Singler.

 #ifndef _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H
 #define _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H 1

 #include <vector>

 #include <parallel/basic_iterator.h>
 #include <bits/stl_algo.h>
 #include <parallel/parallel.h>
 #include <parallel/multiway_merge.h>

 namespace __gnu_parallel
 {

 /** @brief Subsequence description. */
 template<typename _DifferenceTp>
   struct Piece
   {
     typedef _DifferenceTp difference_type;

     /** @brief Begin of subsequence. */
     difference_type begin;

     /** @brief End of subsequence. */
     difference_type end;
   };

 /** @brief Data accessed by all threads.
   *
   *  PMWMS = parallel multiway mergesort */
 template<typename RandomAccessIterator>
   struct PMWMSSortingData
   {
     typedef std::iterator_traits<RandomAccessIterator> traits_type;
     typedef typename traits_type::value_type value_type;
     typedef typename traits_type::difference_type difference_type;

     /** @brief Number of threads involved. */
     thread_index_t num_threads;

     /** @brief Input begin. */
     RandomAccessIterator source;

     /** @brief Start indices, per thread. */
     difference_type* starts;

     /** @brief Storage in which to sort. */
     value_type** temporary;

     /** @brief Samples. */
     value_type* samples;

     /** @brief Offsets to add to the found positions. */
     difference_type* offsets;

     /** @brief Pieces of data to merge @c [thread][sequence] */
     std::vector<Piece<difference_type> >* pieces;
 };

 /**
   *  @brief Select samples from a sequence.
   *  @param sd Pointer to algorithm data. Result will be placed in
   *  @c sd->samples.
   *  @param num_samples Number of samples to select.
   */
 template<typename RandomAccessIterator, typename _DifferenceTp>
   void
   determine_samples(PMWMSSortingData<RandomAccessIterator>* sd,
                     _DifferenceTp num_samples)
   {
     typedef std::iterator_traits<RandomAccessIterator> traits_type;
     typedef typename traits_type::value_type value_type;
     typedef _DifferenceTp difference_type;

     thread_index_t iam = omp_get_thread_num();

     difference_type* es = new difference_type[num_samples + 2];

     equally_split(sd->starts[iam + 1] - sd->starts[iam],
                   num_samples + 1, es);

     for (difference_type i = 0; i < num_samples; ++i)
       ::new(&(sd->samples[iam * num_samples + i]))
 	  value_type(sd->source[sd->starts[iam] + es[i + 1]]);

     delete[] es;
   }

 /** @brief Split consistently. */
 template<bool exact, typename RandomAccessIterator,
           typename Comparator, typename SortingPlacesIterator>
   struct split_consistently
   {
   };

 /** @brief Split by exact splitting. */
 template<typename RandomAccessIterator, typename Comparator,
           typename SortingPlacesIterator>
   struct split_consistently
     <true, RandomAccessIterator, Comparator, SortingPlacesIterator>
   {
     void operator()(
       const thread_index_t iam,
       PMWMSSortingData<RandomAccessIterator>* sd,
       Comparator& comp,
       const typename
         std::iterator_traits<RandomAccessIterator>::difference_type
           num_samples)
       const
   {
 #   pragma omp barrier

     std::vector<std::pair<SortingPlacesIterator, SortingPlacesIterator> >
         seqs(sd->num_threads);
     for (thread_index_t s = 0; s < sd->num_threads; s++)
       seqs[s] = std::make_pair(sd->temporary[s],
                                 sd->temporary[s]
                                     + (sd->starts[s + 1] - sd->starts[s]));

     std::vector<SortingPlacesIterator> offsets(sd->num_threads);

     // if not last thread
     if (iam < sd->num_threads - 1)
       multiseq_partition(seqs.begin(), seqs.end(),
                           sd->starts[iam + 1], offsets.begin(), comp);

     for (int seq = 0; seq < sd->num_threads; seq++)
       {
         // for each sequence
         if (iam < (sd->num_threads - 1))
           sd->pieces[iam][seq].end = offsets[seq] - seqs[seq].first;
         else
           // very end of this sequence
           sd->pieces[iam][seq].end =
               sd->starts[seq + 1] - sd->starts[seq];
       }

 #   pragma omp barrier

     for (thread_index_t seq = 0; seq < sd->num_threads; seq++)
       {
         // For each sequence.
         if (iam > 0)
           sd->pieces[iam][seq].begin = sd->pieces[iam - 1][seq].end;
         else
           // Absolute beginning.
           sd->pieces[iam][seq].begin = 0;
       }
   }
   };

 /** @brief Split by sampling. */
 template<typename RandomAccessIterator, typename Comparator,
           typename SortingPlacesIterator>
   struct split_consistently<false, RandomAccessIterator, Comparator,
                              SortingPlacesIterator>
   {
     void operator()(
         const thread_index_t iam,
         PMWMSSortingData<RandomAccessIterator>* sd,
         Comparator& comp,
         const typename
           std::iterator_traits<RandomAccessIterator>::difference_type
             num_samples)
         const
     {
       typedef std::iterator_traits<RandomAccessIterator> traits_type;
       typedef typename traits_type::value_type value_type;
       typedef typename traits_type::difference_type difference_type;

       determine_samples(sd, num_samples);

 #     pragma omp barrier

 #     pragma omp single
       __gnu_sequential::sort(sd->samples,
                              sd->samples + (num_samples * sd->num_threads),
                              comp);

 #     pragma omp barrier

       for (thread_index_t s = 0; s < sd->num_threads; ++s)
         {
           // For each sequence.
           if (num_samples * iam > 0)
             sd->pieces[iam][s].begin =
                 std::lower_bound(sd->temporary[s],
                     sd->temporary[s]
                         + (sd->starts[s + 1] - sd->starts[s]),
                     sd->samples[num_samples * iam],
                     comp)
                 - sd->temporary[s];
           else
             // Absolute beginning.
             sd->pieces[iam][s].begin = 0;

           if ((num_samples * (iam + 1)) < (num_samples * sd->num_threads))
             sd->pieces[iam][s].end =
                 std::lower_bound(sd->temporary[s],
                         sd->temporary[s]
                             + (sd->starts[s + 1] - sd->starts[s]),
                         sd->samples[num_samples * (iam + 1)],
                         comp)
                 - sd->temporary[s];
           else
             // Absolute end.
             sd->pieces[iam][s].end = sd->starts[s + 1] - sd->starts[s];
         }
     }
   };

 template<bool stable, typename RandomAccessIterator, typename Comparator>
   struct possibly_stable_sort
   {
   };

 template<typename RandomAccessIterator, typename Comparator>
   struct possibly_stable_sort<true, RandomAccessIterator, Comparator>
   {
     void operator()(const RandomAccessIterator& begin,
                      const RandomAccessIterator& end, Comparator& comp) const
     {
       __gnu_sequential::stable_sort(begin, end, comp);
     }
   };

 template<typename RandomAccessIterator, typename Comparator>
   struct possibly_stable_sort<false, RandomAccessIterator, Comparator>
   {
     void operator()(const RandomAccessIterator begin,
                      const RandomAccessIterator end, Comparator& comp) const
     {
       __gnu_sequential::sort(begin, end, comp);
     }
   };

 template<bool stable, typename SeqRandomAccessIterator,
           typename RandomAccessIterator, typename Comparator,
           typename DiffType>
   struct possibly_stable_multiway_merge
   {
   };

 template<typename SeqRandomAccessIterator, typename RandomAccessIterator,
           typename Comparator, typename DiffType>
   struct possibly_stable_multiway_merge
     <true, SeqRandomAccessIterator, RandomAccessIterator, Comparator,
     DiffType>
   {
     void operator()(const SeqRandomAccessIterator& seqs_begin,
                       const SeqRandomAccessIterator& seqs_end,
                       const RandomAccessIterator& target,
                       Comparator& comp,
                       DiffType length_am) const
     {
       stable_multiway_merge(seqs_begin, seqs_end, target, length_am, comp,
                        sequential_tag());
     }
   };

 template<typename SeqRandomAccessIterator, typename RandomAccessIterator,
           typename Comparator, typename DiffType>
   struct possibly_stable_multiway_merge
     <false, SeqRandomAccessIterator, RandomAccessIterator, Comparator,
     DiffType>
   {
     void operator()(const SeqRandomAccessIterator& seqs_begin,
                       const SeqRandomAccessIterator& seqs_end,
                       const RandomAccessIterator& target,
                       Comparator& comp,
                       DiffType length_am) const
     {
       multiway_merge(seqs_begin, seqs_end, target, length_am, comp,
                        sequential_tag());
     }
   };

 /** @brief PMWMS code executed by each thread.
   *  @param sd Pointer to algorithm data.
   *  @param comp Comparator.
   */
 template<bool stable, bool exact, typename RandomAccessIterator,
           typename Comparator>
   void
   parallel_sort_mwms_pu(PMWMSSortingData<RandomAccessIterator>* sd,
                         Comparator& comp)
   {
     typedef std::iterator_traits<RandomAccessIterator> traits_type;
     typedef typename traits_type::value_type value_type;
     typedef typename traits_type::difference_type difference_type;

     thread_index_t iam = omp_get_thread_num();

     // Length of this thread's chunk, before merging.
     difference_type length_local = sd->starts[iam + 1] - sd->starts[iam];

     // Sort in temporary storage, leave space for sentinel.

     typedef value_type* SortingPlacesIterator;

     sd->temporary[iam] =
         static_cast<value_type*>(
         ::operator new(sizeof(value_type) * (length_local + 1)));

     // Copy there.
     std::uninitialized_copy(sd->source + sd->starts[iam],
                             sd->source + sd->starts[iam] + length_local,
                             sd->temporary[iam]);

     possibly_stable_sort<stable, SortingPlacesIterator, Comparator>()
         (sd->temporary[iam], sd->temporary[iam] + length_local, comp);

     // Invariant: locally sorted subsequence in sd->temporary[iam],
     // sd->temporary[iam] + length_local.

     // No barrier here: Synchronization is done by the splitting routine.

     difference_type num_samples =
         _Settings::get().sort_mwms_oversampling * sd->num_threads - 1;
     split_consistently
       <exact, RandomAccessIterator, Comparator, SortingPlacesIterator>()
         (iam, sd, comp, num_samples);

     // Offset from target begin, length after merging.
     difference_type offset = 0, length_am = 0;
     for (thread_index_t s = 0; s < sd->num_threads; s++)
       {
         length_am += sd->pieces[iam][s].end - sd->pieces[iam][s].begin;
         offset += sd->pieces[iam][s].begin;
       }

     typedef std::vector<
       std::pair<SortingPlacesIterator, SortingPlacesIterator> >
         seq_vector_type;
     seq_vector_type seqs(sd->num_threads);

     for (int s = 0; s < sd->num_threads; ++s)
       {
         seqs[s] =
           std::make_pair(sd->temporary[s] + sd->pieces[iam][s].begin,
         sd->temporary[s] + sd->pieces[iam][s].end);
       }

     possibly_stable_multiway_merge<
         stable,
         typename seq_vector_type::iterator,
         RandomAccessIterator,
         Comparator, difference_type>()
           (seqs.begin(), seqs.end(),
            sd->source + offset, comp,
            length_am);

 #   pragma omp barrier

     ::operator delete(sd->temporary[iam]);
   }

 /** @brief PMWMS main call.
   *  @param begin Begin iterator of sequence.
   *  @param end End iterator of sequence.
   *  @param comp Comparator.
   *  @param n Length of sequence.
   *  @param num_threads Number of threads to use.
   */
 template<bool stable, bool exact, typename RandomAccessIterator,
            typename Comparator>
   void
   parallel_sort_mwms(RandomAccessIterator begin, RandomAccessIterator end,
                      Comparator comp,
                      thread_index_t num_threads)
   {
     _GLIBCXX_CALL(end - begin)

     typedef std::iterator_traits<RandomAccessIterator> traits_type;
     typedef typename traits_type::value_type value_type;
     typedef typename traits_type::difference_type difference_type;

     difference_type n = end - begin;

     if (n <= 1)
       return;

     // at least one element per thread
     if (num_threads > n)
       num_threads = static_cast<thread_index_t>(n);

     // shared variables
     PMWMSSortingData<RandomAccessIterator> sd;
     difference_type* starts;

 #   pragma omp parallel num_threads(num_threads)
       {
         num_threads = omp_get_num_threads();  //no more threads than requested

 #       pragma omp single
           {
             sd.num_threads = num_threads;
             sd.source = begin;

             sd.temporary = new value_type*[num_threads];

             if (!exact)
               {
                 difference_type size =
                     (_Settings::get().sort_mwms_oversampling * num_threads - 1)
                         * num_threads;
                 sd.samples = static_cast<value_type*>(
                               ::operator new(size * sizeof(value_type)));
               }
             else
               sd.samples = NULL;

             sd.offsets = new difference_type[num_threads - 1];
             sd.pieces = new std::vector<Piece<difference_type> >[num_threads];
             for (int s = 0; s < num_threads; ++s)
               sd.pieces[s].resize(num_threads);
             starts = sd.starts = new difference_type[num_threads + 1];

             difference_type chunk_length = n / num_threads;
             difference_type split = n % num_threads;
             difference_type pos = 0;
             for (int i = 0; i < num_threads; ++i)
               {
                 starts[i] = pos;
                 pos += (i < split) ? (chunk_length + 1) : chunk_length;
               }
             starts[num_threads] = pos;
           } //single

         // Now sort in parallel.
         parallel_sort_mwms_pu<stable, exact>(&sd, comp);
       } //parallel

     delete[] starts;
     delete[] sd.temporary;

     if (!exact)
       ::operator delete(sd.samples);

     delete[] sd.offsets;
     delete[] sd.pieces;
   }
 } //namespace __gnu_parallel

 #endif /* _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H */
	// -- C++ --

	// Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc.
	//
	// This file is part of the GNU ISO C++ Library. This library is free
	// software; you can redistribute it and/or modify it under the terms
	// of the GNU General Public License as published by the Free Software
	// Foundation; either version 3, or (at your option) any later
	// version.

	// This library is distributed in the hope that it will be useful, but
	// WITHOUT ANY WARRANTY; without even the implied warranty of
	// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	// General Public License for more details.

	// Under Section 7 of GPL version 3, you are granted additional
	// permissions described in the GCC Runtime Library Exception, version
	// 3.1, as published by the Free Software Foundation.

	// You should have received a copy of the GNU General Public License and
	// a copy of the GCC Runtime Library Exception along with this program;
	// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
	// <http://www.gnu.org/licenses/>.

	/** @file parallel/multiway_mergesort.h
	* @brief Parallel multiway merge sort.
	* This file is a GNU parallel extension to the Standard C++ Library.
	*/

	// Written by Johannes Singler.

	#ifndef _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H
	#define _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H 1

	#include <vector>

	#include <parallel/basic_iterator.h>
	#include <bits/stl_algo.h>
	#include <parallel/parallel.h>
	#include <parallel/multiway_merge.h>

	namespace __gnu_parallel
	{

	/** @brief Subsequence description. */
	template<typename _DifferenceTp>
	struct Piece
	{
	typedef _DifferenceTp difference_type;

	/** @brief Begin of subsequence. */
	difference_type begin;

	/** @brief End of subsequence. */
	difference_type end;
	};

	/** @brief Data accessed by all threads.
	*
	* PMWMS = parallel multiway mergesort */
	template<typename RandomAccessIterator>
	struct PMWMSSortingData
	{
	typedef std::iterator_traits<RandomAccessIterator> traits_type;
	typedef typename traits_type::value_type value_type;
	typedef typename traits_type::difference_type difference_type;

	/** @brief Number of threads involved. */
	thread_index_t num_threads;

	/** @brief Input begin. */
	RandomAccessIterator source;

	/** @brief Start indices, per thread. */
	difference_type* starts;

	/** @brief Storage in which to sort. */
	value_type** temporary;

	/** @brief Samples. */
	value_type* samples;

	/** @brief Offsets to add to the found positions. */
	difference_type* offsets;

	/** @brief Pieces of data to merge @c [thread][sequence] */
	std::vector<Piece<difference_type> >* pieces;
	};

	/**
	* @brief Select samples from a sequence.
	* @param sd Pointer to algorithm data. Result will be placed in
	* @c sd->samples.
	* @param num_samples Number of samples to select.
	*/
	template<typename RandomAccessIterator, typename _DifferenceTp>
	void
	determine_samples(PMWMSSortingData<RandomAccessIterator>* sd,
	_DifferenceTp num_samples)
	{
	typedef std::iterator_traits<RandomAccessIterator> traits_type;
	typedef typename traits_type::value_type value_type;
	typedef _DifferenceTp difference_type;

	thread_index_t iam = omp_get_thread_num();

	difference_type* es = new difference_type[num_samples + 2];

	equally_split(sd->starts[iam + 1] - sd->starts[iam],
	num_samples + 1, es);

	for (difference_type i = 0; i < num_samples; ++i)
	::new(&(sd->samples[iam * num_samples + i]))
	value_type(sd->source[sd->starts[iam] + es[i + 1]]);

	delete[] es;
	}

	/** @brief Split consistently. */
	template<bool exact, typename RandomAccessIterator,
	typename Comparator, typename SortingPlacesIterator>
	struct split_consistently
	{
	};

	/** @brief Split by exact splitting. */
	template<typename RandomAccessIterator, typename Comparator,
	typename SortingPlacesIterator>
	struct split_consistently
	<true, RandomAccessIterator, Comparator, SortingPlacesIterator>
	{
	void operator()(
	const thread_index_t iam,
	PMWMSSortingData<RandomAccessIterator>* sd,
	Comparator& comp,
	const typename
	std::iterator_traits<RandomAccessIterator>::difference_type
	num_samples)
	const
	{
	# pragma omp barrier

	std::vector<std::pair<SortingPlacesIterator, SortingPlacesIterator> >
	seqs(sd->num_threads);
	for (thread_index_t s = 0; s < sd->num_threads; s++)
	seqs[s] = std::make_pair(sd->temporary[s],
	sd->temporary[s]
	+ (sd->starts[s + 1] - sd->starts[s]));

	std::vector<SortingPlacesIterator> offsets(sd->num_threads);

	// if not last thread
	if (iam < sd->num_threads - 1)
	multiseq_partition(seqs.begin(), seqs.end(),
	sd->starts[iam + 1], offsets.begin(), comp);

	for (int seq = 0; seq < sd->num_threads; seq++)
	{
	// for each sequence
	if (iam < (sd->num_threads - 1))
	sd->pieces[iam][seq].end = offsets[seq] - seqs[seq].first;
	else
	// very end of this sequence
	sd->pieces[iam][seq].end =
	sd->starts[seq + 1] - sd->starts[seq];
	}

	# pragma omp barrier

	for (thread_index_t seq = 0; seq < sd->num_threads; seq++)
	{
	// For each sequence.
	if (iam > 0)
	sd->pieces[iam][seq].begin = sd->pieces[iam - 1][seq].end;
	else
	// Absolute beginning.
	sd->pieces[iam][seq].begin = 0;
	}
	}
	};

	/** @brief Split by sampling. */
	template<typename RandomAccessIterator, typename Comparator,
	typename SortingPlacesIterator>
	struct split_consistently<false, RandomAccessIterator, Comparator,
	SortingPlacesIterator>
	{
	void operator()(
	const thread_index_t iam,
	PMWMSSortingData<RandomAccessIterator>* sd,
	Comparator& comp,
	const typename
	std::iterator_traits<RandomAccessIterator>::difference_type
	num_samples)
	const
	{
	typedef std::iterator_traits<RandomAccessIterator> traits_type;
	typedef typename traits_type::value_type value_type;
	typedef typename traits_type::difference_type difference_type;

	determine_samples(sd, num_samples);

	# pragma omp barrier

	# pragma omp single
	__gnu_sequential::sort(sd->samples,
	sd->samples + (num_samples * sd->num_threads),
	comp);

	# pragma omp barrier

	for (thread_index_t s = 0; s < sd->num_threads; ++s)
	{
	// For each sequence.
	if (num_samples * iam > 0)
	sd->pieces[iam][s].begin =
	std::lower_bound(sd->temporary[s],
	sd->temporary[s]
	+ (sd->starts[s + 1] - sd->starts[s]),
	sd->samples[num_samples * iam],
	comp)
	- sd->temporary[s];
	else
	// Absolute beginning.
	sd->pieces[iam][s].begin = 0;

	if ((num_samples * (iam + 1)) < (num_samples * sd->num_threads))
	sd->pieces[iam][s].end =
	std::lower_bound(sd->temporary[s],
	sd->temporary[s]
	+ (sd->starts[s + 1] - sd->starts[s]),
	sd->samples[num_samples * (iam + 1)],
	comp)
	- sd->temporary[s];
	else
	// Absolute end.
	sd->pieces[iam][s].end = sd->starts[s + 1] - sd->starts[s];
	}
	}
	};

	template<bool stable, typename RandomAccessIterator, typename Comparator>
	struct possibly_stable_sort
	{
	};

	template<typename RandomAccessIterator, typename Comparator>
	struct possibly_stable_sort<true, RandomAccessIterator, Comparator>
	{
	void operator()(const RandomAccessIterator& begin,
	const RandomAccessIterator& end, Comparator& comp) const
	{
	__gnu_sequential::stable_sort(begin, end, comp);
	}
	};

	template<typename RandomAccessIterator, typename Comparator>
	struct possibly_stable_sort<false, RandomAccessIterator, Comparator>
	{
	void operator()(const RandomAccessIterator begin,
	const RandomAccessIterator end, Comparator& comp) const
	{
	__gnu_sequential::sort(begin, end, comp);
	}
	};

	template<bool stable, typename SeqRandomAccessIterator,
	typename RandomAccessIterator, typename Comparator,
	typename DiffType>
	struct possibly_stable_multiway_merge
	{
	};

	template<typename SeqRandomAccessIterator, typename RandomAccessIterator,
	typename Comparator, typename DiffType>
	struct possibly_stable_multiway_merge
	<true, SeqRandomAccessIterator, RandomAccessIterator, Comparator,
	DiffType>
	{
	void operator()(const SeqRandomAccessIterator& seqs_begin,
	const SeqRandomAccessIterator& seqs_end,
	const RandomAccessIterator& target,
	Comparator& comp,
	DiffType length_am) const
	{
	stable_multiway_merge(seqs_begin, seqs_end, target, length_am, comp,
	sequential_tag());
	}
	};

	template<typename SeqRandomAccessIterator, typename RandomAccessIterator,
	typename Comparator, typename DiffType>
	struct possibly_stable_multiway_merge
	<false, SeqRandomAccessIterator, RandomAccessIterator, Comparator,
	DiffType>
	{
	void operator()(const SeqRandomAccessIterator& seqs_begin,
	const SeqRandomAccessIterator& seqs_end,
	const RandomAccessIterator& target,
	Comparator& comp,
	DiffType length_am) const
	{
	multiway_merge(seqs_begin, seqs_end, target, length_am, comp,
	sequential_tag());
	}
	};

	/** @brief PMWMS code executed by each thread.
	* @param sd Pointer to algorithm data.
	* @param comp Comparator.
	*/
	template<bool stable, bool exact, typename RandomAccessIterator,
	typename Comparator>
	void
	parallel_sort_mwms_pu(PMWMSSortingData<RandomAccessIterator>* sd,
	Comparator& comp)
	{
	typedef std::iterator_traits<RandomAccessIterator> traits_type;
	typedef typename traits_type::value_type value_type;
	typedef typename traits_type::difference_type difference_type;

	thread_index_t iam = omp_get_thread_num();

	// Length of this thread's chunk, before merging.
	difference_type length_local = sd->starts[iam + 1] - sd->starts[iam];

	// Sort in temporary storage, leave space for sentinel.

	typedef value_type* SortingPlacesIterator;

	sd->temporary[iam] =
	static_cast<value_type*>(
	::operator new(sizeof(value_type) * (length_local + 1)));

	// Copy there.
	std::uninitialized_copy(sd->source + sd->starts[iam],
	sd->source + sd->starts[iam] + length_local,
	sd->temporary[iam]);

	possibly_stable_sort<stable, SortingPlacesIterator, Comparator>()
	(sd->temporary[iam], sd->temporary[iam] + length_local, comp);

	// Invariant: locally sorted subsequence in sd->temporary[iam],
	// sd->temporary[iam] + length_local.

	// No barrier here: Synchronization is done by the splitting routine.

	difference_type num_samples =
	_Settings::get().sort_mwms_oversampling * sd->num_threads - 1;
	split_consistently
	<exact, RandomAccessIterator, Comparator, SortingPlacesIterator>()
	(iam, sd, comp, num_samples);

	// Offset from target begin, length after merging.
	difference_type offset = 0, length_am = 0;
	for (thread_index_t s = 0; s < sd->num_threads; s++)
	{
	length_am += sd->pieces[iam][s].end - sd->pieces[iam][s].begin;
	offset += sd->pieces[iam][s].begin;
	}

	typedef std::vector<
	std::pair<SortingPlacesIterator, SortingPlacesIterator> >
	seq_vector_type;
	seq_vector_type seqs(sd->num_threads);

	for (int s = 0; s < sd->num_threads; ++s)
	{
	seqs[s] =
	std::make_pair(sd->temporary[s] + sd->pieces[iam][s].begin,
	sd->temporary[s] + sd->pieces[iam][s].end);
	}

	possibly_stable_multiway_merge<
	stable,
	typename seq_vector_type::iterator,
	RandomAccessIterator,
	Comparator, difference_type>()
	(seqs.begin(), seqs.end(),
	sd->source + offset, comp,
	length_am);

	# pragma omp barrier

	::operator delete(sd->temporary[iam]);
	}

	/** @brief PMWMS main call.
	* @param begin Begin iterator of sequence.
	* @param end End iterator of sequence.
	* @param comp Comparator.
	* @param n Length of sequence.
	* @param num_threads Number of threads to use.
	*/
	template<bool stable, bool exact, typename RandomAccessIterator,
	typename Comparator>
	void
	parallel_sort_mwms(RandomAccessIterator begin, RandomAccessIterator end,
	Comparator comp,
	thread_index_t num_threads)
	{
	_GLIBCXX_CALL(end - begin)

	typedef std::iterator_traits<RandomAccessIterator> traits_type;
	typedef typename traits_type::value_type value_type;
	typedef typename traits_type::difference_type difference_type;

	difference_type n = end - begin;

	if (n <= 1)
	return;

	// at least one element per thread
	if (num_threads > n)
	num_threads = static_cast<thread_index_t>(n);

	// shared variables
	PMWMSSortingData<RandomAccessIterator> sd;
	difference_type* starts;

	# pragma omp parallel num_threads(num_threads)
	{
	num_threads = omp_get_num_threads(); //no more threads than requested

	# pragma omp single
	{
	sd.num_threads = num_threads;
	sd.source = begin;

	sd.temporary = new value_type*[num_threads];

	if (!exact)
	{
	difference_type size =
	(_Settings::get().sort_mwms_oversampling * num_threads - 1)
	* num_threads;
	sd.samples = static_cast<value_type*>(
	::operator new(size * sizeof(value_type)));
	}
	else
	sd.samples = NULL;

	sd.offsets = new difference_type[num_threads - 1];
	sd.pieces = new std::vector<Piece<difference_type> >[num_threads];
	for (int s = 0; s < num_threads; ++s)
	sd.pieces[s].resize(num_threads);
	starts = sd.starts = new difference_type[num_threads + 1];

	difference_type chunk_length = n / num_threads;
	difference_type split = n % num_threads;
	difference_type pos = 0;
	for (int i = 0; i < num_threads; ++i)
	{
	starts[i] = pos;
	pos += (i < split) ? (chunk_length + 1) : chunk_length;
	}
	starts[num_threads] = pos;
	} //single

	// Now sort in parallel.
	parallel_sort_mwms_pu<stable, exact>(&sd, comp);
	} //parallel

	delete[] starts;
	delete[] sd.temporary;

	if (!exact)
	::operator delete(sd.samples);

	delete[] sd.offsets;
	delete[] sd.pieces;
	}
	} //namespace __gnu_parallel

	#endif /* _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H */