| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package org.apache.commons.math.stat.ranking; |
| |
| import java.util.ArrayList; |
| import java.util.Arrays; |
| import java.util.Iterator; |
| import java.util.List; |
| |
| import org.apache.commons.math.exception.MathInternalError; |
| import org.apache.commons.math.random.RandomData; |
| import org.apache.commons.math.random.RandomDataImpl; |
| import org.apache.commons.math.random.RandomGenerator; |
| import org.apache.commons.math.util.FastMath; |
| |
| |
| /** |
| * <p> Ranking based on the natural ordering on doubles.</p> |
| * <p>NaNs are treated according to the configured {@link NaNStrategy} and ties |
| * are handled using the selected {@link TiesStrategy}. |
| * Configuration settings are supplied in optional constructor arguments. |
| * Defaults are {@link NaNStrategy#MAXIMAL} and {@link TiesStrategy#AVERAGE}, |
| * respectively. When using {@link TiesStrategy#RANDOM}, a |
| * {@link RandomGenerator} may be supplied as a constructor argument.</p> |
| * <p>Examples: |
| * <table border="1" cellpadding="3"> |
| * <tr><th colspan="3"> |
| * Input data: (20, 17, 30, 42.3, 17, 50, Double.NaN, Double.NEGATIVE_INFINITY, 17) |
| * </th></tr> |
| * <tr><th>NaNStrategy</th><th>TiesStrategy</th> |
| * <th><code>rank(data)</code></th> |
| * <tr> |
| * <td>default (NaNs maximal)</td> |
| * <td>default (ties averaged)</td> |
| * <td>(5, 3, 6, 7, 3, 8, 9, 1, 3)</td></tr> |
| * <tr> |
| * <td>default (NaNs maximal)</td> |
| * <td>MINIMUM</td> |
| * <td>(5, 2, 6, 7, 2, 8, 9, 1, 2)</td></tr> |
| * <tr> |
| * <td>MINIMAL</td> |
| * <td>default (ties averaged)</td> |
| * <td>(6, 4, 7, 8, 4, 9, 1.5, 1.5, 4)</td></tr> |
| * <tr> |
| * <td>REMOVED</td> |
| * <td>SEQUENTIAL</td> |
| * <td>(5, 2, 6, 7, 3, 8, 1, 4)</td></tr> |
| * <tr> |
| * <td>MINIMAL</td> |
| * <td>MAXIMUM</td> |
| * <td>(6, 5, 7, 8, 5, 9, 2, 2, 5)</td></tr></table></p> |
| * |
| * @since 2.0 |
| * @version $Revision: 1061496 $ $Date: 2011-01-20 21:32:16 +0100 (jeu. 20 janv. 2011) $ |
| */ |
| public class NaturalRanking implements RankingAlgorithm { |
| |
| /** default NaN strategy */ |
| public static final NaNStrategy DEFAULT_NAN_STRATEGY = NaNStrategy.MAXIMAL; |
| |
| /** default ties strategy */ |
| public static final TiesStrategy DEFAULT_TIES_STRATEGY = TiesStrategy.AVERAGE; |
| |
| /** NaN strategy - defaults to NaNs maximal */ |
| private final NaNStrategy nanStrategy; |
| |
| /** Ties strategy - defaults to ties averaged */ |
| private final TiesStrategy tiesStrategy; |
| |
| /** Source of random data - used only when ties strategy is RANDOM */ |
| private final RandomData randomData; |
| |
| /** |
| * Create a NaturalRanking with default strategies for handling ties and NaNs. |
| */ |
| public NaturalRanking() { |
| super(); |
| tiesStrategy = DEFAULT_TIES_STRATEGY; |
| nanStrategy = DEFAULT_NAN_STRATEGY; |
| randomData = null; |
| } |
| |
| /** |
| * Create a NaturalRanking with the given TiesStrategy. |
| * |
| * @param tiesStrategy the TiesStrategy to use |
| */ |
| public NaturalRanking(TiesStrategy tiesStrategy) { |
| super(); |
| this.tiesStrategy = tiesStrategy; |
| nanStrategy = DEFAULT_NAN_STRATEGY; |
| randomData = new RandomDataImpl(); |
| } |
| |
| /** |
| * Create a NaturalRanking with the given NaNStrategy. |
| * |
| * @param nanStrategy the NaNStrategy to use |
| */ |
| public NaturalRanking(NaNStrategy nanStrategy) { |
| super(); |
| this.nanStrategy = nanStrategy; |
| tiesStrategy = DEFAULT_TIES_STRATEGY; |
| randomData = null; |
| } |
| |
| /** |
| * Create a NaturalRanking with the given NaNStrategy and TiesStrategy. |
| * |
| * @param nanStrategy NaNStrategy to use |
| * @param tiesStrategy TiesStrategy to use |
| */ |
| public NaturalRanking(NaNStrategy nanStrategy, TiesStrategy tiesStrategy) { |
| super(); |
| this.nanStrategy = nanStrategy; |
| this.tiesStrategy = tiesStrategy; |
| randomData = new RandomDataImpl(); |
| } |
| |
| /** |
| * Create a NaturalRanking with TiesStrategy.RANDOM and the given |
| * RandomGenerator as the source of random data. |
| * |
| * @param randomGenerator source of random data |
| */ |
| public NaturalRanking(RandomGenerator randomGenerator) { |
| super(); |
| this.tiesStrategy = TiesStrategy.RANDOM; |
| nanStrategy = DEFAULT_NAN_STRATEGY; |
| randomData = new RandomDataImpl(randomGenerator); |
| } |
| |
| |
| /** |
| * Create a NaturalRanking with the given NaNStrategy, TiesStrategy.RANDOM |
| * and the given source of random data. |
| * |
| * @param nanStrategy NaNStrategy to use |
| * @param randomGenerator source of random data |
| */ |
| public NaturalRanking(NaNStrategy nanStrategy, |
| RandomGenerator randomGenerator) { |
| super(); |
| this.nanStrategy = nanStrategy; |
| this.tiesStrategy = TiesStrategy.RANDOM; |
| randomData = new RandomDataImpl(randomGenerator); |
| } |
| |
| /** |
| * Return the NaNStrategy |
| * |
| * @return returns the NaNStrategy |
| */ |
| public NaNStrategy getNanStrategy() { |
| return nanStrategy; |
| } |
| |
| /** |
| * Return the TiesStrategy |
| * |
| * @return the TiesStrategy |
| */ |
| public TiesStrategy getTiesStrategy() { |
| return tiesStrategy; |
| } |
| |
| /** |
| * Rank <code>data</code> using the natural ordering on Doubles, with |
| * NaN values handled according to <code>nanStrategy</code> and ties |
| * resolved using <code>tiesStrategy.</code> |
| * |
| * @param data array to be ranked |
| * @return array of ranks |
| */ |
| public double[] rank(double[] data) { |
| |
| // Array recording initial positions of data to be ranked |
| IntDoublePair[] ranks = new IntDoublePair[data.length]; |
| for (int i = 0; i < data.length; i++) { |
| ranks[i] = new IntDoublePair(data[i], i); |
| } |
| |
| // Recode, remove or record positions of NaNs |
| List<Integer> nanPositions = null; |
| switch (nanStrategy) { |
| case MAXIMAL: // Replace NaNs with +INFs |
| recodeNaNs(ranks, Double.POSITIVE_INFINITY); |
| break; |
| case MINIMAL: // Replace NaNs with -INFs |
| recodeNaNs(ranks, Double.NEGATIVE_INFINITY); |
| break; |
| case REMOVED: // Drop NaNs from data |
| ranks = removeNaNs(ranks); |
| break; |
| case FIXED: // Record positions of NaNs |
| nanPositions = getNanPositions(ranks); |
| break; |
| default: // this should not happen unless NaNStrategy enum is changed |
| throw new MathInternalError(); |
| } |
| |
| // Sort the IntDoublePairs |
| Arrays.sort(ranks); |
| |
| // Walk the sorted array, filling output array using sorted positions, |
| // resolving ties as we go |
| double[] out = new double[ranks.length]; |
| int pos = 1; // position in sorted array |
| out[ranks[0].getPosition()] = pos; |
| List<Integer> tiesTrace = new ArrayList<Integer>(); |
| tiesTrace.add(ranks[0].getPosition()); |
| for (int i = 1; i < ranks.length; i++) { |
| if (Double.compare(ranks[i].getValue(), ranks[i - 1].getValue()) > 0) { |
| // tie sequence has ended (or had length 1) |
| pos = i + 1; |
| if (tiesTrace.size() > 1) { // if seq is nontrivial, resolve |
| resolveTie(out, tiesTrace); |
| } |
| tiesTrace = new ArrayList<Integer>(); |
| tiesTrace.add(ranks[i].getPosition()); |
| } else { |
| // tie sequence continues |
| tiesTrace.add(ranks[i].getPosition()); |
| } |
| out[ranks[i].getPosition()] = pos; |
| } |
| if (tiesTrace.size() > 1) { // handle tie sequence at end |
| resolveTie(out, tiesTrace); |
| } |
| if (nanStrategy == NaNStrategy.FIXED) { |
| restoreNaNs(out, nanPositions); |
| } |
| return out; |
| } |
| |
| /** |
| * Returns an array that is a copy of the input array with IntDoublePairs |
| * having NaN values removed. |
| * |
| * @param ranks input array |
| * @return array with NaN-valued entries removed |
| */ |
| private IntDoublePair[] removeNaNs(IntDoublePair[] ranks) { |
| if (!containsNaNs(ranks)) { |
| return ranks; |
| } |
| IntDoublePair[] outRanks = new IntDoublePair[ranks.length]; |
| int j = 0; |
| for (int i = 0; i < ranks.length; i++) { |
| if (Double.isNaN(ranks[i].getValue())) { |
| // drop, but adjust original ranks of later elements |
| for (int k = i + 1; k < ranks.length; k++) { |
| ranks[k] = new IntDoublePair( |
| ranks[k].getValue(), ranks[k].getPosition() - 1); |
| } |
| } else { |
| outRanks[j] = new IntDoublePair( |
| ranks[i].getValue(), ranks[i].getPosition()); |
| j++; |
| } |
| } |
| IntDoublePair[] returnRanks = new IntDoublePair[j]; |
| System.arraycopy(outRanks, 0, returnRanks, 0, j); |
| return returnRanks; |
| } |
| |
| /** |
| * Recodes NaN values to the given value. |
| * |
| * @param ranks array to recode |
| * @param value the value to replace NaNs with |
| */ |
| private void recodeNaNs(IntDoublePair[] ranks, double value) { |
| for (int i = 0; i < ranks.length; i++) { |
| if (Double.isNaN(ranks[i].getValue())) { |
| ranks[i] = new IntDoublePair( |
| value, ranks[i].getPosition()); |
| } |
| } |
| } |
| |
| /** |
| * Checks for presence of NaNs in <code>ranks.</code> |
| * |
| * @param ranks array to be searched for NaNs |
| * @return true iff ranks contains one or more NaNs |
| */ |
| private boolean containsNaNs(IntDoublePair[] ranks) { |
| for (int i = 0; i < ranks.length; i++) { |
| if (Double.isNaN(ranks[i].getValue())) { |
| return true; |
| } |
| } |
| return false; |
| } |
| |
| /** |
| * Resolve a sequence of ties, using the configured {@link TiesStrategy}. |
| * The input <code>ranks</code> array is expected to take the same value |
| * for all indices in <code>tiesTrace</code>. The common value is recoded |
| * according to the tiesStrategy. For example, if ranks = <5,8,2,6,2,7,1,2>, |
| * tiesTrace = <2,4,7> and tiesStrategy is MINIMUM, ranks will be unchanged. |
| * The same array and trace with tiesStrategy AVERAGE will come out |
| * <5,8,3,6,3,7,1,3>. |
| * |
| * @param ranks array of ranks |
| * @param tiesTrace list of indices where <code>ranks</code> is constant |
| * -- that is, for any i and j in TiesTrace, <code> ranks[i] == ranks[j] |
| * </code> |
| */ |
| private void resolveTie(double[] ranks, List<Integer> tiesTrace) { |
| |
| // constant value of ranks over tiesTrace |
| final double c = ranks[tiesTrace.get(0)]; |
| |
| // length of sequence of tied ranks |
| final int length = tiesTrace.size(); |
| |
| switch (tiesStrategy) { |
| case AVERAGE: // Replace ranks with average |
| fill(ranks, tiesTrace, (2 * c + length - 1) / 2d); |
| break; |
| case MAXIMUM: // Replace ranks with maximum values |
| fill(ranks, tiesTrace, c + length - 1); |
| break; |
| case MINIMUM: // Replace ties with minimum |
| fill(ranks, tiesTrace, c); |
| break; |
| case RANDOM: // Fill with random integral values in [c, c + length - 1] |
| Iterator<Integer> iterator = tiesTrace.iterator(); |
| long f = FastMath.round(c); |
| while (iterator.hasNext()) { |
| ranks[iterator.next()] = |
| randomData.nextLong(f, f + length - 1); |
| } |
| break; |
| case SEQUENTIAL: // Fill sequentially from c to c + length - 1 |
| // walk and fill |
| iterator = tiesTrace.iterator(); |
| f = FastMath.round(c); |
| int i = 0; |
| while (iterator.hasNext()) { |
| ranks[iterator.next()] = f + i++; |
| } |
| break; |
| default: // this should not happen unless TiesStrategy enum is changed |
| throw new MathInternalError(); |
| } |
| } |
| |
| /** |
| * Sets<code>data[i] = value</code> for each i in <code>tiesTrace.</code> |
| * |
| * @param data array to modify |
| * @param tiesTrace list of index values to set |
| * @param value value to set |
| */ |
| private void fill(double[] data, List<Integer> tiesTrace, double value) { |
| Iterator<Integer> iterator = tiesTrace.iterator(); |
| while (iterator.hasNext()) { |
| data[iterator.next()] = value; |
| } |
| } |
| |
| /** |
| * Set <code>ranks[i] = Double.NaN</code> for each i in <code>nanPositions.</code> |
| * |
| * @param ranks array to modify |
| * @param nanPositions list of index values to set to <code>Double.NaN</code> |
| */ |
| private void restoreNaNs(double[] ranks, List<Integer> nanPositions) { |
| if (nanPositions.size() == 0) { |
| return; |
| } |
| Iterator<Integer> iterator = nanPositions.iterator(); |
| while (iterator.hasNext()) { |
| ranks[iterator.next().intValue()] = Double.NaN; |
| } |
| |
| } |
| |
| /** |
| * Returns a list of indexes where <code>ranks</code> is <code>NaN.</code> |
| * |
| * @param ranks array to search for <code>NaNs</code> |
| * @return list of indexes i such that <code>ranks[i] = NaN</code> |
| */ |
| private List<Integer> getNanPositions(IntDoublePair[] ranks) { |
| ArrayList<Integer> out = new ArrayList<Integer>(); |
| for (int i = 0; i < ranks.length; i++) { |
| if (Double.isNaN(ranks[i].getValue())) { |
| out.add(Integer.valueOf(i)); |
| } |
| } |
| return out; |
| } |
| |
| /** |
| * Represents the position of a double value in an ordering. |
| * Comparable interface is implemented so Arrays.sort can be used |
| * to sort an array of IntDoublePairs by value. Note that the |
| * implicitly defined natural ordering is NOT consistent with equals. |
| */ |
| private static class IntDoublePair implements Comparable<IntDoublePair> { |
| |
| /** Value of the pair */ |
| private final double value; |
| |
| /** Original position of the pair */ |
| private final int position; |
| |
| /** |
| * Construct an IntDoublePair with the given value and position. |
| * @param value the value of the pair |
| * @param position the original position |
| */ |
| public IntDoublePair(double value, int position) { |
| this.value = value; |
| this.position = position; |
| } |
| |
| /** |
| * Compare this IntDoublePair to another pair. |
| * Only the <strong>values</strong> are compared. |
| * |
| * @param other the other pair to compare this to |
| * @return result of <code>Double.compare(value, other.value)</code> |
| */ |
| public int compareTo(IntDoublePair other) { |
| return Double.compare(value, other.value); |
| } |
| |
| /** |
| * Returns the value of the pair. |
| * @return value |
| */ |
| public double getValue() { |
| return value; |
| } |
| |
| /** |
| * Returns the original position of the pair. |
| * @return position |
| */ |
| public int getPosition() { |
| return position; |
| } |
| } |
| } |