| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package org.apache.commons.math.stat.descriptive; |
| |
| import java.io.Serializable; |
| import java.util.Collection; |
| import java.util.Iterator; |
| |
| /** |
| * <p> |
| * An aggregator for {@code SummaryStatistics} from several data sets or |
| * data set partitions. In its simplest usage mode, the client creates an |
| * instance via the zero-argument constructor, then uses |
| * {@link #createContributingStatistics()} to obtain a {@code SummaryStatistics} |
| * for each individual data set / partition. The per-set statistics objects |
| * are used as normal, and at any time the aggregate statistics for all the |
| * contributors can be obtained from this object. |
| * </p><p> |
| * Clients with specialized requirements can use alternative constructors to |
| * control the statistics implementations and initial values used by the |
| * contributing and the internal aggregate {@code SummaryStatistics} objects. |
| * </p><p> |
| * A static {@link #aggregate(Collection)} method is also included that computes |
| * aggregate statistics directly from a Collection of SummaryStatistics instances. |
| * </p><p> |
| * When {@link #createContributingStatistics()} is used to create SummaryStatistics |
| * instances to be aggregated concurrently, the created instances' |
| * {@link SummaryStatistics#addValue(double)} methods must synchronize on the aggregating |
| * instance maintained by this class. In multithreaded environments, if the functionality |
| * provided by {@link #aggregate(Collection)} is adequate, that method should be used |
| * to avoid unecessary computation and synchronization delays.</p> |
| * |
| * @since 2.0 |
| * @version $Revision: 811833 $ $Date: 2009-09-06 18:27:50 +0200 (dim. 06 sept. 2009) $ |
| * |
| */ |
| public class AggregateSummaryStatistics implements StatisticalSummary, |
| Serializable { |
| |
| |
| /** Serializable version identifier */ |
| private static final long serialVersionUID = -8207112444016386906L; |
| |
| /** |
| * A SummaryStatistics serving as a prototype for creating SummaryStatistics |
| * contributing to this aggregate |
| */ |
| private final SummaryStatistics statisticsPrototype; |
| |
| /** |
| * The SummaryStatistics in which aggregate statistics are accumulated. |
| */ |
| private final SummaryStatistics statistics; |
| |
| /** |
| * Initializes a new AggregateSummaryStatistics with default statistics |
| * implementations. |
| * |
| */ |
| public AggregateSummaryStatistics() { |
| this(new SummaryStatistics()); |
| } |
| |
| /** |
| * Initializes a new AggregateSummaryStatistics with the specified statistics |
| * object as a prototype for contributing statistics and for the internal |
| * aggregate statistics. This provides for customized statistics implementations |
| * to be used by contributing and aggregate statistics. |
| * |
| * @param prototypeStatistics a {@code SummaryStatistics} serving as a |
| * prototype both for the internal aggregate statistics and for |
| * contributing statistics obtained via the |
| * {@code createContributingStatistics()} method. Being a prototype |
| * means that other objects are initialized by copying this object's state. |
| * If {@code null}, a new, default statistics object is used. Any statistic |
| * values in the prototype are propagated to contributing statistics |
| * objects and (once) into these aggregate statistics. |
| * @see #createContributingStatistics() |
| */ |
| public AggregateSummaryStatistics(SummaryStatistics prototypeStatistics) { |
| this(prototypeStatistics, |
| prototypeStatistics == null ? null : new SummaryStatistics(prototypeStatistics)); |
| } |
| |
| /** |
| * Initializes a new AggregateSummaryStatistics with the specified statistics |
| * object as a prototype for contributing statistics and for the internal |
| * aggregate statistics. This provides for different statistics implementations |
| * to be used by contributing and aggregate statistics and for an initial |
| * state to be supplied for the aggregate statistics. |
| * |
| * @param prototypeStatistics a {@code SummaryStatistics} serving as a |
| * prototype both for the internal aggregate statistics and for |
| * contributing statistics obtained via the |
| * {@code createContributingStatistics()} method. Being a prototype |
| * means that other objects are initialized by copying this object's state. |
| * If {@code null}, a new, default statistics object is used. Any statistic |
| * values in the prototype are propagated to contributing statistics |
| * objects, but not into these aggregate statistics. |
| * @param initialStatistics a {@code SummaryStatistics} to serve as the |
| * internal aggregate statistics object. If {@code null}, a new, default |
| * statistics object is used. |
| * @see #createContributingStatistics() |
| */ |
| public AggregateSummaryStatistics(SummaryStatistics prototypeStatistics, |
| SummaryStatistics initialStatistics) { |
| this.statisticsPrototype = |
| (prototypeStatistics == null) ? new SummaryStatistics() : prototypeStatistics; |
| this.statistics = |
| (initialStatistics == null) ? new SummaryStatistics() : initialStatistics; |
| } |
| |
| /** |
| * {@inheritDoc}. This version returns the maximum over all the aggregated |
| * data. |
| * |
| * @see StatisticalSummary#getMax() |
| */ |
| public double getMax() { |
| synchronized (statistics) { |
| return statistics.getMax(); |
| } |
| } |
| |
| /** |
| * {@inheritDoc}. This version returns the mean of all the aggregated data. |
| * |
| * @see StatisticalSummary#getMean() |
| */ |
| public double getMean() { |
| synchronized (statistics) { |
| return statistics.getMean(); |
| } |
| } |
| |
| /** |
| * {@inheritDoc}. This version returns the minimum over all the aggregated |
| * data. |
| * |
| * @see StatisticalSummary#getMin() |
| */ |
| public double getMin() { |
| synchronized (statistics) { |
| return statistics.getMin(); |
| } |
| } |
| |
| /** |
| * {@inheritDoc}. This version returns a count of all the aggregated data. |
| * |
| * @see StatisticalSummary#getN() |
| */ |
| public long getN() { |
| synchronized (statistics) { |
| return statistics.getN(); |
| } |
| } |
| |
| /** |
| * {@inheritDoc}. This version returns the standard deviation of all the |
| * aggregated data. |
| * |
| * @see StatisticalSummary#getStandardDeviation() |
| */ |
| public double getStandardDeviation() { |
| synchronized (statistics) { |
| return statistics.getStandardDeviation(); |
| } |
| } |
| |
| /** |
| * {@inheritDoc}. This version returns a sum of all the aggregated data. |
| * |
| * @see StatisticalSummary#getSum() |
| */ |
| public double getSum() { |
| synchronized (statistics) { |
| return statistics.getSum(); |
| } |
| } |
| |
| /** |
| * {@inheritDoc}. This version returns the variance of all the aggregated |
| * data. |
| * |
| * @see StatisticalSummary#getVariance() |
| */ |
| public double getVariance() { |
| synchronized (statistics) { |
| return statistics.getVariance(); |
| } |
| } |
| |
| /** |
| * Returns the sum of the logs of all the aggregated data. |
| * |
| * @return the sum of logs |
| * @see SummaryStatistics#getSumOfLogs() |
| */ |
| public double getSumOfLogs() { |
| synchronized (statistics) { |
| return statistics.getSumOfLogs(); |
| } |
| } |
| |
| /** |
| * Returns the geometric mean of all the aggregated data. |
| * |
| * @return the geometric mean |
| * @see SummaryStatistics#getGeometricMean() |
| */ |
| public double getGeometricMean() { |
| synchronized (statistics) { |
| return statistics.getGeometricMean(); |
| } |
| } |
| |
| /** |
| * Returns the sum of the squares of all the aggregated data. |
| * |
| * @return The sum of squares |
| * @see SummaryStatistics#getSumsq() |
| */ |
| public double getSumsq() { |
| synchronized (statistics) { |
| return statistics.getSumsq(); |
| } |
| } |
| |
| /** |
| * Returns a statistic related to the Second Central Moment. Specifically, |
| * what is returned is the sum of squared deviations from the sample mean |
| * among the all of the aggregated data. |
| * |
| * @return second central moment statistic |
| * @see SummaryStatistics#getSecondMoment() |
| */ |
| public double getSecondMoment() { |
| synchronized (statistics) { |
| return statistics.getSecondMoment(); |
| } |
| } |
| |
| /** |
| * Return a {@link StatisticalSummaryValues} instance reporting current |
| * aggregate statistics. |
| * |
| * @return Current values of aggregate statistics |
| */ |
| public StatisticalSummary getSummary() { |
| synchronized (statistics) { |
| return new StatisticalSummaryValues(getMean(), getVariance(), getN(), |
| getMax(), getMin(), getSum()); |
| } |
| } |
| |
| /** |
| * Creates and returns a {@code SummaryStatistics} whose data will be |
| * aggregated with those of this {@code AggregateSummaryStatistics}. |
| * |
| * @return a {@code SummaryStatistics} whose data will be aggregated with |
| * those of this {@code AggregateSummaryStatistics}. The initial state |
| * is a copy of the configured prototype statistics. |
| */ |
| public SummaryStatistics createContributingStatistics() { |
| SummaryStatistics contributingStatistics |
| = new AggregatingSummaryStatistics(statistics); |
| |
| SummaryStatistics.copy(statisticsPrototype, contributingStatistics); |
| |
| return contributingStatistics; |
| } |
| |
| /** |
| * Computes aggregate summary statistics. This method can be used to combine statistics |
| * computed over partitions or subsamples - i.e., the StatisticalSummaryValues returned |
| * should contain the same values that would have been obtained by computing a single |
| * StatisticalSummary over the combined dataset. |
| * <p> |
| * Returns null if the collection is empty or null. |
| * </p> |
| * |
| * @param statistics collection of SummaryStatistics to aggregate |
| * @return summary statistics for the combined dataset |
| */ |
| public static StatisticalSummaryValues aggregate(Collection<SummaryStatistics> statistics) { |
| if (statistics == null) { |
| return null; |
| } |
| Iterator<SummaryStatistics> iterator = statistics.iterator(); |
| if (!iterator.hasNext()) { |
| return null; |
| } |
| SummaryStatistics current = iterator.next(); |
| long n = current.getN(); |
| double min = current.getMin(); |
| double sum = current.getSum(); |
| double max = current.getMax(); |
| double m2 = current.getSecondMoment(); |
| double mean = current.getMean(); |
| while (iterator.hasNext()) { |
| current = iterator.next(); |
| if (current.getMin() < min || Double.isNaN(min)) { |
| min = current.getMin(); |
| } |
| if (current.getMax() > max || Double.isNaN(max)) { |
| max = current.getMax(); |
| } |
| sum += current.getSum(); |
| final double oldN = n; |
| final double curN = current.getN(); |
| n += curN; |
| final double meanDiff = current.getMean() - mean; |
| mean = sum / n; |
| m2 = m2 + current.getSecondMoment() + meanDiff * meanDiff * oldN * curN / n; |
| } |
| final double variance; |
| if (n == 0) { |
| variance = Double.NaN; |
| } else if (n == 1) { |
| variance = 0d; |
| } else { |
| variance = m2 / (n - 1); |
| } |
| return new StatisticalSummaryValues(mean, variance, n, max, min, sum); |
| } |
| |
| /** |
| * A SummaryStatistics that also forwards all values added to it to a second |
| * {@code SummaryStatistics} for aggregation. |
| * |
| * @since 2.0 |
| */ |
| private static class AggregatingSummaryStatistics extends SummaryStatistics { |
| |
| /** |
| * The serialization version of this class |
| */ |
| private static final long serialVersionUID = 1L; |
| |
| /** |
| * An additional SummaryStatistics into which values added to these |
| * statistics (and possibly others) are aggregated |
| */ |
| private final SummaryStatistics aggregateStatistics; |
| |
| /** |
| * Initializes a new AggregatingSummaryStatistics with the specified |
| * aggregate statistics object |
| * |
| * @param aggregateStatistics a {@code SummaryStatistics} into which |
| * values added to this statistics object should be aggregated |
| */ |
| public AggregatingSummaryStatistics(SummaryStatistics aggregateStatistics) { |
| this.aggregateStatistics = aggregateStatistics; |
| } |
| |
| /** |
| * {@inheritDoc}. This version adds the provided value to the configured |
| * aggregate after adding it to these statistics. |
| * |
| * @see SummaryStatistics#addValue(double) |
| */ |
| @Override |
| public void addValue(double value) { |
| super.addValue(value); |
| synchronized (aggregateStatistics) { |
| aggregateStatistics.addValue(value); |
| } |
| } |
| |
| /** |
| * Returns true iff <code>object</code> is a |
| * <code>SummaryStatistics</code> instance and all statistics have the |
| * same values as this. |
| * @param object the object to test equality against. |
| * @return true if object equals this |
| */ |
| @Override |
| public boolean equals(Object object) { |
| if (object == this) { |
| return true; |
| } |
| if (object instanceof AggregatingSummaryStatistics == false) { |
| return false; |
| } |
| AggregatingSummaryStatistics stat = (AggregatingSummaryStatistics)object; |
| return super.equals(stat) && |
| aggregateStatistics.equals(stat.aggregateStatistics); |
| } |
| |
| /** |
| * Returns hash code based on values of statistics |
| * @return hash code |
| */ |
| @Override |
| public int hashCode() { |
| return 123 + super.hashCode() + aggregateStatistics.hashCode(); |
| } |
| } |
| } |