blob: a47d0cf1fc7486c3c99e8f0db0ecd3908e69ee43 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.math.stat.inference;
import java.util.Collection;
import org.apache.commons.math.MathException;
import org.apache.commons.math.MathRuntimeException;
import org.apache.commons.math.distribution.FDistribution;
import org.apache.commons.math.distribution.FDistributionImpl;
import org.apache.commons.math.exception.util.LocalizedFormats;
import org.apache.commons.math.stat.descriptive.summary.Sum;
import org.apache.commons.math.stat.descriptive.summary.SumOfSquares;
/**
* Implements one-way ANOVA statistics defined in the {@link OneWayAnovaImpl}
* interface.
*
* <p>Uses the
* {@link org.apache.commons.math.distribution.FDistribution
* commons-math F Distribution implementation} to estimate exact p-values.</p>
*
* <p>This implementation is based on a description at
* http://faculty.vassar.edu/lowry/ch13pt1.html</p>
* <pre>
* Abbreviations: bg = between groups,
* wg = within groups,
* ss = sum squared deviations
* </pre>
*
* @since 1.2
* @version $Revision: 983921 $ $Date: 2010-08-10 12:46:06 +0200 (mar. 10 août 2010) $
*/
public class OneWayAnovaImpl implements OneWayAnova {
/**
* Default constructor.
*/
public OneWayAnovaImpl() {
}
/**
* {@inheritDoc}<p>
* This implementation computes the F statistic using the definitional
* formula<pre>
* F = msbg/mswg</pre>
* where<pre>
* msbg = between group mean square
* mswg = within group mean square</pre>
* are as defined <a href="http://faculty.vassar.edu/lowry/ch13pt1.html">
* here</a></p>
*/
public double anovaFValue(Collection<double[]> categoryData)
throws IllegalArgumentException, MathException {
AnovaStats a = anovaStats(categoryData);
return a.F;
}
/**
* {@inheritDoc}<p>
* This implementation uses the
* {@link org.apache.commons.math.distribution.FDistribution
* commons-math F Distribution implementation} to estimate the exact
* p-value, using the formula<pre>
* p = 1 - cumulativeProbability(F)</pre>
* where <code>F</code> is the F value and <code>cumulativeProbability</code>
* is the commons-math implementation of the F distribution.</p>
*/
public double anovaPValue(Collection<double[]> categoryData)
throws IllegalArgumentException, MathException {
AnovaStats a = anovaStats(categoryData);
FDistribution fdist = new FDistributionImpl(a.dfbg, a.dfwg);
return 1.0 - fdist.cumulativeProbability(a.F);
}
/**
* {@inheritDoc}<p>
* This implementation uses the
* {@link org.apache.commons.math.distribution.FDistribution
* commons-math F Distribution implementation} to estimate the exact
* p-value, using the formula<pre>
* p = 1 - cumulativeProbability(F)</pre>
* where <code>F</code> is the F value and <code>cumulativeProbability</code>
* is the commons-math implementation of the F distribution.</p>
* <p>True is returned iff the estimated p-value is less than alpha.</p>
*/
public boolean anovaTest(Collection<double[]> categoryData, double alpha)
throws IllegalArgumentException, MathException {
if ((alpha <= 0) || (alpha > 0.5)) {
throw MathRuntimeException.createIllegalArgumentException(
LocalizedFormats.OUT_OF_BOUND_SIGNIFICANCE_LEVEL,
alpha, 0, 0.5);
}
return anovaPValue(categoryData) < alpha;
}
/**
* This method actually does the calculations (except P-value).
*
* @param categoryData <code>Collection</code> of <code>double[]</code>
* arrays each containing data for one category
* @return computed AnovaStats
* @throws IllegalArgumentException if categoryData does not meet
* preconditions specified in the interface definition
* @throws MathException if an error occurs computing the Anova stats
*/
private AnovaStats anovaStats(Collection<double[]> categoryData)
throws IllegalArgumentException, MathException {
// check if we have enough categories
if (categoryData.size() < 2) {
throw MathRuntimeException.createIllegalArgumentException(
LocalizedFormats.TWO_OR_MORE_CATEGORIES_REQUIRED,
categoryData.size());
}
// check if each category has enough data and all is double[]
for (double[] array : categoryData) {
if (array.length <= 1) {
throw MathRuntimeException.createIllegalArgumentException(
LocalizedFormats.TWO_OR_MORE_VALUES_IN_CATEGORY_REQUIRED,
array.length);
}
}
int dfwg = 0;
double sswg = 0;
Sum totsum = new Sum();
SumOfSquares totsumsq = new SumOfSquares();
int totnum = 0;
for (double[] data : categoryData) {
Sum sum = new Sum();
SumOfSquares sumsq = new SumOfSquares();
int num = 0;
for (int i = 0; i < data.length; i++) {
double val = data[i];
// within category
num++;
sum.increment(val);
sumsq.increment(val);
// for all categories
totnum++;
totsum.increment(val);
totsumsq.increment(val);
}
dfwg += num - 1;
double ss = sumsq.getResult() - sum.getResult() * sum.getResult() / num;
sswg += ss;
}
double sst = totsumsq.getResult() - totsum.getResult() *
totsum.getResult()/totnum;
double ssbg = sst - sswg;
int dfbg = categoryData.size() - 1;
double msbg = ssbg/dfbg;
double mswg = sswg/dfwg;
double F = msbg/mswg;
return new AnovaStats(dfbg, dfwg, F);
}
/**
Convenience class to pass dfbg,dfwg,F values around within AnovaImpl.
No get/set methods provided.
*/
private static class AnovaStats {
/** Degrees of freedom in numerator (between groups). */
private int dfbg;
/** Degrees of freedom in denominator (within groups). */
private int dfwg;
/** Statistic. */
private double F;
/**
* Constructor
* @param dfbg degrees of freedom in numerator (between groups)
* @param dfwg degrees of freedom in denominator (within groups)
* @param F statistic
*/
private AnovaStats(int dfbg, int dfwg, double F) {
this.dfbg = dfbg;
this.dfwg = dfwg;
this.F = F;
}
}
}