src/main/java/org/apache/commons/math/stat/inference/TTestImpl.java - platform/external/apache-commons-math - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.commons.math.stat.inference;

 import org.apache.commons.math.MathException;
 import org.apache.commons.math.MathRuntimeException;
 import org.apache.commons.math.distribution.TDistribution;
 import org.apache.commons.math.distribution.TDistributionImpl;
 import org.apache.commons.math.exception.util.LocalizedFormats;
 import org.apache.commons.math.stat.StatUtils;
 import org.apache.commons.math.stat.descriptive.StatisticalSummary;
 import org.apache.commons.math.util.FastMath;

 /**
  * Implements t-test statistics defined in the {@link TTest} interface.
  * <p>
  * Uses commons-math {@link org.apache.commons.math.distribution.TDistributionImpl}
  * implementation to estimate exact p-values.</p>
  *
  * @version $Revision: 1042336 $ $Date: 2010-12-05 13:40:48 +0100 (dim. 05 déc. 2010) $
  */
 public class TTestImpl implements TTest  {

     /** Distribution used to compute inference statistics.
      * @deprecated in 2.2 (to be removed in 3.0).
      */
     @Deprecated
     private TDistribution distribution;

     /**
      * Default constructor.
      */
     public TTestImpl() {
         this(new TDistributionImpl(1.0));
     }

     /**
      * Create a test instance using the given distribution for computing
      * inference statistics.
      * @param t distribution used to compute inference statistics.
      * @since 1.2
      * @deprecated in 2.2 (to be removed in 3.0).
      */
     @Deprecated
     public TTestImpl(TDistribution t) {
         super();
         setDistribution(t);
     }

     /**
      * Computes a paired, 2-sample t-statistic based on the data in the input
      * arrays.  The t-statistic returned is equivalent to what would be returned by
      * computing the one-sample t-statistic {@link #t(double, double[])}, with
      * <code>mu = 0</code> and the sample array consisting of the (signed)
      * differences between corresponding entries in <code>sample1</code> and
      * <code>sample2.</code>
      * <p>
      * <strong>Preconditions</strong>: <ul>
      * <li>The input arrays must have the same length and their common length
      * must be at least 2.
      * </li></ul></p>
      *
      * @param sample1 array of sample data values
      * @param sample2 array of sample data values
      * @return t statistic
      * @throws IllegalArgumentException if the precondition is not met
      * @throws MathException if the statistic can not be computed do to a
      *         convergence or other numerical error.
      */
     public double pairedT(double[] sample1, double[] sample2)
         throws IllegalArgumentException, MathException {
         checkSampleData(sample1);
         checkSampleData(sample2);
         double meanDifference = StatUtils.meanDifference(sample1, sample2);
         return t(meanDifference, 0,
                 StatUtils.varianceDifference(sample1, sample2, meanDifference),
                 sample1.length);
     }

      /**
      * Returns the <i>observed significance level</i>, or
      * <i> p-value</i>, associated with a paired, two-sample, two-tailed t-test
      * based on the data in the input arrays.
      * <p>
      * The number returned is the smallest significance level
      * at which one can reject the null hypothesis that the mean of the paired
      * differences is 0 in favor of the two-sided alternative that the mean paired
      * difference is not equal to 0. For a one-sided test, divide the returned
      * value by 2.</p>
      * <p>
      * This test is equivalent to a one-sample t-test computed using
      * {@link #tTest(double, double[])} with <code>mu = 0</code> and the sample
      * array consisting of the signed differences between corresponding elements of
      * <code>sample1</code> and <code>sample2.</code></p>
      * <p>
      * <strong>Usage Note:</strong><br>
      * The validity of the p-value depends on the assumptions of the parametric
      * t-test procedure, as discussed
      * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
      * here</a></p>
      * <p>
      * <strong>Preconditions</strong>: <ul>
      * <li>The input array lengths must be the same and their common length must
      * be at least 2.
      * </li></ul></p>
      *
      * @param sample1 array of sample data values
      * @param sample2 array of sample data values
      * @return p-value for t-test
      * @throws IllegalArgumentException if the precondition is not met
      * @throws MathException if an error occurs computing the p-value
      */
     public double pairedTTest(double[] sample1, double[] sample2)
         throws IllegalArgumentException, MathException {
         double meanDifference = StatUtils.meanDifference(sample1, sample2);
         return tTest(meanDifference, 0,
                 StatUtils.varianceDifference(sample1, sample2, meanDifference),
                 sample1.length);
     }

      /**
      * Performs a paired t-test evaluating the null hypothesis that the
      * mean of the paired differences between <code>sample1</code> and
      * <code>sample2</code> is 0 in favor of the two-sided alternative that the
      * mean paired difference is not equal to 0, with significance level
      * <code>alpha</code>.
      * <p>
      * Returns <code>true</code> iff the null hypothesis can be rejected with
      * confidence <code>1 - alpha</code>.  To perform a 1-sided test, use
      * <code>alpha * 2</code></p>
      * <p>
      * <strong>Usage Note:</strong><br>
      * The validity of the test depends on the assumptions of the parametric
      * t-test procedure, as discussed
      * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
      * here</a></p>
      * <p>
      * <strong>Preconditions</strong>: <ul>
      * <li>The input array lengths must be the same and their common length
      * must be at least 2.
      * </li>
      * <li> <code> 0 < alpha < 0.5 </code>
      * </li></ul></p>
      *
      * @param sample1 array of sample data values
      * @param sample2 array of sample data values
      * @param alpha significance level of the test
      * @return true if the null hypothesis can be rejected with
      * confidence 1 - alpha
      * @throws IllegalArgumentException if the preconditions are not met
      * @throws MathException if an error occurs performing the test
      */
     public boolean pairedTTest(double[] sample1, double[] sample2, double alpha)
         throws IllegalArgumentException, MathException {
         checkSignificanceLevel(alpha);
         return pairedTTest(sample1, sample2) < alpha;
     }

     /**
      * Computes a <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc22.htm#formula">
      * t statistic </a> given observed values and a comparison constant.
      * <p>
      * This statistic can be used to perform a one sample t-test for the mean.
      * </p><p>
      * <strong>Preconditions</strong>: <ul>
      * <li>The observed array length must be at least 2.
      * </li></ul></p>
      *
      * @param mu comparison constant
      * @param observed array of values
      * @return t statistic
      * @throws IllegalArgumentException if input array length is less than 2
      */
     public double t(double mu, double[] observed)
     throws IllegalArgumentException {
         checkSampleData(observed);
         return t(StatUtils.mean(observed), mu, StatUtils.variance(observed),
                 observed.length);
     }

     /**
      * Computes a <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc22.htm#formula">
      * t statistic </a> to use in comparing the mean of the dataset described by
      * <code>sampleStats</code> to <code>mu</code>.
      * <p>
      * This statistic can be used to perform a one sample t-test for the mean.
      * </p><p>
      * <strong>Preconditions</strong>: <ul>
      * <li><code>observed.getN() > = 2</code>.
      * </li></ul></p>
      *
      * @param mu comparison constant
      * @param sampleStats DescriptiveStatistics holding sample summary statitstics
      * @return t statistic
      * @throws IllegalArgumentException if the precondition is not met
      */
     public double t(double mu, StatisticalSummary sampleStats)
     throws IllegalArgumentException {
         checkSampleData(sampleStats);
         return t(sampleStats.getMean(), mu, sampleStats.getVariance(),
                 sampleStats.getN());
     }

     /**
      * Computes a 2-sample t statistic,  under the hypothesis of equal
      * subpopulation variances.  To compute a t-statistic without the
      * equal variances hypothesis, use {@link #t(double[], double[])}.
      * <p>
      * This statistic can be used to perform a (homoscedastic) two-sample
      * t-test to compare sample means.</p>
      * <p>
      * The t-statisitc is</p>
      * <p>
      * &nbsp;&nbsp;<code>  t = (m1 - m2) / (sqrt(1/n1 +1/n2) sqrt(var))</code>
      * </p><p>
      * where <strong><code>n1</code></strong> is the size of first sample;
      * <strong><code> n2</code></strong> is the size of second sample;
      * <strong><code> m1</code></strong> is the mean of first sample;
      * <strong><code> m2</code></strong> is the mean of second sample</li>
      * </ul>
      * and <strong><code>var</code></strong> is the pooled variance estimate:
      * </p><p>
      * <code>var = sqrt(((n1 - 1)var1 + (n2 - 1)var2) / ((n1-1) + (n2-1)))</code>
      * </p><p>
      * with <strong><code>var1<code></strong> the variance of the first sample and
      * <strong><code>var2</code></strong> the variance of the second sample.
      * </p><p>
      * <strong>Preconditions</strong>: <ul>
      * <li>The observed array lengths must both be at least 2.
      * </li></ul></p>
      *
      * @param sample1 array of sample data values
      * @param sample2 array of sample data values
      * @return t statistic
      * @throws IllegalArgumentException if the precondition is not met
      */
     public double homoscedasticT(double[] sample1, double[] sample2)
     throws IllegalArgumentException {
         checkSampleData(sample1);
         checkSampleData(sample2);
         return homoscedasticT(StatUtils.mean(sample1), StatUtils.mean(sample2),
                 StatUtils.variance(sample1), StatUtils.variance(sample2),
                 sample1.length, sample2.length);
     }

     /**
      * Computes a 2-sample t statistic, without the hypothesis of equal
      * subpopulation variances.  To compute a t-statistic assuming equal
      * variances, use {@link #homoscedasticT(double[], double[])}.
      * <p>
      * This statistic can be used to perform a two-sample t-test to compare
      * sample means.</p>
      * <p>
      * The t-statisitc is</p>
      * <p>
      * &nbsp;&nbsp; <code>  t = (m1 - m2) / sqrt(var1/n1 + var2/n2)</code>
      * </p><p>
      *  where <strong><code>n1</code></strong> is the size of the first sample
      * <strong><code> n2</code></strong> is the size of the second sample;
      * <strong><code> m1</code></strong> is the mean of the first sample;
      * <strong><code> m2</code></strong> is the mean of the second sample;
      * <strong><code> var1</code></strong> is the variance of the first sample;
      * <strong><code> var2</code></strong> is the variance of the second sample;
      * </p><p>
      * <strong>Preconditions</strong>: <ul>
      * <li>The observed array lengths must both be at least 2.
      * </li></ul></p>
      *
      * @param sample1 array of sample data values
      * @param sample2 array of sample data values
      * @return t statistic
      * @throws IllegalArgumentException if the precondition is not met
      */
     public double t(double[] sample1, double[] sample2)
     throws IllegalArgumentException {
         checkSampleData(sample1);
         checkSampleData(sample2);
         return t(StatUtils.mean(sample1), StatUtils.mean(sample2),
                 StatUtils.variance(sample1), StatUtils.variance(sample2),
                 sample1.length, sample2.length);
     }

     /**
      * Computes a 2-sample t statistic </a>, comparing the means of the datasets
      * described by two {@link StatisticalSummary} instances, without the
      * assumption of equal subpopulation variances.  Use
      * {@link #homoscedasticT(StatisticalSummary, StatisticalSummary)} to
      * compute a t-statistic under the equal variances assumption.
      * <p>
      * This statistic can be used to perform a two-sample t-test to compare
      * sample means.</p>
      * <p>
       * The returned  t-statisitc is</p>
      * <p>
      * &nbsp;&nbsp; <code>  t = (m1 - m2) / sqrt(var1/n1 + var2/n2)</code>
      * </p><p>
      * where <strong><code>n1</code></strong> is the size of the first sample;
      * <strong><code> n2</code></strong> is the size of the second sample;
      * <strong><code> m1</code></strong> is the mean of the first sample;
      * <strong><code> m2</code></strong> is the mean of the second sample
      * <strong><code> var1</code></strong> is the variance of the first sample;
      * <strong><code> var2</code></strong> is the variance of the second sample
      * </p><p>
      * <strong>Preconditions</strong>: <ul>
      * <li>The datasets described by the two Univariates must each contain
      * at least 2 observations.
      * </li></ul></p>
      *
      * @param sampleStats1 StatisticalSummary describing data from the first sample
      * @param sampleStats2 StatisticalSummary describing data from the second sample
      * @return t statistic
      * @throws IllegalArgumentException if the precondition is not met
      */
     public double t(StatisticalSummary sampleStats1,
                     StatisticalSummary sampleStats2)
     throws IllegalArgumentException {
         checkSampleData(sampleStats1);
         checkSampleData(sampleStats2);
         return t(sampleStats1.getMean(), sampleStats2.getMean(),
                 sampleStats1.getVariance(), sampleStats2.getVariance(),
                 sampleStats1.getN(), sampleStats2.getN());
     }

     /**
      * Computes a 2-sample t statistic, comparing the means of the datasets
      * described by two {@link StatisticalSummary} instances, under the
      * assumption of equal subpopulation variances.  To compute a t-statistic
      * without the equal variances assumption, use
      * {@link #t(StatisticalSummary, StatisticalSummary)}.
      * <p>
      * This statistic can be used to perform a (homoscedastic) two-sample
      * t-test to compare sample means.</p>
      * <p>
      * The t-statisitc returned is</p>
      * <p>
      * &nbsp;&nbsp;<code>  t = (m1 - m2) / (sqrt(1/n1 +1/n2) sqrt(var))</code>
      * </p><p>
      * where <strong><code>n1</code></strong> is the size of first sample;
      * <strong><code> n2</code></strong> is the size of second sample;
      * <strong><code> m1</code></strong> is the mean of first sample;
      * <strong><code> m2</code></strong> is the mean of second sample
      * and <strong><code>var</code></strong> is the pooled variance estimate:
      * </p><p>
      * <code>var = sqrt(((n1 - 1)var1 + (n2 - 1)var2) / ((n1-1) + (n2-1)))</code>
      * <p>
      * with <strong><code>var1<code></strong> the variance of the first sample and
      * <strong><code>var2</code></strong> the variance of the second sample.
      * </p><p>
      * <strong>Preconditions</strong>: <ul>
      * <li>The datasets described by the two Univariates must each contain
      * at least 2 observations.
      * </li></ul></p>
      *
      * @param sampleStats1 StatisticalSummary describing data from the first sample
      * @param sampleStats2 StatisticalSummary describing data from the second sample
      * @return t statistic
      * @throws IllegalArgumentException if the precondition is not met
      */
     public double homoscedasticT(StatisticalSummary sampleStats1,
             StatisticalSummary sampleStats2)
     throws IllegalArgumentException {
         checkSampleData(sampleStats1);
         checkSampleData(sampleStats2);
         return homoscedasticT(sampleStats1.getMean(), sampleStats2.getMean(),
                 sampleStats1.getVariance(), sampleStats2.getVariance(),
                 sampleStats1.getN(), sampleStats2.getN());
     }

      /**
      * Returns the <i>observed significance level</i>, or
      * <i>p-value</i>, associated with a one-sample, two-tailed t-test
      * comparing the mean of the input array with the constant <code>mu</code>.
      * <p>
      * The number returned is the smallest significance level
      * at which one can reject the null hypothesis that the mean equals
      * <code>mu</code> in favor of the two-sided alternative that the mean
      * is different from <code>mu</code>. For a one-sided test, divide the
      * returned value by 2.</p>
      * <p>
      * <strong>Usage Note:</strong><br>
      * The validity of the test depends on the assumptions of the parametric
      * t-test procedure, as discussed
      * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a>
      * </p><p>
      * <strong>Preconditions</strong>: <ul>
      * <li>The observed array length must be at least 2.
      * </li></ul></p>
      *
      * @param mu constant value to compare sample mean against
      * @param sample array of sample data values
      * @return p-value
      * @throws IllegalArgumentException if the precondition is not met
      * @throws MathException if an error occurs computing the p-value
      */
     public double tTest(double mu, double[] sample)
     throws IllegalArgumentException, MathException {
         checkSampleData(sample);
         return tTest( StatUtils.mean(sample), mu, StatUtils.variance(sample),
                 sample.length);
     }

     /**
      * Performs a <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
      * two-sided t-test</a> evaluating the null hypothesis that the mean of the population from
      * which <code>sample</code> is drawn equals <code>mu</code>.
      * <p>
      * Returns <code>true</code> iff the null hypothesis can be
      * rejected with confidence <code>1 - alpha</code>.  To
      * perform a 1-sided test, use <code>alpha * 2</code>
      * </p><p>
      * <strong>Examples:</strong><br><ol>
      * <li>To test the (2-sided) hypothesis <code>sample mean = mu </code> at
      * the 95% level, use <br><code>tTest(mu, sample, 0.05) </code>
      * </li>
      * <li>To test the (one-sided) hypothesis <code> sample mean < mu </code>
      * at the 99% level, first verify that the measured sample mean is less
      * than <code>mu</code> and then use
      * <br><code>tTest(mu, sample, 0.02) </code>
      * </li></ol></p>
      * <p>
      * <strong>Usage Note:</strong><br>
      * The validity of the test depends on the assumptions of the one-sample
      * parametric t-test procedure, as discussed
      * <a href="http://www.basic.nwu.edu/statguidefiles/sg_glos.html#one-sample">here</a>
      * </p><p>
      * <strong>Preconditions</strong>: <ul>
      * <li>The observed array length must be at least 2.
      * </li></ul></p>
      *
      * @param mu constant value to compare sample mean against
      * @param sample array of sample data values
      * @param alpha significance level of the test
      * @return p-value
      * @throws IllegalArgumentException if the precondition is not met
      * @throws MathException if an error computing the p-value
      */
     public boolean tTest(double mu, double[] sample, double alpha)
     throws IllegalArgumentException, MathException {
         checkSignificanceLevel(alpha);
         return tTest(mu, sample) < alpha;
     }

     /**
      * Returns the <i>observed significance level</i>, or
      * <i>p-value</i>, associated with a one-sample, two-tailed t-test
      * comparing the mean of the dataset described by <code>sampleStats</code>
      * with the constant <code>mu</code>.
      * <p>
      * The number returned is the smallest significance level
      * at which one can reject the null hypothesis that the mean equals
      * <code>mu</code> in favor of the two-sided alternative that the mean
      * is different from <code>mu</code>. For a one-sided test, divide the
      * returned value by 2.</p>
      * <p>
      * <strong>Usage Note:</strong><br>
      * The validity of the test depends on the assumptions of the parametric
      * t-test procedure, as discussed
      * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
      * here</a></p>
      * <p>
      * <strong>Preconditions</strong>: <ul>
      * <li>The sample must contain at least 2 observations.
      * </li></ul></p>
      *
      * @param mu constant value to compare sample mean against
      * @param sampleStats StatisticalSummary describing sample data
      * @return p-value
      * @throws IllegalArgumentException if the precondition is not met
      * @throws MathException if an error occurs computing the p-value
      */
     public double tTest(double mu, StatisticalSummary sampleStats)
     throws IllegalArgumentException, MathException {
         checkSampleData(sampleStats);
         return tTest(sampleStats.getMean(), mu, sampleStats.getVariance(),
                 sampleStats.getN());
     }

      /**
      * Performs a <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
      * two-sided t-test</a> evaluating the null hypothesis that the mean of the
      * population from which the dataset described by <code>stats</code> is
      * drawn equals <code>mu</code>.
      * <p>
      * Returns <code>true</code> iff the null hypothesis can be rejected with
      * confidence <code>1 - alpha</code>.  To  perform a 1-sided test, use
      * <code>alpha * 2.</code></p>
      * <p>
      * <strong>Examples:</strong><br><ol>
      * <li>To test the (2-sided) hypothesis <code>sample mean = mu </code> at
      * the 95% level, use <br><code>tTest(mu, sampleStats, 0.05) </code>
      * </li>
      * <li>To test the (one-sided) hypothesis <code> sample mean < mu </code>
      * at the 99% level, first verify that the measured sample mean is less
      * than <code>mu</code> and then use
      * <br><code>tTest(mu, sampleStats, 0.02) </code>
      * </li></ol></p>
      * <p>
      * <strong>Usage Note:</strong><br>
      * The validity of the test depends on the assumptions of the one-sample
      * parametric t-test procedure, as discussed
      * <a href="http://www.basic.nwu.edu/statguidefiles/sg_glos.html#one-sample">here</a>
      * </p><p>
      * <strong>Preconditions</strong>: <ul>
      * <li>The sample must include at least 2 observations.
      * </li></ul></p>
      *
      * @param mu constant value to compare sample mean against
      * @param sampleStats StatisticalSummary describing sample data values
      * @param alpha significance level of the test
      * @return p-value
      * @throws IllegalArgumentException if the precondition is not met
      * @throws MathException if an error occurs computing the p-value
      */
     public boolean tTest( double mu, StatisticalSummary sampleStats,
             double alpha)
     throws IllegalArgumentException, MathException {
         checkSignificanceLevel(alpha);
         return tTest(mu, sampleStats) < alpha;
     }

     /**
      * Returns the <i>observed significance level</i>, or
      * <i>p-value</i>, associated with a two-sample, two-tailed t-test
      * comparing the means of the input arrays.
      * <p>
      * The number returned is the smallest significance level
      * at which one can reject the null hypothesis that the two means are
      * equal in favor of the two-sided alternative that they are different.
      * For a one-sided test, divide the returned value by 2.</p>
      * <p>
      * The test does not assume that the underlying popuation variances are
      * equal  and it uses approximated degrees of freedom computed from the
      * sample data to compute the p-value.  The t-statistic used is as defined in
      * {@link #t(double[], double[])} and the Welch-Satterthwaite approximation
      * to the degrees of freedom is used,
      * as described
      * <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">
      * here.</a>  To perform the test under the assumption of equal subpopulation
      * variances, use {@link #homoscedasticTTest(double[], double[])}.</p>
      * <p>
      * <strong>Usage Note:</strong><br>
      * The validity of the p-value depends on the assumptions of the parametric
      * t-test procedure, as discussed
      * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
      * here</a></p>
      * <p>
      * <strong>Preconditions</strong>: <ul>
      * <li>The observed array lengths must both be at least 2.
      * </li></ul></p>
      *
      * @param sample1 array of sample data values
      * @param sample2 array of sample data values
      * @return p-value for t-test
      * @throws IllegalArgumentException if the precondition is not met
      * @throws MathException if an error occurs computing the p-value
      */
     public double tTest(double[] sample1, double[] sample2)
     throws IllegalArgumentException, MathException {
         checkSampleData(sample1);
         checkSampleData(sample2);
         return tTest(StatUtils.mean(sample1), StatUtils.mean(sample2),
                 StatUtils.variance(sample1), StatUtils.variance(sample2),
                 sample1.length, sample2.length);
     }

     /**
      * Returns the <i>observed significance level</i>, or
      * <i>p-value</i>, associated with a two-sample, two-tailed t-test
      * comparing the means of the input arrays, under the assumption that
      * the two samples are drawn from subpopulations with equal variances.
      * To perform the test without the equal variances assumption, use
      * {@link #tTest(double[], double[])}.
      * <p>
      * The number returned is the smallest significance level
      * at which one can reject the null hypothesis that the two means are
      * equal in favor of the two-sided alternative that they are different.
      * For a one-sided test, divide the returned value by 2.</p>
      * <p>
      * A pooled variance estimate is used to compute the t-statistic.  See
      * {@link #homoscedasticT(double[], double[])}. The sum of the sample sizes
      * minus 2 is used as the degrees of freedom.</p>
      * <p>
      * <strong>Usage Note:</strong><br>
      * The validity of the p-value depends on the assumptions of the parametric
      * t-test procedure, as discussed
      * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
      * here</a></p>
      * <p>
      * <strong>Preconditions</strong>: <ul>
      * <li>The observed array lengths must both be at least 2.
      * </li></ul></p>
      *
      * @param sample1 array of sample data values
      * @param sample2 array of sample data values
      * @return p-value for t-test
      * @throws IllegalArgumentException if the precondition is not met
      * @throws MathException if an error occurs computing the p-value
      */
     public double homoscedasticTTest(double[] sample1, double[] sample2)
     throws IllegalArgumentException, MathException {
         checkSampleData(sample1);
         checkSampleData(sample2);
         return homoscedasticTTest(StatUtils.mean(sample1),
                 StatUtils.mean(sample2), StatUtils.variance(sample1),
                 StatUtils.variance(sample2), sample1.length,
                 sample2.length);
     }


      /**
      * Performs a
      * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
      * two-sided t-test</a> evaluating the null hypothesis that <code>sample1</code>
      * and <code>sample2</code> are drawn from populations with the same mean,
      * with significance level <code>alpha</code>.  This test does not assume
      * that the subpopulation variances are equal.  To perform the test assuming
      * equal variances, use
      * {@link #homoscedasticTTest(double[], double[], double)}.
      * <p>
      * Returns <code>true</code> iff the null hypothesis that the means are
      * equal can be rejected with confidence <code>1 - alpha</code>.  To
      * perform a 1-sided test, use <code>alpha / 2</code></p>
      * <p>
      * See {@link #t(double[], double[])} for the formula used to compute the
      * t-statistic.  Degrees of freedom are approximated using the
      * <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">
      * Welch-Satterthwaite approximation.</a></p>

      * <p>
      * <strong>Examples:</strong><br><ol>
      * <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at
      * the 95% level,  use
      * <br><code>tTest(sample1, sample2, 0.05). </code>
      * </li>
      * <li>To test the (one-sided) hypothesis <code> mean 1 < mean 2 </code> at
      * the 99% level, first verify that the measured  mean of <code>sample 1</code>
      * is less than the mean of <code>sample 2</code> and then use
      * <br><code>tTest(sample1, sample2, 0.02) </code>
      * </li></ol></p>
      * <p>
      * <strong>Usage Note:</strong><br>
      * The validity of the test depends on the assumptions of the parametric
      * t-test procedure, as discussed
      * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
      * here</a></p>
      * <p>
      * <strong>Preconditions</strong>: <ul>
      * <li>The observed array lengths must both be at least 2.
      * </li>
      * <li> <code> 0 < alpha < 0.5 </code>
      * </li></ul></p>
      *
      * @param sample1 array of sample data values
      * @param sample2 array of sample data values
      * @param alpha significance level of the test
      * @return true if the null hypothesis can be rejected with
      * confidence 1 - alpha
      * @throws IllegalArgumentException if the preconditions are not met
      * @throws MathException if an error occurs performing the test
      */
     public boolean tTest(double[] sample1, double[] sample2,
             double alpha)
     throws IllegalArgumentException, MathException {
         checkSignificanceLevel(alpha);
         return tTest(sample1, sample2) < alpha;
     }

     /**
      * Performs a
      * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
      * two-sided t-test</a> evaluating the null hypothesis that <code>sample1</code>
      * and <code>sample2</code> are drawn from populations with the same mean,
      * with significance level <code>alpha</code>,  assuming that the
      * subpopulation variances are equal.  Use
      * {@link #tTest(double[], double[], double)} to perform the test without
      * the assumption of equal variances.
      * <p>
      * Returns <code>true</code> iff the null hypothesis that the means are
      * equal can be rejected with confidence <code>1 - alpha</code>.  To
      * perform a 1-sided test, use <code>alpha * 2.</code>  To perform the test
      * without the assumption of equal subpopulation variances, use
      * {@link #tTest(double[], double[], double)}.</p>
      * <p>
      * A pooled variance estimate is used to compute the t-statistic. See
      * {@link #t(double[], double[])} for the formula. The sum of the sample
      * sizes minus 2 is used as the degrees of freedom.</p>
      * <p>
      * <strong>Examples:</strong><br><ol>
      * <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at
      * the 95% level, use <br><code>tTest(sample1, sample2, 0.05). </code>
      * </li>
      * <li>To test the (one-sided) hypothesis <code> mean 1 < mean 2, </code>
      * at the 99% level, first verify that the measured mean of
      * <code>sample 1</code> is less than the mean of <code>sample 2</code>
      * and then use
      * <br><code>tTest(sample1, sample2, 0.02) </code>
      * </li></ol></p>
      * <p>
      * <strong>Usage Note:</strong><br>
      * The validity of the test depends on the assumptions of the parametric
      * t-test procedure, as discussed
      * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
      * here</a></p>
      * <p>
      * <strong>Preconditions</strong>: <ul>
      * <li>The observed array lengths must both be at least 2.
      * </li>
      * <li> <code> 0 < alpha < 0.5 </code>
      * </li></ul></p>
      *
      * @param sample1 array of sample data values
      * @param sample2 array of sample data values
      * @param alpha significance level of the test
      * @return true if the null hypothesis can be rejected with
      * confidence 1 - alpha
      * @throws IllegalArgumentException if the preconditions are not met
      * @throws MathException if an error occurs performing the test
      */
     public boolean homoscedasticTTest(double[] sample1, double[] sample2,
             double alpha)
     throws IllegalArgumentException, MathException {
         checkSignificanceLevel(alpha);
         return homoscedasticTTest(sample1, sample2) < alpha;
     }

      /**
      * Returns the <i>observed significance level</i>, or
      * <i>p-value</i>, associated with a two-sample, two-tailed t-test
      * comparing the means of the datasets described by two StatisticalSummary
      * instances.
      * <p>
      * The number returned is the smallest significance level
      * at which one can reject the null hypothesis that the two means are
      * equal in favor of the two-sided alternative that they are different.
      * For a one-sided test, divide the returned value by 2.</p>
      * <p>
      * The test does not assume that the underlying popuation variances are
      * equal  and it uses approximated degrees of freedom computed from the
      * sample data to compute the p-value.   To perform the test assuming
      * equal variances, use
      * {@link #homoscedasticTTest(StatisticalSummary, StatisticalSummary)}.</p>
      * <p>
      * <strong>Usage Note:</strong><br>
      * The validity of the p-value depends on the assumptions of the parametric
      * t-test procedure, as discussed
      * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
      * here</a></p>
      * <p>
      * <strong>Preconditions</strong>: <ul>
      * <li>The datasets described by the two Univariates must each contain
      * at least 2 observations.
      * </li></ul></p>
      *
      * @param sampleStats1  StatisticalSummary describing data from the first sample
      * @param sampleStats2  StatisticalSummary describing data from the second sample
      * @return p-value for t-test
      * @throws IllegalArgumentException if the precondition is not met
      * @throws MathException if an error occurs computing the p-value
      */
     public double tTest(StatisticalSummary sampleStats1, StatisticalSummary sampleStats2)
     throws IllegalArgumentException, MathException {
         checkSampleData(sampleStats1);
         checkSampleData(sampleStats2);
         return tTest(sampleStats1.getMean(), sampleStats2.getMean(), sampleStats1.getVariance(),
                 sampleStats2.getVariance(), sampleStats1.getN(),
                 sampleStats2.getN());
     }

     /**
      * Returns the <i>observed significance level</i>, or
      * <i>p-value</i>, associated with a two-sample, two-tailed t-test
      * comparing the means of the datasets described by two StatisticalSummary
      * instances, under the hypothesis of equal subpopulation variances. To
      * perform a test without the equal variances assumption, use
      * {@link #tTest(StatisticalSummary, StatisticalSummary)}.
      * <p>
      * The number returned is the smallest significance level
      * at which one can reject the null hypothesis that the two means are
      * equal in favor of the two-sided alternative that they are different.
      * For a one-sided test, divide the returned value by 2.</p>
      * <p>
      * See {@link #homoscedasticT(double[], double[])} for the formula used to
      * compute the t-statistic. The sum of the  sample sizes minus 2 is used as
      * the degrees of freedom.</p>
      * <p>
      * <strong>Usage Note:</strong><br>
      * The validity of the p-value depends on the assumptions of the parametric
      * t-test procedure, as discussed
      * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">here</a>
      * </p><p>
      * <strong>Preconditions</strong>: <ul>
      * <li>The datasets described by the two Univariates must each contain
      * at least 2 observations.
      * </li></ul></p>
      *
      * @param sampleStats1  StatisticalSummary describing data from the first sample
      * @param sampleStats2  StatisticalSummary describing data from the second sample
      * @return p-value for t-test
      * @throws IllegalArgumentException if the precondition is not met
      * @throws MathException if an error occurs computing the p-value
      */
     public double homoscedasticTTest(StatisticalSummary sampleStats1,
                                      StatisticalSummary sampleStats2)
     throws IllegalArgumentException, MathException {
         checkSampleData(sampleStats1);
         checkSampleData(sampleStats2);
         return homoscedasticTTest(sampleStats1.getMean(),
                 sampleStats2.getMean(), sampleStats1.getVariance(),
                 sampleStats2.getVariance(), sampleStats1.getN(),
                 sampleStats2.getN());
     }

     /**
      * Performs a
      * <a href="http://www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm">
      * two-sided t-test</a> evaluating the null hypothesis that
      * <code>sampleStats1</code> and <code>sampleStats2</code> describe
      * datasets drawn from populations with the same mean, with significance
      * level <code>alpha</code>.   This test does not assume that the
      * subpopulation variances are equal.  To perform the test under the equal
      * variances assumption, use
      * {@link #homoscedasticTTest(StatisticalSummary, StatisticalSummary)}.
      * <p>
      * Returns <code>true</code> iff the null hypothesis that the means are
      * equal can be rejected with confidence <code>1 - alpha</code>.  To
      * perform a 1-sided test, use <code>alpha * 2</code></p>
      * <p>
      * See {@link #t(double[], double[])} for the formula used to compute the
      * t-statistic.  Degrees of freedom are approximated using the
      * <a href="http://www.itl.nist.gov/div898/handbook/prc/section3/prc31.htm">
      * Welch-Satterthwaite approximation.</a></p>
      * <p>
      * <strong>Examples:</strong><br><ol>
      * <li>To test the (2-sided) hypothesis <code>mean 1 = mean 2 </code> at
      * the 95%, use
      * <br><code>tTest(sampleStats1, sampleStats2, 0.05) </code>
      * </li>
      * <li>To test the (one-sided) hypothesis <code> mean 1 < mean 2 </code>
      * at the 99% level,  first verify that the measured mean of
      * <code>sample 1</code> is less than  the mean of <code>sample 2</code>
      * and then use
      * <br><code>tTest(sampleStats1, sampleStats2, 0.02) </code>
      * </li></ol></p>
      * <p>
      * <strong>Usage Note:</strong><br>
      * The validity of the test depends on the assumptions of the parametric
      * t-test procedure, as discussed
      * <a href="http://www.basic.nwu.edu/statguidefiles/ttest_unpaired_ass_viol.html">
      * here</a></p>
      * <p>
      * <strong>Preconditions</strong>: <ul>
      * <li>The datasets described by the two Univariates must each contain
      * at least 2 observations.
      * </li>
      * <li> <code> 0 < alpha < 0.5 </code>
      * </li></ul></p>
      *
      * @param sampleStats1 StatisticalSummary describing sample data values
      * @param sampleStats2 StatisticalSummary describing sample data values
      * @param alpha significance level of the test
      * @return true if the null hypothesis can be rejected with
      * confidence 1 - alpha
      * @throws IllegalArgumentException if the preconditions are not met
      * @throws MathException if an error occurs performing the test
      */
     public boolean tTest(StatisticalSummary sampleStats1,
             StatisticalSummary sampleStats2, double alpha)
     throws IllegalArgumentException, MathException {
         checkSignificanceLevel(alpha);
         return tTest(sampleStats1, sampleStats2) < alpha;
     }

     //----------------------------------------------- Protected methods

     /**
      * Computes approximate degrees of freedom for 2-sample t-test.
      *
      * @param v1 first sample variance
      * @param v2 second sample variance
      * @param n1 first sample n
      * @param n2 second sample n
      * @return approximate degrees of freedom
      */
     protected double df(double v1, double v2, double n1, double n2) {
         return (((v1 / n1) + (v2 / n2)) * ((v1 / n1) + (v2 / n2))) /
         ((v1 * v1) / (n1 * n1 * (n1 - 1d)) + (v2 * v2) /
                 (n2 * n2 * (n2 - 1d)));
     }

     /**
      * Computes t test statistic for 1-sample t-test.
      *
      * @param m sample mean
      * @param mu constant to test against
      * @param v sample variance
      * @param n sample n
      * @return t test statistic
      */
     protected double t(double m, double mu, double v, double n) {
         return (m - mu) / FastMath.sqrt(v / n);
     }

     /**
      * Computes t test statistic for 2-sample t-test.
      * <p>
      * Does not assume that subpopulation variances are equal.</p>
      *
      * @param m1 first sample mean
      * @param m2 second sample mean
      * @param v1 first sample variance
      * @param v2 second sample variance
      * @param n1 first sample n
      * @param n2 second sample n
      * @return t test statistic
      */
     protected double t(double m1, double m2,  double v1, double v2, double n1,
             double n2)  {
             return (m1 - m2) / FastMath.sqrt((v1 / n1) + (v2 / n2));
     }

     /**
      * Computes t test statistic for 2-sample t-test under the hypothesis
      * of equal subpopulation variances.
      *
      * @param m1 first sample mean
      * @param m2 second sample mean
      * @param v1 first sample variance
      * @param v2 second sample variance
      * @param n1 first sample n
      * @param n2 second sample n
      * @return t test statistic
      */
     protected double homoscedasticT(double m1, double m2,  double v1,
             double v2, double n1, double n2)  {
             double pooledVariance = ((n1  - 1) * v1 + (n2 -1) * v2 ) / (n1 + n2 - 2);
             return (m1 - m2) / FastMath.sqrt(pooledVariance * (1d / n1 + 1d / n2));
     }

     /**
      * Computes p-value for 2-sided, 1-sample t-test.
      *
      * @param m sample mean
      * @param mu constant to test against
      * @param v sample variance
      * @param n sample n
      * @return p-value
      * @throws MathException if an error occurs computing the p-value
      */
     protected double tTest(double m, double mu, double v, double n)
     throws MathException {
         double t = FastMath.abs(t(m, mu, v, n));
         distribution.setDegreesOfFreedom(n - 1);
         return 2.0 * distribution.cumulativeProbability(-t);
     }

     /**
      * Computes p-value for 2-sided, 2-sample t-test.
      * <p>
      * Does not assume subpopulation variances are equal. Degrees of freedom
      * are estimated from the data.</p>
      *
      * @param m1 first sample mean
      * @param m2 second sample mean
      * @param v1 first sample variance
      * @param v2 second sample variance
      * @param n1 first sample n
      * @param n2 second sample n
      * @return p-value
      * @throws MathException if an error occurs computing the p-value
      */
     protected double tTest(double m1, double m2, double v1, double v2,
             double n1, double n2)
     throws MathException {
         double t = FastMath.abs(t(m1, m2, v1, v2, n1, n2));
         double degreesOfFreedom = 0;
         degreesOfFreedom = df(v1, v2, n1, n2);
         distribution.setDegreesOfFreedom(degreesOfFreedom);
         return 2.0 * distribution.cumulativeProbability(-t);
     }

     /**
      * Computes p-value for 2-sided, 2-sample t-test, under the assumption
      * of equal subpopulation variances.
      * <p>
      * The sum of the sample sizes minus 2 is used as degrees of freedom.</p>
      *
      * @param m1 first sample mean
      * @param m2 second sample mean
      * @param v1 first sample variance
      * @param v2 second sample variance
      * @param n1 first sample n
      * @param n2 second sample n
      * @return p-value
      * @throws MathException if an error occurs computing the p-value
      */
     protected double homoscedasticTTest(double m1, double m2, double v1,
             double v2, double n1, double n2)
     throws MathException {
         double t = FastMath.abs(homoscedasticT(m1, m2, v1, v2, n1, n2));
         double degreesOfFreedom = n1 + n2 - 2;
         distribution.setDegreesOfFreedom(degreesOfFreedom);
         return 2.0 * distribution.cumulativeProbability(-t);
     }

     /**
      * Modify the distribution used to compute inference statistics.
      * @param value the new distribution
      * @since 1.2
      * @deprecated in 2.2 (to be removed in 3.0).
      */
     @Deprecated
     public void setDistribution(TDistribution value) {
         distribution = value;
     }

     /** Check significance level.
      * @param alpha significance level
      * @exception IllegalArgumentException if significance level is out of bounds
      */
     private void checkSignificanceLevel(final double alpha)
         throws IllegalArgumentException {
         if ((alpha <= 0) || (alpha > 0.5)) {
             throw MathRuntimeException.createIllegalArgumentException(
                   LocalizedFormats.OUT_OF_BOUND_SIGNIFICANCE_LEVEL,
                   alpha, 0.0, 0.5);
         }
     }

     /** Check sample data.
      * @param data sample data
      * @exception IllegalArgumentException if there is not enough sample data
      */
     private void checkSampleData(final double[] data)
         throws IllegalArgumentException {
         if ((data == null) || (data.length < 2)) {
             throw MathRuntimeException.createIllegalArgumentException(
                   LocalizedFormats.INSUFFICIENT_DATA_FOR_T_STATISTIC,
                   (data == null) ? 0 : data.length);
         }
     }

     /** Check sample data.
      * @param stat statistical summary
      * @exception IllegalArgumentException if there is not enough sample data
      */
     private void checkSampleData(final StatisticalSummary stat)
         throws IllegalArgumentException {
         if ((stat == null) || (stat.getN() < 2)) {
             throw MathRuntimeException.createIllegalArgumentException(
                   LocalizedFormats.INSUFFICIENT_DATA_FOR_T_STATISTIC,
                   (stat == null) ? 0 : stat.getN());
         }
     }

 }