View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      https://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  /*
19   * This is not the original file distributed by the Apache Software Foundation
20   * It has been modified by the Hipparchus project
21   */
22  package org.hipparchus.stat.descriptive.moment;
23  
24  import java.io.Serializable;
25  
26  import org.hipparchus.exception.MathIllegalArgumentException;
27  import org.hipparchus.exception.NullArgumentException;
28  import org.hipparchus.stat.StatUtils;
29  import org.hipparchus.stat.descriptive.AbstractStorelessUnivariateStatistic;
30  import org.hipparchus.stat.descriptive.AggregatableStatistic;
31  import org.hipparchus.stat.descriptive.WeightedEvaluation;
32  import org.hipparchus.util.MathArrays;
33  import org.hipparchus.util.MathUtils;
34  
35  /**
36   * Computes the variance of the available values.  By default, the unbiased
37   * "sample variance" definitional formula is used:
38   * <p>
39   * variance = sum((x_i - mean)^2) / (n - 1)
40   * <p>
41   * where mean is the {@link Mean} and <code>n</code> is the number
42   * of sample observations.
43   * <p>
44   * The definitional formula does not have good numerical properties, so
45   * this implementation does not compute the statistic using the definitional
46   * formula.
47   * <ul>
48   * <li> The <code>getResult</code> method computes the variance using
49   * updating formulas based on West's algorithm, as described in
50   * <a href="http://doi.acm.org/10.1145/359146.359152"> Chan, T. F. and
51   * J. G. Lewis 1979, <i>Communications of the ACM</i>,
52   * vol. 22 no. 9, pp. 526-531.</a></li>
53   * <li> The <code>evaluate</code> methods leverage the fact that they have the
54   * full array of values in memory to execute a two-pass algorithm.
55   * Specifically, these methods use the "corrected two-pass algorithm" from
56   * Chan, Golub, Levesque, <i>Algorithms for Computing the Sample Variance</i>,
57   * American Statistician, vol. 37, no. 3 (1983) pp. 242-247.</li>
58   * </ul>
59   * <p>
60   * Note that adding values using <code>increment</code> or
61   * <code>incrementAll</code> and then executing <code>getResult</code> will
62   * sometimes give a different, less accurate, result than executing
63   * <code>evaluate</code> with the full array of values. The former approach
64   * should only be used when the full array of values is not available.
65   * <p>
66   * The "population variance"  ( sum((x_i - mean)^2) / n ) can also
67   * be computed using this statistic.  The <code>isBiasCorrected</code>
68   * property determines whether the "population" or "sample" value is
69   * returned by the <code>evaluate</code> and <code>getResult</code> methods.
70   * To compute population variances, set this property to <code>false.</code>
71   * <p>
72   * <strong>Note that this implementation is not synchronized.</strong> If
73   * multiple threads access an instance of this class concurrently, and at least
74   * one of the threads invokes the <code>increment()</code> or
75   * <code>clear()</code> method, it must be synchronized externally.
76   */
77  public class Variance extends AbstractStorelessUnivariateStatistic
78      implements AggregatableStatistic<Variance>, WeightedEvaluation, Serializable {
79  
80      /** Serializable version identifier */
81      private static final long serialVersionUID = 20150412L;
82  
83      /** SecondMoment is used in incremental calculation of Variance*/
84      protected final SecondMoment moment;
85  
86      /**
87       * Whether or not {@link #increment(double)} should increment
88       * the internal second moment. When a Variance is constructed with an
89       * external SecondMoment as a constructor parameter, this property is
90       * set to false and increments must be applied to the second moment
91       * directly.
92       */
93      protected final boolean incMoment;
94  
95      /**
96       * Whether or not bias correction is applied when computing the
97       * value of the statistic. True means that bias is corrected.  See
98       * {@link Variance} for details on the formula.
99       */
100     private final boolean isBiasCorrected;
101 
102     /**
103      * Constructs a Variance with default (true) <code>isBiasCorrected</code>
104      * property.
105      */
106     public Variance() {
107         this(true);
108     }
109 
110     /**
111      * Constructs a Variance based on an external second moment.
112      * <p>
113      * When this constructor is used, the statistic may only be
114      * incremented via the moment, i.e., {@link #increment(double)}
115      * does nothing; whereas {@code m2.increment(value)} increments
116      * both {@code m2} and the Variance instance constructed from it.
117      *
118      * @param m2 the SecondMoment (Third or Fourth moments work here as well.)
119      */
120     public Variance(final SecondMoment m2) {
121         this(true, m2);
122     }
123 
124     /**
125      * Constructs a Variance with the specified <code>isBiasCorrected</code>
126      * property.
127      *
128      * @param isBiasCorrected  setting for bias correction - true means
129      * bias will be corrected and is equivalent to using the argumentless
130      * constructor
131      */
132     public Variance(boolean isBiasCorrected) {
133         this(new SecondMoment(), true, isBiasCorrected);
134     }
135 
136     /**
137      * Constructs a Variance with the specified <code>isBiasCorrected</code>
138      * property and the supplied external second moment.
139      *
140      * @param isBiasCorrected  setting for bias correction - true means
141      * bias will be corrected
142      * @param m2 the SecondMoment (Third or Fourth moments work
143      * here as well.)
144      */
145     public Variance(boolean isBiasCorrected, SecondMoment m2) {
146         this(m2, false, isBiasCorrected);
147     }
148 
149     /**
150      * Constructs a Variance with the specified parameters.
151      *
152      * @param m2 the SecondMoment (Third or Fourth moments work
153      * here as well.)
154      * @param incMoment if the moment shall be incremented
155      * @param isBiasCorrected  setting for bias correction - true means
156      * bias will be corrected
157      */
158     private Variance(SecondMoment m2, boolean incMoment, boolean isBiasCorrected) {
159         this.moment          = m2;
160         this.incMoment       = incMoment;
161         this.isBiasCorrected = isBiasCorrected;
162     }
163 
164     /**
165      * Copy constructor, creates a new {@code Variance} identical
166      * to the {@code original}.
167      *
168      * @param original the {@code Variance} instance to copy
169      * @throws NullArgumentException if original is null
170      */
171     public Variance(Variance original) throws NullArgumentException {
172         MathUtils.checkNotNull(original);
173         this.moment          = original.moment.copy();
174         this.incMoment       = original.incMoment;
175         this.isBiasCorrected = original.isBiasCorrected;
176     }
177 
178     /**
179      * {@inheritDoc}
180      * <p>If all values are available, it is more accurate to use
181      * {@link #evaluate(double[])} rather than adding values one at a time
182      * using this method and then executing {@link #getResult}, since
183      * <code>evaluate</code> leverages the fact that is has the full
184      * list of values together to execute a two-pass algorithm.
185      * See {@link Variance}.
186      * <p>
187      * Note also that when {@link #Variance(SecondMoment)} is used to
188      * create a Variance, this method does nothing. In that case, the
189      * SecondMoment should be incremented directly.
190      */
191     @Override
192     public void increment(final double d) {
193         if (incMoment) {
194             moment.increment(d);
195         }
196     }
197 
198     /** {@inheritDoc} */
199     @Override
200     public double getResult() {
201         if (moment.n == 0) {
202             return Double.NaN;
203         } else if (moment.n == 1) {
204             return 0d;
205         } else {
206             if (isBiasCorrected) {
207                 return moment.m2 / (moment.n - 1d);
208             } else {
209                 return moment.m2 / (moment.n);
210             }
211         }
212     }
213 
214     /** {@inheritDoc} */
215     @Override
216     public long getN() {
217         return moment.getN();
218     }
219 
220     /** {@inheritDoc} */
221     @Override
222     public void clear() {
223         if (incMoment) {
224             moment.clear();
225         }
226     }
227 
228     /** {@inheritDoc} */
229     @Override
230     public void aggregate(Variance other) {
231         MathUtils.checkNotNull(other);
232         if (incMoment) {
233             this.moment.aggregate(other.moment);
234         }
235     }
236 
237     /**
238      * Returns the variance of the entries in the specified portion of
239      * the input array, or <code>Double.NaN</code> if the designated subarray
240      * is empty.  Note that Double.NaN may also be returned if the input
241      * includes NaN and / or infinite values.
242      * <p>
243      * See {@link Variance} for details on the computing algorithm.</p>
244      * <p>
245      * Returns 0 for a single-value (i.e. length = 1) sample.</p>
246      * <p>
247      * Does not change the internal state of the statistic.</p>
248      * <p>
249      * Throws <code>MathIllegalArgumentException</code> if the array is null.</p>
250      *
251      * @param values the input array
252      * @param begin index of the first array element to include
253      * @param length the number of elements to include
254      * @return the variance of the values or Double.NaN if length = 0
255      * @throws MathIllegalArgumentException if the array is null or the array index
256      *  parameters are not valid
257      */
258     @Override
259     public double evaluate(final double[] values, final int begin, final int length)
260         throws MathIllegalArgumentException {
261 
262         double var = Double.NaN;
263 
264         if (MathArrays.verifyValues(values, begin, length)) {
265             if (length == 1) {
266                 var = 0.0;
267             } else if (length > 1) {
268                 double m = StatUtils.mean(values, begin, length);
269                 var = evaluate(values, m, begin, length);
270             }
271         }
272         return var;
273     }
274 
275     /**
276      * Returns the weighted variance of the entries in the specified portion of
277      * the input array, or <code>Double.NaN</code> if the designated subarray
278      * is empty.
279      * <p>
280      * Uses the formula
281      * <pre>
282      *   &Sigma;(weights[i]*(values[i] - weightedMean)²)/(&Sigma;(weights[i]) - 1)
283      * </pre>
284      * where weightedMean is the weighted mean.
285      * <p>
286      * This formula will not return the same result as the unweighted variance when all
287      * weights are equal, unless all weights are equal to 1. The formula assumes that
288      * weights are to be treated as "expansion values," as will be the case if for example
289      * the weights represent frequency counts. To normalize weights so that the denominator
290      * in the variance computation equals the length of the input vector minus one, use
291      * <pre>
292      *   <code>evaluate(values, MathArrays.normalizeArray(weights, values.length));</code>
293      * </pre>
294      * <p>
295      * Returns 0 for a single-value (i.e. length = 1) sample.
296      * <p>
297      * Throws <code>IllegalArgumentException</code> if any of the following are true:
298      * <ul><li>the values array is null</li>
299      *     <li>the weights array is null</li>
300      *     <li>the weights array does not have the same length as the values array</li>
301      *     <li>the weights array contains one or more infinite values</li>
302      *     <li>the weights array contains one or more NaN values</li>
303      *     <li>the weights array contains negative values</li>
304      *     <li>the start and length arguments do not determine a valid array</li>
305      * </ul>
306      * <p>
307      * Does not change the internal state of the statistic.
308      *
309      * @param values the input array
310      * @param weights the weights array
311      * @param begin index of the first array element to include
312      * @param length the number of elements to include
313      * @return the weighted variance of the values or Double.NaN if length = 0
314      * @throws MathIllegalArgumentException if the parameters are not valid
315      */
316     @Override
317     public double evaluate(final double[] values, final double[] weights,
318                            final int begin, final int length)
319         throws MathIllegalArgumentException {
320 
321         double var = Double.NaN;
322         if (MathArrays.verifyValues(values, weights,begin, length)) {
323             if (length == 1) {
324                 var = 0.0;
325             } else if (length > 1) {
326                 Mean mean = new Mean();
327                 double m = mean.evaluate(values, weights, begin, length);
328                 var = evaluate(values, weights, m, begin, length);
329             }
330         }
331         return var;
332     }
333 
334     /**
335      * Returns the variance of the entries in the specified portion of
336      * the input array, using the precomputed mean value. Returns
337      * <code>Double.NaN</code> if the designated subarray is empty.
338      * <p>
339      * See {@link Variance} for details on the computing algorithm.
340      * <p>
341      * The formula used assumes that the supplied mean value is the arithmetic
342      * mean of the sample data, not a known population parameter.  This method
343      * is supplied only to save computation when the mean has already been
344      * computed.
345      * <p>
346      * Returns 0 for a single-value (i.e. length = 1) sample.
347      * <p>
348      * Does not change the internal state of the statistic.
349      *
350      * @param values the input array
351      * @param mean the precomputed mean value
352      * @param begin index of the first array element to include
353      * @param length the number of elements to include
354      * @return the variance of the values or Double.NaN if length = 0
355      * @throws MathIllegalArgumentException if the array is null or the array index
356      *  parameters are not valid
357      */
358     public double evaluate(final double[] values, final double mean,
359                            final int begin, final int length)
360         throws MathIllegalArgumentException {
361 
362         double var = Double.NaN;
363         if (MathArrays.verifyValues(values, begin, length)) {
364             if (length == 1) {
365                 var = 0.0;
366             } else if (length > 1) {
367                 double accum = 0.0;
368                 double accum2 = 0.0;
369                 for (int i = begin; i < begin + length; i++) {
370                     final double dev = values[i] - mean;
371                     accum += dev * dev;
372                     accum2 += dev;
373                 }
374                 double len = length;
375                 if (isBiasCorrected) {
376                     var = (accum - (accum2 * accum2 / len)) / (len - 1.0);
377                 } else {
378                     var = (accum - (accum2 * accum2 / len)) / len;
379                 }
380             }
381         }
382         return var;
383     }
384 
385     /**
386      * Returns the variance of the entries in the input array, using the
387      * precomputed mean value.  Returns <code>Double.NaN</code> if the array
388      * is empty.
389      * <p>
390      * See {@link Variance} for details on the computing algorithm.
391      * <p>
392      * If <code>isBiasCorrected</code> is <code>true</code> the formula used
393      * assumes that the supplied mean value is the arithmetic mean of the
394      * sample data, not a known population parameter.  If the mean is a known
395      * population parameter, or if the "population" version of the variance is
396      * desired, set <code>isBiasCorrected</code> to <code>false</code> before
397      * invoking this method.
398      * <p>
399      * Returns 0 for a single-value (i.e. length = 1) sample.
400      * <p>
401      * Does not change the internal state of the statistic.
402      *
403      * @param values the input array
404      * @param mean the precomputed mean value
405      * @return the variance of the values or Double.NaN if the array is empty
406      * @throws MathIllegalArgumentException if the array is null
407      */
408     public double evaluate(final double[] values, final double mean)
409         throws MathIllegalArgumentException {
410         return evaluate(values, mean, 0, values.length);
411     }
412 
413     /**
414      * Returns the weighted variance of the entries in the specified portion of
415      * the input array, using the precomputed weighted mean value.  Returns
416      * <code>Double.NaN</code> if the designated subarray is empty.
417      * <p>
418      * Uses the formula
419      * <pre>
420      *   &Sigma;(weights[i]*(values[i] - mean)²)/(&Sigma;(weights[i]) - 1)
421      * </pre>
422      * <p>
423      * The formula used assumes that the supplied mean value is the weighted arithmetic
424      * mean of the sample data, not a known population parameter. This method
425      * is supplied only to save computation when the mean has already been
426      * computed.
427      * <p>
428      * This formula will not return the same result as the unweighted variance when all
429      * weights are equal, unless all weights are equal to 1. The formula assumes that
430      * weights are to be treated as "expansion values," as will be the case if for example
431      * the weights represent frequency counts. To normalize weights so that the denominator
432      * in the variance computation equals the length of the input vector minus one, use
433      * <pre>
434      *   <code>evaluate(values, MathArrays.normalizeArray(weights, values.length), mean);</code>
435      * </pre>
436      * <p>
437      * Returns 0 for a single-value (i.e. length = 1) sample.
438      * <p>
439      * Throws <code>MathIllegalArgumentException</code> if any of the following are true:
440      * <ul><li>the values array is null</li>
441      *     <li>the weights array is null</li>
442      *     <li>the weights array does not have the same length as the values array</li>
443      *     <li>the weights array contains one or more infinite values</li>
444      *     <li>the weights array contains one or more NaN values</li>
445      *     <li>the weights array contains negative values</li>
446      *     <li>the start and length arguments do not determine a valid array</li>
447      * </ul>
448      * <p>
449      * Does not change the internal state of the statistic.
450      *
451      * @param values the input array
452      * @param weights the weights array
453      * @param mean the precomputed weighted mean value
454      * @param begin index of the first array element to include
455      * @param length the number of elements to include
456      * @return the variance of the values or Double.NaN if length = 0
457      * @throws MathIllegalArgumentException if the parameters are not valid
458      */
459     public double evaluate(final double[] values, final double[] weights,
460                            final double mean, final int begin, final int length)
461         throws MathIllegalArgumentException {
462 
463         double var = Double.NaN;
464 
465         if (MathArrays.verifyValues(values, weights, begin, length)) {
466             if (length == 1) {
467                 var = 0.0;
468             } else if (length > 1) {
469                 double accum = 0.0;
470                 double accum2 = 0.0;
471                 for (int i = begin; i < begin + length; i++) {
472                     final double dev = values[i] - mean;
473                     accum += weights[i] * (dev * dev);
474                     accum2 += weights[i] * dev;
475                 }
476 
477                 double sumWts = 0;
478                 for (int i = begin; i < begin + length; i++) {
479                     sumWts += weights[i];
480                 }
481 
482                 if (isBiasCorrected) {
483                     var = (accum - (accum2 * accum2 / sumWts)) / (sumWts - 1.0);
484                 } else {
485                     var = (accum - (accum2 * accum2 / sumWts)) / sumWts;
486                 }
487             }
488         }
489         return var;
490     }
491 
492     /**
493      * Returns the weighted variance of the values in the input array, using
494      * the precomputed weighted mean value.
495      * <p>
496      * Uses the formula
497      * <pre>
498      *   &Sigma;(weights[i]*(values[i] - mean)²)/(&Sigma;(weights[i]) - 1)
499      * </pre>
500      * <p>
501      * The formula used assumes that the supplied mean value is the weighted arithmetic
502      * mean of the sample data, not a known population parameter. This method
503      * is supplied only to save computation when the mean has already been
504      * computed.
505      * <p>
506      * This formula will not return the same result as the unweighted variance when all
507      * weights are equal, unless all weights are equal to 1. The formula assumes that
508      * weights are to be treated as "expansion values," as will be the case if for example
509      * the weights represent frequency counts. To normalize weights so that the denominator
510      * in the variance computation equals the length of the input vector minus one, use
511      * <pre>
512      *   <code>evaluate(values, MathArrays.normalizeArray(weights, values.length), mean);</code>
513      * </pre>
514      * <p>
515      * Returns 0 for a single-value (i.e. length = 1) sample.
516      * <p>
517      * Throws <code>MathIllegalArgumentException</code> if any of the following are true:
518      * <ul><li>the values array is null</li>
519      *     <li>the weights array is null</li>
520      *     <li>the weights array does not have the same length as the values array</li>
521      *     <li>the weights array contains one or more infinite values</li>
522      *     <li>the weights array contains one or more NaN values</li>
523      *     <li>the weights array contains negative values</li>
524      * </ul>
525      * <p>
526      * Does not change the internal state of the statistic.
527      *
528      * @param values the input array
529      * @param weights the weights array
530      * @param mean the precomputed weighted mean value
531      * @return the variance of the values or Double.NaN if length = 0
532      * @throws MathIllegalArgumentException if the parameters are not valid
533      */
534     public double evaluate(final double[] values, final double[] weights, final double mean)
535         throws MathIllegalArgumentException {
536         return evaluate(values, weights, mean, 0, values.length);
537     }
538 
539     /** Check if bias is corrected.
540      * @return Returns the isBiasCorrected.
541      */
542     public boolean isBiasCorrected() {
543         return isBiasCorrected;
544     }
545 
546     /**
547      * Returns a new copy of this variance with the given bias correction
548      * setting.
549      *
550      * @param biasCorrection The bias correction flag to set.
551      * @return a copy of this instance with the given bias correction setting
552      */
553     public Variance withBiasCorrection(boolean biasCorrection) {
554         return new Variance(this.moment, this.incMoment, biasCorrection);
555     }
556 
557     /** {@inheritDoc} */
558     @Override
559     public Variance copy() {
560         return new Variance(this);
561     }
562 
563 }