View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      https://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  /*
19   * This is not the original file distributed by the Apache Software Foundation
20   * It has been modified by the Hipparchus project
21   */
22  package org.hipparchus.stat.descriptive;
23  
24  import java.io.Serializable;
25  import java.util.Arrays;
26  import java.util.function.DoubleConsumer;
27  
28  import org.hipparchus.exception.LocalizedCoreFormats;
29  import org.hipparchus.exception.MathIllegalArgumentException;
30  import org.hipparchus.exception.MathIllegalStateException;
31  import org.hipparchus.stat.descriptive.moment.GeometricMean;
32  import org.hipparchus.stat.descriptive.moment.Kurtosis;
33  import org.hipparchus.stat.descriptive.moment.Mean;
34  import org.hipparchus.stat.descriptive.moment.Skewness;
35  import org.hipparchus.stat.descriptive.moment.Variance;
36  import org.hipparchus.stat.descriptive.rank.Max;
37  import org.hipparchus.stat.descriptive.rank.Min;
38  import org.hipparchus.stat.descriptive.rank.Percentile;
39  import org.hipparchus.stat.descriptive.summary.Sum;
40  import org.hipparchus.stat.descriptive.summary.SumOfSquares;
41  import org.hipparchus.util.FastMath;
42  import org.hipparchus.util.MathUtils;
43  import org.hipparchus.util.ResizableDoubleArray;
44  
45  
46  /**
47   * Maintains a dataset of values of a single variable and computes descriptive
48   * statistics based on stored data.
49   * <p>
50   * The {@link #getWindowSize() windowSize} property sets a limit on the number
51   * of values that can be stored in the dataset. The default value, INFINITE_WINDOW,
52   * puts no limit on the size of the dataset. This value should be used with
53   * caution, as the backing store will grow without bound in this case.
54   * <p>
55   * For very large datasets, {@link StreamingStatistics}, which does not store
56   * the dataset, should be used instead of this class. If <code>windowSize</code>
57   * is not INFINITE_WINDOW and more values are added than can be stored in the
58   * dataset, new values are added in a "rolling" manner, with new values replacing
59   * the "oldest" values in the dataset.
60   * <p>
61   * Note: this class is not threadsafe.
62   */
63  public class DescriptiveStatistics
64      implements StatisticalSummary, DoubleConsumer, Serializable {
65  
66      /**
67       * Represents an infinite window size.  When the {@link #getWindowSize()}
68       * returns this value, there is no limit to the number of data values
69       * that can be stored in the dataset.
70       */
71      protected static final int INFINITE_WINDOW = -1;
72  
73      /** Serialization UID */
74      private static final long serialVersionUID = 20160411L;
75  
76      /** The statistic used to calculate the population variance - fixed. */
77      private static final UnivariateStatistic POPULATION_VARIANCE = new Variance(false);
78  
79      /** Maximum statistic implementation. */
80      private final UnivariateStatistic          maxImpl;
81      /** Minimum statistic implementation. */
82      private final UnivariateStatistic          minImpl;
83      /** Sum statistic implementation. */
84      private final UnivariateStatistic          sumImpl;
85      /** Sum of squares statistic implementation. */
86      private final UnivariateStatistic          sumOfSquaresImpl;
87      /** Mean statistic implementation. */
88      private final UnivariateStatistic          meanImpl;
89      /** Variance statistic implementation. */
90      private final UnivariateStatistic          varianceImpl;
91      /** Geometric mean statistic implementation. */
92      private final UnivariateStatistic          geometricMeanImpl;
93      /** Kurtosis statistic implementation. */
94      private final UnivariateStatistic          kurtosisImpl;
95      /** Skewness statistic implementation. */
96      private final UnivariateStatistic          skewnessImpl;
97      /** Percentile statistic implementation. */
98      private final Percentile                   percentileImpl;
99  
100     /** holds the window size. */
101     private int windowSize;
102 
103     /** Stored data values. */
104     private final ResizableDoubleArray eDA;
105 
106     /**
107      * Construct a DescriptiveStatistics instance with an infinite window.
108      */
109     public DescriptiveStatistics() {
110         this(INFINITE_WINDOW);
111     }
112 
113     /**
114      * Construct a DescriptiveStatistics instance with the specified window.
115      *
116      * @param size the window size.
117      * @throws MathIllegalArgumentException if window size is less than 1 but
118      * not equal to {@link #INFINITE_WINDOW}
119      */
120     public DescriptiveStatistics(int size) throws MathIllegalArgumentException {
121         this(size, false, null);
122     }
123 
124     /**
125      * Construct a DescriptiveStatistics instance with an infinite window
126      * and the initial data values in double[] initialDoubleArray.
127      *
128      * @param initialDoubleArray the initial double[].
129      * @throws org.hipparchus.exception.NullArgumentException if the input array is null
130      */
131     public DescriptiveStatistics(double[] initialDoubleArray) {
132         this(INFINITE_WINDOW, true, initialDoubleArray);
133     }
134 
135     /**
136      * Copy constructor.
137      * <p>
138      * Construct a new DescriptiveStatistics instance that
139      * is a copy of original.
140      *
141      * @param original DescriptiveStatistics instance to copy
142      * @throws org.hipparchus.exception.NullArgumentException if original is null
143      */
144     protected DescriptiveStatistics(DescriptiveStatistics original) {
145         MathUtils.checkNotNull(original);
146 
147         // Copy data and window size
148         this.windowSize = original.windowSize;
149         this.eDA        = original.eDA.copy();
150 
151         // Copy implementations
152         this.maxImpl           = original.maxImpl.copy();
153         this.minImpl           = original.minImpl.copy();
154         this.meanImpl          = original.meanImpl.copy();
155         this.sumImpl           = original.sumImpl.copy();
156         this.sumOfSquaresImpl  = original.sumOfSquaresImpl.copy();
157         this.varianceImpl      = original.varianceImpl.copy();
158         this.geometricMeanImpl = original.geometricMeanImpl.copy();
159         this.kurtosisImpl      = original.kurtosisImpl.copy();
160         this.skewnessImpl      = original.skewnessImpl.copy();
161         this.percentileImpl    = original.percentileImpl.copy();
162     }
163 
164     /**
165      * Construct a DescriptiveStatistics instance with the specified window.
166      *
167      * @param windowSize the window size
168      * @param hasInitialValues if initial values have been provided
169      * @param initialValues the initial values
170      * @throws org.hipparchus.exception.NullArgumentException if initialValues is null
171      * @throws MathIllegalArgumentException if window size is less than 1 but
172      * not equal to {@link #INFINITE_WINDOW}
173      */
174     DescriptiveStatistics(int windowSize, boolean hasInitialValues, double[] initialValues) {
175         if (windowSize < 1 && windowSize != INFINITE_WINDOW) {
176             throw new MathIllegalArgumentException(
177                     LocalizedCoreFormats.NOT_POSITIVE_WINDOW_SIZE, windowSize);
178         }
179 
180         if (hasInitialValues) {
181             MathUtils.checkNotNull(initialValues, LocalizedCoreFormats.INPUT_ARRAY);
182         }
183 
184         this.windowSize     = windowSize;
185         int initialCapacity = this.windowSize < 0 ? 100 : this.windowSize;
186         this.eDA            = hasInitialValues ?
187             new ResizableDoubleArray(initialValues) :
188             new ResizableDoubleArray(initialCapacity);
189 
190         maxImpl           = new Max();
191         minImpl           = new Min();
192         sumImpl           = new Sum();
193         sumOfSquaresImpl  = new SumOfSquares();
194         meanImpl          = new Mean();
195         varianceImpl      = new Variance();
196         geometricMeanImpl = new GeometricMean();
197         kurtosisImpl      = new Kurtosis();
198         skewnessImpl      = new Skewness();
199         percentileImpl    = new Percentile();
200     }
201 
202     /**
203      * Returns a copy of this DescriptiveStatistics instance with the same internal state.
204      *
205      * @return a copy of this
206      */
207     public DescriptiveStatistics copy() {
208         return new DescriptiveStatistics(this);
209     }
210 
211     /**
212      * Adds the value to the dataset. If the dataset is at the maximum size
213      * (i.e., the number of stored elements equals the currently configured
214      * windowSize), the first (oldest) element in the dataset is discarded
215      * to make room for the new value.
216      *
217      * @param v the value to be added
218      */
219     public void addValue(double v) {
220         if (windowSize != INFINITE_WINDOW) {
221             if (getN() == windowSize) {
222                 eDA.addElementRolling(v);
223             } else if (getN() < windowSize) {
224                 eDA.addElement(v);
225             }
226         } else {
227             eDA.addElement(v);
228         }
229     }
230 
231     /** {@inheritDoc} */
232     @Override
233     public void accept(double v) {
234         addValue(v);
235     }
236 
237     /**
238      * Resets all statistics and storage.
239      */
240     public void clear() {
241         eDA.clear();
242     }
243 
244     /**
245      * Removes the most recent value from the dataset.
246      *
247      * @throws MathIllegalStateException if there are no elements stored
248      */
249     public void removeMostRecentValue() throws MathIllegalStateException {
250         try {
251             eDA.discardMostRecentElements(1);
252         } catch (MathIllegalArgumentException ex) {
253             throw new MathIllegalStateException(ex, LocalizedCoreFormats.NO_DATA);
254         }
255     }
256 
257     /**
258      * Replaces the most recently stored value with the given value.
259      * There must be at least one element stored to call this method.
260      *
261      * @param v the value to replace the most recent stored value
262      * @return replaced value
263      * @throws MathIllegalStateException if there are no elements stored
264      */
265     public double replaceMostRecentValue(double v) throws MathIllegalStateException {
266         return eDA.substituteMostRecentElement(v);
267     }
268 
269     /**
270      * Apply the given statistic to the data associated with this set of statistics.
271      * @param stat the statistic to apply
272      * @return the computed value of the statistic.
273      */
274     public double apply(UnivariateStatistic stat) {
275         // No try-catch or advertised exception here because arguments
276         // are guaranteed valid.
277         return eDA.compute(stat);
278     }
279 
280     /** {@inheritDoc} */
281     @Override
282     public double getMean() {
283         return apply(meanImpl);
284     }
285 
286     /**
287      * Returns the geometric mean of the available values.
288      * <p>
289      * See {@link GeometricMean} for details on the computing algorithm.
290      *
291      * @see <a href="http://www.xycoon.com/geometric_mean.htm">
292      * Geometric mean</a>
293      *
294      * @return The geometricMean, Double.NaN if no values have been added,
295      * or if any negative values have been added.
296      */
297     public double getGeometricMean() {
298         return apply(geometricMeanImpl);
299     }
300 
301     /**
302      * Returns the standard deviation of the available values.
303      * @return The standard deviation, Double.NaN if no values have been added
304      * or 0.0 for a single value set.
305      */
306     @Override
307     public double getStandardDeviation() {
308         double stdDev = Double.NaN;
309         if (getN() > 0) {
310             if (getN() > 1) {
311                 stdDev = FastMath.sqrt(getVariance());
312             } else {
313                 stdDev = 0.0;
314             }
315         }
316         return stdDev;
317     }
318 
319     /**
320      * Returns the quadratic mean of the available values.
321      *
322      * @see <a href="http://mathworld.wolfram.com/Root-Mean-Square.html">
323      * Root Mean Square</a>
324      *
325      * @return The quadratic mean or {@code Double.NaN} if no values
326      * have been added.
327      */
328     public double getQuadraticMean() {
329         final long n = getN();
330         return n > 0 ? FastMath.sqrt(getSumOfSquares() / n) : Double.NaN;
331     }
332 
333     /** {@inheritDoc} */
334     @Override
335     public double getVariance() {
336         return apply(varianceImpl);
337     }
338 
339     /**
340      * Returns the population variance of the available values.
341      *
342      * @see <a href="http://en.wikibooks.org/wiki/Statistics/Summary/Variance">
343      * Population variance</a>
344      *
345      * @return The population variance, Double.NaN if no values have been added,
346      * or 0.0 for a single value set.
347      */
348     public double getPopulationVariance() {
349         return apply(POPULATION_VARIANCE);
350     }
351 
352     /**
353      * Returns the skewness of the available values. Skewness is a
354      * measure of the asymmetry of a given distribution.
355      *
356      * @return The skewness, Double.NaN if less than 3 values have been added.
357      */
358     public double getSkewness() {
359         return apply(skewnessImpl);
360     }
361 
362     /**
363      * Returns the Kurtosis of the available values. Kurtosis is a
364      * measure of the "peakedness" of a distribution.
365      *
366      * @return The kurtosis, Double.NaN if less than 4 values have been added.
367      */
368     public double getKurtosis() {
369         return apply(kurtosisImpl);
370     }
371 
372     /** {@inheritDoc} */
373     @Override
374     public double getMax() {
375         return apply(maxImpl);
376     }
377 
378     /** {@inheritDoc} */
379     @Override
380     public double getMin() {
381         return apply(minImpl);
382     }
383 
384     /** {@inheritDoc} */
385     @Override
386     public double getSum() {
387         return apply(sumImpl);
388     }
389 
390     /**
391      * Returns the sum of the squares of the available values.
392      * @return The sum of the squares or Double.NaN if no
393      * values have been added.
394      */
395     public double getSumOfSquares() {
396         return apply(sumOfSquaresImpl);
397     }
398 
399     /**
400      * Returns an estimate for the pth percentile of the stored values.
401      * <p>
402      * The implementation provided here follows the first estimation procedure presented
403      * <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc252.htm">here.</a>
404      * </p><p>
405      * <strong>Preconditions</strong>:<ul>
406      * <li><code>0 &lt; p &le; 100</code> (otherwise an
407      * <code>MathIllegalArgumentException</code> is thrown)</li>
408      * <li>at least one value must be stored (returns <code>Double.NaN
409      *     </code> otherwise)</li>
410      * </ul>
411      *
412      * @param p the requested percentile (scaled from 0 - 100)
413      * @return An estimate for the pth percentile of the stored data
414      * @throws MathIllegalArgumentException if p is not a valid quantile
415      */
416     public double getPercentile(final double p)
417         throws MathIllegalArgumentException {
418 
419         percentileImpl.setQuantile(p);
420         return apply(percentileImpl);
421     }
422 
423     /** {@inheritDoc} */
424     @Override
425     public long getN() {
426         return eDA.getNumElements();
427     }
428 
429     /**
430      * Returns the maximum number of values that can be stored in the
431      * dataset, or INFINITE_WINDOW (-1) if there is no limit.
432      *
433      * @return The current window size or -1 if its Infinite.
434      */
435     public int getWindowSize() {
436         return windowSize;
437     }
438 
439     /**
440      * WindowSize controls the number of values that contribute to the
441      * reported statistics.  For example, if windowSize is set to 3 and the
442      * values {1,2,3,4,5} have been added <strong> in that order</strong> then
443      * the <i>available values</i> are {3,4,5} and all reported statistics will
444      * be based on these values. If {@code windowSize} is decreased as a result
445      * of this call and there are more than the new value of elements in the
446      * current dataset, values from the front of the array are discarded to
447      * reduce the dataset to {@code windowSize} elements.
448      *
449      * @param windowSize sets the size of the window.
450      * @throws MathIllegalArgumentException if window size is less than 1 but
451      * not equal to {@link #INFINITE_WINDOW}
452      */
453     public void setWindowSize(int windowSize)
454         throws MathIllegalArgumentException {
455 
456         if (windowSize < 1 && windowSize != INFINITE_WINDOW) {
457             throw new MathIllegalArgumentException(
458                     LocalizedCoreFormats.NOT_POSITIVE_WINDOW_SIZE, windowSize);
459         }
460 
461         this.windowSize = windowSize;
462 
463         // We need to check to see if we need to discard elements
464         // from the front of the array.  If the windowSize is less than
465         // the current number of elements.
466         if (windowSize != INFINITE_WINDOW && windowSize < eDA.getNumElements()) {
467             eDA.discardFrontElements(eDA.getNumElements() - windowSize);
468         }
469     }
470 
471     /**
472      * Returns the current set of values in an array of double primitives.
473      * The order of addition is preserved.  The returned array is a fresh
474      * copy of the underlying data -- i.e., it is not a reference to the
475      * stored data.
476      *
477      * @return the current set of numbers in the order in which they
478      * were added to this set
479      */
480     public double[] getValues() {
481         return eDA.getElements();
482     }
483 
484     /**
485      * Returns the current set of values in an array of double primitives,
486      * sorted in ascending order.  The returned array is a fresh
487      * copy of the underlying data -- i.e., it is not a reference to the
488      * stored data.
489      * @return returns the current set of
490      * numbers sorted in ascending order
491      */
492     public double[] getSortedValues() {
493         double[] sort = getValues();
494         Arrays.sort(sort);
495         return sort;
496     }
497 
498     /**
499      * Returns the element at the specified index
500      * @param index The Index of the element
501      * @return return the element at the specified index
502      */
503     public double getElement(int index) {
504         return eDA.getElement(index);
505     }
506 
507     /**
508      * Generates a text report displaying univariate statistics from values
509      * that have been added.  Each statistic is displayed on a separate line.
510      *
511      * @return String with line feeds displaying statistics
512      */
513     @Override
514     public String toString() {
515         final StringBuilder outBuffer = new StringBuilder(100);
516         final String endl = "\n";
517         outBuffer.append("DescriptiveStatistics:").append(endl).
518                   append("n: ").append(getN()).append(endl).
519                   append("min: ").append(getMin()).append(endl).
520                   append("max: ").append(getMax()).append(endl).
521                   append("mean: ").append(getMean()).append(endl).
522                   append("std dev: ").append(getStandardDeviation()).append(endl);
523         try {
524             // No catch for MIAE because actual parameter is valid below
525             outBuffer.append("median: ").append(getPercentile(50)).append(endl);
526         } catch (MathIllegalStateException ex) {
527             outBuffer.append("median: unavailable").append(endl);
528         }
529         outBuffer.append("skewness: ").append(getSkewness()).append(endl).
530                   append("kurtosis: ").append(getKurtosis()).append(endl);
531         return outBuffer.toString();
532     }
533 
534 }