1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * https://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 /*
19 * This is not the original file distributed by the Apache Software Foundation
20 * It has been modified by the Hipparchus project
21 */
22 package org.hipparchus.stat.descriptive;
23
24 import java.io.Serializable;
25 import java.util.Arrays;
26 import java.util.function.DoubleConsumer;
27
28 import org.hipparchus.exception.LocalizedCoreFormats;
29 import org.hipparchus.exception.MathIllegalArgumentException;
30 import org.hipparchus.exception.MathIllegalStateException;
31 import org.hipparchus.stat.descriptive.moment.GeometricMean;
32 import org.hipparchus.stat.descriptive.moment.Kurtosis;
33 import org.hipparchus.stat.descriptive.moment.Mean;
34 import org.hipparchus.stat.descriptive.moment.Skewness;
35 import org.hipparchus.stat.descriptive.moment.Variance;
36 import org.hipparchus.stat.descriptive.rank.Max;
37 import org.hipparchus.stat.descriptive.rank.Min;
38 import org.hipparchus.stat.descriptive.rank.Percentile;
39 import org.hipparchus.stat.descriptive.summary.Sum;
40 import org.hipparchus.stat.descriptive.summary.SumOfSquares;
41 import org.hipparchus.util.FastMath;
42 import org.hipparchus.util.MathUtils;
43 import org.hipparchus.util.ResizableDoubleArray;
44
45
46 /**
47 * Maintains a dataset of values of a single variable and computes descriptive
48 * statistics based on stored data.
49 * <p>
50 * The {@link #getWindowSize() windowSize} property sets a limit on the number
51 * of values that can be stored in the dataset. The default value, INFINITE_WINDOW,
52 * puts no limit on the size of the dataset. This value should be used with
53 * caution, as the backing store will grow without bound in this case.
54 * <p>
55 * For very large datasets, {@link StreamingStatistics}, which does not store
56 * the dataset, should be used instead of this class. If <code>windowSize</code>
57 * is not INFINITE_WINDOW and more values are added than can be stored in the
58 * dataset, new values are added in a "rolling" manner, with new values replacing
59 * the "oldest" values in the dataset.
60 * <p>
61 * Note: this class is not threadsafe.
62 */
63 public class DescriptiveStatistics
64 implements StatisticalSummary, DoubleConsumer, Serializable {
65
66 /**
67 * Represents an infinite window size. When the {@link #getWindowSize()}
68 * returns this value, there is no limit to the number of data values
69 * that can be stored in the dataset.
70 */
71 protected static final int INFINITE_WINDOW = -1;
72
73 /** Serialization UID */
74 private static final long serialVersionUID = 20160411L;
75
76 /** The statistic used to calculate the population variance - fixed. */
77 private static final UnivariateStatistic POPULATION_VARIANCE = new Variance(false);
78
79 /** Maximum statistic implementation. */
80 private final UnivariateStatistic maxImpl;
81 /** Minimum statistic implementation. */
82 private final UnivariateStatistic minImpl;
83 /** Sum statistic implementation. */
84 private final UnivariateStatistic sumImpl;
85 /** Sum of squares statistic implementation. */
86 private final UnivariateStatistic sumOfSquaresImpl;
87 /** Mean statistic implementation. */
88 private final UnivariateStatistic meanImpl;
89 /** Variance statistic implementation. */
90 private final UnivariateStatistic varianceImpl;
91 /** Geometric mean statistic implementation. */
92 private final UnivariateStatistic geometricMeanImpl;
93 /** Kurtosis statistic implementation. */
94 private final UnivariateStatistic kurtosisImpl;
95 /** Skewness statistic implementation. */
96 private final UnivariateStatistic skewnessImpl;
97 /** Percentile statistic implementation. */
98 private final Percentile percentileImpl;
99
100 /** holds the window size. */
101 private int windowSize;
102
103 /** Stored data values. */
104 private final ResizableDoubleArray eDA;
105
106 /**
107 * Construct a DescriptiveStatistics instance with an infinite window.
108 */
109 public DescriptiveStatistics() {
110 this(INFINITE_WINDOW);
111 }
112
113 /**
114 * Construct a DescriptiveStatistics instance with the specified window.
115 *
116 * @param size the window size.
117 * @throws MathIllegalArgumentException if window size is less than 1 but
118 * not equal to {@link #INFINITE_WINDOW}
119 */
120 public DescriptiveStatistics(int size) throws MathIllegalArgumentException {
121 this(size, false, null);
122 }
123
124 /**
125 * Construct a DescriptiveStatistics instance with an infinite window
126 * and the initial data values in double[] initialDoubleArray.
127 *
128 * @param initialDoubleArray the initial double[].
129 * @throws org.hipparchus.exception.NullArgumentException if the input array is null
130 */
131 public DescriptiveStatistics(double[] initialDoubleArray) {
132 this(INFINITE_WINDOW, true, initialDoubleArray);
133 }
134
135 /**
136 * Copy constructor.
137 * <p>
138 * Construct a new DescriptiveStatistics instance that
139 * is a copy of original.
140 *
141 * @param original DescriptiveStatistics instance to copy
142 * @throws org.hipparchus.exception.NullArgumentException if original is null
143 */
144 protected DescriptiveStatistics(DescriptiveStatistics original) {
145 MathUtils.checkNotNull(original);
146
147 // Copy data and window size
148 this.windowSize = original.windowSize;
149 this.eDA = original.eDA.copy();
150
151 // Copy implementations
152 this.maxImpl = original.maxImpl.copy();
153 this.minImpl = original.minImpl.copy();
154 this.meanImpl = original.meanImpl.copy();
155 this.sumImpl = original.sumImpl.copy();
156 this.sumOfSquaresImpl = original.sumOfSquaresImpl.copy();
157 this.varianceImpl = original.varianceImpl.copy();
158 this.geometricMeanImpl = original.geometricMeanImpl.copy();
159 this.kurtosisImpl = original.kurtosisImpl.copy();
160 this.skewnessImpl = original.skewnessImpl.copy();
161 this.percentileImpl = original.percentileImpl.copy();
162 }
163
164 /**
165 * Construct a DescriptiveStatistics instance with the specified window.
166 *
167 * @param windowSize the window size
168 * @param hasInitialValues if initial values have been provided
169 * @param initialValues the initial values
170 * @throws org.hipparchus.exception.NullArgumentException if initialValues is null
171 * @throws MathIllegalArgumentException if window size is less than 1 but
172 * not equal to {@link #INFINITE_WINDOW}
173 */
174 DescriptiveStatistics(int windowSize, boolean hasInitialValues, double[] initialValues) {
175 if (windowSize < 1 && windowSize != INFINITE_WINDOW) {
176 throw new MathIllegalArgumentException(
177 LocalizedCoreFormats.NOT_POSITIVE_WINDOW_SIZE, windowSize);
178 }
179
180 if (hasInitialValues) {
181 MathUtils.checkNotNull(initialValues, LocalizedCoreFormats.INPUT_ARRAY);
182 }
183
184 this.windowSize = windowSize;
185 int initialCapacity = this.windowSize < 0 ? 100 : this.windowSize;
186 this.eDA = hasInitialValues ?
187 new ResizableDoubleArray(initialValues) :
188 new ResizableDoubleArray(initialCapacity);
189
190 maxImpl = new Max();
191 minImpl = new Min();
192 sumImpl = new Sum();
193 sumOfSquaresImpl = new SumOfSquares();
194 meanImpl = new Mean();
195 varianceImpl = new Variance();
196 geometricMeanImpl = new GeometricMean();
197 kurtosisImpl = new Kurtosis();
198 skewnessImpl = new Skewness();
199 percentileImpl = new Percentile();
200 }
201
202 /**
203 * Returns a copy of this DescriptiveStatistics instance with the same internal state.
204 *
205 * @return a copy of this
206 */
207 public DescriptiveStatistics copy() {
208 return new DescriptiveStatistics(this);
209 }
210
211 /**
212 * Adds the value to the dataset. If the dataset is at the maximum size
213 * (i.e., the number of stored elements equals the currently configured
214 * windowSize), the first (oldest) element in the dataset is discarded
215 * to make room for the new value.
216 *
217 * @param v the value to be added
218 */
219 public void addValue(double v) {
220 if (windowSize != INFINITE_WINDOW) {
221 if (getN() == windowSize) {
222 eDA.addElementRolling(v);
223 } else if (getN() < windowSize) {
224 eDA.addElement(v);
225 }
226 } else {
227 eDA.addElement(v);
228 }
229 }
230
231 /** {@inheritDoc} */
232 @Override
233 public void accept(double v) {
234 addValue(v);
235 }
236
237 /**
238 * Resets all statistics and storage.
239 */
240 public void clear() {
241 eDA.clear();
242 }
243
244 /**
245 * Removes the most recent value from the dataset.
246 *
247 * @throws MathIllegalStateException if there are no elements stored
248 */
249 public void removeMostRecentValue() throws MathIllegalStateException {
250 try {
251 eDA.discardMostRecentElements(1);
252 } catch (MathIllegalArgumentException ex) {
253 throw new MathIllegalStateException(ex, LocalizedCoreFormats.NO_DATA);
254 }
255 }
256
257 /**
258 * Replaces the most recently stored value with the given value.
259 * There must be at least one element stored to call this method.
260 *
261 * @param v the value to replace the most recent stored value
262 * @return replaced value
263 * @throws MathIllegalStateException if there are no elements stored
264 */
265 public double replaceMostRecentValue(double v) throws MathIllegalStateException {
266 return eDA.substituteMostRecentElement(v);
267 }
268
269 /**
270 * Apply the given statistic to the data associated with this set of statistics.
271 * @param stat the statistic to apply
272 * @return the computed value of the statistic.
273 */
274 public double apply(UnivariateStatistic stat) {
275 // No try-catch or advertised exception here because arguments
276 // are guaranteed valid.
277 return eDA.compute(stat);
278 }
279
280 /** {@inheritDoc} */
281 @Override
282 public double getMean() {
283 return apply(meanImpl);
284 }
285
286 /**
287 * Returns the geometric mean of the available values.
288 * <p>
289 * See {@link GeometricMean} for details on the computing algorithm.
290 *
291 * @see <a href="http://www.xycoon.com/geometric_mean.htm">
292 * Geometric mean</a>
293 *
294 * @return The geometricMean, Double.NaN if no values have been added,
295 * or if any negative values have been added.
296 */
297 public double getGeometricMean() {
298 return apply(geometricMeanImpl);
299 }
300
301 /**
302 * Returns the standard deviation of the available values.
303 * @return The standard deviation, Double.NaN if no values have been added
304 * or 0.0 for a single value set.
305 */
306 @Override
307 public double getStandardDeviation() {
308 double stdDev = Double.NaN;
309 if (getN() > 0) {
310 if (getN() > 1) {
311 stdDev = FastMath.sqrt(getVariance());
312 } else {
313 stdDev = 0.0;
314 }
315 }
316 return stdDev;
317 }
318
319 /**
320 * Returns the quadratic mean of the available values.
321 *
322 * @see <a href="http://mathworld.wolfram.com/Root-Mean-Square.html">
323 * Root Mean Square</a>
324 *
325 * @return The quadratic mean or {@code Double.NaN} if no values
326 * have been added.
327 */
328 public double getQuadraticMean() {
329 final long n = getN();
330 return n > 0 ? FastMath.sqrt(getSumOfSquares() / n) : Double.NaN;
331 }
332
333 /** {@inheritDoc} */
334 @Override
335 public double getVariance() {
336 return apply(varianceImpl);
337 }
338
339 /**
340 * Returns the population variance of the available values.
341 *
342 * @see <a href="http://en.wikibooks.org/wiki/Statistics/Summary/Variance">
343 * Population variance</a>
344 *
345 * @return The population variance, Double.NaN if no values have been added,
346 * or 0.0 for a single value set.
347 */
348 public double getPopulationVariance() {
349 return apply(POPULATION_VARIANCE);
350 }
351
352 /**
353 * Returns the skewness of the available values. Skewness is a
354 * measure of the asymmetry of a given distribution.
355 *
356 * @return The skewness, Double.NaN if less than 3 values have been added.
357 */
358 public double getSkewness() {
359 return apply(skewnessImpl);
360 }
361
362 /**
363 * Returns the Kurtosis of the available values. Kurtosis is a
364 * measure of the "peakedness" of a distribution.
365 *
366 * @return The kurtosis, Double.NaN if less than 4 values have been added.
367 */
368 public double getKurtosis() {
369 return apply(kurtosisImpl);
370 }
371
372 /** {@inheritDoc} */
373 @Override
374 public double getMax() {
375 return apply(maxImpl);
376 }
377
378 /** {@inheritDoc} */
379 @Override
380 public double getMin() {
381 return apply(minImpl);
382 }
383
384 /** {@inheritDoc} */
385 @Override
386 public double getSum() {
387 return apply(sumImpl);
388 }
389
390 /**
391 * Returns the sum of the squares of the available values.
392 * @return The sum of the squares or Double.NaN if no
393 * values have been added.
394 */
395 public double getSumOfSquares() {
396 return apply(sumOfSquaresImpl);
397 }
398
399 /**
400 * Returns an estimate for the pth percentile of the stored values.
401 * <p>
402 * The implementation provided here follows the first estimation procedure presented
403 * <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc252.htm">here.</a>
404 * </p><p>
405 * <strong>Preconditions</strong>:<ul>
406 * <li><code>0 < p ≤ 100</code> (otherwise an
407 * <code>MathIllegalArgumentException</code> is thrown)</li>
408 * <li>at least one value must be stored (returns <code>Double.NaN
409 * </code> otherwise)</li>
410 * </ul>
411 *
412 * @param p the requested percentile (scaled from 0 - 100)
413 * @return An estimate for the pth percentile of the stored data
414 * @throws MathIllegalArgumentException if p is not a valid quantile
415 */
416 public double getPercentile(final double p)
417 throws MathIllegalArgumentException {
418
419 percentileImpl.setQuantile(p);
420 return apply(percentileImpl);
421 }
422
423 /** {@inheritDoc} */
424 @Override
425 public long getN() {
426 return eDA.getNumElements();
427 }
428
429 /**
430 * Returns the maximum number of values that can be stored in the
431 * dataset, or INFINITE_WINDOW (-1) if there is no limit.
432 *
433 * @return The current window size or -1 if its Infinite.
434 */
435 public int getWindowSize() {
436 return windowSize;
437 }
438
439 /**
440 * WindowSize controls the number of values that contribute to the
441 * reported statistics. For example, if windowSize is set to 3 and the
442 * values {1,2,3,4,5} have been added <strong> in that order</strong> then
443 * the <i>available values</i> are {3,4,5} and all reported statistics will
444 * be based on these values. If {@code windowSize} is decreased as a result
445 * of this call and there are more than the new value of elements in the
446 * current dataset, values from the front of the array are discarded to
447 * reduce the dataset to {@code windowSize} elements.
448 *
449 * @param windowSize sets the size of the window.
450 * @throws MathIllegalArgumentException if window size is less than 1 but
451 * not equal to {@link #INFINITE_WINDOW}
452 */
453 public void setWindowSize(int windowSize)
454 throws MathIllegalArgumentException {
455
456 if (windowSize < 1 && windowSize != INFINITE_WINDOW) {
457 throw new MathIllegalArgumentException(
458 LocalizedCoreFormats.NOT_POSITIVE_WINDOW_SIZE, windowSize);
459 }
460
461 this.windowSize = windowSize;
462
463 // We need to check to see if we need to discard elements
464 // from the front of the array. If the windowSize is less than
465 // the current number of elements.
466 if (windowSize != INFINITE_WINDOW && windowSize < eDA.getNumElements()) {
467 eDA.discardFrontElements(eDA.getNumElements() - windowSize);
468 }
469 }
470
471 /**
472 * Returns the current set of values in an array of double primitives.
473 * The order of addition is preserved. The returned array is a fresh
474 * copy of the underlying data -- i.e., it is not a reference to the
475 * stored data.
476 *
477 * @return the current set of numbers in the order in which they
478 * were added to this set
479 */
480 public double[] getValues() {
481 return eDA.getElements();
482 }
483
484 /**
485 * Returns the current set of values in an array of double primitives,
486 * sorted in ascending order. The returned array is a fresh
487 * copy of the underlying data -- i.e., it is not a reference to the
488 * stored data.
489 * @return returns the current set of
490 * numbers sorted in ascending order
491 */
492 public double[] getSortedValues() {
493 double[] sort = getValues();
494 Arrays.sort(sort);
495 return sort;
496 }
497
498 /**
499 * Returns the element at the specified index
500 * @param index The Index of the element
501 * @return return the element at the specified index
502 */
503 public double getElement(int index) {
504 return eDA.getElement(index);
505 }
506
507 /**
508 * Generates a text report displaying univariate statistics from values
509 * that have been added. Each statistic is displayed on a separate line.
510 *
511 * @return String with line feeds displaying statistics
512 */
513 @Override
514 public String toString() {
515 final StringBuilder outBuffer = new StringBuilder(100);
516 final String endl = "\n";
517 outBuffer.append("DescriptiveStatistics:").append(endl).
518 append("n: ").append(getN()).append(endl).
519 append("min: ").append(getMin()).append(endl).
520 append("max: ").append(getMax()).append(endl).
521 append("mean: ").append(getMean()).append(endl).
522 append("std dev: ").append(getStandardDeviation()).append(endl);
523 try {
524 // No catch for MIAE because actual parameter is valid below
525 outBuffer.append("median: ").append(getPercentile(50)).append(endl);
526 } catch (MathIllegalStateException ex) {
527 outBuffer.append("median: unavailable").append(endl);
528 }
529 outBuffer.append("skewness: ").append(getSkewness()).append(endl).
530 append("kurtosis: ").append(getKurtosis()).append(endl);
531 return outBuffer.toString();
532 }
533
534 }