View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      https://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  /*
19   * This is not the original file distributed by the Apache Software Foundation
20   * It has been modified by the Hipparchus project
21   */
22  package org.hipparchus.stat.descriptive.rank;
23  
24  import java.io.Serializable;
25  import java.util.Arrays;
26  import java.util.BitSet;
27  
28  import org.hipparchus.exception.LocalizedCoreFormats;
29  import org.hipparchus.exception.MathIllegalArgumentException;
30  import org.hipparchus.exception.NullArgumentException;
31  import org.hipparchus.stat.LocalizedStatFormats;
32  import org.hipparchus.stat.descriptive.AbstractUnivariateStatistic;
33  import org.hipparchus.stat.ranking.NaNStrategy;
34  import org.hipparchus.util.FastMath;
35  import org.hipparchus.util.KthSelector;
36  import org.hipparchus.util.MathArrays;
37  import org.hipparchus.util.MathUtils;
38  import org.hipparchus.util.PivotingStrategy;
39  import org.hipparchus.util.Precision;
40  
41  /**
42   * Provides percentile computation.
43   * <p>
44   * There are several commonly used methods for estimating percentiles (a.k.a.
45   * quantiles) based on sample data.  For large samples, the different methods
46   * agree closely, but when sample sizes are small, different methods will give
47   * significantly different results.  The algorithm implemented here works as follows:
48   * <ol>
49   * <li>Let <code>n</code> be the length of the (sorted) array and
50   * <code>0 &lt; p &lt;= 100</code> be the desired percentile.</li>
51   * <li>If <code> n = 1 </code> return the unique array element (regardless of
52   * the value of <code>p</code>); otherwise </li>
53   * <li>Compute the estimated percentile position
54   * <code> pos = p * (n + 1) / 100</code> and the difference, <code>d</code>
55   * between <code>pos</code> and <code>floor(pos)</code> (i.e. the fractional
56   * part of <code>pos</code>).</li>
57   * <li> If <code>pos &lt; 1</code> return the smallest element in the array.</li>
58   * <li> Else if <code>pos &gt;= n</code> return the largest element in the array.</li>
59   * <li> Else let <code>lower</code> be the element in position
60   * <code>floor(pos)</code> in the array and let <code>upper</code> be the
61   * next element in the array.  Return <code>lower + d * (upper - lower)</code>
62   * </li>
63   * </ol>
64   * <p>
65   * To compute percentiles, the data must be at least partially ordered.  Input
66   * arrays are copied and recursively partitioned using an ordering definition.
67   * The ordering used by <code>Arrays.sort(double[])</code> is the one determined
68   * by {@link java.lang.Double#compareTo(Double)}.  This ordering makes
69   * <code>Double.NaN</code> larger than any other value (including
70   * <code>Double.POSITIVE_INFINITY</code>).  Therefore, for example, the median
71   * (50th percentile) of
72   * <code>{0, 1, 2, 3, 4, Double.NaN}</code> evaluates to <code>2.5.</code>
73   * <p>
74   * Since percentile estimation usually involves interpolation between array
75   * elements, arrays containing  <code>NaN</code> or infinite values will often
76   * result in <code>NaN</code> or infinite values returned.
77   * <p>
78   * Further, to include different estimation types such as R1, R2 as mentioned in
79   * <a href="http://en.wikipedia.org/wiki/Quantile">Quantile page(wikipedia)</a>,
80   * a type specific NaN handling strategy is used to closely match with the
81   * typically observed results from popular tools like R(R1-R9), Excel(R7).
82   * <p>
83   * Percentile uses only selection instead of complete sorting and caches selection
84   * algorithm state between calls to the various {@code evaluate} methods. This
85   * greatly improves efficiency, both for a single percentile and multiple percentile
86   * computations. To maximize performance when multiple percentiles are computed
87   * based on the same data, users should set the data array once using either one
88   * of the {@link #evaluate(double[], double)} or {@link #setData(double[])} methods
89   * and thereafter {@link #evaluate(double)} with just the percentile provided.
90   * <p>
91   * <strong>Note that this implementation is not synchronized.</strong> If
92   * multiple threads access an instance of this class concurrently, and at least
93   * one of the threads invokes the <code>increment()</code> or
94   * <code>clear()</code> method, it must be synchronized externally.
95   */
96  public class Percentile extends AbstractUnivariateStatistic implements Serializable {
97  
98      /** Serializable version identifier */
99      private static final long serialVersionUID = 20150412L;
100 
101     /** Maximum number of partitioning pivots cached (each level double the number of pivots). */
102     private static final int MAX_CACHED_LEVELS = 10;
103 
104     /** Maximum number of cached pivots in the pivots cached array */
105     private static final int PIVOTS_HEAP_LENGTH = 0x1 << MAX_CACHED_LEVELS - 1;
106 
107     /** Default KthSelector used with default pivoting strategy */
108     private final KthSelector kthSelector;
109 
110     /** Any of the {@link EstimationType}s such as {@link EstimationType#LEGACY CM} can be used. */
111     private final EstimationType estimationType;
112 
113     /** NaN Handling of the input as defined by {@link NaNStrategy} */
114     private final NaNStrategy nanStrategy;
115 
116     /**
117      * Determines what percentile is computed when evaluate() is activated
118      * with no quantile argument.
119      */
120     private double quantile;
121 
122     /** Cached pivots. */
123     private int[] cachedPivots;
124 
125     /**
126      * Constructs a Percentile with the following defaults.
127      * <ul>
128      *   <li>default quantile: 50.0, can be reset with {@link #setQuantile(double)}</li>
129      *   <li>default estimation type: {@link EstimationType#LEGACY},
130      *   can be reset with {@link #withEstimationType(EstimationType)}</li>
131      *   <li>default NaN strategy: {@link NaNStrategy#REMOVED},
132      *   can be reset with {@link #withNaNStrategy(NaNStrategy)}</li>
133      *   <li>a KthSelector that makes use of {@link PivotingStrategy#MEDIAN_OF_3},
134      *   can be reset with {@link #withKthSelector(KthSelector)}</li>
135      * </ul>
136      */
137     public Percentile() {
138         // No try-catch or advertised exception here - arg is valid
139         this(50.0);
140     }
141 
142     /**
143      * Constructs a Percentile with the specific quantile value and the following
144      * <ul>
145      *   <li>default method type: {@link EstimationType#LEGACY}</li>
146      *   <li>default NaN strategy: {@link NaNStrategy#REMOVED}</li>
147      *   <li>a Kth Selector : {@link KthSelector}</li>
148      * </ul>
149      * @param quantile the quantile
150      * @throws MathIllegalArgumentException  if p is not greater than 0 and less
151      * than or equal to 100
152      */
153     public Percentile(final double quantile) throws MathIllegalArgumentException {
154         this(quantile, EstimationType.LEGACY, NaNStrategy.REMOVED,
155              new KthSelector(PivotingStrategy.MEDIAN_OF_3));
156     }
157 
158     /**
159      * Copy constructor, creates a new {@code Percentile} identical
160      * to the {@code original}
161      *
162      * @param original the {@code Percentile} instance to copy
163      * @throws NullArgumentException if original is null
164      */
165     public Percentile(final Percentile original) throws NullArgumentException {
166         super(original);
167         estimationType   = original.getEstimationType();
168         nanStrategy      = original.getNaNStrategy();
169         kthSelector      = original.getKthSelector();
170 
171         setData(original.getDataRef());
172         if (original.cachedPivots != null) {
173             System.arraycopy(original.cachedPivots, 0, cachedPivots, 0, original.cachedPivots.length);
174         }
175         setQuantile(original.quantile);
176     }
177 
178     /**
179      * Constructs a Percentile with the specific quantile value,
180      * {@link EstimationType}, {@link NaNStrategy} and {@link KthSelector}.
181      *
182      * @param quantile the quantile to be computed
183      * @param estimationType one of the percentile {@link EstimationType  estimation types}
184      * @param nanStrategy one of {@link NaNStrategy} to handle with NaNs
185      * @param kthSelector a {@link KthSelector} to use for pivoting during search
186      * @throws MathIllegalArgumentException if p is not within (0,100]
187      * @throws NullArgumentException if type or NaNStrategy passed is null
188      */
189     protected Percentile(final double quantile,
190                          final EstimationType estimationType,
191                          final NaNStrategy nanStrategy,
192                          final KthSelector kthSelector)
193         throws MathIllegalArgumentException {
194         setQuantile(quantile);
195         cachedPivots = null;
196         MathUtils.checkNotNull(estimationType);
197         MathUtils.checkNotNull(nanStrategy);
198         MathUtils.checkNotNull(kthSelector);
199         this.estimationType = estimationType;
200         this.nanStrategy = nanStrategy;
201         this.kthSelector = kthSelector;
202     }
203 
204     /** {@inheritDoc} */
205     @Override
206     public void setData(final double[] values) {
207         if (values == null) {
208             cachedPivots = null;
209         } else {
210             cachedPivots = new int[PIVOTS_HEAP_LENGTH];
211             Arrays.fill(cachedPivots, -1);
212         }
213         super.setData(values);
214     }
215 
216     /** {@inheritDoc} */
217     @Override
218     public void setData(final double[] values, final int begin, final int length)
219         throws MathIllegalArgumentException {
220         MathUtils.checkNotNull(values, LocalizedCoreFormats.INPUT_ARRAY);
221         cachedPivots = new int[PIVOTS_HEAP_LENGTH];
222         Arrays.fill(cachedPivots, -1);
223         super.setData(values, begin, length);
224     }
225 
226     /**
227      * Returns the result of evaluating the statistic over the stored data.
228      * <p>
229      * The stored array is the one which was set by previous calls to
230      * {@link #setData(double[])}
231      *
232      * @param p the percentile value to compute
233      * @return the value of the statistic applied to the stored data
234      * @throws MathIllegalArgumentException if p is not a valid quantile value
235      * (p must be greater than 0 and less than or equal to 100)
236      */
237     public double evaluate(final double p) throws MathIllegalArgumentException {
238         return evaluate(getDataRef(), p);
239     }
240 
241     /**
242      * Returns an estimate of the <code>quantile</code>th percentile of the
243      * designated values in the <code>values</code> array.
244      * <p>The quantile
245      * estimated is determined by the <code>quantile</code> property.
246      * </p>
247      * <ul>
248      * <li>Returns <code>Double.NaN</code> if <code>length = 0</code></li>
249      * <li>Returns (for any value of <code>quantile</code>)
250      * <code>values[begin]</code> if <code>length = 1 </code></li>
251      * <li>Throws <code>MathIllegalArgumentException</code> if <code>values</code>
252      * is null, or <code>start</code> or <code>length</code> is invalid</li>
253      * </ul>
254      * <p>
255      * See {@link Percentile} for a description of the percentile estimation
256      * algorithm used.
257      *
258      * @param values the input array
259      * @param start index of the first array element to include
260      * @param length the number of elements to include
261      * @return the percentile value
262      * @throws MathIllegalArgumentException if the parameters are not valid
263      *
264      */
265     @Override
266     public double evaluate(final double[] values, final int start, final int length)
267         throws MathIllegalArgumentException {
268         return evaluate(values, start, length, quantile);
269     }
270 
271     /**
272      * Returns an estimate of the <code>p</code>th percentile of the values
273      * in the <code>values</code> array.
274      * <ul>
275      * <li>Returns <code>Double.NaN</code> if <code>values</code> has length
276      * <code>0</code></li>
277      * <li>Returns (for any value of <code>p</code>) <code>values[0]</code>
278      *  if <code>values</code> has length <code>1</code></li>
279      * <li>Throws <code>MathIllegalArgumentException</code> if <code>values</code>
280      * is null or p is not a valid quantile value (p must be greater than 0
281      * and less than or equal to 100) </li>
282      * </ul>
283      * <p>
284      * The default implementation delegates to
285      * <code>evaluate(double[], int, int, double)</code> in the natural way.
286      *
287      * @param values input array of values
288      * @param p the percentile value to compute
289      * @return the percentile value or Double.NaN if the array is empty
290      * @throws MathIllegalArgumentException if <code>values</code> is null or p is invalid
291      */
292     public double evaluate(final double[] values, final double p)
293         throws MathIllegalArgumentException {
294         MathUtils.checkNotNull(values, LocalizedCoreFormats.INPUT_ARRAY);
295         return evaluate(values, 0, values.length, p);
296     }
297 
298     /**
299      * Returns an estimate of the <code>p</code>th percentile of the values
300      * in the <code>values</code> array, starting with the element in (0-based)
301      * position <code>begin</code> in the array and including <code>length</code>
302      * values.
303      * <p>
304      * Calls to this method do not modify the internal <code>quantile</code>
305      * state of this statistic.
306      * </p>
307      * <ul>
308      * <li>Returns <code>Double.NaN</code> if <code>length = 0</code></li>
309      * <li>Returns (for any value of <code>p</code>) <code>values[begin]</code>
310      *  if <code>length = 1 </code></li>
311      * <li>Throws <code>MathIllegalArgumentException</code> if <code>values</code>
312      *  is null , <code>begin</code> or <code>length</code> is invalid, or
313      * <code>p</code> is not a valid quantile value (p must be greater than 0
314      * and less than or equal to 100)</li>
315      * </ul>
316      * <p>
317      * See {@link Percentile} for a description of the percentile estimation
318      * algorithm used.
319      *
320      * @param values array of input values
321      * @param p  the percentile to compute
322      * @param begin  the first (0-based) element to include in the computation
323      * @param length  the number of array elements to include
324      * @return  the percentile value
325      * @throws MathIllegalArgumentException if the parameters are not valid or the
326      * input array is null
327      */
328     public double evaluate(final double[] values, final int begin,
329                            final int length, final double p)
330         throws MathIllegalArgumentException {
331 
332         MathArrays.verifyValues(values, begin, length);
333         if (p > 100 || p <= 0) {
334             throw new MathIllegalArgumentException(LocalizedStatFormats.OUT_OF_BOUNDS_QUANTILE_VALUE,
335                                                    p, 0, 100);
336         }
337         if (length == 0) {
338             return Double.NaN;
339         }
340         if (length == 1) {
341             return values[begin]; // always return single value for n = 1
342         }
343 
344         final double[] work = getWorkArray(values, begin, length);
345         final int[] pivotsHeap = getPivots(values);
346         return work.length == 0 ? Double.NaN :
347                     estimationType.evaluate(work, pivotsHeap, p, kthSelector);
348     }
349 
350     /**
351      * Returns the value of the quantile field (determines what percentile is
352      * computed when evaluate() is called with no quantile argument).
353      *
354      * @return quantile set while construction or {@link #setQuantile(double)}
355      */
356     public double getQuantile() {
357         return quantile;
358     }
359 
360     /**
361      * Sets the value of the quantile field (determines what percentile is
362      * computed when evaluate() is called with no quantile argument).
363      *
364      * @param p a value between 0 &lt; p &lt;= 100
365      * @throws MathIllegalArgumentException  if p is not greater than 0 and less
366      * than or equal to 100
367      */
368     public void setQuantile(final double p) throws MathIllegalArgumentException {
369         if (p <= 0 || p > 100) {
370             throw new MathIllegalArgumentException(
371                     LocalizedStatFormats.OUT_OF_BOUNDS_QUANTILE_VALUE, p, 0, 100);
372         }
373         quantile = p;
374     }
375 
376     /** {@inheritDoc} */
377     @Override
378     public Percentile copy() {
379         return new Percentile(this);
380     }
381 
382     /**
383      * Get the work array to operate. Makes use of prior {@code storedData} if
384      * it exists or else do a check on NaNs and copy a subset of the array
385      * defined by begin and length parameters. The set {@link #nanStrategy} will
386      * be used to either retain/remove/replace any NaNs present before returning
387      * the resultant array.
388      *
389      * @param values the array of numbers
390      * @param begin index to start reading the array
391      * @param length the length of array to be read from the begin index
392      * @return work array sliced from values in the range [begin,begin+length)
393      * @throws MathIllegalArgumentException if values or indices are invalid
394      */
395     protected double[] getWorkArray(final double[] values, final int begin, final int length) {
396         final double[] work;
397         if (values == getDataRef()) {
398             work = getDataRef();
399         } else {
400             switch (nanStrategy) {
401                 case MAXIMAL:// Replace NaNs with +INFs
402                     work = replaceAndSlice(values, begin, length, Double.NaN, Double.POSITIVE_INFINITY);
403                     break;
404                 case MINIMAL:// Replace NaNs with -INFs
405                     work = replaceAndSlice(values, begin, length, Double.NaN, Double.NEGATIVE_INFINITY);
406                     break;
407                 case REMOVED:// Drop NaNs from data
408                     work = removeAndSlice(values, begin, length, Double.NaN);
409                     break;
410                 case FAILED:// just throw exception as NaN is un-acceptable
411                     work = copyOf(values, begin, length);
412                     MathArrays.checkNotNaN(work);
413                     break;
414                 default: //FIXED
415                     work = copyOf(values, begin, length);
416                     break;
417             }
418         }
419         return work;
420     }
421 
422     /**
423      * Make a copy of the array for the slice defined by array part from
424      * [begin, begin+length)
425      * @param values the input array
426      * @param begin start index of the array to include
427      * @param length number of elements to include from begin
428      * @return copy of a slice of the original array
429      */
430     private static double[] copyOf(final double[] values, final int begin, final int length) {
431         MathArrays.verifyValues(values, begin, length);
432         return Arrays.copyOfRange(values, begin, begin + length);
433     }
434 
435     /**
436      * Replace every occurrence of a given value with a replacement value in a
437      * copied slice of array defined by array part from [begin, begin+length).
438      * @param values the input array
439      * @param begin start index of the array to include
440      * @param length number of elements to include from begin
441      * @param original the value to be replaced with
442      * @param replacement the value to be used for replacement
443      * @return the copy of sliced array with replaced values
444      */
445     private static double[] replaceAndSlice(final double[] values,
446                                             final int begin, final int length,
447                                             final double original,
448                                             final double replacement) {
449         final double[] temp = copyOf(values, begin, length);
450         for(int i = 0; i < length; i++) {
451             temp[i] = Precision.equalsIncludingNaN(original, temp[i]) ?
452                       replacement : temp[i];
453         }
454         return temp;
455     }
456 
457     /**
458      * Remove the occurrence of a given value in a copied slice of array
459      * defined by the array part from [begin, begin+length).
460      * @param values the input array
461      * @param begin start index of the array to include
462      * @param length number of elements to include from begin
463      * @param removedValue the value to be removed from the sliced array
464      * @return the copy of the sliced array after removing the removedValue
465      */
466     private static double[] removeAndSlice(final double[] values,
467                                            final int begin, final int length,
468                                            final double removedValue) {
469         MathArrays.verifyValues(values, begin, length);
470         final double[] temp;
471         //BitSet(length) to indicate where the removedValue is located
472         final BitSet bits = new BitSet(length);
473         for (int i = begin; i < begin+length; i++) {
474             if (Precision.equalsIncludingNaN(removedValue, values[i])) {
475                 bits.set(i - begin);
476             }
477         }
478         //Check if empty then create a new copy
479         if (bits.isEmpty()) {
480             temp = copyOf(values, begin, length); // Nothing removed, just copy
481         } else if (bits.cardinality() == length) {
482             temp = new double[0];                 // All removed, just empty
483         } else {                                   // Some removable, so new
484             temp = new double[length - bits.cardinality()];
485             int start = begin;  //start index from source array (i.e values)
486             int dest = 0;       //dest index in destination array(i.e temp)
487             int bitSetPtr = 0;  //bitSetPtr is start index pointer of bitset
488             for (int nextOne = bits.nextSetBit(bitSetPtr); nextOne != -1; nextOne = bits.nextSetBit(bitSetPtr)) {
489                 final int lengthToCopy = nextOne - bitSetPtr;
490                 System.arraycopy(values, start, temp, dest, lengthToCopy);
491                 dest += lengthToCopy;
492                 start = begin + (bitSetPtr = bits.nextClearBit(nextOne));
493             }
494             //Copy any residue past start index till begin+length
495             if (start < begin + length) {
496                 System.arraycopy(values,start,temp,dest,begin + length - start);
497             }
498         }
499         return temp;
500     }
501 
502     /**
503      * Get pivots which is either cached or a newly created one
504      *
505      * @param values array containing the input numbers
506      * @return cached pivots or a newly created one
507      */
508     private int[] getPivots(final double[] values) {
509         final int[] pivotsHeap;
510         if (values == getDataRef()) {
511             pivotsHeap = cachedPivots;
512         } else {
513             pivotsHeap = new int[PIVOTS_HEAP_LENGTH];
514             Arrays.fill(pivotsHeap, -1);
515         }
516         return pivotsHeap;
517     }
518 
519     /**
520      * Get the estimation {@link EstimationType type} used for computation.
521      *
522      * @return the {@code estimationType} set
523      */
524     public EstimationType getEstimationType() {
525         return estimationType;
526     }
527 
528     /**
529      * Build a new instance similar to the current one except for the
530      * {@link EstimationType estimation type}.
531      * <p>
532      * This method is intended to be used as part of a fluent-type builder
533      * pattern. Building finely tune instances should be done as follows:
534      * <pre>
535      *   Percentile customized = new Percentile(quantile).
536      *                           withEstimationType(estimationType).
537      *                           withNaNStrategy(nanStrategy).
538      *                           withKthSelector(kthSelector);
539      * </pre>
540      * <p>
541      * If any of the {@code withXxx} method is omitted, the default value for
542      * the corresponding customization parameter will be used.
543      *
544      * @param newEstimationType estimation type for the new instance
545      * @return a new instance, with changed estimation type
546      * @throws NullArgumentException when newEstimationType is null
547      */
548     public Percentile withEstimationType(final EstimationType newEstimationType) {
549         return new Percentile(quantile, newEstimationType, nanStrategy, kthSelector);
550     }
551 
552     /**
553      * Get the {@link NaNStrategy NaN Handling} strategy used for computation.
554      * @return {@code NaN Handling} strategy set during construction
555      */
556     public NaNStrategy getNaNStrategy() {
557         return nanStrategy;
558     }
559 
560     /**
561      * Build a new instance similar to the current one except for the
562      * {@link NaNStrategy NaN handling} strategy.
563      * <p>
564      * This method is intended to be used as part of a fluent-type builder
565      * pattern. Building finely tune instances should be done as follows:
566      * <pre>
567      *   Percentile customized = new Percentile(quantile).
568      *                           withEstimationType(estimationType).
569      *                           withNaNStrategy(nanStrategy).
570      *                           withKthSelector(kthSelector);
571      * </pre>
572      * <p>
573      * If any of the {@code withXxx} method is omitted, the default value for
574      * the corresponding customization parameter will be used.
575      *
576      * @param newNaNStrategy NaN strategy for the new instance
577      * @return a new instance, with changed NaN handling strategy
578      * @throws NullArgumentException when newNaNStrategy is null
579      */
580     public Percentile withNaNStrategy(final NaNStrategy newNaNStrategy) {
581         return new Percentile(quantile, estimationType, newNaNStrategy, kthSelector);
582     }
583 
584     /**
585      * Get the {@link KthSelector kthSelector} used for computation.
586      * @return the {@code kthSelector} set
587      */
588     public KthSelector getKthSelector() {
589         return kthSelector;
590     }
591 
592     /**
593      * Get the {@link PivotingStrategy} used in KthSelector for computation.
594      * @return the pivoting strategy set
595      */
596     public PivotingStrategy getPivotingStrategy() {
597         return kthSelector.getPivotingStrategy();
598     }
599 
600     /**
601      * Build a new instance similar to the current one except for the
602      * {@link KthSelector kthSelector} instance specifically set.
603      * <p>
604      * This method is intended to be used as part of a fluent-type builder
605      * pattern. Building finely tune instances should be done as follows:
606      * <pre>
607      *   Percentile customized = new Percentile(quantile).
608      *                           withEstimationType(estimationType).
609      *                           withNaNStrategy(nanStrategy).
610      *                           withKthSelector(newKthSelector);
611      * </pre>
612      * <p>
613      * If any of the {@code withXxx} method is omitted, the default value for
614      * the corresponding customization parameter will be used.
615      *
616      * @param newKthSelector KthSelector for the new instance
617      * @return a new instance, with changed KthSelector
618      * @throws NullArgumentException when newKthSelector is null
619      */
620     public Percentile withKthSelector(final KthSelector newKthSelector) {
621         return new Percentile(quantile, estimationType, nanStrategy, newKthSelector);
622     }
623 
624     /**
625      * An enum for various estimation strategies of a percentile referred in
626      * <a href="http://en.wikipedia.org/wiki/Quantile">wikipedia on quantile</a>
627      * with the names of enum matching those of types mentioned in
628      * wikipedia.
629      * <p>
630      * Each enum corresponding to the specific type of estimation in wikipedia
631      * implements  the respective formulae that specializes in the below aspects
632      * <ul>
633      * <li>An <b>index method</b> to calculate approximate index of the
634      * estimate</li>
635      * <li>An <b>estimate method</b> to estimate a value found at the earlier
636      * computed index</li>
637      * <li>A <b> minLimit</b> on the quantile for which first element of sorted
638      * input is returned as an estimate </li>
639      * <li>A <b> maxLimit</b> on the quantile for which last element of sorted
640      * input is returned as an estimate </li>
641      * </ul>
642      * <p>
643      * Users can now create {@link Percentile} by explicitly passing this enum;
644      * such as by invoking {@link Percentile#withEstimationType(EstimationType)}
645      * <p>
646      * References:
647      * <ol>
648      * <li>
649      * <a href="http://en.wikipedia.org/wiki/Quantile">Wikipedia on quantile</a>
650      * </li>
651      * <li>
652      * <a href="https://www.amherst.edu/media/view/129116/.../Sample+Quantiles.pdf">
653      * Hyndman, R. J. and Fan, Y. (1996) Sample quantiles in statistical
654      * packages, American Statistician 50, 361–365</a> </li>
655      * <li>
656      * <a href="http://stat.ethz.ch/R-manual/R-devel/library/stats/html/quantile.html">
657      * R-Manual </a></li>
658      * </ol>
659      */
660     public enum EstimationType {
661         /**
662          * This is the default type used in the {@link Percentile}.This method
663          * has the following formulae for index and estimates<br>
664          * \( \begin{align}
665          * &amp;index    = (N+1)p\ \\
666          * &amp;estimate = x_{\lceil h\,-\,1/2 \rceil} \\
667          * &amp;minLimit = 0 \\
668          * &amp;maxLimit = 1 \\
669          * \end{align}\)
670          */
671         LEGACY("Legacy Hipparchus") {
672             /**
673              * {@inheritDoc}.This method in particular makes use of existing
674              * Hipparchus style of picking up the index.
675              */
676             @Override
677             protected double index(final double p, final int length) {
678                 final double minLimit = 0d;
679                 final double maxLimit = 1d;
680                 return Double.compare(p, minLimit) == 0 ? 0 :
681                        Double.compare(p, maxLimit) == 0 ?
682                                length : p * (length + 1);
683             }
684         },
685         /**
686          * The method R_1 has the following formulae for index and estimates<br>
687          * \( \begin{align}
688          * &amp;index= Np + 1/2\,  \\
689          * &amp;estimate= x_{\lceil h\,-\,1/2 \rceil} \\
690          * &amp;minLimit = 0 \\
691          * \end{align}\)
692          */
693         R_1("R-1") {
694 
695             @Override
696             protected double index(final double p, final int length) {
697                 final double minLimit = 0d;
698                 return Double.compare(p, minLimit) == 0 ? 0 : length * p + 0.5;
699             }
700 
701             /**
702              * {@inheritDoc}This method in particular for R_1 uses ceil(pos-0.5)
703              */
704             @Override
705             protected double estimate(final double[] values,
706                                       final int[] pivotsHeap, final double pos,
707                                       final int length, final KthSelector selector) {
708                 return super.estimate(values, pivotsHeap, FastMath.ceil(pos - 0.5), length, selector);
709             }
710 
711         },
712         /**
713          * The method R_2 has the following formulae for index and estimates<br>
714          * \( \begin{align}
715          * &amp;index= Np + 1/2\, \\
716          * &amp;estimate=\frac{x_{\lceil h\,-\,1/2 \rceil} +
717          * x_{\lfloor h\,+\,1/2 \rfloor}}{2} \\
718          * &amp;minLimit = 0 \\
719          * &amp;maxLimit = 1 \\
720          * \end{align}\)
721          */
722         R_2("R-2") {
723 
724             @Override
725             protected double index(final double p, final int length) {
726                 final double minLimit = 0d;
727                 final double maxLimit = 1d;
728                 return Double.compare(p, maxLimit) == 0 ? length :
729                        Double.compare(p, minLimit) == 0 ? 0 : length * p + 0.5;
730             }
731 
732             /**
733              * {@inheritDoc}This method in particular for R_2 averages the
734              * values at ceil(p+0.5) and floor(p-0.5).
735              */
736             @Override
737             protected double estimate(final double[] values,
738                                       final int[] pivotsHeap, final double pos,
739                                       final int length, final KthSelector selector) {
740                 final double low =
741                         super.estimate(values, pivotsHeap, FastMath.ceil(pos - 0.5), length, selector);
742                 final double high =
743                         super.estimate(values, pivotsHeap,FastMath.floor(pos + 0.5), length, selector);
744                 return (low + high) / 2;
745             }
746 
747         },
748         /**
749          * The method R_3 has the following formulae for index and estimates<br>
750          * \( \begin{align}
751          * &amp;index= Np \\
752          * &amp;estimate= x_{\lfloor h \rceil}\, \\
753          * &amp;minLimit = 0.5/N \\
754          * \end{align}\)
755          */
756         R_3("R-3") {
757             @Override
758             protected double index(final double p, final int length) {
759                 final double minLimit = 1d/2 / length;
760                 return Double.compare(p, minLimit) <= 0 ?
761                         0 : FastMath.rint(length * p);
762             }
763 
764         },
765         /**
766          * The method R_4 has the following formulae for index and estimates<br>
767          * \( \begin{align}
768          * &amp;index= Np\, \\
769          * &amp;estimate= x_{\lfloor h \rfloor} + (h -
770          * \lfloor h \rfloor) (x_{\lfloor h \rfloor + 1} - x_{\lfloor h
771          * \rfloor}) \\
772          * &amp;minLimit = 1/N \\
773          * &amp;maxLimit = 1 \\
774          * \end{align}\)
775          */
776         R_4("R-4") {
777             @Override
778             protected double index(final double p, final int length) {
779                 final double minLimit = 1d / length;
780                 final double maxLimit = 1d;
781                 return Double.compare(p, minLimit) < 0 ? 0 :
782                        Double.compare(p, maxLimit) == 0 ? length : length * p;
783             }
784 
785         },
786         /**
787          * The method R_5 has the following formulae for index and estimates<br>
788          * \( \begin{align}
789          * &amp;index= Np + 1/2\\
790          * &amp;estimate= x_{\lfloor h \rfloor} + (h -
791          * \lfloor h \rfloor) (x_{\lfloor h \rfloor + 1} - x_{\lfloor h
792          * \rfloor}) \\
793          * &amp;minLimit = 0.5/N \\
794          * &amp;maxLimit = (N-0.5)/N
795          * \end{align}\)
796          */
797         R_5("R-5") {
798 
799             @Override
800             protected double index(final double p, final int length) {
801                 final double minLimit = 1d/2 / length;
802                 final double maxLimit = (length - 0.5) / length;
803                 return Double.compare(p, minLimit) < 0 ? 0 :
804                        Double.compare(p, maxLimit) >= 0 ?
805                                length : length * p + 0.5;
806             }
807         },
808         /**
809          * The method R_6 has the following formulae for index and estimates<br>
810          * \( \begin{align}
811          * &amp;index= (N + 1)p \\
812          * &amp;estimate= x_{\lfloor h \rfloor} + (h -
813          * \lfloor h \rfloor) (x_{\lfloor h \rfloor + 1} - x_{\lfloor h
814          * \rfloor}) \\
815          * &amp;minLimit = 1/(N+1) \\
816          * &amp;maxLimit = N/(N+1) \\
817          * \end{align}\)
818          * <p>
819          * <b>Note:</b> This method computes the index in a manner very close to
820          * the default Hipparchus Percentile existing implementation. However
821          * the difference to be noted is in picking up the limits with which
822          * first element (p&lt;1(N+1)) and last elements (p&gt;N/(N+1))are done.
823          * While in default case; these are done with p=0 and p=1 respectively.
824          */
825         R_6("R-6") {
826 
827             @Override
828             protected double index(final double p, final int length) {
829                 final double minLimit = 1d / (length + 1);
830                 final double maxLimit = 1d * length / (length + 1);
831                 return Double.compare(p, minLimit) < 0 ? 0 :
832                        Double.compare(p, maxLimit) >= 0 ?
833                                length : (length + 1) * p;
834             }
835         },
836 
837         /**
838          * The method R_7 implements Microsoft Excel style computation has the
839          * following formulae for index and estimates.<br>
840          * \( \begin{align}
841          * &amp;index = (N-1)p + 1 \\
842          * &amp;estimate = x_{\lfloor h \rfloor} + (h -
843          * \lfloor h \rfloor) (x_{\lfloor h \rfloor + 1} - x_{\lfloor h
844          * \rfloor}) \\
845          * &amp;minLimit = 0 \\
846          * &amp;maxLimit = 1 \\
847          * \end{align}\)
848          */
849         R_7("R-7") {
850             @Override
851             protected double index(final double p, final int length) {
852                 final double minLimit = 0d;
853                 final double maxLimit = 1d;
854                 return Double.compare(p, minLimit) == 0 ? 0 :
855                        Double.compare(p, maxLimit) == 0 ?
856                                length : 1 + (length - 1) * p;
857             }
858 
859         },
860 
861         /**
862          * The method R_8 has the following formulae for index and estimates<br>
863          * \( \begin{align}
864          * &amp;index = (N + 1/3)p + 1/3  \\
865          * &amp;estimate = x_{\lfloor h \rfloor} + (h -
866            \lfloor h \rfloor) (x_{\lfloor h \rfloor + 1} - x_{\lfloor h
867          * \rfloor}) \\
868          * &amp;minLimit = (2/3)/(N+1/3) \\
869          * &amp;maxLimit = (N-1/3)/(N+1/3) \\
870          * \end{align}\)
871          * <p>
872          * As per Ref [2,3] this approach is most recommended as it provides
873          * an approximate median-unbiased estimate regardless of distribution.
874          */
875         R_8("R-8") {
876             @Override
877             protected double index(final double p, final int length) {
878                 final double minLimit = 2 * (1d / 3) / (length + 1d / 3);
879                 final double maxLimit =
880                         (length - 1d / 3) / (length + 1d / 3);
881                 return Double.compare(p, minLimit) < 0 ? 0 :
882                        Double.compare(p, maxLimit) >= 0 ? length :
883                            (length + 1d / 3) * p + 1d / 3;
884             }
885         },
886 
887         /**
888          * The method R_9 has the following formulae for index and estimates<br>
889          * \( \begin{align}
890          * &amp;index = (N + 1/4)p + 3/8\\
891          * &amp;estimate = x_{\lfloor h \rfloor} + (h -
892            \lfloor h \rfloor) (x_{\lfloor h \rfloor + 1} - x_{\lfloor h
893          * \rfloor}) \\
894          * &amp;minLimit = (5/8)/(N+1/4) \\
895          * &amp;maxLimit = (N-3/8)/(N+1/4) \\
896          * \end{align}\)
897          */
898         R_9("R-9") {
899             @Override
900             protected double index(final double p, final int length) {
901                 final double minLimit = 5d/8 / (length + 0.25);
902                 final double maxLimit = (length - 3d/8) / (length + 0.25);
903                 return Double.compare(p, minLimit) < 0 ? 0 :
904                        Double.compare(p, maxLimit) >= 0 ? length :
905                                (length + 0.25) * p + 3d/8;
906             }
907 
908         },
909         ;
910 
911         /** Simple name such as R-1, R-2 corresponding to those in wikipedia. */
912         private final String name;
913 
914         /**
915          * Constructor
916          *
917          * @param type name of estimation type as per wikipedia
918          */
919         EstimationType(final String type) {
920             this.name = type;
921         }
922 
923         /**
924          * Finds the index of array that can be used as starting index to
925          * {@link #estimate(double[], int[], double, int, KthSelector) estimate}
926          * percentile. The calculation of index calculation is specific to each
927          * {@link EstimationType}.
928          *
929          * @param p the p<sup>th</sup> quantile
930          * @param length the total number of array elements in the work array
931          * @return a computed real valued index as explained in the wikipedia
932          */
933         protected abstract double index(double p, int length);
934 
935         /**
936          * Estimation based on K<sup>th</sup> selection. This may be overridden
937          * in specific enums to compute slightly different estimations.
938          *
939          * @param work array of numbers to be used for finding the percentile
940          * @param pos indicated positional index prior computed from calling
941          *            {@link #index(double, int)}
942          * @param pivotsHeap an earlier populated cache if exists; will be used
943          * @param length size of array considered
944          * @param selector a {@link KthSelector} used for pivoting during search
945          * @return estimated percentile
946          */
947         protected double estimate(final double[] work, final int[] pivotsHeap,
948                                   final double pos, final int length,
949                                   final KthSelector selector) {
950 
951             final double fpos = FastMath.floor(pos);
952             final int intPos = (int) fpos;
953             final double dif = pos - fpos;
954 
955             if (pos < 1) {
956                 return selector.select(work, pivotsHeap, 0);
957             }
958             if (pos >= length) {
959                 return selector.select(work, pivotsHeap, length - 1);
960             }
961 
962             final double lower = selector.select(work, pivotsHeap, intPos - 1);
963             final double upper = selector.select(work, pivotsHeap, intPos);
964             return lower + dif * (upper - lower);
965         }
966 
967         /**
968          * Evaluate method to compute the percentile for a given bounded array
969          * using earlier computed pivots heap.<br>
970          * This basically calls the {@link #index(double, int) index} and then
971          * {@link #estimate(double[], int[], double, int, KthSelector) estimate}
972          * functions to return the estimated percentile value.
973          *
974          * @param work array of numbers to be used for finding the percentile
975          * @param pivotsHeap a prior cached heap which can speed up estimation
976          * @param p the p<sup>th</sup> quantile to be computed
977          * @param selector a {@link KthSelector} used for pivoting during search
978          * @return estimated percentile
979          * @throws MathIllegalArgumentException if p is out of range
980          * @throws NullArgumentException if work array is null
981          */
982         protected double evaluate(final double[] work, final int[] pivotsHeap, final double p,
983                                   final KthSelector selector) {
984             MathUtils.checkNotNull(work);
985             if (p > 100 || p <= 0) {
986                 throw new MathIllegalArgumentException(LocalizedStatFormats.OUT_OF_BOUNDS_QUANTILE_VALUE,
987                                               p, 0, 100);
988             }
989             return estimate(work, pivotsHeap, index(p/100d, work.length), work.length, selector);
990         }
991 
992         /**
993          * Evaluate method to compute the percentile for a given bounded array.
994          * This basically calls the {@link #index(double, int) index} and then
995          * {@link #estimate(double[], int[], double, int, KthSelector) estimate}
996          * functions to return the estimated percentile value. Please
997          * note that this method does not make use of cached pivots.
998          *
999          * @param work array of numbers to be used for finding the percentile
1000          * @param p the p<sup>th</sup> quantile to be computed
1001          * @return estimated percentile
1002          * @param selector a {@link KthSelector} used for pivoting during search
1003          * @throws MathIllegalArgumentException if length or p is out of range
1004          * @throws NullArgumentException if work array is null
1005          */
1006         public double evaluate(final double[] work, final double p, final KthSelector selector) {
1007             return this.evaluate(work, null, p, selector);
1008         }
1009 
1010         /**
1011          * Gets the name of the enum
1012          *
1013          * @return the name
1014          */
1015         String getName() {
1016             return name;
1017         }
1018     }
1019 }