View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      https://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  /*
19   * This is not the original file distributed by the Apache Software Foundation
20   * It has been modified by the Hipparchus project
21   */
22  
23  package org.hipparchus.stat.ranking;
24  
25  import java.util.ArrayList;
26  import java.util.Arrays;
27  import java.util.Iterator;
28  import java.util.List;
29  
30  import org.hipparchus.exception.LocalizedCoreFormats;
31  import org.hipparchus.exception.MathIllegalArgumentException;
32  import org.hipparchus.exception.MathRuntimeException;
33  import org.hipparchus.random.RandomDataGenerator;
34  import org.hipparchus.random.RandomGenerator;
35  import org.hipparchus.util.FastMath;
36  
37  
38  /**
39   * <p> Ranking based on the natural ordering on doubles.</p>
40   * <p>NaNs are treated according to the configured {@link NaNStrategy} and ties
41   * are handled using the selected {@link TiesStrategy}.
42   * Configuration settings are supplied in optional constructor arguments.
43   * Defaults are {@link NaNStrategy#FAILED} and {@link TiesStrategy#AVERAGE},
44   * respectively. When using {@link TiesStrategy#RANDOM}, a
45   * {@link RandomGenerator} may be supplied as a constructor argument.</p>
46   * <table border="1">
47   * <caption>Examples</caption>
48   * <tr><th colspan="3">
49   * Input data: (20, 17, 30, 42.3, 17, 50, Double.NaN, Double.NEGATIVE_INFINITY, 17)
50   * </th></tr>
51   * <tr><th>NaNStrategy</th><th>TiesStrategy</th>
52   * <th><code>rank(data)</code></th>
53   * <tr>
54   * <td>default (NaNs maximal)</td>
55   * <td>default (ties averaged)</td>
56   * <td>(5, 3, 6, 7, 3, 8, 9, 1, 3)</td></tr>
57   * <tr>
58   * <td>default (NaNs maximal)</td>
59   * <td>MINIMUM</td>
60   * <td>(5, 2, 6, 7, 2, 8, 9, 1, 2)</td></tr>
61   * <tr>
62   * <td>MINIMAL</td>
63   * <td>default (ties averaged)</td>
64   * <td>(6, 4, 7, 8, 4, 9, 1.5, 1.5, 4)</td></tr>
65   * <tr>
66   * <td>REMOVED</td>
67   * <td>SEQUENTIAL</td>
68   * <td>(5, 2, 6, 7, 3, 8, 1, 4)</td></tr>
69   * <tr>
70   * <td>MINIMAL</td>
71   * <td>MAXIMUM</td>
72   * <td>(6, 5, 7, 8, 5, 9, 2, 2, 5)</td></tr></table>
73   *
74   */
75  public class NaturalRanking implements RankingAlgorithm {
76  
77      /** default NaN strategy */
78      public static final NaNStrategy DEFAULT_NAN_STRATEGY = NaNStrategy.FAILED;
79  
80      /** default ties strategy */
81      public static final TiesStrategy DEFAULT_TIES_STRATEGY = TiesStrategy.AVERAGE;
82  
83      /** NaN strategy - defaults to NaNs maximal */
84      private final NaNStrategy nanStrategy;
85  
86      /** Ties strategy - defaults to ties averaged */
87      private final TiesStrategy tiesStrategy;
88  
89      /** Source of random data - used only when ties strategy is RANDOM */
90      private final RandomDataGenerator randomData;
91  
92      /**
93       * Create a NaturalRanking with default strategies for handling ties and NaNs.
94       */
95      public NaturalRanking() {
96          super();
97          tiesStrategy = DEFAULT_TIES_STRATEGY;
98          nanStrategy = DEFAULT_NAN_STRATEGY;
99          randomData = null;
100     }
101 
102     /**
103      * Create a NaturalRanking with the given TiesStrategy.
104      *
105      * @param tiesStrategy the TiesStrategy to use
106      */
107     public NaturalRanking(TiesStrategy tiesStrategy) {
108         super();
109         this.tiesStrategy = tiesStrategy;
110         nanStrategy = DEFAULT_NAN_STRATEGY;
111         randomData = new RandomDataGenerator();
112     }
113 
114     /**
115      * Create a NaturalRanking with the given NaNStrategy.
116      *
117      * @param nanStrategy the NaNStrategy to use
118      */
119     public NaturalRanking(NaNStrategy nanStrategy) {
120         super();
121         this.nanStrategy = nanStrategy;
122         tiesStrategy = DEFAULT_TIES_STRATEGY;
123         randomData = null;
124     }
125 
126     /**
127      * Create a NaturalRanking with the given NaNStrategy and TiesStrategy.
128      *
129      * @param nanStrategy NaNStrategy to use
130      * @param tiesStrategy TiesStrategy to use
131      */
132     public NaturalRanking(NaNStrategy nanStrategy, TiesStrategy tiesStrategy) {
133         super();
134         this.nanStrategy = nanStrategy;
135         this.tiesStrategy = tiesStrategy;
136         randomData = new RandomDataGenerator();
137     }
138 
139     /**
140      * Create a NaturalRanking with TiesStrategy.RANDOM and the given
141      * RandomGenerator as the source of random data.
142      *
143      * @param randomGenerator source of random data
144      */
145     public NaturalRanking(RandomGenerator randomGenerator) {
146         super();
147         this.tiesStrategy = TiesStrategy.RANDOM;
148         nanStrategy = DEFAULT_NAN_STRATEGY;
149         randomData = RandomDataGenerator.of(randomGenerator);
150     }
151 
152 
153     /**
154      * Create a NaturalRanking with the given NaNStrategy, TiesStrategy.RANDOM
155      * and the given source of random data.
156      *
157      * @param nanStrategy NaNStrategy to use
158      * @param randomGenerator source of random data
159      */
160     public NaturalRanking(NaNStrategy nanStrategy,
161             RandomGenerator randomGenerator) {
162         super();
163         this.nanStrategy = nanStrategy;
164         this.tiesStrategy = TiesStrategy.RANDOM;
165         randomData = RandomDataGenerator.of(randomGenerator);
166     }
167 
168     /**
169      * Return the NaNStrategy
170      *
171      * @return returns the NaNStrategy
172      */
173     public NaNStrategy getNanStrategy() {
174         return nanStrategy;
175     }
176 
177     /**
178      * Return the TiesStrategy
179      *
180      * @return the TiesStrategy
181      */
182     public TiesStrategy getTiesStrategy() {
183         return tiesStrategy;
184     }
185 
186     /**
187      * Rank <code>data</code> using the natural ordering on Doubles, with
188      * NaN values handled according to <code>nanStrategy</code> and ties
189      * resolved using <code>tiesStrategy.</code>
190      *
191      * @param data array to be ranked
192      * @return array of ranks
193      * @throws MathIllegalArgumentException if the selected {@link NaNStrategy} is {@code FAILED}
194      * and a {@link Double#NaN} is encountered in the input data
195      */
196     @Override
197     public double[] rank(double[] data) {
198 
199         // Array recording initial positions of data to be ranked
200         IntDoublePair[] ranks = new IntDoublePair[data.length];
201         for (int i = 0; i < data.length; i++) {
202             ranks[i] = new IntDoublePair(data[i], i);
203         }
204 
205         // Recode, remove or record positions of NaNs
206         List<Integer> nanPositions = null;
207         switch (nanStrategy) {
208             case MAXIMAL: // Replace NaNs with +INFs
209                 recodeNaNs(ranks, Double.POSITIVE_INFINITY);
210                 break;
211             case MINIMAL: // Replace NaNs with -INFs
212                 recodeNaNs(ranks, Double.NEGATIVE_INFINITY);
213                 break;
214             case REMOVED: // Drop NaNs from data
215                 ranks = removeNaNs(ranks);
216                 break;
217             case FIXED:   // Record positions of NaNs
218                 nanPositions = getNanPositions(ranks);
219                 break;
220             case FAILED:
221                 nanPositions = getNanPositions(ranks);
222                 if (!nanPositions.isEmpty()) {
223                     throw new MathIllegalArgumentException(LocalizedCoreFormats.NAN_NOT_ALLOWED);
224                 }
225                 break;
226             default: // this should not happen unless NaNStrategy enum is changed
227                 throw MathRuntimeException.createInternalError();
228         }
229 
230         // Sort the IntDoublePairs
231         Arrays.sort(ranks, (p1, p2) -> Double.compare(p1.value, p2.value));
232 
233         // Walk the sorted array, filling output array using sorted positions,
234         // resolving ties as we go
235         double[] out = new double[ranks.length];
236         int pos = 1;  // position in sorted array
237         out[ranks[0].getPosition()] = pos;
238         List<Integer> tiesTrace = new ArrayList<>();
239         tiesTrace.add(ranks[0].getPosition());
240         for (int i = 1; i < ranks.length; i++) {
241             if (Double.compare(ranks[i].getValue(), ranks[i - 1].getValue()) > 0) {
242                 // tie sequence has ended (or had length 1)
243                 pos = i + 1;
244                 if (tiesTrace.size() > 1) {  // if seq is nontrivial, resolve
245                     resolveTie(out, tiesTrace);
246                 }
247                 tiesTrace = new ArrayList<>();
248                 tiesTrace.add(ranks[i].getPosition());
249             } else {
250                 // tie sequence continues
251                 tiesTrace.add(ranks[i].getPosition());
252             }
253             out[ranks[i].getPosition()] = pos;
254         }
255         if (tiesTrace.size() > 1) {  // handle tie sequence at end
256             resolveTie(out, tiesTrace);
257         }
258         if (nanStrategy == NaNStrategy.FIXED) {
259             restoreNaNs(out, nanPositions);
260         }
261         return out;
262     }
263 
264     /**
265      * Returns an array that is a copy of the input array with IntDoublePairs
266      * having NaN values removed.
267      *
268      * @param ranks input array
269      * @return array with NaN-valued entries removed
270      */
271     private IntDoublePair[] removeNaNs(IntDoublePair[] ranks) {
272         if (!containsNaNs(ranks)) {
273             return ranks;
274         }
275         IntDoublePair[] outRanks = new IntDoublePair[ranks.length];
276         int j = 0;
277         for (int i = 0; i < ranks.length; i++) {
278             if (Double.isNaN(ranks[i].getValue())) {
279                 // drop, but adjust original ranks of later elements
280                 for (int k = i + 1; k < ranks.length; k++) {
281                     ranks[k] = new IntDoublePair(
282                             ranks[k].getValue(), ranks[k].getPosition() - 1);
283                 }
284             } else {
285                 outRanks[j] = new IntDoublePair(
286                         ranks[i].getValue(), ranks[i].getPosition());
287                 j++;
288             }
289         }
290         IntDoublePair[] returnRanks = new IntDoublePair[j];
291         System.arraycopy(outRanks, 0, returnRanks, 0, j);
292         return returnRanks;
293     }
294 
295     /**
296      * Recodes NaN values to the given value.
297      *
298      * @param ranks array to recode
299      * @param value the value to replace NaNs with
300      */
301     private void recodeNaNs(IntDoublePair[] ranks, double value) {
302         for (int i = 0; i < ranks.length; i++) {
303             if (Double.isNaN(ranks[i].getValue())) {
304                 ranks[i] = new IntDoublePair(
305                         value, ranks[i].getPosition());
306             }
307         }
308     }
309 
310     /**
311      * Checks for presence of NaNs in <code>ranks.</code>
312      *
313      * @param ranks array to be searched for NaNs
314      * @return true iff ranks contains one or more NaNs
315      */
316     private boolean containsNaNs(IntDoublePair[] ranks) {
317         for (int i = 0; i < ranks.length; i++) {
318             if (Double.isNaN(ranks[i].getValue())) {
319                 return true;
320             }
321         }
322         return false;
323     }
324 
325     /**
326      * Resolve a sequence of ties, using the configured {@link TiesStrategy}.
327      * The input <code>ranks</code> array is expected to take the same value
328      * for all indices in <code>tiesTrace</code>.  The common value is recoded
329      * according to the tiesStrategy. For example, if ranks = <5,8,2,6,2,7,1,2>,
330      * tiesTrace = <2,4,7> and tiesStrategy is MINIMUM, ranks will be unchanged.
331      * The same array and trace with tiesStrategy AVERAGE will come out
332      * <5,8,3,6,3,7,1,3>.
333      *
334      * @param ranks array of ranks
335      * @param tiesTrace list of indices where <code>ranks</code> is constant
336      * -- that is, for any i and j in TiesTrace, <code> ranks[i] == ranks[j]
337      * </code>
338      */
339     private void resolveTie(double[] ranks, List<Integer> tiesTrace) {
340 
341         // constant value of ranks over tiesTrace
342         final double c = ranks[tiesTrace.get(0)];
343 
344         // length of sequence of tied ranks
345         final int length = tiesTrace.size();
346 
347         switch (tiesStrategy) {
348             case  AVERAGE:  // Replace ranks with average
349                 fill(ranks, tiesTrace, (2 * c + length - 1) / 2d);
350                 break;
351             case MAXIMUM:   // Replace ranks with maximum values
352                 fill(ranks, tiesTrace, c + length - 1);
353                 break;
354             case MINIMUM:   // Replace ties with minimum
355                 fill(ranks, tiesTrace, c);
356                 break;
357             case RANDOM:    // Fill with random integral values in [c, c + length - 1]
358                 Iterator<Integer> iterator = tiesTrace.iterator();
359                 long f = FastMath.round(c);
360                 while (iterator.hasNext()) {
361                     // No advertised exception because args are guaranteed valid
362                     ranks[iterator.next()] =
363                         randomData.nextLong(f, f + length - 1);
364                 }
365                 break;
366             case SEQUENTIAL:  // Fill sequentially from c to c + length - 1
367                 // walk and fill
368                 iterator = tiesTrace.iterator();
369                 f = FastMath.round(c);
370                 int i = 0;
371                 while (iterator.hasNext()) {
372                     ranks[iterator.next()] = f + i++;
373                 }
374                 break;
375             default: // this should not happen unless TiesStrategy enum is changed
376                 throw MathRuntimeException.createInternalError();
377         }
378     }
379 
380     /**
381      * Sets<code>data[i] = value</code> for each i in <code>tiesTrace.</code>
382      *
383      * @param data array to modify
384      * @param tiesTrace list of index values to set
385      * @param value value to set
386      */
387     private void fill(double[] data, List<Integer> tiesTrace, double value) {
388         Iterator<Integer> iterator = tiesTrace.iterator();
389         while (iterator.hasNext()) {
390             data[iterator.next()] = value;
391         }
392     }
393 
394     /**
395      * Set <code>ranks[i] = Double.NaN</code> for each i in <code>nanPositions.</code>
396      *
397      * @param ranks array to modify
398      * @param nanPositions list of index values to set to <code>Double.NaN</code>
399      */
400     private void restoreNaNs(double[] ranks, List<Integer> nanPositions) {
401         if (nanPositions.isEmpty()) {
402             return;
403         }
404         Iterator<Integer> iterator = nanPositions.iterator();
405         while (iterator.hasNext()) {
406             ranks[iterator.next().intValue()] = Double.NaN;
407         }
408 
409     }
410 
411     /**
412      * Returns a list of indexes where <code>ranks</code> is <code>NaN.</code>
413      *
414      * @param ranks array to search for <code>NaNs</code>
415      * @return list of indexes i such that <code>ranks[i] = NaN</code>
416      */
417     private List<Integer> getNanPositions(IntDoublePair[] ranks) {
418         ArrayList<Integer> out = new ArrayList<>();
419         for (int i = 0; i < ranks.length; i++) {
420             if (Double.isNaN(ranks[i].getValue())) {
421                 out.add(Integer.valueOf(i));
422             }
423         }
424         return out;
425     }
426 
427     /**
428      * Represents the position of a double value in an ordering.
429      */
430     private static class IntDoublePair {
431 
432         /** Value of the pair */
433         private final double value;
434 
435         /** Original position of the pair */
436         private final int position;
437 
438         /**
439          * Construct an IntDoublePair with the given value and position.
440          * @param value the value of the pair
441          * @param position the original position
442          */
443         IntDoublePair(double value, int position) {
444             this.value = value;
445             this.position = position;
446         }
447 
448         /**
449          * Returns the value of the pair.
450          * @return value
451          */
452         public double getValue() {
453             return value;
454         }
455 
456         /**
457          * Returns the original position of the pair.
458          * @return position
459          */
460         public int getPosition() {
461             return position;
462         }
463     }
464 }