View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      https://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  /*
19   * This is not the original file distributed by the Apache Software Foundation
20   * It has been modified by the Hipparchus project
21   */
22  
23  package org.hipparchus.stat.ranking;
24  
25  import java.util.ArrayList;
26  import java.util.Arrays;
27  import java.util.Iterator;
28  import java.util.List;
29  
30  import org.hipparchus.exception.LocalizedCoreFormats;
31  import org.hipparchus.exception.MathIllegalArgumentException;
32  import org.hipparchus.exception.MathRuntimeException;
33  import org.hipparchus.random.RandomDataGenerator;
34  import org.hipparchus.random.RandomGenerator;
35  import org.hipparchus.util.FastMath;
36  
37  
38  /**
39   * <p> Ranking based on the natural ordering on doubles.</p>
40   * <p>NaNs are treated according to the configured {@link NaNStrategy} and ties
41   * are handled using the selected {@link TiesStrategy}.
42   * Configuration settings are supplied in optional constructor arguments.
43   * Defaults are {@link NaNStrategy#FAILED} and {@link TiesStrategy#AVERAGE},
44   * respectively. When using {@link TiesStrategy#RANDOM}, a
45   * {@link RandomGenerator} may be supplied as a constructor argument.</p>
46   * <table border="1">
47   * <caption>Examples</caption>
48   * <tr><th colspan="3">
49   * Input data: (20, 17, 30, 42.3, 17, 50, Double.NaN, Double.NEGATIVE_INFINITY, 17)
50   * </th></tr>
51   * <tr><th>NaNStrategy</th><th>TiesStrategy</th>
52   * <th><code>rank(data)</code></th>
53   * <tr>
54   * <td>default (NaNs maximal)</td>
55   * <td>default (ties averaged)</td>
56   * <td>(5, 3, 6, 7, 3, 8, 9, 1, 3)</td></tr>
57   * <tr>
58   * <td>default (NaNs maximal)</td>
59   * <td>MINIMUM</td>
60   * <td>(5, 2, 6, 7, 2, 8, 9, 1, 2)</td></tr>
61   * <tr>
62   * <td>MINIMAL</td>
63   * <td>default (ties averaged)</td>
64   * <td>(6, 4, 7, 8, 4, 9, 1.5, 1.5, 4)</td></tr>
65   * <tr>
66   * <td>REMOVED</td>
67   * <td>SEQUENTIAL</td>
68   * <td>(5, 2, 6, 7, 3, 8, 1, 4)</td></tr>
69   * <tr>
70   * <td>MINIMAL</td>
71   * <td>MAXIMUM</td>
72   * <td>(6, 5, 7, 8, 5, 9, 2, 2, 5)</td></tr></table>
73   *
74   */
75  public class NaturalRanking implements RankingAlgorithm {
76  
77      /** default NaN strategy */
78      public static final NaNStrategy DEFAULT_NAN_STRATEGY = NaNStrategy.FAILED;
79  
80      /** default ties strategy */
81      public static final TiesStrategy DEFAULT_TIES_STRATEGY = TiesStrategy.AVERAGE;
82  
83      /** NaN strategy - defaults to NaNs maximal */
84      private final NaNStrategy nanStrategy;
85  
86      /** Ties strategy - defaults to ties averaged */
87      private final TiesStrategy tiesStrategy;
88  
89      /** Source of random data - used only when ties strategy is RANDOM */
90      private final RandomDataGenerator randomData;
91  
92      /**
93       * Create a NaturalRanking with default strategies for handling ties and NaNs.
94       */
95      public NaturalRanking() {
96          super();
97          tiesStrategy = DEFAULT_TIES_STRATEGY;
98          nanStrategy = DEFAULT_NAN_STRATEGY;
99          randomData = null;
100     }
101 
102     /**
103      * Create a NaturalRanking with the given TiesStrategy.
104      *
105      * @param tiesStrategy the TiesStrategy to use
106      */
107     public NaturalRanking(TiesStrategy tiesStrategy) {
108         super();
109         this.tiesStrategy = tiesStrategy;
110         nanStrategy = DEFAULT_NAN_STRATEGY;
111         randomData = new RandomDataGenerator();
112     }
113 
114     /**
115      * Create a NaturalRanking with the given NaNStrategy.
116      *
117      * @param nanStrategy the NaNStrategy to use
118      */
119     public NaturalRanking(NaNStrategy nanStrategy) {
120         super();
121         this.nanStrategy = nanStrategy;
122         tiesStrategy = DEFAULT_TIES_STRATEGY;
123         randomData = null;
124     }
125 
126     /**
127      * Create a NaturalRanking with the given NaNStrategy and TiesStrategy.
128      *
129      * @param nanStrategy NaNStrategy to use
130      * @param tiesStrategy TiesStrategy to use
131      */
132     public NaturalRanking(NaNStrategy nanStrategy, TiesStrategy tiesStrategy) {
133         super();
134         this.nanStrategy = nanStrategy;
135         this.tiesStrategy = tiesStrategy;
136         randomData = new RandomDataGenerator();
137     }
138 
139     /**
140      * Create a NaturalRanking with TiesStrategy.RANDOM and the given
141      * RandomGenerator as the source of random data.
142      *
143      * @param randomGenerator source of random data
144      */
145     public NaturalRanking(RandomGenerator randomGenerator) {
146         super();
147         this.tiesStrategy = TiesStrategy.RANDOM;
148         nanStrategy = DEFAULT_NAN_STRATEGY;
149         randomData = RandomDataGenerator.of(randomGenerator);
150     }
151 
152 
153     /**
154      * Create a NaturalRanking with the given NaNStrategy, TiesStrategy.RANDOM
155      * and the given source of random data.
156      *
157      * @param nanStrategy NaNStrategy to use
158      * @param randomGenerator source of random data
159      */
160     public NaturalRanking(NaNStrategy nanStrategy,
161             RandomGenerator randomGenerator) {
162         super();
163         this.nanStrategy = nanStrategy;
164         this.tiesStrategy = TiesStrategy.RANDOM;
165         randomData = RandomDataGenerator.of(randomGenerator);
166     }
167 
168     /**
169      * Return the NaNStrategy
170      *
171      * @return returns the NaNStrategy
172      */
173     public NaNStrategy getNanStrategy() {
174         return nanStrategy;
175     }
176 
177     /**
178      * Return the TiesStrategy
179      *
180      * @return the TiesStrategy
181      */
182     public TiesStrategy getTiesStrategy() {
183         return tiesStrategy;
184     }
185 
186     /**
187      * Rank <code>data</code> using the natural ordering on Doubles, with
188      * NaN values handled according to <code>nanStrategy</code> and ties
189      * resolved using <code>tiesStrategy.</code>
190      *
191      * @param data array to be ranked
192      * @return array of ranks
193      * @throws MathIllegalArgumentException if the selected {@link NaNStrategy} is {@code FAILED}
194      * and a {@link Double#NaN} is encountered in the input data
195      */
196     @Override
197     public double[] rank(double[] data) {
198 
199         // Array recording initial positions of data to be ranked
200         IntDoublePair[] ranks = new IntDoublePair[data.length];
201         for (int i = 0; i < data.length; i++) {
202             ranks[i] = new IntDoublePair(data[i], i);
203         }
204 
205         // Recode, remove or record positions of NaNs
206         List<Integer> nanPositions = null;
207         switch (nanStrategy) {
208             case MAXIMAL: // Replace NaNs with +INFs
209                 recodeNaNs(ranks, Double.POSITIVE_INFINITY);
210                 break;
211             case MINIMAL: // Replace NaNs with -INFs
212                 recodeNaNs(ranks, Double.NEGATIVE_INFINITY);
213                 break;
214             case REMOVED: // Drop NaNs from data
215                 ranks = removeNaNs(ranks);
216                 break;
217             case FIXED:   // Record positions of NaNs
218                 nanPositions = getNanPositions(ranks);
219                 break;
220             case FAILED:
221                 nanPositions = getNanPositions(ranks);
222                 if (!nanPositions.isEmpty()) {
223                     throw new MathIllegalArgumentException(LocalizedCoreFormats.NAN_NOT_ALLOWED);
224                 }
225                 break;
226             default: // this should not happen unless NaNStrategy enum is changed
227                 throw MathRuntimeException.createInternalError();
228         }
229 
230         // Sort the IntDoublePairs
231         Arrays.sort(ranks, (p1, p2) -> Double.compare(p1.value, p2.value));
232 
233         // Walk the sorted array, filling output array using sorted positions,
234         // resolving ties as we go
235         double[] out = new double[ranks.length];
236         int pos = 1;  // position in sorted array
237         out[ranks[0].getPosition()] = pos;
238         List<Integer> tiesTrace = new ArrayList<>();
239         tiesTrace.add(ranks[0].getPosition());
240         for (int i = 1; i < ranks.length; i++) {
241             if (Double.compare(ranks[i].getValue(), ranks[i - 1].getValue()) > 0) {
242                 // tie sequence has ended (or had length 1)
243                 pos = i + 1;
244                 if (tiesTrace.size() > 1) {  // if seq is nontrivial, resolve
245                     resolveTie(out, tiesTrace);
246                 }
247                 tiesTrace = new ArrayList<>();
248                 tiesTrace.add(ranks[i].getPosition());
249             } else {
250                 // tie sequence continues
251                 tiesTrace.add(ranks[i].getPosition());
252             }
253             out[ranks[i].getPosition()] = pos;
254         }
255         if (tiesTrace.size() > 1) {  // handle tie sequence at end
256             resolveTie(out, tiesTrace);
257         }
258         if (nanStrategy == NaNStrategy.FIXED) {
259             restoreNaNs(out, nanPositions);
260         }
261         return out;
262     }
263 
264     /**
265      * Returns an array that is a copy of the input array with IntDoublePairs
266      * having NaN values removed.
267      *
268      * @param ranks input array
269      * @return array with NaN-valued entries removed
270      */
271     private IntDoublePair[] removeNaNs(IntDoublePair[] ranks) {
272         if (!containsNaNs(ranks)) {
273             return ranks;
274         }
275         IntDoublePair[] outRanks = new IntDoublePair[ranks.length];
276         int j = 0;
277         for (int i = 0; i < ranks.length; i++) {
278             if (Double.isNaN(ranks[i].getValue())) {
279                 // drop, but adjust original ranks of later elements
280                 for (int k = i + 1; k < ranks.length; k++) {
281                     ranks[k] = new IntDoublePair(
282                             ranks[k].getValue(), ranks[k].getPosition() - 1);
283                 }
284             } else {
285                 outRanks[j] = new IntDoublePair(
286                         ranks[i].getValue(), ranks[i].getPosition());
287                 j++;
288             }
289         }
290         IntDoublePair[] returnRanks = new IntDoublePair[j];
291         System.arraycopy(outRanks, 0, returnRanks, 0, j);
292         return returnRanks;
293     }
294 
295     /**
296      * Recodes NaN values to the given value.
297      *
298      * @param ranks array to recode
299      * @param value the value to replace NaNs with
300      */
301     private void recodeNaNs(IntDoublePair[] ranks, double value) {
302         for (int i = 0; i < ranks.length; i++) {
303             if (Double.isNaN(ranks[i].getValue())) {
304                 ranks[i] = new IntDoublePair(
305                         value, ranks[i].getPosition());
306             }
307         }
308     }
309 
310     /**
311      * Checks for presence of NaNs in <code>ranks.</code>
312      *
313      * @param ranks array to be searched for NaNs
314      * @return true iff ranks contains one or more NaNs
315      */
316     private boolean containsNaNs(IntDoublePair[] ranks) {
317         for (IntDoublePair rank : ranks) {
318             if (Double.isNaN(rank.getValue())) {
319                 return true;
320             }
321         }
322         return false;
323     }
324 
325     /**
326      * Resolve a sequence of ties, using the configured {@link TiesStrategy}.
327      * The input <code>ranks</code> array is expected to take the same value
328      * for all indices in <code>tiesTrace</code>.  The common value is recoded
329      * according to the tiesStrategy. For example, if ranks = <5,8,2,6,2,7,1,2>,
330      * tiesTrace = <2,4,7> and tiesStrategy is MINIMUM, ranks will be unchanged.
331      * The same array and trace with tiesStrategy AVERAGE will come out
332      * <5,8,3,6,3,7,1,3>.
333      *
334      * @param ranks array of ranks
335      * @param tiesTrace list of indices where <code>ranks</code> is constant
336      * -- that is, for any i and j in TiesTrace, <code> ranks[i] == ranks[j]
337      * </code>
338      */
339     private void resolveTie(double[] ranks, List<Integer> tiesTrace) {
340 
341         // constant value of ranks over tiesTrace
342         final double c = ranks[tiesTrace.get(0)];
343 
344         // length of sequence of tied ranks
345         final int length = tiesTrace.size();
346 
347         switch (tiesStrategy) {
348             case  AVERAGE:  // Replace ranks with average
349                 fill(ranks, tiesTrace, (2 * c + length - 1) / 2d);
350                 break;
351             case MAXIMUM:   // Replace ranks with maximum values
352                 fill(ranks, tiesTrace, c + length - 1);
353                 break;
354             case MINIMUM:   // Replace ties with minimum
355                 fill(ranks, tiesTrace, c);
356                 break;
357             case RANDOM:    // Fill with random integral values in [c, c + length - 1]
358                 Iterator<Integer> iterator = tiesTrace.iterator();
359                 long f = FastMath.round(c);
360                 while (iterator.hasNext()) {
361                     // No advertised exception because args are guaranteed valid
362                     ranks[iterator.next()] =
363                         randomData.nextLong(f, f + length - 1);
364                 }
365                 break;
366             case SEQUENTIAL:  // Fill sequentially from c to c + length - 1
367                 // walk and fill
368                 iterator = tiesTrace.iterator();
369                 f = FastMath.round(c);
370                 int i = 0;
371                 while (iterator.hasNext()) {
372                     ranks[iterator.next()] = f + i++;
373                 }
374                 break;
375             default: // this should not happen unless TiesStrategy enum is changed
376                 throw MathRuntimeException.createInternalError();
377         }
378     }
379 
380     /**
381      * Sets<code>data[i] = value</code> for each i in <code>tiesTrace.</code>
382      *
383      * @param data array to modify
384      * @param tiesTrace list of index values to set
385      * @param value value to set
386      */
387     private void fill(double[] data, List<Integer> tiesTrace, double value) {
388         for (Integer integer : tiesTrace) {
389             data[integer] = value;
390         }
391     }
392 
393     /**
394      * Set <code>ranks[i] = Double.NaN</code> for each i in <code>nanPositions.</code>
395      *
396      * @param ranks array to modify
397      * @param nanPositions list of index values to set to <code>Double.NaN</code>
398      */
399     private void restoreNaNs(double[] ranks, List<Integer> nanPositions) {
400         if (nanPositions.isEmpty()) {
401             return;
402         }
403         for (Integer nanPosition : nanPositions) {
404             ranks[nanPosition] = Double.NaN;
405         }
406 
407     }
408 
409     /**
410      * Returns a list of indexes where <code>ranks</code> is <code>NaN.</code>
411      *
412      * @param ranks array to search for <code>NaNs</code>
413      * @return list of indexes i such that <code>ranks[i] = NaN</code>
414      */
415     private List<Integer> getNanPositions(IntDoublePair[] ranks) {
416         ArrayList<Integer> out = new ArrayList<>();
417         for (int i = 0; i < ranks.length; i++) {
418             if (Double.isNaN(ranks[i].getValue())) {
419                 out.add(i);
420             }
421         }
422         return out;
423     }
424 
425     /**
426      * Represents the position of a double value in an ordering.
427      */
428     private static class IntDoublePair {
429 
430         /** Value of the pair */
431         private final double value;
432 
433         /** Original position of the pair */
434         private final int position;
435 
436         /**
437          * Construct an IntDoublePair with the given value and position.
438          * @param value the value of the pair
439          * @param position the original position
440          */
441         IntDoublePair(double value, int position) {
442             this.value = value;
443             this.position = position;
444         }
445 
446         /**
447          * Returns the value of the pair.
448          * @return value
449          */
450         public double getValue() {
451             return value;
452         }
453 
454         /**
455          * Returns the original position of the pair.
456          * @return position
457          */
458         public int getPosition() {
459             return position;
460         }
461     }
462 }