View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      https://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  /*
19   * This is not the original file distributed by the Apache Software Foundation
20   * It has been modified by the Hipparchus project
21   */
22  
23  package org.hipparchus.stat.inference;
24  
25  import org.hipparchus.UnitTestUtils;
26  import org.hipparchus.distribution.continuous.NormalDistribution;
27  import org.hipparchus.distribution.continuous.UniformRealDistribution;
28  import org.hipparchus.random.RandomGenerator;
29  import org.hipparchus.random.Well19937c;
30  import org.hipparchus.util.CombinatoricsUtils;
31  import org.hipparchus.util.FastMath;
32  import org.hipparchus.util.MathArrays;
33  import org.junit.jupiter.api.Test;
34  import org.junit.jupiter.api.Timeout;
35  
36  import java.lang.reflect.Method;
37  import java.util.Arrays;
38  import java.util.Iterator;
39  import java.util.Map;
40  import java.util.TreeMap;
41  import java.util.concurrent.TimeUnit;
42  
43  import static org.junit.jupiter.api.Assertions.assertArrayEquals;
44  import static org.junit.jupiter.api.Assertions.assertEquals;
45  import static org.junit.jupiter.api.Assertions.assertFalse;
46  import static org.junit.jupiter.api.Assertions.assertTrue;
47  
48  /**
49   * Test cases for {@link KolmogorovSmirnovTest}.
50   *
51   */
52  public class KolmogorovSmirnovTestTest {
53  
54      protected static final double TOLERANCE = 10e-10;
55  
56      // Random N(0,1) values generated using R rnorm
57      protected static final double[] gaussian = {
58          0.26055895, -0.63665233, 1.51221323, 0.61246988, -0.03013003, -1.73025682, -0.51435805, 0.70494168, 0.18242945,
59          0.94734336, -0.04286604, -0.37931719, -1.07026403, -2.05861425, 0.11201862, 0.71400136, -0.52122185,
60          -0.02478725, -1.86811649, -1.79907688, 0.15046279, 1.32390193, 1.55889719, 1.83149171, -0.03948003,
61          -0.98579207, -0.76790540, 0.89080682, 0.19532153, 0.40692841, 0.15047336, -0.58546562, -0.39865469, 0.77604271,
62          -0.65188221, -1.80368554, 0.65273365, -0.75283102, -1.91022150, -0.07640869, -1.08681188, -0.89270600,
63          2.09017508, 0.43907981, 0.10744033, -0.70961218, 1.15707300, 0.44560525, -2.04593349, 0.53816843, -0.08366640,
64          0.24652218, 1.80549401, -0.99220707, -1.14589408, -0.27170290, -0.49696855, 0.00968353, -1.87113545,
65          -1.91116529, 0.97151891, -0.73576115, -0.59437029, 0.72148436, 0.01747695, -0.62601157, -1.00971538,
66          -1.42691397, 1.03250131, -0.30672627, -0.15353992, -1.19976069, -0.68364218, 0.37525652, -0.46592881,
67          -0.52116168, -0.17162202, 1.04679215, 0.25165971, -0.04125231, -0.23756244, -0.93389975, 0.75551407,
68          0.08347445, -0.27482228, -0.4717632, -0.1867746, -0.1166976, 0.5763333, 0.1307952, 0.7630584, -0.3616248,
69          2.1383790, -0.7946630, 0.0231885, 0.7919195, 1.6057144, -0.3802508, 0.1229078, 1.5252901, -0.8543149, 0.3025040
70      };
71  
72      // Random N(0, 1.6) values generated using R rnorm
73      protected static final double[] gaussian2 = {
74          2.88041498038308, -0.632349445671017, 0.402121295225571, 0.692626364613243, 1.30693446815426,
75          -0.714176317131286, -0.233169206599583, 1.09113298322107, -1.53149079994305, 1.23259966205809,
76          1.01389927412503, 0.0143898711497477, -0.512813545447559, 2.79364360835469, 0.662008875538092,
77          1.04861546834788, -0.321280099931466, 0.250296656278743, 1.75820367603736, -2.31433523590905,
78          -0.462694696086403, 0.187725700950191, -2.24410950019152, 2.83473751105445, 0.252460174391016,
79          1.39051945380281, -1.56270144203134, 0.998522814471644, -1.50147469080896, 0.145307533554146,
80          0.469089457043406, -0.0914780723809334, -0.123446939266548, -0.610513388160565, -3.71548343891957,
81          -0.329577317349478, -0.312973794075871, 2.02051909758923, 2.85214308266271, 0.0193222002327237,
82          -0.0322422268266562, 0.514736012106768, 0.231484953375887, -2.22468798953629, 1.42197716075595,
83          2.69988043856357, 0.0443757119128293, 0.721536984407798, -0.0445688839903234, -0.294372724550705,
84          0.234041580912698, -0.868973119365727, 1.3524893453845, -0.931054600134503, -0.263514296006792,
85          0.540949457402918, -0.882544288773685, -0.34148675747989, 1.56664494810034, 2.19850536566584,
86          -0.667972122928022, -0.70889669526203, -0.00251758193079668, 2.39527162977682, -2.7559594317269,
87          -0.547393502656671, -2.62144031572617, 2.81504147017922, -1.02036850201042, -1.00713927602786,
88          -0.520197775122254, 1.00625480138649, 2.46756916531313, 1.64364743727799, 0.704545210648595,
89          -0.425885789416992, -1.78387854908546, -0.286783886710481, 0.404183648369076, -0.369324280845769,
90          -0.0391185138840443, 2.41257787857293, 2.49744281317859, -0.826964496939021, -0.792555379958975,
91          1.81097685787403, -0.475014580016638, 1.23387615291805, 0.646615294802053, 1.88496377454523, 1.20390698380814,
92          -0.27812153371728, 2.50149494533101, 0.406964323253817, -1.72253451309982, 1.98432494184332, 2.2223658560333,
93          0.393086362404685, -0.504073151377089, -0.0484610869883821
94      };
95  
96      // Random uniform (0, 1) generated using R runif
97      protected static final double[] uniform = {
98          0.7930305, 0.6424382, 0.8747699, 0.7156518, 0.1845909, 0.2022326, 0.4877206, 0.8928752, 0.2293062, 0.4222006,
99          0.1610459, 0.2830535, 0.9946345, 0.7329499, 0.26411126, 0.87958133, 0.29827437, 0.39185988, 0.38351185,
100         0.36359611, 0.48646472, 0.05577866, 0.56152250, 0.52672013, 0.13171783, 0.95864085, 0.03060207, 0.33514887,
101         0.72508148, 0.38901437, 0.9978665, 0.5981300, 0.1065388, 0.7036991, 0.1071584, 0.4423963, 0.1107071, 0.6437221,
102         0.58523872, 0.05044634, 0.65999539, 0.37367260, 0.73270024, 0.47473755, 0.74661163, 0.50765549, 0.05377347,
103         0.40998009, 0.55235182, 0.21361998, 0.63117971, 0.18109222, 0.89153510, 0.23203248, 0.6177106, 0.6856418,
104         0.2158557, 0.9870501, 0.2036914, 0.2100311, 0.9065020, 0.7459159, 0.56631790, 0.06753629, 0.39684629,
105         0.52504615, 0.14199103, 0.78551120, 0.90503321, 0.80452362, 0.9960115, 0.8172592, 0.5831134, 0.8794187,
106         0.2021501, 0.2923505, 0.9561824, 0.8792248, 0.85201008, 0.02945562, 0.26200374, 0.11382818, 0.17238856,
107         0.36449473, 0.69688273, 0.96216330, 0.4859432, 0.4503438, 0.1917656, 0.8357845, 0.9957812, 0.4633570,
108         0.8654599, 0.4597996, 0.68190289, 0.58887855, 0.09359396, 0.98081979, 0.73659533, 0.89344777, 0.18903099,
109         0.97660425
110     };
111 
112     /** Unit normal distribution, unit normal data */
113     @Test
114     void testOneSampleGaussianGaussian() {
115         final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest();
116         final NormalDistribution unitNormal = new NormalDistribution(0d, 1d);
117         // Uncomment to run exact test - takes about a minute. Same value is used in R tests and for
118         // approx.
119         // Assertions.assertEquals(0.3172069207622391, test.kolmogorovSmirnovTest(unitNormal, gaussian,
120         // true), TOLERANCE);
121         assertEquals(0.3172069207622391, test.kolmogorovSmirnovTest(unitNormal, gaussian, false), TOLERANCE);
122         assertFalse(test.kolmogorovSmirnovTest(unitNormal, gaussian, 0.05));
123         assertEquals(0.0932947561266756, test.kolmogorovSmirnovStatistic(unitNormal, gaussian), TOLERANCE);
124     }
125 
126     /** Unit normal distribution, unit normal data, small dataset */
127     @Test
128     void testOneSampleGaussianGaussianSmallSample() {
129         final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest();
130         final NormalDistribution unitNormal = new NormalDistribution(0d, 1d);
131         final double[] shortGaussian = new double[50];
132         System.arraycopy(gaussian, 0, shortGaussian, 0, 50);
133         assertEquals(0.683736463728347, test.kolmogorovSmirnovTest(unitNormal, shortGaussian, false), TOLERANCE);
134         assertFalse(test.kolmogorovSmirnovTest(unitNormal, gaussian, 0.05));
135         assertEquals(0.09820779969463278, test.kolmogorovSmirnovStatistic(unitNormal, shortGaussian), TOLERANCE);
136     }
137 
138     /** Unit normal distribution, uniform data */
139     @Test
140     void testOneSampleGaussianUniform() {
141         final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest();
142         final NormalDistribution unitNormal = new NormalDistribution(0d, 1d);
143         // Uncomment to run exact test - takes a long time. Same value is used in R tests and for
144         // approx.
145         // Assertions.assertEquals(0.3172069207622391, test.kolmogorovSmirnovTest(unitNormal, uniform,
146         // true), TOLERANCE);
147         assertEquals(8.881784197001252E-16, test.kolmogorovSmirnovTest(unitNormal, uniform, false), TOLERANCE);
148         assertFalse(test.kolmogorovSmirnovTest(unitNormal, gaussian, 0.05));
149         assertEquals(0.5117493931609258, test.kolmogorovSmirnovStatistic(unitNormal, uniform), TOLERANCE);
150     }
151 
152     /** Uniform distribution, uniform data */
153     // @Test - takes about 6 seconds, uncomment for
154     public void testOneSampleUniformUniform() {
155         final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest();
156         final UniformRealDistribution unif = new UniformRealDistribution(-0.5, 0.5);
157         assertEquals(8.881784197001252E-16, test.kolmogorovSmirnovTest(unif, uniform, false), TOLERANCE);
158         assertTrue(test.kolmogorovSmirnovTest(unif, uniform, 0.05));
159         assertEquals(0.5400666982352942, test.kolmogorovSmirnovStatistic(unif, uniform), TOLERANCE);
160     }
161 
162     /** Uniform distribution, uniform data, small dataset */
163     @Test
164     void testOneSampleUniformUniformSmallSample() {
165         final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest();
166         final UniformRealDistribution unif = new UniformRealDistribution(-0.5, 0.5);
167         final double[] shortUniform = new double[20];
168         System.arraycopy(uniform, 0, shortUniform, 0, 20);
169         assertEquals(4.117594598618268E-9, test.kolmogorovSmirnovTest(unif, shortUniform, false), TOLERANCE);
170         assertTrue(test.kolmogorovSmirnovTest(unif, shortUniform, 0.05));
171         assertEquals(0.6610459, test.kolmogorovSmirnovStatistic(unif, shortUniform), TOLERANCE);
172     }
173 
174     /** Uniform distribution, unit normal dataset */
175     @Test
176     void testOneSampleUniformGaussian() {
177         final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest();
178         final UniformRealDistribution unif = new UniformRealDistribution(-0.5, 0.5);
179         // Value was obtained via exact test, validated against R. Running exact test takes a long
180         // time.
181         assertEquals(4.9405812774239166E-11, test.kolmogorovSmirnovTest(unif, gaussian, false), TOLERANCE);
182         assertTrue(test.kolmogorovSmirnovTest(unif, gaussian, 0.05));
183         assertEquals(0.3401058049019608, test.kolmogorovSmirnovStatistic(unif, gaussian), TOLERANCE);
184     }
185 
186     /**
187      * Checks D and exact p against expected values.
188      *
189      * @param sample1 test sample
190      * @param sample2 test sample
191      * @param expectedD expected D value
192      * @param expectedP expected P value
193      */
194     private void checkValues(double[] sample1, double[] sample2, double expectedD, double expectedP) {
195         final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest();
196         assertEquals(expectedP, test.kolmogorovSmirnovTest(sample1, sample2, false), TOLERANCE);
197         assertEquals(expectedD, test.kolmogorovSmirnovStatistic(sample1, sample2), TOLERANCE);
198     }
199 
200 
201     // Small sample tests. D values and p-values are checked against R version 3.2.0.
202     // R uses non-strict inequality in null hypothesis.
203     @Test
204     void testTwoSampleSmallSampleExact() {
205         checkValues(
206                 new double[] {6, 7, 9, 13, 19, 21, 22, 23, 24},
207                 new double[] {10, 11, 12, 16, 20, 27, 28, 32, 44, 54},
208                 0.5,
209                 0.105577085453247
210                 );
211     }
212 
213     @Test
214     void testTwoSampleSmallSampleExact2() {
215         checkValues(
216                 new double[] {6, 7, 9, 13, 19, 21, 22, 23, 24, 29, 30, 34, 36, 41, 45, 47, 51, 63, 33, 91},
217                 new double[] {10, 11, 12, 16, 20, 27, 28, 32, 44, 54, 56, 57, 64, 69, 71, 80, 81, 88, 90},
218                 0.4263157895,
219                 0.0462986609
220                 );
221     }
222 
223     @Test
224     void testTwoSampleSmallSampleExact3() {
225         checkValues(
226                 new double[] { -10, -5, 17, 21, 22, 23, 24, 30, 44, 50, 56, 57, 59, 67, 73, 75, 77, 78, 79,
227                         80, 81, 83, 84, 85, 88, 90, 92, 93, 94, 95, 98, 100, 101, 103, 105, 110},
228                 new double[] { -2, -1, 0, 10, 14, 15, 16, 20, 25, 26, 27, 31, 32, 33, 34, 45, 47, 48, 51, 52,
229                         53, 54, 60, 61, 62, 63, 74, 82, 106, 107, 109, 11, 112, 113, 114},
230                 0.41031746031746,
231                 0.00300743602233366
232                 );
233     }
234 
235     @Test
236     void testTwoSampleSmallSampleExact4() {
237         checkValues(
238                 new double[] { 2.0, 5.0, 7.0, 9.0, 10.0, 11.0, 13.0, 14.0, 16.0 },
239                 new double[] { 0.0, 1.0, 3.0, 4.0, 6.0, 8.0, 12.0, 15.0 },
240                 0.41666666666666,
241                 0.351707116412999);
242     }
243 
244     @Test
245     void testTwoSampleSmallSampleExact5() {
246         checkValues(
247                 new double[] {2.0, 4.0, 5.0, 6.0, 8.0, 10.0, 11.0, 13.0},
248                 new double[] {0.0, 1.0, 3.0, 7.0, 9.0, 12.0, 14.0, 15.0},
249                 0.25,
250                 0.98010878010878);
251     }
252 
253     @Test
254     void testTwoSampleSmallSampleExact6() {
255         checkValues(
256                 new double[] {0,2},
257                 new double[] {1,3},
258                 0.5,
259                 1.0);
260     }
261 
262     @Test
263     void testTwoSampleSmallSampleExact7() {
264         checkValues(
265                 new double[] {0.0, 2.0, 4.0, 5.0, 7.0, 8.0, 10.0, 12.0},
266                 new double[] {1.0, 3.0, 6.0, 9.0, 11.0},
267                 0.15,
268                 1.0);
269     }
270 
271     /**
272      * Checks exact p-value computations using critical values from Table 9 in V.K Rohatgi, An
273      * Introduction to Probability and Mathematical Statistics, Wiley, 1976, ISBN 0-471-73135-8.
274      */
275     @Test
276     void testTwoSampleExactP() {
277         checkExactTable(4, 6, 5d / 6d, 0.01d);
278         checkExactTable(4, 7, 17d / 28d, 0.2d);
279         checkExactTable(6, 7, 29d / 42d, 0.05d);
280         checkExactTable(4, 10, 7d / 10d, 0.05d);
281         checkExactTable(5, 15, 11d / 15d, 0.02d);
282         checkExactTable(9, 10, 31d / 45d, 0.01d);
283         checkExactTable(7, 10, 43d / 70d, 0.05d);
284     }
285 
286     @Test
287     void testTwoSampleApproximateCritialValues() {
288         final double tol = .01;
289         final double[] alpha = {
290             0.10, 0.05, 0.025, 0.01, 0.005, 0.001
291         };
292         // From Wikipedia KS article - TODO: get (and test) more precise values
293         final double[] c = {
294             1.22, 1.36, 1.48, 1.63, 1.73, 1.95
295         };
296         final int[] k = {
297             60, 100, 500
298         };
299         double n;
300         double m;
301         for (int i = 0; i < k.length; i++) {
302             for (int j = 0; j < i; j++) {
303                 n = k[i];
304                 m = k[j];
305                 for (int l = 0; l < alpha.length; l++) {
306                     final double dCrit = c[l] * FastMath.sqrt((n + m) / (n * m));
307                     checkApproximateTable(k[i], k[j], dCrit, alpha[l], tol);
308                 }
309             }
310         }
311     }
312 
313     @Test
314     void testPelzGoodApproximation() {
315         KolmogorovSmirnovTest ksTest = new KolmogorovSmirnovTest();
316         final double[] d = {0.15, 0.20, 0.25, 0.3, 0.35, 0.4};
317         final int[] n = {141, 150, 180, 220, 1000};
318         // Reference values computed using the Pelz method from
319         // http://simul.iro.umontreal.ca/ksdir/KolmogorovSmirnovDist.java
320         final double[] ref = {
321             0.9968940168727819, 0.9979326624184857, 0.9994677598604506, 0.9999128354780209, 0.9999999999998661,
322             0.9999797514476236, 0.9999902122242081, 0.9999991327060908, 0.9999999657681911, 0.9999999999977929,
323             0.9999999706444976, 0.9999999906571532, 0.9999999997949596, 0.999999999998745, 0.9999999999993876,
324             0.9999999999916627, 0.9999999999984447, 0.9999999999999936, 0.999999999999341, 0.9999999999971508,
325             0.9999999999999877, 0.9999999999999191, 0.9999999999999254, 0.9999999999998178, 0.9999999999917788,
326             0.9999999999998556, 0.9999999999992014, 0.9999999999988859, 0.9999999999999325, 0.9999999999821726
327         };
328 
329         final double tol = 10e-15;
330         int k = 0;
331         for (int i = 0; i < 6; i++) {
332             for (int j = 0; j < 5; j++, k++) {
333                 assertEquals(ref[k], ksTest.pelzGood(d[i], n[j]), tol);
334             }
335         }
336     }
337 
338     /** Verifies large sample approximate p values against R */
339     @Test
340     void testTwoSampleApproximateP() {
341         final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest();
342         // Reference values from R, version 2.15.3
343         assertEquals(0.0319983962391632, test.kolmogorovSmirnovTest(gaussian, gaussian2), TOLERANCE);
344         assertEquals(0.202352941176471, test.kolmogorovSmirnovStatistic(gaussian, gaussian2), TOLERANCE);
345     }
346 
347     /**
348      * MATH-1181
349      * Verify that large sample method is selected for sample product &gt; Integer.MAX_VALUE
350      * (integer overflow in sample product)
351      */
352     @Test
353     @Timeout(value = 5000, unit = TimeUnit.MILLISECONDS)
354     void testTwoSampleProductSizeOverflow() {
355         final int n = 50000;
356         assertTrue(n * n < 0);
357         double[] x = new double[n];
358         double[] y = new double[n];
359         final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest();
360         assertFalse(Double.isNaN(test.kolmogorovSmirnovTest(x, y)));
361     }
362 
363     @Test
364     void testTwoSampleWithManyTies() {
365         // MATH-1197
366         final double[] x = {
367             0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
368             0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
369             0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
370             0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
371             0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
372             0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
373             0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
374             0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
375             0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
376             0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
377             0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
378             0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
379             0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
380             0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
381             0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
382             0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
383             0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
384             0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
385             0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
386             0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
387             0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
388             0.000000, 2.202653, 2.202653, 2.202653, 2.202653, 2.202653,
389             2.202653, 2.202653, 2.202653, 2.202653, 2.202653, 2.202653,
390             2.202653, 2.202653, 2.202653, 2.202653, 2.202653, 2.202653,
391             2.202653, 2.202653, 2.202653, 2.202653, 2.202653, 2.202653,
392             2.202653, 2.202653, 2.202653, 2.202653, 2.202653, 2.202653,
393             2.202653, 2.202653, 2.202653, 2.202653, 2.202653, 2.202653,
394             3.181199, 3.181199, 3.181199, 3.181199, 3.181199, 3.181199,
395             3.723539, 3.723539, 3.723539, 3.723539, 4.383482, 4.383482,
396             4.383482, 4.383482, 5.320671, 5.320671, 5.320671, 5.717284,
397             6.964001, 7.352165, 8.710510, 8.710510, 8.710510, 8.710510,
398             8.710510, 8.710510, 9.539004, 9.539004, 10.720619, 17.726077,
399             17.726077, 17.726077, 17.726077, 22.053875, 23.799144, 27.355308,
400             30.584960, 30.584960, 30.584960, 30.584960, 30.751808
401         };
402 
403         final double[] y = {
404             0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
405             0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
406             0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
407             0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
408             0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
409             0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
410             0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
411             0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
412             0.000000, 0.000000, 0.000000, 2.202653, 2.202653, 2.202653,
413             2.202653, 2.202653, 2.202653, 2.202653, 2.202653, 3.061758,
414             3.723539, 5.628420, 5.628420, 5.628420, 5.628420, 5.628420,
415             6.916982, 6.916982, 6.916982, 10.178538, 10.178538, 10.178538,
416             10.178538, 10.178538
417         };
418 
419         final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest();
420 
421         assertEquals(0.0640394088, test.kolmogorovSmirnovStatistic(x, y), 1e-6);
422         assertEquals(0.9792777290, test.kolmogorovSmirnovTest(x, y), 1e-6);
423 
424     }
425 
426     @Test
427     void testTwoSamplesAllEqual() {
428         final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest();
429         for (int i = 2; i < 30; ++i) {
430             // testing values with ties
431             double[] values = new double[i];
432             Arrays.fill(values, i);
433             // testing values without ties
434             double[] ascendingValues = new double[i];
435             for (int j = 0; j < ascendingValues.length; j++) {
436                 ascendingValues[j] = j;
437             }
438 
439             assertEquals(0., test.kolmogorovSmirnovStatistic(values, values), 0.);
440             assertEquals(0., test.kolmogorovSmirnovStatistic(ascendingValues, ascendingValues), 0.);
441 
442             if (i < 10) {
443                 assertEquals(1.0, test.exactP(0, values.length, values.length, true), 0.);
444                 assertEquals(1.0, test.exactP(0, values.length, values.length, false), 0.);
445             }
446 
447             assertEquals(1.0, test.approximateP(0, values.length, values.length), 0.);
448             assertEquals(1.0, test.approximateP(0, values.length, values.length), 0.);
449         }
450     }
451 
452     /**
453      * JIRA: MATH-1245
454      *
455      * Verify that D-values are not viewed as distinct when they are mathematically equal
456      * when computing p-statistics for small sample tests. Reference values are from R 3.2.0.
457      */
458     @Test
459     void testDRounding() {
460         final double tol = 1e-12;
461         final double[] x = {0, 2, 3, 4, 5, 6, 7, 8, 9, 12};
462         final double[] y = {1, 10, 11, 13, 14, 15, 16, 17, 18};
463         final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest();
464         assertEquals(0.0027495724090154106, test.kolmogorovSmirnovTest(x, y,false), tol);
465 
466         final double[] x1 = {2, 4, 6, 8, 9, 10, 11, 12, 13};
467         final double[] y1 = {0, 1, 3, 5, 7};
468         assertEquals(0.085914085914085896, test.kolmogorovSmirnovTest(x1, y1, false), tol);
469 
470         final double[] x2 = {4, 6, 7, 8, 9, 10, 11};
471         final double[] y2 = {0, 1, 2, 3, 5};
472         assertEquals(0.015151515151515027, test.kolmogorovSmirnovTest(x2, y2, false), tol);
473     }
474 
475     @Test
476     void testFillBooleanArrayRandomlyWithFixedNumberTrueValues() {
477 
478         final int[][] parameters = {{5, 1}, {5, 2}, {5, 3}, {5, 4}, {8, 1}, {8, 2}, {8, 3}, {8, 4}, {8, 5}, {8, 6}, {8, 7}};
479 
480         final double alpha = 0.001;
481         final int numIterations = 1000000;
482 
483         final RandomGenerator rng = new Well19937c(0);
484 
485         for (final int[] parameter : parameters) {
486 
487             final int arraySize = parameter[0];
488             final int numberOfTrueValues = parameter[1];
489 
490             final boolean[] b = new boolean[arraySize];
491             final long[] counts = new long[1 << arraySize];
492 
493             for (int i = 0; i < numIterations; ++i) {
494                 KolmogorovSmirnovTest.fillBooleanArrayRandomlyWithFixedNumberTrueValues(b, numberOfTrueValues, rng);
495                 int x = 0;
496                 for (int j = 0; j < arraySize; ++j) {
497                     x = ((x << 1) | ((b[j])?1:0));
498                 }
499                 counts[x] += 1;
500             }
501 
502             final int numCombinations = (int) CombinatoricsUtils.binomialCoefficient(arraySize, numberOfTrueValues);
503 
504             final long[] observed = new long[numCombinations];
505             final double[] expected = new double[numCombinations];
506             Arrays.fill(expected, numIterations / (double) numCombinations);
507 
508             int observedIdx = 0;
509 
510             for (int i = 0; i < (1 << arraySize); ++i) {
511                 if (Integer.bitCount(i) == numberOfTrueValues) {
512                     observed[observedIdx] = counts[i];
513                     observedIdx += 1;
514                 }
515                 else {
516                     assertEquals(0, counts[i]);
517                 }
518             }
519 
520             assertEquals(numCombinations, observedIdx);
521             UnitTestUtils.customAssertChiSquareAccept(expected, observed, alpha);
522         }
523     }
524 
525     /**
526      * Test an example with ties in the data.  Reference data is R 3.2.0,
527      * ks.boot implemented in Matching (Version 4.8-3.4, Build Date: 2013/10/28)
528      */
529     @Test
530     void testBootstrapSmallSamplesWithTies() {
531         final double[] x = {0, 2, 4, 6, 8, 8, 10, 15, 22, 30, 33, 36, 38};
532         final double[] y = {9, 17, 20, 33, 40, 51, 60, 60, 72, 90, 101};
533         final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest(1000);
534         assertEquals(0.0059, test.bootstrap(x, y, 10000, false), 1E-3);
535     }
536 
537     /**
538      * Reference data is R 3.2.0, ks.boot implemented in
539      * Matching (Version 4.8-3.4, Build Date: 2013/10/28)
540      */
541     @Test
542     void testBootstrapLargeSamples() {
543         final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest(1000);
544         assertEquals(0.0237, test.bootstrap(gaussian, gaussian2, 10000), 1E-2);
545     }
546 
547     /**
548      * Test an example where D-values are close (subject to rounding).
549      * Reference data is R 3.2.0, ks.boot implemented in
550      * Matching (Version 4.8-3.4, Build Date: 2013/10/28)
551      */
552     @Test
553     void testBootstrapRounding() {
554         final double[] x = {2,4,6,8,9,10,11,12,13};
555         final double[] y = {0,1,3,5,7};
556         final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest(1000);
557         assertEquals(0.06303, test.bootstrap(x, y, 10000, false), 1E-2);
558     }
559 
560     @Test
561     void testFixTiesNoOp() throws Exception {
562         final double[] x = {0, 1, 2, 3, 4};
563         final double[] y = {5, 6, 7, 8};
564         final double[] origX = x.clone();
565         final double[] origY = y.clone();
566         fixTies(x,y);
567         assertArrayEquals(origX, x, 0);
568         assertArrayEquals(origY, y, 0);
569     }
570 
571     /**
572      * Verify that fixTies is deterministic, i.e,
573      * x = x', y = y' ⇒ fixTies(x,y) = fixTies(x', y')
574      */
575     @Test
576     void testFixTiesConsistency() throws Exception {
577         final double[] x = {0, 1, 2, 3, 4, 2};
578         final double[] y = {5, 6, 7, 8, 1, 2};
579         final double[] xP = x.clone();
580         final double[] yP = y.clone();
581         checkFixTies(x, y);
582         final double[] fixedX = x.clone();
583         final double[] fixedY = y.clone();
584         checkFixTies(xP, yP);
585         assertArrayEquals(fixedX, xP, 0);
586         assertArrayEquals(fixedY,  yP, 0);
587     }
588 
589     @Test
590     void testFixTies() throws Exception {
591         checkFixTies(new double[] {0, 1, 1, 4, 0}, new double[] {0, 5, 0.5, 0.55, 7});
592         checkFixTies(new double[] {1, 1, 1, 1, 1}, new double[] {1, 1});
593         checkFixTies(new double[] {1, 2, 3}, new double[] {1});
594         checkFixTies(new double[] {1, 1, 0, 1, 0}, new double[] {});
595     }
596 
597     /**
598      * Checks that fixTies eliminates ties in the data but does not otherwise
599      * perturb the ordering.
600      */
601     private void checkFixTies(double[] x, double[] y) throws Exception {
602         final double[] origCombined = MathArrays.concatenate(x, y);
603         fixTies(x, y);
604         assertFalse(hasTies(x, y));
605         final double[] combined = MathArrays.concatenate(x, y);
606         for (int i = 0; i < combined.length; i++) {
607             for (int j = 0; j < i; j++) {
608                 assertTrue(combined[i] != combined[j]);
609                 if (combined[i] < combined[j])
610                     assertTrue(origCombined[i] < origCombined[j]
611                                           || origCombined[i] == origCombined[j]);
612             }
613 
614         }
615     }
616 
617     /**
618      * Verifies the inequality exactP(criticalValue, n, m, true) < alpha < exactP(criticalValue, n,
619      * m, false).
620      *
621      * Note that the validity of this check depends on the fact that alpha lies strictly between two
622      * attained values of the distribution and that criticalValue is one of the attained values. The
623      * critical value table (reference below) uses attained values. This test therefore also
624      * verifies that criticalValue is attained.
625      *
626      * @param n first sample size
627      * @param m second sample size
628      * @param criticalValue critical value
629      * @param alpha significance level
630      */
631     private void checkExactTable(int n, int m, double criticalValue, double alpha) {
632         final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest();
633         assertTrue(test.exactP(criticalValue, n, m, true) < alpha);
634         assertTrue(test.exactP(criticalValue, n, m, false) > alpha);
635     }
636 
637     /**
638      * Verifies that approximateP(criticalValue, n, m) is within epsilon of alpha.
639      *
640      * @param n first sample size
641      * @param m second sample size
642      * @param criticalValue critical value (from table)
643      * @param alpha significance level
644      * @param epsilon tolerance
645      */
646     private void checkApproximateTable(int n, int m, double criticalValue, double alpha, double epsilon) {
647         final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest();
648         assertEquals(alpha, test.approximateP(criticalValue, n, m), epsilon);
649     }
650 
651     /**
652      * This is copied directly from version 3.4.1 of commons math. The results of this
653      * method are exact, but the algorithm is slow and scales very badly.  It is not
654      * practical for values of m, n larger than 10.  Versions 3.5 and beyond use a
655      * more efficient method.
656      * <p>
657      * Computes \(P(D_{n,m} > d)\) if {@code strict} is {@code true}; otherwise
658      * \(P(D_{n,m} \ge d)\), where \(D_{n,m}\) is the 2-sample Kolmogorov-Smirnov
659      * statistic. See {@code KolmogorovSmirnovStatistic} for
660      * the definition of \(D_{n,m}\).
661      * <p>
662      * The returned probability is exact, obtained by enumerating all partitions of
663      * {@code m + n} into {@code m} and {@code n} sets, computing \(D_{n,m}\) for
664      * each partition and counting the number of partitions that yield \(D_{n,m}\)
665      * values exceeding (resp. greater than or equal to) {@code d}.
666      * <p>
667      * <strong>USAGE NOTE</strong>: Since this method enumerates all combinations in
668      * \({m+n} \choose {n}\), it is very slow if called for large {@code m, n}. For
669      * this reason, {@code KolmogorovSmirnovTest)} uses this
670      * only for {@code m * n < } {@code SMALL_SAMPLE_PRODUCT}.
671      *
672      * @param d      D-statistic value
673      * @param n      first sample size
674      * @param m      second sample size
675      * @param strict whether or not the probability to compute is expressed as a
676      *               strict inequality
677      * @return probability that a randomly selected m-n partition of m + n generates
678      *         \(D_{n,m}\) greater than (resp. greater than or equal to) {@code d}
679      */
680      public double exactP341(double d, int n, int m, boolean strict) {
681         Iterator<int[]> combinationsIterator = CombinatoricsUtils.combinationsIterator(n + m, n);
682         long tail = 0;
683         final double[] nSet = new double[n];
684         final double[] mSet = new double[m];
685         while (combinationsIterator.hasNext()) {
686             // Generate an n-set
687             final int[] nSetI = combinationsIterator.next();
688             // Copy the n-set to nSet and its complement to mSet
689             int j = 0;
690             int k = 0;
691             for (int i = 0; i < n + m; i++) {
692                 if (j < n && nSetI[j] == i) {
693                     nSet[j++] = i;
694                 } else {
695                     mSet[k++] = i;
696                 }
697             }
698             final KolmogorovSmirnovTest kStatTest = new KolmogorovSmirnovTest();
699             final double curD = kStatTest.kolmogorovSmirnovStatistic(nSet, mSet);
700             if (curD > d) {
701                 tail++;
702             } else if (curD == d && !strict) {
703                 tail++;
704             }
705         }
706         return (double) tail / (double) CombinatoricsUtils.binomialCoefficient(n + m, n);
707     }
708 
709     /**
710      * Checks exactP for d values ranging from .01 to 1 against brute force
711      * implementation in Commons Math 3.4.1.
712      *
713      * See {@link #exactP341(double, int, int, boolean)}
714      * which duplicates that code.
715      *
716      * The brute force implementation enumerates all n-m partitions and counts
717      * the number with p-values less than d, so it really is exact (but slow).
718      * This test compares current code with the 3.4.1 implementation.
719      *
720      * Set maxSize higher to extend the test to more values. Since the 3.4.1
721      * code is very slow, setting maxSize higher than 8 will make this test
722      * case run a long time.
723      */
724     @Test
725     void testExactP341() throws Exception {
726         final double tol = 1e-12;
727         final int maxSize = 6;
728         final KolmogorovSmirnovTest ksTest = new KolmogorovSmirnovTest();
729         for (int m = 2; m < maxSize; m++) {
730             for (int n = 2; n < maxSize; n++) {
731                 double d = 0;
732                 for (int i = 0; i < 100; i++) {
733                     assertEquals(
734                             exactP341(d, m, n, true),
735                             ksTest.exactP(d, m, n, true),
736                             tol);
737                     d +=.01;
738                 }
739             }
740         }
741     }
742 
743     /**
744      * Checks exactP for all actually attained D values against the implementation
745      * in Commons Math 3.4.1. See {@link #exactP341(double, int, int, boolean)}
746      * which duplicates that code.
747      *
748      * The brute force implementation enumerates all n-m partitions and counts the
749      * number with p-values less than d, so it really is exact (but slow). This test
750      * compares current code with the 3.4.1 implementation. Set maxSize higher to
751      * extend the test to more values. Since the 3.4.1 code is very slow, setting
752      * maxSize higher than 8 will make this test case run a long time.
753      */
754     @Test
755     void testExactP341RealD() {
756         final double tol = 1e-12;
757         final int maxSize = 6;
758         for (int m = 2; m < maxSize; m++) {
759             for (int n = 2; n < maxSize; n++ ) {
760                 // Not actually used for the test, but dValues basically stores
761                 // the ks distribution - keys are d values and values are p-values
762                 final Map<Double, Double> dValues = new TreeMap<>();
763                 final Iterator<int[]> combinationsIterator = CombinatoricsUtils.combinationsIterator(n + m, n);
764                 final double[] nSet = new double[n];
765                 final double[] mSet = new double[m];
766                 while (combinationsIterator.hasNext()) {
767                     // Generate an n-set
768                     final int[] nSetI = combinationsIterator.next();
769                     // Copy the n-set to nSet and its complement to mSet
770                     int j = 0;
771                     int k = 0;
772                     for (int i = 0; i < n + m; i++) {
773                         if (j < n && nSetI[j] == i) {
774                             nSet[j++] = i;
775                         } else {
776                             mSet[k++] = i;
777                         }
778                     }
779                     final KolmogorovSmirnovTest kStatTest = new KolmogorovSmirnovTest();
780                     final double curD = kStatTest.kolmogorovSmirnovStatistic(nSet, mSet);
781                     final double curP = kStatTest.exactP(curD, m, n, true);
782                     dValues.putIfAbsent(curD, curP);
783                     assertEquals(
784                             exactP341(curD, m, n, true),
785                             kStatTest.exactP(curD, m, n, true),
786                             tol);
787                 }
788             }
789         }
790         // Add code to display / persist dValues here if desired
791     }
792 
793     /**
794      * Reflection hack to expose private fixTies method for testing.
795      */
796     private static void fixTies(double[] x, double[] y) throws Exception {
797         Method method = KolmogorovSmirnovTest.class.getDeclaredMethod("fixTies",
798                                              double[].class, double[].class);
799         method.setAccessible(true);
800         KolmogorovSmirnovTest ksTest = new KolmogorovSmirnovTest();
801         method.invoke(ksTest, x, y);
802     }
803 
804     /**
805      * Reflection hack to expose private hasTies method.
806      */
807     private static boolean hasTies(double[] x, double[] y) throws Exception {
808         Method method = KolmogorovSmirnovTest.class.getDeclaredMethod("hasTies",
809                                                double[].class, double[].class);
810         method.setAccessible(true);
811         return (boolean) method.invoke(KolmogorovSmirnovTest.class, x, y);
812     }
813 
814 }