View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      https://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  /*
19   * This is not the original file distributed by the Apache Software Foundation
20   * It has been modified by the Hipparchus project
21   */
22  
23  package org.hipparchus.stat.inference;
24  
25  import java.lang.reflect.Method;
26  import java.util.Arrays;
27  import java.util.Iterator;
28  import java.util.Map;
29  import java.util.TreeMap;
30  
31  import org.hipparchus.UnitTestUtils;
32  import org.hipparchus.distribution.continuous.NormalDistribution;
33  import org.hipparchus.distribution.continuous.UniformRealDistribution;
34  import org.hipparchus.random.RandomGenerator;
35  import org.hipparchus.random.Well19937c;
36  import org.hipparchus.util.CombinatoricsUtils;
37  import org.hipparchus.util.FastMath;
38  import org.hipparchus.util.MathArrays;
39  import org.junit.Assert;
40  import org.junit.Test;
41  
42  /**
43   * Test cases for {@link KolmogorovSmirnovTest}.
44   *
45   */
46  public class KolmogorovSmirnovTestTest {
47  
48      protected static final double TOLERANCE = 10e-10;
49  
50      // Random N(0,1) values generated using R rnorm
51      protected static final double[] gaussian = {
52          0.26055895, -0.63665233, 1.51221323, 0.61246988, -0.03013003, -1.73025682, -0.51435805, 0.70494168, 0.18242945,
53          0.94734336, -0.04286604, -0.37931719, -1.07026403, -2.05861425, 0.11201862, 0.71400136, -0.52122185,
54          -0.02478725, -1.86811649, -1.79907688, 0.15046279, 1.32390193, 1.55889719, 1.83149171, -0.03948003,
55          -0.98579207, -0.76790540, 0.89080682, 0.19532153, 0.40692841, 0.15047336, -0.58546562, -0.39865469, 0.77604271,
56          -0.65188221, -1.80368554, 0.65273365, -0.75283102, -1.91022150, -0.07640869, -1.08681188, -0.89270600,
57          2.09017508, 0.43907981, 0.10744033, -0.70961218, 1.15707300, 0.44560525, -2.04593349, 0.53816843, -0.08366640,
58          0.24652218, 1.80549401, -0.99220707, -1.14589408, -0.27170290, -0.49696855, 0.00968353, -1.87113545,
59          -1.91116529, 0.97151891, -0.73576115, -0.59437029, 0.72148436, 0.01747695, -0.62601157, -1.00971538,
60          -1.42691397, 1.03250131, -0.30672627, -0.15353992, -1.19976069, -0.68364218, 0.37525652, -0.46592881,
61          -0.52116168, -0.17162202, 1.04679215, 0.25165971, -0.04125231, -0.23756244, -0.93389975, 0.75551407,
62          0.08347445, -0.27482228, -0.4717632, -0.1867746, -0.1166976, 0.5763333, 0.1307952, 0.7630584, -0.3616248,
63          2.1383790, -0.7946630, 0.0231885, 0.7919195, 1.6057144, -0.3802508, 0.1229078, 1.5252901, -0.8543149, 0.3025040
64      };
65  
66      // Random N(0, 1.6) values generated using R rnorm
67      protected static final double[] gaussian2 = {
68          2.88041498038308, -0.632349445671017, 0.402121295225571, 0.692626364613243, 1.30693446815426,
69          -0.714176317131286, -0.233169206599583, 1.09113298322107, -1.53149079994305, 1.23259966205809,
70          1.01389927412503, 0.0143898711497477, -0.512813545447559, 2.79364360835469, 0.662008875538092,
71          1.04861546834788, -0.321280099931466, 0.250296656278743, 1.75820367603736, -2.31433523590905,
72          -0.462694696086403, 0.187725700950191, -2.24410950019152, 2.83473751105445, 0.252460174391016,
73          1.39051945380281, -1.56270144203134, 0.998522814471644, -1.50147469080896, 0.145307533554146,
74          0.469089457043406, -0.0914780723809334, -0.123446939266548, -0.610513388160565, -3.71548343891957,
75          -0.329577317349478, -0.312973794075871, 2.02051909758923, 2.85214308266271, 0.0193222002327237,
76          -0.0322422268266562, 0.514736012106768, 0.231484953375887, -2.22468798953629, 1.42197716075595,
77          2.69988043856357, 0.0443757119128293, 0.721536984407798, -0.0445688839903234, -0.294372724550705,
78          0.234041580912698, -0.868973119365727, 1.3524893453845, -0.931054600134503, -0.263514296006792,
79          0.540949457402918, -0.882544288773685, -0.34148675747989, 1.56664494810034, 2.19850536566584,
80          -0.667972122928022, -0.70889669526203, -0.00251758193079668, 2.39527162977682, -2.7559594317269,
81          -0.547393502656671, -2.62144031572617, 2.81504147017922, -1.02036850201042, -1.00713927602786,
82          -0.520197775122254, 1.00625480138649, 2.46756916531313, 1.64364743727799, 0.704545210648595,
83          -0.425885789416992, -1.78387854908546, -0.286783886710481, 0.404183648369076, -0.369324280845769,
84          -0.0391185138840443, 2.41257787857293, 2.49744281317859, -0.826964496939021, -0.792555379958975,
85          1.81097685787403, -0.475014580016638, 1.23387615291805, 0.646615294802053, 1.88496377454523, 1.20390698380814,
86          -0.27812153371728, 2.50149494533101, 0.406964323253817, -1.72253451309982, 1.98432494184332, 2.2223658560333,
87          0.393086362404685, -0.504073151377089, -0.0484610869883821
88      };
89  
90      // Random uniform (0, 1) generated using R runif
91      protected static final double[] uniform = {
92          0.7930305, 0.6424382, 0.8747699, 0.7156518, 0.1845909, 0.2022326, 0.4877206, 0.8928752, 0.2293062, 0.4222006,
93          0.1610459, 0.2830535, 0.9946345, 0.7329499, 0.26411126, 0.87958133, 0.29827437, 0.39185988, 0.38351185,
94          0.36359611, 0.48646472, 0.05577866, 0.56152250, 0.52672013, 0.13171783, 0.95864085, 0.03060207, 0.33514887,
95          0.72508148, 0.38901437, 0.9978665, 0.5981300, 0.1065388, 0.7036991, 0.1071584, 0.4423963, 0.1107071, 0.6437221,
96          0.58523872, 0.05044634, 0.65999539, 0.37367260, 0.73270024, 0.47473755, 0.74661163, 0.50765549, 0.05377347,
97          0.40998009, 0.55235182, 0.21361998, 0.63117971, 0.18109222, 0.89153510, 0.23203248, 0.6177106, 0.6856418,
98          0.2158557, 0.9870501, 0.2036914, 0.2100311, 0.9065020, 0.7459159, 0.56631790, 0.06753629, 0.39684629,
99          0.52504615, 0.14199103, 0.78551120, 0.90503321, 0.80452362, 0.9960115, 0.8172592, 0.5831134, 0.8794187,
100         0.2021501, 0.2923505, 0.9561824, 0.8792248, 0.85201008, 0.02945562, 0.26200374, 0.11382818, 0.17238856,
101         0.36449473, 0.69688273, 0.96216330, 0.4859432, 0.4503438, 0.1917656, 0.8357845, 0.9957812, 0.4633570,
102         0.8654599, 0.4597996, 0.68190289, 0.58887855, 0.09359396, 0.98081979, 0.73659533, 0.89344777, 0.18903099,
103         0.97660425
104     };
105 
106     /** Unit normal distribution, unit normal data */
107     @Test
108     public void testOneSampleGaussianGaussian() {
109         final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest();
110         final NormalDistribution unitNormal = new NormalDistribution(0d, 1d);
111         // Uncomment to run exact test - takes about a minute. Same value is used in R tests and for
112         // approx.
113         // Assert.assertEquals(0.3172069207622391, test.kolmogorovSmirnovTest(unitNormal, gaussian,
114         // true), TOLERANCE);
115         Assert.assertEquals(0.3172069207622391, test.kolmogorovSmirnovTest(unitNormal, gaussian, false), TOLERANCE);
116         Assert.assertFalse(test.kolmogorovSmirnovTest(unitNormal, gaussian, 0.05));
117         Assert.assertEquals(0.0932947561266756, test.kolmogorovSmirnovStatistic(unitNormal, gaussian), TOLERANCE);
118     }
119 
120     /** Unit normal distribution, unit normal data, small dataset */
121     @Test
122     public void testOneSampleGaussianGaussianSmallSample() {
123         final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest();
124         final NormalDistribution unitNormal = new NormalDistribution(0d, 1d);
125         final double[] shortGaussian = new double[50];
126         System.arraycopy(gaussian, 0, shortGaussian, 0, 50);
127         Assert.assertEquals(0.683736463728347, test.kolmogorovSmirnovTest(unitNormal, shortGaussian, false), TOLERANCE);
128         Assert.assertFalse(test.kolmogorovSmirnovTest(unitNormal, gaussian, 0.05));
129         Assert.assertEquals(0.09820779969463278, test.kolmogorovSmirnovStatistic(unitNormal, shortGaussian), TOLERANCE);
130     }
131 
132     /** Unit normal distribution, uniform data */
133     @Test
134     public void testOneSampleGaussianUniform() {
135         final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest();
136         final NormalDistribution unitNormal = new NormalDistribution(0d, 1d);
137         // Uncomment to run exact test - takes a long time. Same value is used in R tests and for
138         // approx.
139         // Assert.assertEquals(0.3172069207622391, test.kolmogorovSmirnovTest(unitNormal, uniform,
140         // true), TOLERANCE);
141         Assert.assertEquals(8.881784197001252E-16, test.kolmogorovSmirnovTest(unitNormal, uniform, false), TOLERANCE);
142         Assert.assertFalse(test.kolmogorovSmirnovTest(unitNormal, gaussian, 0.05));
143         Assert.assertEquals(0.5117493931609258, test.kolmogorovSmirnovStatistic(unitNormal, uniform), TOLERANCE);
144     }
145 
146     /** Uniform distribution, uniform data */
147     // @Test - takes about 6 seconds, uncomment for
148     public void testOneSampleUniformUniform() {
149         final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest();
150         final UniformRealDistribution unif = new UniformRealDistribution(-0.5, 0.5);
151         Assert.assertEquals(8.881784197001252E-16, test.kolmogorovSmirnovTest(unif, uniform, false), TOLERANCE);
152         Assert.assertTrue(test.kolmogorovSmirnovTest(unif, uniform, 0.05));
153         Assert.assertEquals(0.5400666982352942, test.kolmogorovSmirnovStatistic(unif, uniform), TOLERANCE);
154     }
155 
156     /** Uniform distribution, uniform data, small dataset */
157     @Test
158     public void testOneSampleUniformUniformSmallSample() {
159         final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest();
160         final UniformRealDistribution unif = new UniformRealDistribution(-0.5, 0.5);
161         final double[] shortUniform = new double[20];
162         System.arraycopy(uniform, 0, shortUniform, 0, 20);
163         Assert.assertEquals(4.117594598618268E-9, test.kolmogorovSmirnovTest(unif, shortUniform, false), TOLERANCE);
164         Assert.assertTrue(test.kolmogorovSmirnovTest(unif, shortUniform, 0.05));
165         Assert.assertEquals(0.6610459, test.kolmogorovSmirnovStatistic(unif, shortUniform), TOLERANCE);
166     }
167 
168     /** Uniform distribution, unit normal dataset */
169     @Test
170     public void testOneSampleUniformGaussian() {
171         final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest();
172         final UniformRealDistribution unif = new UniformRealDistribution(-0.5, 0.5);
173         // Value was obtained via exact test, validated against R. Running exact test takes a long
174         // time.
175         Assert.assertEquals(4.9405812774239166E-11, test.kolmogorovSmirnovTest(unif, gaussian, false), TOLERANCE);
176         Assert.assertTrue(test.kolmogorovSmirnovTest(unif, gaussian, 0.05));
177         Assert.assertEquals(0.3401058049019608, test.kolmogorovSmirnovStatistic(unif, gaussian), TOLERANCE);
178     }
179 
180     /**
181      * Checks D and exact p against expected values.
182      *
183      * @param first test sample
184      * @param second test sample
185      * @param expectedD expected D value
186      * @param expectedP expected P value
187      */
188     private void checkValues(double[] sample1, double[] sample2, double expectedD, double expectedP) {
189         final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest();
190         Assert
191             .assertEquals(expectedP, test.kolmogorovSmirnovTest(sample1, sample2, false), TOLERANCE);
192         Assert.assertEquals(expectedD, test.kolmogorovSmirnovStatistic(sample1, sample2), TOLERANCE);
193     }
194 
195 
196     // Small sample tests. D values and p-values are checked against R version 3.2.0.
197     // R uses non-strict inequality in null hypothesis.
198     @Test
199     public void testTwoSampleSmallSampleExact() {
200         checkValues(
201                 new double[] {6, 7, 9, 13, 19, 21, 22, 23, 24},
202                 new double[] {10, 11, 12, 16, 20, 27, 28, 32, 44, 54},
203                 0.5,
204                 0.105577085453247
205                 );
206     }
207 
208     @Test
209     public void testTwoSampleSmallSampleExact2() {
210         checkValues(
211                 new double[] {6, 7, 9, 13, 19, 21, 22, 23, 24, 29, 30, 34, 36, 41, 45, 47, 51, 63, 33, 91},
212                 new double[] {10, 11, 12, 16, 20, 27, 28, 32, 44, 54, 56, 57, 64, 69, 71, 80, 81, 88, 90},
213                 0.4263157895,
214                 0.0462986609
215                 );
216     }
217 
218     @Test
219     public void testTwoSampleSmallSampleExact3() {
220         checkValues(
221                 new double[] { -10, -5, 17, 21, 22, 23, 24, 30, 44, 50, 56, 57, 59, 67, 73, 75, 77, 78, 79,
222                         80, 81, 83, 84, 85, 88, 90, 92, 93, 94, 95, 98, 100, 101, 103, 105, 110},
223                 new double[] { -2, -1, 0, 10, 14, 15, 16, 20, 25, 26, 27, 31, 32, 33, 34, 45, 47, 48, 51, 52,
224                         53, 54, 60, 61, 62, 63, 74, 82, 106, 107, 109, 11, 112, 113, 114},
225                 0.41031746031746,
226                 0.00300743602233366
227                 );
228     }
229 
230     @Test
231     public void testTwoSampleSmallSampleExact4() {
232         checkValues(
233                 new double[] { 2.0, 5.0, 7.0, 9.0, 10.0, 11.0, 13.0, 14.0, 16.0 },
234                 new double[] { 0.0, 1.0, 3.0, 4.0, 6.0, 8.0, 12.0, 15.0 },
235                 0.41666666666666,
236                 0.351707116412999);
237     }
238 
239     @Test
240     public void testTwoSampleSmallSampleExact5() {
241         checkValues(
242                 new double[] {2.0, 4.0, 5.0, 6.0, 8.0, 10.0, 11.0, 13.0},
243                 new double[] {0.0, 1.0, 3.0, 7.0, 9.0, 12.0, 14.0, 15.0},
244                 0.25,
245                 0.98010878010878);
246     }
247 
248     @Test
249     public void testTwoSampleSmallSampleExact6() {
250         checkValues(
251                 new double[] {0,2},
252                 new double[] {1,3},
253                 0.5,
254                 1.0);
255     }
256 
257     @Test
258     public void testTwoSampleSmallSampleExact7() {
259         checkValues(
260                 new double[] {0.0, 2.0, 4.0, 5.0, 7.0, 8.0, 10.0, 12.0},
261                 new double[] {1.0, 3.0, 6.0, 9.0, 11.0},
262                 0.15,
263                 1.0);
264     }
265 
266     /**
267      * Checks exact p-value computations using critical values from Table 9 in V.K Rohatgi, An
268      * Introduction to Probability and Mathematical Statistics, Wiley, 1976, ISBN 0-471-73135-8.
269      */
270     @Test
271     public void testTwoSampleExactP() {
272         checkExactTable(4, 6, 5d / 6d, 0.01d);
273         checkExactTable(4, 7, 17d / 28d, 0.2d);
274         checkExactTable(6, 7, 29d / 42d, 0.05d);
275         checkExactTable(4, 10, 7d / 10d, 0.05d);
276         checkExactTable(5, 15, 11d / 15d, 0.02d);
277         checkExactTable(9, 10, 31d / 45d, 0.01d);
278         checkExactTable(7, 10, 43d / 70d, 0.05d);
279     }
280 
281     @Test
282     public void testTwoSampleApproximateCritialValues() {
283         final double tol = .01;
284         final double[] alpha = {
285             0.10, 0.05, 0.025, 0.01, 0.005, 0.001
286         };
287         // From Wikipedia KS article - TODO: get (and test) more precise values
288         final double[] c = {
289             1.22, 1.36, 1.48, 1.63, 1.73, 1.95
290         };
291         final int k[] = {
292             60, 100, 500
293         };
294         double n;
295         double m;
296         for (int i = 0; i < k.length; i++) {
297             for (int j = 0; j < i; j++) {
298                 n = k[i];
299                 m = k[j];
300                 for (int l = 0; l < alpha.length; l++) {
301                     final double dCrit = c[l] * FastMath.sqrt((n + m) / (n * m));
302                     checkApproximateTable(k[i], k[j], dCrit, alpha[l], tol);
303                 }
304             }
305         }
306     }
307 
308     @Test
309     public void testPelzGoodApproximation() {
310         KolmogorovSmirnovTest ksTest = new KolmogorovSmirnovTest();
311         final double d[] = {0.15, 0.20, 0.25, 0.3, 0.35, 0.4};
312         final int n[] = {141, 150, 180, 220, 1000};
313         // Reference values computed using the Pelz method from
314         // http://simul.iro.umontreal.ca/ksdir/KolmogorovSmirnovDist.java
315         final double ref[] = {
316             0.9968940168727819, 0.9979326624184857, 0.9994677598604506, 0.9999128354780209, 0.9999999999998661,
317             0.9999797514476236, 0.9999902122242081, 0.9999991327060908, 0.9999999657681911, 0.9999999999977929,
318             0.9999999706444976, 0.9999999906571532, 0.9999999997949596, 0.999999999998745, 0.9999999999993876,
319             0.9999999999916627, 0.9999999999984447, 0.9999999999999936, 0.999999999999341, 0.9999999999971508,
320             0.9999999999999877, 0.9999999999999191, 0.9999999999999254, 0.9999999999998178, 0.9999999999917788,
321             0.9999999999998556, 0.9999999999992014, 0.9999999999988859, 0.9999999999999325, 0.9999999999821726
322         };
323 
324         final double tol = 10e-15;
325         int k = 0;
326         for (int i = 0; i < 6; i++) {
327             for (int j = 0; j < 5; j++, k++) {
328                 Assert.assertEquals(ref[k], ksTest.pelzGood(d[i], n[j]), tol);
329             }
330         }
331     }
332 
333     /** Verifies large sample approximate p values against R */
334     @Test
335     public void testTwoSampleApproximateP() {
336         final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest();
337         // Reference values from R, version 2.15.3
338         Assert.assertEquals(0.0319983962391632, test.kolmogorovSmirnovTest(gaussian, gaussian2), TOLERANCE);
339         Assert.assertEquals(0.202352941176471, test.kolmogorovSmirnovStatistic(gaussian, gaussian2), TOLERANCE);
340     }
341 
342     /**
343      * MATH-1181
344      * Verify that large sample method is selected for sample product &gt; Integer.MAX_VALUE
345      * (integer overflow in sample product)
346      */
347     @Test(timeout=5000)
348     public void testTwoSampleProductSizeOverflow() {
349         final int n = 50000;
350         Assert.assertTrue(n * n < 0);
351         double[] x = new double[n];
352         double[] y = new double[n];
353         final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest();
354         Assert.assertFalse(Double.isNaN(test.kolmogorovSmirnovTest(x, y)));
355     }
356 
357     @Test
358     public void testTwoSampleWithManyTies() {
359         // MATH-1197
360         final double[] x = {
361             0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
362             0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
363             0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
364             0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
365             0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
366             0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
367             0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
368             0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
369             0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
370             0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
371             0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
372             0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
373             0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
374             0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
375             0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
376             0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
377             0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
378             0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
379             0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
380             0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
381             0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
382             0.000000, 2.202653, 2.202653, 2.202653, 2.202653, 2.202653,
383             2.202653, 2.202653, 2.202653, 2.202653, 2.202653, 2.202653,
384             2.202653, 2.202653, 2.202653, 2.202653, 2.202653, 2.202653,
385             2.202653, 2.202653, 2.202653, 2.202653, 2.202653, 2.202653,
386             2.202653, 2.202653, 2.202653, 2.202653, 2.202653, 2.202653,
387             2.202653, 2.202653, 2.202653, 2.202653, 2.202653, 2.202653,
388             3.181199, 3.181199, 3.181199, 3.181199, 3.181199, 3.181199,
389             3.723539, 3.723539, 3.723539, 3.723539, 4.383482, 4.383482,
390             4.383482, 4.383482, 5.320671, 5.320671, 5.320671, 5.717284,
391             6.964001, 7.352165, 8.710510, 8.710510, 8.710510, 8.710510,
392             8.710510, 8.710510, 9.539004, 9.539004, 10.720619, 17.726077,
393             17.726077, 17.726077, 17.726077, 22.053875, 23.799144, 27.355308,
394             30.584960, 30.584960, 30.584960, 30.584960, 30.751808
395         };
396 
397         final double[] y = {
398             0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
399             0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
400             0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
401             0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
402             0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
403             0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
404             0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
405             0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
406             0.000000, 0.000000, 0.000000, 2.202653, 2.202653, 2.202653,
407             2.202653, 2.202653, 2.202653, 2.202653, 2.202653, 3.061758,
408             3.723539, 5.628420, 5.628420, 5.628420, 5.628420, 5.628420,
409             6.916982, 6.916982, 6.916982, 10.178538, 10.178538, 10.178538,
410             10.178538, 10.178538
411         };
412 
413         final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest();
414 
415         Assert.assertEquals(0.0640394088, test.kolmogorovSmirnovStatistic(x, y), 1e-6);
416         Assert.assertEquals(0.9792777290, test.kolmogorovSmirnovTest(x, y), 1e-6);
417 
418     }
419 
420     @Test
421     public void testTwoSamplesAllEqual() {
422         final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest();
423         for (int i = 2; i < 30; ++i) {
424             // testing values with ties
425             double[] values = new double[i];
426             Arrays.fill(values, i);
427             // testing values without ties
428             double[] ascendingValues = new double[i];
429             for (int j = 0; j < ascendingValues.length; j++) {
430                 ascendingValues[j] = j;
431             }
432 
433             Assert.assertEquals(0., test.kolmogorovSmirnovStatistic(values, values), 0.);
434             Assert.assertEquals(0., test.kolmogorovSmirnovStatistic(ascendingValues, ascendingValues), 0.);
435 
436             if (i < 10) {
437                 Assert.assertEquals(1.0, test.exactP(0, values.length, values.length, true), 0.);
438                 Assert.assertEquals(1.0, test.exactP(0, values.length, values.length, false), 0.);
439             }
440 
441             Assert.assertEquals(1.0, test.approximateP(0, values.length, values.length), 0.);
442             Assert.assertEquals(1.0, test.approximateP(0, values.length, values.length), 0.);
443         }
444     }
445 
446     /**
447      * JIRA: MATH-1245
448      *
449      * Verify that D-values are not viewed as distinct when they are mathematically equal
450      * when computing p-statistics for small sample tests. Reference values are from R 3.2.0.
451      */
452     @Test
453     public void testDRounding() {
454         final double tol = 1e-12;
455         final double[] x = {0, 2, 3, 4, 5, 6, 7, 8, 9, 12};
456         final double[] y = {1, 10, 11, 13, 14, 15, 16, 17, 18};
457         final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest();
458         Assert.assertEquals(0.0027495724090154106, test.kolmogorovSmirnovTest(x, y,false), tol);
459 
460         final double[] x1 = {2, 4, 6, 8, 9, 10, 11, 12, 13};
461         final double[] y1 = {0, 1, 3, 5, 7};
462         Assert.assertEquals(0.085914085914085896, test.kolmogorovSmirnovTest(x1, y1, false), tol);
463 
464         final double[] x2 = {4, 6, 7, 8, 9, 10, 11};
465         final double[] y2 = {0, 1, 2, 3, 5};
466         Assert.assertEquals(0.015151515151515027, test.kolmogorovSmirnovTest(x2, y2, false), tol);
467     }
468 
469     @Test
470     public void testFillBooleanArrayRandomlyWithFixedNumberTrueValues() {
471 
472         final int[][] parameters = {{5, 1}, {5, 2}, {5, 3}, {5, 4}, {8, 1}, {8, 2}, {8, 3}, {8, 4}, {8, 5}, {8, 6}, {8, 7}};
473 
474         final double alpha = 0.001;
475         final int numIterations = 1000000;
476 
477         final RandomGenerator rng = new Well19937c(0);
478 
479         for (final int[] parameter : parameters) {
480 
481             final int arraySize = parameter[0];
482             final int numberOfTrueValues = parameter[1];
483 
484             final boolean[] b = new boolean[arraySize];
485             final long[] counts = new long[1 << arraySize];
486 
487             for (int i = 0; i < numIterations; ++i) {
488                 KolmogorovSmirnovTest.fillBooleanArrayRandomlyWithFixedNumberTrueValues(b, numberOfTrueValues, rng);
489                 int x = 0;
490                 for (int j = 0; j < arraySize; ++j) {
491                     x = ((x << 1) | ((b[j])?1:0));
492                 }
493                 counts[x] += 1;
494             }
495 
496             final int numCombinations = (int) CombinatoricsUtils.binomialCoefficient(arraySize, numberOfTrueValues);
497 
498             final long[] observed = new long[numCombinations];
499             final double[] expected = new double[numCombinations];
500             Arrays.fill(expected, numIterations / (double) numCombinations);
501 
502             int observedIdx = 0;
503 
504             for (int i = 0; i < (1 << arraySize); ++i) {
505                 if (Integer.bitCount(i) == numberOfTrueValues) {
506                     observed[observedIdx] = counts[i];
507                     observedIdx += 1;
508                 }
509                 else {
510                     Assert.assertEquals(0, counts[i]);
511                 }
512             }
513 
514             Assert.assertEquals(numCombinations, observedIdx);
515             UnitTestUtils.assertChiSquareAccept(expected, observed, alpha);
516         }
517     }
518 
519     /**
520      * Test an example with ties in the data.  Reference data is R 3.2.0,
521      * ks.boot implemented in Matching (Version 4.8-3.4, Build Date: 2013/10/28)
522      */
523     @Test
524     public void testBootstrapSmallSamplesWithTies() {
525         final double[] x = {0, 2, 4, 6, 8, 8, 10, 15, 22, 30, 33, 36, 38};
526         final double[] y = {9, 17, 20, 33, 40, 51, 60, 60, 72, 90, 101};
527         final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest(1000);
528         Assert.assertEquals(0.0059, test.bootstrap(x, y, 10000, false), 1E-3);
529     }
530 
531     /**
532      * Reference data is R 3.2.0, ks.boot implemented in
533      * Matching (Version 4.8-3.4, Build Date: 2013/10/28)
534      */
535     @Test
536     public void testBootstrapLargeSamples() {
537         final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest(1000);
538         Assert.assertEquals(0.0237, test.bootstrap(gaussian, gaussian2, 10000), 1E-2);
539     }
540 
541     /**
542      * Test an example where D-values are close (subject to rounding).
543      * Reference data is R 3.2.0, ks.boot implemented in
544      * Matching (Version 4.8-3.4, Build Date: 2013/10/28)
545      */
546     @Test
547     public void testBootstrapRounding() {
548         final double[] x = {2,4,6,8,9,10,11,12,13};
549         final double[] y = {0,1,3,5,7};
550         final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest(1000);
551         Assert.assertEquals(0.06303, test.bootstrap(x, y, 10000, false), 1E-2);
552     }
553 
554     @Test
555     public void testFixTiesNoOp() throws Exception {
556         final double[] x = {0, 1, 2, 3, 4};
557         final double[] y = {5, 6, 7, 8};
558         final double[] origX = x.clone();
559         final double[] origY = y.clone();
560         fixTies(x,y);
561         Assert.assertArrayEquals(origX, x, 0);
562         Assert.assertArrayEquals(origY, y, 0);
563     }
564 
565     /**
566      * Verify that fixTies is deterministic, i.e,
567      * x = x', y = y' ⇒ fixTies(x,y) = fixTies(x', y')
568      */
569     @Test
570     public void testFixTiesConsistency() throws Exception {
571         final double[] x = {0, 1, 2, 3, 4, 2};
572         final double[] y = {5, 6, 7, 8, 1, 2};
573         final double[] xP = x.clone();
574         final double[] yP = y.clone();
575         checkFixTies(x, y);
576         final double[] fixedX = x.clone();
577         final double[] fixedY = y.clone();
578         checkFixTies(xP, yP);
579         Assert.assertArrayEquals(fixedX, xP, 0);
580         Assert.assertArrayEquals(fixedY,  yP, 0);
581     }
582 
583     @Test
584     public void testFixTies() throws Exception {
585         checkFixTies(new double[] {0, 1, 1, 4, 0}, new double[] {0, 5, 0.5, 0.55, 7});
586         checkFixTies(new double[] {1, 1, 1, 1, 1}, new double[] {1, 1});
587         checkFixTies(new double[] {1, 2, 3}, new double[] {1});
588         checkFixTies(new double[] {1, 1, 0, 1, 0}, new double[] {});
589     }
590 
591     /**
592      * Checks that fixTies eliminates ties in the data but does not otherwise
593      * perturb the ordering.
594      */
595     private void checkFixTies(double[] x, double[] y) throws Exception {
596         final double[] origCombined = MathArrays.concatenate(x, y);
597         fixTies(x, y);
598         Assert.assertFalse(hasTies(x, y));
599         final double[] combined = MathArrays.concatenate(x, y);
600         for (int i = 0; i < combined.length; i++) {
601             for (int j = 0; j < i; j++) {
602                 Assert.assertTrue(combined[i] != combined[j]);
603                 if (combined[i] < combined[j])
604                     Assert.assertTrue(origCombined[i] < origCombined[j]
605                                           || origCombined[i] == origCombined[j]);
606             }
607 
608         }
609     }
610 
611     /**
612      * Verifies the inequality exactP(criticalValue, n, m, true) < alpha < exactP(criticalValue, n,
613      * m, false).
614      *
615      * Note that the validity of this check depends on the fact that alpha lies strictly between two
616      * attained values of the distribution and that criticalValue is one of the attained values. The
617      * critical value table (reference below) uses attained values. This test therefore also
618      * verifies that criticalValue is attained.
619      *
620      * @param n first sample size
621      * @param m second sample size
622      * @param criticalValue critical value
623      * @param alpha significance level
624      */
625     private void checkExactTable(int n, int m, double criticalValue, double alpha) {
626         final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest();
627         Assert.assertTrue(test.exactP(criticalValue, n, m, true) < alpha);
628         Assert.assertTrue(test.exactP(criticalValue, n, m, false) > alpha);
629     }
630 
631     /**
632      * Verifies that approximateP(criticalValue, n, m) is within epsilon of alpha.
633      *
634      * @param n first sample size
635      * @param m second sample size
636      * @param criticalValue critical value (from table)
637      * @param alpha significance level
638      * @param epsilon tolerance
639      */
640     private void checkApproximateTable(int n, int m, double criticalValue, double alpha, double epsilon) {
641         final KolmogorovSmirnovTest test = new KolmogorovSmirnovTest();
642         Assert.assertEquals(alpha, test.approximateP(criticalValue, n, m), epsilon);
643     }
644 
645     /**
646      * This is copied directly from version 3.4.1 of commons math. The results of this
647      * method are exact, but the algorithm is slow and scales very badly.  It is not
648      * practical for values of m, n larger than 10.  Versions 3.5 and beyond use a
649      * more efficient method.
650      * <p>
651      * Computes \(P(D_{n,m} > d)\) if {@code strict} is {@code true}; otherwise
652      * \(P(D_{n,m} \ge d)\), where \(D_{n,m}\) is the 2-sample Kolmogorov-Smirnov
653      * statistic. See {@link #kolmogorovSmirnovStatistic(double[], double[])} for
654      * the definition of \(D_{n,m}\).
655      * <p>
656      * The returned probability is exact, obtained by enumerating all partitions of
657      * {@code m + n} into {@code m} and {@code n} sets, computing \(D_{n,m}\) for
658      * each partition and counting the number of partitions that yield \(D_{n,m}\)
659      * values exceeding (resp. greater than or equal to) {@code d}.
660      * <p>
661      * <strong>USAGE NOTE</strong>: Since this method enumerates all combinations in
662      * \({m+n} \choose {n}\), it is very slow if called for large {@code m, n}. For
663      * this reason, {@link #kolmogorovSmirnovTest(double[], double[])} uses this
664      * only for {@code m * n < } {@value #SMALL_SAMPLE_PRODUCT}.
665      *
666      * @param d      D-statistic value
667      * @param n      first sample size
668      * @param m      second sample size
669      * @param strict whether or not the probability to compute is expressed as a
670      *               strict inequality
671      * @return probability that a randomly selected m-n partition of m + n generates
672      *         \(D_{n,m}\) greater than (resp. greater than or equal to) {@code d}
673      */
674      public double exactP341(double d, int n, int m, boolean strict) {
675         Iterator<int[]> combinationsIterator = CombinatoricsUtils.combinationsIterator(n + m, n);
676         long tail = 0;
677         final double[] nSet = new double[n];
678         final double[] mSet = new double[m];
679         while (combinationsIterator.hasNext()) {
680             // Generate an n-set
681             final int[] nSetI = combinationsIterator.next();
682             // Copy the n-set to nSet and its complement to mSet
683             int j = 0;
684             int k = 0;
685             for (int i = 0; i < n + m; i++) {
686                 if (j < n && nSetI[j] == i) {
687                     nSet[j++] = i;
688                 } else {
689                     mSet[k++] = i;
690                 }
691             }
692             final KolmogorovSmirnovTest kStatTest = new KolmogorovSmirnovTest();
693             final double curD = kStatTest.kolmogorovSmirnovStatistic(nSet, mSet);
694             if (curD > d) {
695                 tail++;
696             } else if (curD == d && !strict) {
697                 tail++;
698             }
699         }
700         return (double) tail / (double) CombinatoricsUtils.binomialCoefficient(n + m, n);
701     }
702 
703     /**
704      * Checks exactP for d values ranging from .01 to 1 against brute force
705      * implementation in Commons Math 3.4.1.
706      *
707      * See {@link #exactP341(double, int, int, boolean)}
708      * which duplicates that code.
709      *
710      * The brute force implementation enumerates all n-m partitions and counts
711      * the number with p-values less than d, so it really is exact (but slow).
712      * This test compares current code with the 3.4.1 implementation.
713      *
714      * Set maxSize higher to extend the test to more values. Since the 3.4.1
715      * code is very slow, setting maxSize higher than 8 will make this test
716      * case run a long time.
717      */
718     @Test
719     public void testExactP341() throws Exception {
720         final double tol = 1e-12;
721         final int maxSize = 6;
722         final KolmogorovSmirnovTest ksTest = new KolmogorovSmirnovTest();
723         for (int m = 2; m < maxSize; m++) {
724             for (int n = 2; n < maxSize; n++) {
725                 double d = 0;
726                 for (int i = 0; i < 100; i++) {
727                     Assert.assertEquals(
728                             exactP341(d, m, n, true),
729                             ksTest.exactP(d, m, n, true),
730                             tol);
731                     d +=.01;
732                 }
733             }
734         }
735     }
736 
737     /**
738      * Checks exactP for all actually attained D values against the implementation
739      * in Commons Math 3.4.1. See {@link #exactP341(double, int, int, boolean)}
740      * which duplicates that code.
741      *
742      * The brute force implementation enumerates all n-m partitions and counts the
743      * number with p-values less than d, so it really is exact (but slow). This test
744      * compares current code with the 3.4.1 implementation. Set maxSize higher to
745      * extend the test to more values. Since the 3.4.1 code is very slow, setting
746      * maxSize higher than 8 will make this test case run a long time.
747      */
748     @Test
749     public void testExactP341RealD() {
750         final double tol = 1e-12;
751         final int maxSize = 6;
752         for (int m = 2; m < maxSize; m++) {
753             for (int n = 2; n < maxSize; n++ ) {
754                 // Not actually used for the test, but dValues basically stores
755                 // the ks distribution - keys are d values and values are p-values
756                 final Map<Double, Double> dValues = new TreeMap<>();
757                 final Iterator<int[]> combinationsIterator = CombinatoricsUtils.combinationsIterator(n + m, n);
758                 final double[] nSet = new double[n];
759                 final double[] mSet = new double[m];
760                 while (combinationsIterator.hasNext()) {
761                     // Generate an n-set
762                     final int[] nSetI = combinationsIterator.next();
763                     // Copy the n-set to nSet and its complement to mSet
764                     int j = 0;
765                     int k = 0;
766                     for (int i = 0; i < n + m; i++) {
767                         if (j < n && nSetI[j] == i) {
768                             nSet[j++] = i;
769                         } else {
770                             mSet[k++] = i;
771                         }
772                     }
773                     final KolmogorovSmirnovTest kStatTest = new KolmogorovSmirnovTest();
774                     final double curD = kStatTest.kolmogorovSmirnovStatistic(nSet, mSet);
775                     final double curP = kStatTest.exactP(curD, m, n, true);
776                     dValues.putIfAbsent(curD, curP);
777                     Assert.assertEquals(
778                             exactP341(curD, m, n, true),
779                             kStatTest.exactP(curD, m, n, true),
780                             tol);
781                 }
782             }
783         }
784         // Add code to display / persist dValues here if desired
785     }
786 
787     /**
788      * Reflection hack to expose private fixTies method for testing.
789      */
790     private static void fixTies(double[] x, double[] y) throws Exception {
791         Method method = KolmogorovSmirnovTest.class.getDeclaredMethod("fixTies",
792                                              double[].class, double[].class);
793         method.setAccessible(true);
794         KolmogorovSmirnovTest ksTest = new KolmogorovSmirnovTest();
795         method.invoke(ksTest, x, y);
796     }
797 
798     /**
799      * Reflection hack to expose private hasTies method.
800      */
801     private static boolean hasTies(double[] x, double[] y) throws Exception {
802         Method method = KolmogorovSmirnovTest.class.getDeclaredMethod("hasTies",
803                                                double[].class, double[].class);
804         method.setAccessible(true);
805         return (boolean) method.invoke(KolmogorovSmirnovTest.class, x, y);
806     }
807 
808 }