1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 package org.hipparchus.stat.fitting;
23
24 import org.hipparchus.UnitTestUtils;
25 import org.hipparchus.analysis.UnivariateFunction;
26 import org.hipparchus.analysis.integration.BaseAbstractUnivariateIntegrator;
27 import org.hipparchus.analysis.integration.IterativeLegendreGaussIntegrator;
28 import org.hipparchus.distribution.RealDistribution;
29 import org.hipparchus.distribution.continuous.ConstantRealDistribution;
30 import org.hipparchus.distribution.continuous.NormalDistribution;
31 import org.hipparchus.distribution.continuous.RealDistributionAbstractTest;
32 import org.hipparchus.distribution.continuous.UniformRealDistribution;
33 import org.hipparchus.exception.MathIllegalArgumentException;
34 import org.hipparchus.exception.MathIllegalStateException;
35 import org.hipparchus.exception.NullArgumentException;
36 import org.hipparchus.stat.descriptive.StreamingStatistics;
37 import org.hipparchus.util.FastMath;
38 import org.junit.jupiter.api.BeforeEach;
39 import org.junit.jupiter.api.Test;
40
41 import java.io.BufferedReader;
42 import java.io.File;
43 import java.io.IOException;
44 import java.io.InputStreamReader;
45 import java.net.URISyntaxException;
46 import java.net.URL;
47 import java.util.ArrayList;
48 import java.util.Arrays;
49
50 import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
51 import static org.junit.jupiter.api.Assertions.assertEquals;
52 import static org.junit.jupiter.api.Assertions.assertFalse;
53 import static org.junit.jupiter.api.Assertions.assertThrows;
54 import static org.junit.jupiter.api.Assertions.assertTrue;
55 import static org.junit.jupiter.api.Assertions.fail;
56
57
58
59
60 public final class EmpiricalDistributionTest extends RealDistributionAbstractTest {
61
62 protected EmpiricalDistribution empiricalDistribution = null;
63 protected EmpiricalDistribution empiricalDistribution2 = null;
64 protected File file = null;
65 protected URL url = null;
66 protected double[] dataArray = null;
67 protected final int n = 10000;
68
69 @Override
70 @BeforeEach
71 public void setUp() {
72 super.setUp();
73 empiricalDistribution = new EmpiricalDistribution(100);
74 url = getClass().getResource("testData.txt");
75 final ArrayList<Double> list = new ArrayList<>();
76 assertDoesNotThrow(() -> {
77 empiricalDistribution2 = new EmpiricalDistribution(100);
78 BufferedReader in =
79 new BufferedReader(new InputStreamReader(
80 url.openStream()));
81 String str = null;
82 while ((str = in.readLine()) != null) {
83 list.add(Double.valueOf(str));
84 }
85 in.close();
86 in = null;
87 }, "IOException ");
88
89 dataArray = new double[list.size()];
90 int i = 0;
91 for (Double data : list) {
92 dataArray[i] = data.doubleValue();
93 i++;
94 }
95 }
96
97
98 @Test
99 void testPrecondition1() {
100 assertThrows(MathIllegalArgumentException.class, () -> {
101 new EmpiricalDistribution(0);
102 });
103 }
104
105
106
107
108
109
110 @Test
111 void testLoad() throws Exception {
112
113 empiricalDistribution.load(url);
114 checkDistribution();
115
116
117 File file = new File(url.toURI());
118 empiricalDistribution.load(file);
119 checkDistribution();
120 }
121
122 private void checkDistribution() {
123
124
125 assertEquals(1000, empiricalDistribution.getSampleStats().getN(), 10E-7);
126
127 assertEquals(
128 5.069831575018909, empiricalDistribution.getSampleStats().getMean(), 10E-7);
129 assertEquals(
130 1.0173699343977738, empiricalDistribution.getSampleStats().getStandardDeviation(), 10E-7);
131 }
132
133 @Test
134 void testLoadURLError() throws IOException {
135 try {
136 URL existing = getClass().getResource("testData.txt");
137 URL nonexistent = new URL(existing.toString() + "-nonexistent");
138 empiricalDistribution.load(nonexistent);
139 fail("an exception should have been thrown");
140 } catch (IOException ioe) {
141
142 }
143 }
144
145 @Test
146 void testLoadFileError() throws IOException, URISyntaxException {
147 try {
148 File existing = new File(getClass().getResource("testData.txt").toURI());
149 File nonexistent = new File(existing.getAbsolutePath() + "-nonexistent");
150 empiricalDistribution.load(nonexistent);
151 fail("an exception should have been thrown");
152 } catch (IOException ioe) {
153
154 }
155 }
156
157
158
159
160
161
162
163 @Test
164 void testDoubleLoad() throws Exception {
165 empiricalDistribution2.load(dataArray);
166
167
168 assertEquals(1000, empiricalDistribution2.getSampleStats().getN(), 10E-7);
169
170 assertEquals(
171 5.069831575018909, empiricalDistribution2.getSampleStats().getMean(), 10E-7);
172 assertEquals(
173 1.0173699343977738, empiricalDistribution2.getSampleStats().getStandardDeviation(), 10E-7);
174
175 double[] bounds = empiricalDistribution2.getGeneratorUpperBounds();
176 assertEquals(100, bounds.length);
177 assertEquals(1.0, bounds[99], 10e-12);
178
179 }
180
181
182
183
184
185
186 @Test
187 void testNext() throws Exception {
188 tstGen(0.1);
189 tstDoubleGen(0.1);
190 }
191
192
193
194
195
196 @Test
197 void testNexFail() {
198 try {
199 empiricalDistribution.getNextValue();
200 empiricalDistribution2.getNextValue();
201 fail("Expecting MathIllegalStateException");
202 } catch (MathIllegalStateException ex) {
203
204 }
205 }
206
207
208
209
210 @Test
211 void testGridTooFine() throws Exception {
212 empiricalDistribution = new EmpiricalDistribution(1001);
213 tstGen(0.1);
214 empiricalDistribution2 = new EmpiricalDistribution(1001);
215 tstDoubleGen(0.1);
216 }
217
218
219
220
221 @Test
222 void testGridTooFat() throws Exception {
223 empiricalDistribution = new EmpiricalDistribution(1);
224 tstGen(5);
225
226 empiricalDistribution2 = new EmpiricalDistribution(1);
227 tstDoubleGen(5);
228 }
229
230
231
232
233 @Test
234 void testBinIndexOverflow() throws Exception {
235 double[] x = new double[] {9474.94326071674, 2080107.8865462579};
236 new EmpiricalDistribution().load(x);
237 }
238
239 @Test
240 void testSerialization() {
241
242 EmpiricalDistribution dist = new EmpiricalDistribution();
243 EmpiricalDistribution dist2 = (EmpiricalDistribution) UnitTestUtils.serializeAndRecover(dist);
244 verifySame(dist, dist2);
245
246
247 empiricalDistribution2.load(dataArray);
248 dist2 = (EmpiricalDistribution) UnitTestUtils.serializeAndRecover(empiricalDistribution2);
249 verifySame(empiricalDistribution2, dist2);
250 }
251
252 @Test
253 void testLoadNullDoubleArray() {
254 assertThrows(NullArgumentException.class, () -> {
255 new EmpiricalDistribution().load((double[]) null);
256 });
257 }
258
259 @Test
260 void testLoadNullURL() throws Exception {
261 assertThrows(NullArgumentException.class, () -> {
262 new EmpiricalDistribution().load((URL) null);
263 });
264 }
265
266 @Test
267 void testLoadNullFile() throws Exception {
268 assertThrows(NullArgumentException.class, () -> {
269 new EmpiricalDistribution().load((File) null);
270 });
271 }
272
273
274
275
276 @Test
277 void testGetBinUpperBounds() {
278 double[] testData = {0, 1, 1, 2, 3, 4, 4, 5, 6, 7, 8, 9, 10};
279 EmpiricalDistribution dist = new EmpiricalDistribution(5);
280 dist.load(testData);
281 double[] expectedBinUpperBounds = {2, 4, 6, 8, 10};
282 double[] expectedGeneratorUpperBounds = {4d/13d, 7d/13d, 9d/13d, 11d/13d, 1};
283 double tol = 10E-12;
284 UnitTestUtils.customAssertEquals(expectedBinUpperBounds, dist.getUpperBounds(), tol);
285 UnitTestUtils.customAssertEquals(expectedGeneratorUpperBounds, dist.getGeneratorUpperBounds(), tol);
286 }
287
288 @Test
289 void testReSeed() throws Exception {
290 empiricalDistribution.load(url);
291 empiricalDistribution.reSeed(100);
292 final double [] values = new double[10];
293 for (int i = 0; i < 10; i++) {
294 values[i] = empiricalDistribution.getNextValue();
295 }
296 empiricalDistribution.reSeed(100);
297 for (int i = 0; i < 10; i++) {
298 assertEquals(values[i],empiricalDistribution.getNextValue(), 0d);
299 }
300 }
301
302 private void verifySame(EmpiricalDistribution d1, EmpiricalDistribution d2) {
303 assertEquals(d1.isLoaded(), d2.isLoaded());
304 assertEquals(d1.getBinCount(), d2.getBinCount());
305 assertEquals(d1.getSampleStats(), d2.getSampleStats());
306 if (d1.isLoaded()) {
307 for (int i = 0; i < d1.getUpperBounds().length; i++) {
308 assertEquals(d1.getUpperBounds()[i], d2.getUpperBounds()[i], 0);
309 }
310 assertEquals(d1.getBinStats(), d2.getBinStats());
311 }
312 }
313
314 private void tstGen(double tolerance)throws Exception {
315 empiricalDistribution.load(url);
316 empiricalDistribution.reSeed(1000);
317 StreamingStatistics stats = new StreamingStatistics();
318 for (int i = 1; i < 1000; i++) {
319 stats.addValue(empiricalDistribution.getNextValue());
320 }
321 assertEquals(5.069831575018909, stats.getMean(),tolerance,"mean");
322 assertEquals(1.0173699343977738, stats.getStandardDeviation(),tolerance,"std dev");
323 }
324
325 private void tstDoubleGen(double tolerance)throws Exception {
326 empiricalDistribution2.load(dataArray);
327 empiricalDistribution2.reSeed(1000);
328 StreamingStatistics stats = new StreamingStatistics();
329 for (int i = 1; i < 1000; i++) {
330 stats.addValue(empiricalDistribution2.getNextValue());
331 }
332 assertEquals(5.069831575018909, stats.getMean(), tolerance, "mean");
333 assertEquals(1.0173699343977738, stats.getStandardDeviation(), tolerance, "std dev");
334 }
335
336
337
338 @Override
339 public RealDistribution makeDistribution() {
340
341 final double[] sourceData = new double[n + 1];
342 for (int i = 0; i < n + 1; i++) {
343 sourceData[i] = i;
344 }
345 EmpiricalDistribution dist = new EmpiricalDistribution();
346 dist.load(sourceData);
347 return dist;
348 }
349
350
351 private final double binMass = 10d / (n + 1);
352
353
354 private final double firstBinMass = 11d / (n + 1);
355
356 @Override
357 public double[] makeCumulativeTestPoints() {
358 final double[] testPoints = new double[] {9, 10, 15, 1000, 5004, 9999};
359 return testPoints;
360 }
361
362
363 @Override
364 public double[] makeCumulativeTestValues() {
365
366
367
368
369
370 final double[] testPoints = getCumulativeTestPoints();
371 final double[] cumValues = new double[testPoints.length];
372 final EmpiricalDistribution empiricalDistribution = (EmpiricalDistribution) makeDistribution();
373 final double[] binBounds = empiricalDistribution.getUpperBounds();
374 for (int i = 0; i < testPoints.length; i++) {
375 final int bin = findBin(testPoints[i]);
376 final double lower = bin == 0 ? empiricalDistribution.getSupportLowerBound() :
377 binBounds[bin - 1];
378 final double upper = binBounds[bin];
379
380
381 final double bMinus = bin == 0 ? 0 : (bin - 1) * binMass + firstBinMass;
382 final RealDistribution kernel = findKernel(lower, upper);
383 final double withinBinKernelMass = kernel.probability(lower, upper);
384 final double kernelCum = kernel.probability(lower, testPoints[i]);
385 cumValues[i] = bMinus + (bin == 0 ? firstBinMass : binMass) * kernelCum/withinBinKernelMass;
386 }
387 return cumValues;
388 }
389
390 @Override
391 public double[] makeDensityTestValues() {
392 final double[] testPoints = getCumulativeTestPoints();
393 final double[] densityValues = new double[testPoints.length];
394 final EmpiricalDistribution empiricalDistribution = (EmpiricalDistribution) makeDistribution();
395 final double[] binBounds = empiricalDistribution.getUpperBounds();
396 for (int i = 0; i < testPoints.length; i++) {
397 final int bin = findBin(testPoints[i]);
398 final double lower = bin == 0 ? empiricalDistribution.getSupportLowerBound() :
399 binBounds[bin - 1];
400 final double upper = binBounds[bin];
401 final RealDistribution kernel = findKernel(lower, upper);
402 final double withinBinKernelMass = kernel.probability(lower, upper);
403 final double density = kernel.density(testPoints[i]);
404 densityValues[i] = density * (bin == 0 ? firstBinMass : binMass) / withinBinKernelMass;
405 }
406 return densityValues;
407 }
408
409
410
411
412
413
414
415 @Override
416 @Test
417 public void testDensityIntegrals() {
418 final RealDistribution distribution = makeDistribution();
419 final double tol = 1.0e-9;
420 final BaseAbstractUnivariateIntegrator integrator =
421 new IterativeLegendreGaussIntegrator(5, 1.0e-12, 1.0e-10);
422 final UnivariateFunction d = new UnivariateFunction() {
423 @Override
424 public double value(double x) {
425 return distribution.density(x);
426 }
427 };
428 final double[] lower = {0, 5, 1000, 5001, 9995};
429 final double[] upper = {5, 12, 1030, 5010, 10000};
430 for (int i = 1; i < 5; i++) {
431 assertEquals(
432 distribution.probability(
433 lower[i], upper[i]),
434 integrator.integrate(
435 1000000,
436 d, lower[i], upper[i]), tol);
437 }
438 }
439
440
441
442
443
444 @Test
445 void testSampleValuesRange() {
446
447
448 final double[] data = new double[100];
449 for (int i = 0; i < 50; i++) {
450 data[i] = 1 / ((double) i + 1);
451 }
452 for (int i = 51; i < 100; i++) {
453 data[i] = 1 - 1 / (100 - (double) i + 2);
454 }
455 EmpiricalDistribution dist = new EmpiricalDistribution(10);
456 dist.load(data);
457 dist.reseedRandomGenerator(1000);
458 for (int i = 0; i < 1000; i++) {
459 final double dev = dist.getNextValue();
460 assertTrue(dev < 1);
461 assertTrue(dev > 0);
462 }
463 }
464
465
466
467
468 @Test
469 void testNoBinVariance() {
470 final double[] data = {0, 0, 1, 1};
471 EmpiricalDistribution dist = new EmpiricalDistribution(2);
472 dist.load(data);
473 dist.reseedRandomGenerator(1000);
474 for (int i = 0; i < 1000; i++) {
475 final double dev = dist.getNextValue();
476 assertTrue(dev == 0 || dev == 1);
477 }
478 assertEquals(0.5, dist.cumulativeProbability(0), Double.MIN_VALUE);
479 assertEquals(1.0, dist.cumulativeProbability(1), Double.MIN_VALUE);
480 assertEquals(0.5, dist.cumulativeProbability(0.5), Double.MIN_VALUE);
481 assertEquals(0.5, dist.cumulativeProbability(0.7), Double.MIN_VALUE);
482 }
483
484
485
486
487 private int findBin(double x) {
488
489 final double nMinus = FastMath.floor(x / 10);
490 final int bin = (int) FastMath.round(nMinus);
491
492 return FastMath.floor(x / 10) == x / 10 ? bin - 1 : bin;
493 }
494
495
496
497
498
499
500
501
502 private RealDistribution findKernel(double lower, double upper) {
503 if (lower < 1) {
504 return new NormalDistribution(5d, 3.3166247903554);
505 } else {
506 return new NormalDistribution((upper + lower + 1) / 2d, 3.0276503540974917);
507 }
508 }
509
510 @Test
511 void testKernelOverrideConstant() {
512 final EmpiricalDistribution dist = new ConstantKernelEmpiricalDistribution(5);
513 final double[] data = {1d,2d,3d, 4d,5d,6d, 7d,8d,9d, 10d,11d,12d, 13d,14d,15d};
514 dist.load(data);
515
516 double[] values = {2d, 5d, 8d, 11d, 14d};
517 for (int i = 0; i < 20; i++) {
518 assertTrue(Arrays.binarySearch(values, dist.getNextValue()) >= 0);
519 }
520 final double tol = 10E-12;
521 assertEquals(0.0, dist.cumulativeProbability(1), tol);
522 assertEquals(0.2, dist.cumulativeProbability(2), tol);
523 assertEquals(0.6, dist.cumulativeProbability(10), tol);
524 assertEquals(0.8, dist.cumulativeProbability(12), tol);
525 assertEquals(0.8, dist.cumulativeProbability(13), tol);
526 assertEquals(1.0, dist.cumulativeProbability(15), tol);
527
528 assertEquals(2.0, dist.inverseCumulativeProbability(0.1), tol);
529 assertEquals(2.0, dist.inverseCumulativeProbability(0.2), tol);
530 assertEquals(5.0, dist.inverseCumulativeProbability(0.3), tol);
531 assertEquals(5.0, dist.inverseCumulativeProbability(0.4), tol);
532 assertEquals(8.0, dist.inverseCumulativeProbability(0.5), tol);
533 assertEquals(8.0, dist.inverseCumulativeProbability(0.6), tol);
534 }
535
536 @Test
537 void testKernelOverrideUniform() {
538 final EmpiricalDistribution dist = new UniformKernelEmpiricalDistribution(5);
539 final double[] data = {1d,2d,3d, 4d,5d,6d, 7d,8d,9d, 10d,11d,12d, 13d,14d,15d};
540 dist.load(data);
541
542 final double[] bounds = {3d, 6d, 9d, 12d};
543 final double tol = 10E-12;
544 for (int i = 0; i < 20; i++) {
545 final double v = dist.getNextValue();
546
547 for (int j = 0; j < bounds.length; j++) {
548 assertFalse(v > bounds[j] + tol && v < bounds[j] + 1 - tol);
549 }
550 }
551 assertEquals(0.0, dist.cumulativeProbability(1), tol);
552 assertEquals(0.1, dist.cumulativeProbability(2), tol);
553 assertEquals(0.6, dist.cumulativeProbability(10), tol);
554 assertEquals(0.8, dist.cumulativeProbability(12), tol);
555 assertEquals(0.8, dist.cumulativeProbability(13), tol);
556 assertEquals(1.0, dist.cumulativeProbability(15), tol);
557
558 assertEquals(2.0, dist.inverseCumulativeProbability(0.1), tol);
559 assertEquals(3.0, dist.inverseCumulativeProbability(0.2), tol);
560 assertEquals(5.0, dist.inverseCumulativeProbability(0.3), tol);
561 assertEquals(6.0, dist.inverseCumulativeProbability(0.4), tol);
562 assertEquals(8.0, dist.inverseCumulativeProbability(0.5), tol);
563 assertEquals(9.0, dist.inverseCumulativeProbability(0.6), tol);
564 }
565
566 @Test
567 void testEmptyBins() {
568 double[] data = new double[10];
569 for (int i = 0; i < 10; ++i) {
570 data[i] = i < 5 ? 0 : 1;
571 }
572 EmpiricalDistribution edist = new EmpiricalDistribution(100);
573 edist.load(data);
574 assertEquals(0.5, edist.cumulativeProbability(0), Double.MIN_VALUE);
575 assertEquals(0.5, edist.cumulativeProbability(0.3), Double.MIN_VALUE);
576 assertEquals(0.5, edist.cumulativeProbability(0.9), Double.MIN_VALUE);
577 assertEquals(1, edist.cumulativeProbability(1), Double.MIN_VALUE);
578 assertEquals(1, edist.cumulativeProbability(1.5), Double.MIN_VALUE);
579 }
580
581
582
583
584
585 private class ConstantKernelEmpiricalDistribution extends EmpiricalDistribution {
586 private static final long serialVersionUID = 1L;
587 public ConstantKernelEmpiricalDistribution(int i) {
588 super(i);
589 }
590
591 @Override
592 protected RealDistribution getKernel(StreamingStatistics bStats) {
593 return new ConstantRealDistribution(bStats.getMean());
594 }
595 }
596
597
598
599
600 private class UniformKernelEmpiricalDistribution extends EmpiricalDistribution {
601 private static final long serialVersionUID = 2963149194515159653L;
602 public UniformKernelEmpiricalDistribution(int i) {
603 super(i);
604 }
605 @Override
606 protected RealDistribution getKernel(StreamingStatistics bStats) {
607 return new UniformRealDistribution(bStats.getMin(), bStats.getMax());
608 }
609 }
610 }