View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      https://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  /*
19   * This is not the original file distributed by the Apache Software Foundation
20   * It has been modified by the Hipparchus project
21   */
22  package org.hipparchus.stat.regression;
23  
24  import org.hipparchus.exception.MathIllegalArgumentException;
25  import org.hipparchus.random.ISAACRandom;
26  import org.hipparchus.stat.LocalizedStatFormats;
27  import org.hipparchus.util.FastMath;
28  import org.junit.jupiter.api.Test;
29  
30  import java.util.Random;
31  
32  import static org.junit.jupiter.api.Assertions.assertEquals;
33  import static org.junit.jupiter.api.Assertions.assertFalse;
34  import static org.junit.jupiter.api.Assertions.assertNotNull;
35  import static org.junit.jupiter.api.Assertions.assertTrue;
36  import static org.junit.jupiter.api.Assertions.fail;
37  
38  
39  /**
40   * Test cases for the TestStatistic class.
41   *
42   */
43  
44  final class SimpleRegressionTest {
45  
46      /*
47       * NIST "Norris" refernce data set from
48       * http://www.itl.nist.gov/div898/strd/lls/data/LINKS/DATA/Norris.dat
49       * Strangely, order is {y,x}
50       */
51      private double[][] data = { { 0.1, 0.2 }, {338.8, 337.4 }, {118.1, 118.2 },
52              {888.0, 884.6 }, {9.2, 10.1 }, {228.1, 226.5 }, {668.5, 666.3 }, {998.5, 996.3 },
53              {449.1, 448.6 }, {778.9, 777.0 }, {559.2, 558.2 }, {0.3, 0.4 }, {0.1, 0.6 }, {778.1, 775.5 },
54              {668.8, 666.9 }, {339.3, 338.0 }, {448.9, 447.5 }, {10.8, 11.6 }, {557.7, 556.0 },
55              {228.3, 228.1 }, {998.0, 995.8 }, {888.8, 887.6 }, {119.6, 120.2 }, {0.3, 0.3 },
56              {0.6, 0.3 }, {557.6, 556.8 }, {339.3, 339.1 }, {888.0, 887.2 }, {998.5, 999.0 },
57              {778.9, 779.0 }, {10.2, 11.1 }, {117.6, 118.3 }, {228.9, 229.2 }, {668.4, 669.1 },
58              {449.2, 448.9 }, {0.2, 0.5 }
59      };
60  
61      /*
62       * Correlation example from
63       * http://www.xycoon.com/correlation.htm
64       */
65      private double[][] corrData = { { 101.0, 99.2 }, {100.1, 99.0 }, {100.0, 100.0 },
66              {90.6, 111.6 }, {86.5, 122.2 }, {89.7, 117.6 }, {90.6, 121.1 }, {82.8, 136.0 },
67              {70.1, 154.2 }, {65.4, 153.6 }, {61.3, 158.5 }, {62.5, 140.6 }, {63.6, 136.2 },
68              {52.6, 168.0 }, {59.7, 154.3 }, {59.5, 149.0 }, {61.3, 165.5 }
69      };
70  
71      /*
72       * From Moore and Mcabe, "Introduction to the Practice of Statistics"
73       * Example 10.3
74       */
75      private double[][] infData = { { 15.6, 5.2 }, {26.8, 6.1 }, {37.8, 8.7 }, {36.4, 8.5 },
76              {35.5, 8.8 }, {18.6, 4.9 }, {15.3, 4.5 }, {7.9, 2.5 }, {0.0, 1.1 }
77      };
78  
79      /*
80       * Points to remove in the remove tests
81       */
82      private double[][] removeSingle = {infData[1]};
83      private double[][] removeMultiple = { infData[1], infData[2] };
84      private double removeX = infData[0][0];
85      private double removeY = infData[0][1];
86  
87  
88      /*
89       * Data with bad linear fit
90       */
91      private double[][] infData2 = { { 1, 1 }, {2, 0 }, {3, 5 }, {4, 2 },
92              {5, -1 }, {6, 12 }
93      };
94  
95  
96      /*
97       * Data from NIST NOINT1
98       */
99      private double[][] noint1 = {
100         {130.0,60.0},
101         {131.0,61.0},
102         {132.0,62.0},
103         {133.0,63.0},
104         {134.0,64.0},
105         {135.0,65.0},
106         {136.0,66.0},
107         {137.0,67.0},
108         {138.0,68.0},
109         {139.0,69.0},
110         {140.0,70.0}
111     };
112 
113     /*
114      * Data from NIST NOINT2
115      *
116      */
117     private double[][] noint2 = {
118         {3.0,4},
119         {4,5},
120         {4,6}
121     };
122 
123 
124     /**
125      * Test that the SimpleRegression objects generated from combining two
126      * SimpleRegression objects created from subsets of data are identical to
127      * SimpleRegression objects created from the combined data.
128      */
129     @Test
130     void testAppend() {
131         check(false);
132         check(true);
133     }
134 
135     /**
136      * Checks that adding data to a single model gives the same result
137      * as adding "parts" of the dataset to smaller models and using append
138      * to aggregate the smaller models.
139      *
140      * @param includeIntercept
141      */
142     private void check(boolean includeIntercept) {
143         final int sets = 2;
144         final ISAACRandom rand = new ISAACRandom(10L);// Seed can be changed
145         final SimpleRegression whole = new SimpleRegression(includeIntercept);// regression of the whole set
146         final SimpleRegression parts = new SimpleRegression(includeIntercept);// regression with parts.
147 
148         for (int s = 0; s < sets; s++) {// loop through each subset of data.
149             final double coef = rand.nextDouble();
150             final SimpleRegression sub = new SimpleRegression(includeIntercept);// sub regression
151             for (int i = 0; i < 5; i++) { // loop through individual samlpes.
152                 final double x = rand.nextDouble();
153                 final double y = x * coef + rand.nextDouble();// some noise
154                 sub.addData(x, y);
155                 whole.addData(x, y);
156             }
157             parts.append(sub);
158             assertTrue(equals(parts, whole, 1E-6));
159         }
160     }
161 
162     /**
163      * Returns true iff the statistics reported by model1 are all within tol of
164      * those reported by model2.
165      *
166      * @param model1 first model
167      * @param model2 second model
168      * @param tol tolerance
169      * @return true if the two models report the same regression stats
170      */
171     private boolean equals(SimpleRegression model1, SimpleRegression model2, double tol) {
172         if (model1.getN() != model2.getN()) {
173             return false;
174         }
175         if (FastMath.abs(model1.getIntercept() - model2.getIntercept()) > tol) {
176             return false;
177         }
178         if (FastMath.abs(model1.getInterceptStdErr() - model2.getInterceptStdErr()) > tol) {
179             return false;
180         }
181         if (FastMath.abs(model1.getMeanSquareError() - model2.getMeanSquareError()) > tol) {
182             return false;
183         }
184         if (FastMath.abs(model1.getR() - model2.getR()) > tol) {
185             return false;
186         }
187         if (FastMath.abs(model1.getRegressionSumSquares() - model2.getRegressionSumSquares()) > tol) {
188             return false;
189         }
190         if (FastMath.abs(model1.getRSquare() - model2.getRSquare()) > tol) {
191             return false;
192         }
193         if (FastMath.abs(model1.getSignificance() - model2.getSignificance()) > tol) {
194             return false;
195         }
196         if (FastMath.abs(model1.getSlope() - model2.getSlope()) > tol) {
197             return false;
198         }
199         if (FastMath.abs(model1.getSlopeConfidenceInterval() - model2.getSlopeConfidenceInterval()) > tol) {
200             return false;
201         }
202         if (FastMath.abs(model1.getSlopeStdErr() - model2.getSlopeStdErr()) > tol) {
203             return false;
204         }
205         if (FastMath.abs(model1.getSumOfCrossProducts() - model2.getSumOfCrossProducts()) > tol) {
206             return false;
207         }
208         if (FastMath.abs(model1.getSumSquaredErrors() - model2.getSumSquaredErrors()) > tol) {
209             return false;
210         }
211         if (FastMath.abs(model1.getTotalSumSquares() - model2.getTotalSumSquares()) > tol) {
212             return false;
213         }
214         if (FastMath.abs(model1.getXSumSquares() - model2.getXSumSquares()) > tol) {
215             return false;
216         }
217         return true;
218     }
219 
220     @Test
221     void testRegressIfaceMethod(){
222         final SimpleRegression regression = new SimpleRegression(true);
223         final UpdatingMultipleLinearRegression iface = regression;
224         final SimpleRegression regressionNoint = new SimpleRegression( false );
225         final SimpleRegression regressionIntOnly= new SimpleRegression( false );
226         for (int i = 0; i < data.length; i++) {
227             iface.addObservation( new double[]{data[i][1]}, data[i][0]);
228             regressionNoint.addData(data[i][1], data[i][0]);
229             regressionIntOnly.addData(1.0, data[i][0]);
230         }
231 
232         //should not be null
233         final RegressionResults fullReg = iface.regress( );
234         assertNotNull(fullReg);
235         assertEquals(regression.getIntercept(), fullReg.getParameterEstimate(0), 1.0e-16, "intercept");
236         assertEquals(regression.getInterceptStdErr(), fullReg.getStdErrorOfEstimate(0),1.0E-16,"intercept std err");
237         assertEquals(regression.getSlope(), fullReg.getParameterEstimate(1), 1.0e-16, "slope");
238         assertEquals(regression.getSlopeStdErr(), fullReg.getStdErrorOfEstimate(1),1.0E-16,"slope std err");
239         assertEquals(regression.getN(), fullReg.getN(), "number of observations");
240         assertEquals(regression.getRSquare(), fullReg.getRSquared(), 1.0E-16, "r-square");
241         assertEquals(regression.getRegressionSumSquares(), fullReg.getRegressionSumSquares() ,1.0E-16,"SSR");
242         assertEquals(regression.getMeanSquareError(), fullReg.getMeanSquareError() ,1.0E-16,"MSE");
243         assertEquals(regression.getSumSquaredErrors(), fullReg.getErrorSumSquares() ,1.0E-16,"SSE");
244 
245 
246         final RegressionResults noInt   = iface.regress( new int[]{1} );
247         assertNotNull(noInt);
248         assertEquals(regressionNoint.getSlope(), noInt.getParameterEstimate(0), 1.0e-12, "slope");
249         assertEquals(regressionNoint.getSlopeStdErr(), noInt.getStdErrorOfEstimate(0),1.0E-16,"slope std err");
250         assertEquals(regressionNoint.getN(), noInt.getN(), "number of observations");
251         assertEquals(regressionNoint.getRSquare(), noInt.getRSquared(), 1.0E-16, "r-square");
252         assertEquals(regressionNoint.getRegressionSumSquares(), noInt.getRegressionSumSquares() ,1.0E-8,"SSR");
253         assertEquals(regressionNoint.getMeanSquareError(), noInt.getMeanSquareError() ,1.0E-16,"MSE");
254         assertEquals(regressionNoint.getSumSquaredErrors(), noInt.getErrorSumSquares() ,1.0E-16,"SSE");
255 
256         final RegressionResults onlyInt = iface.regress( new int[]{0} );
257         assertNotNull(onlyInt);
258         assertEquals(regressionIntOnly.getSlope(), onlyInt.getParameterEstimate(0), 1.0e-12, "slope");
259         assertEquals(regressionIntOnly.getSlopeStdErr(), onlyInt.getStdErrorOfEstimate(0),1.0E-12,"slope std err");
260         assertEquals(regressionIntOnly.getN(), onlyInt.getN(), "number of observations");
261         assertEquals(regressionIntOnly.getRSquare(), onlyInt.getRSquared(), 1.0E-14, "r-square");
262         assertEquals(regressionIntOnly.getSumSquaredErrors(), onlyInt.getErrorSumSquares() ,1.0E-8,"SSE");
263         assertEquals(regressionIntOnly.getRegressionSumSquares(), onlyInt.getRegressionSumSquares() ,1.0E-8,"SSR");
264         assertEquals(regressionIntOnly.getMeanSquareError(), onlyInt.getMeanSquareError() ,1.0E-8,"MSE");
265 
266     }
267 
268     /**
269      * Verify that regress generates exceptions as advertised for bad model specifications.
270      */
271     @Test
272     void testRegressExceptions() {
273         // No intercept
274         final SimpleRegression noIntRegression = new SimpleRegression(false);
275         noIntRegression.addData(noint2[0][1], noint2[0][0]);
276         noIntRegression.addData(noint2[1][1], noint2[1][0]);
277         noIntRegression.addData(noint2[2][1], noint2[2][0]);
278         try { // null array
279             noIntRegression.regress(null);
280             fail("Expecting MathIllegalArgumentException for null array");
281         } catch (MathIllegalArgumentException ex) {
282             // Expected
283         }
284         try { // empty array
285             noIntRegression.regress(new int[] {});
286             fail("Expecting MathIllegalArgumentException for empty array");
287         } catch (MathIllegalArgumentException ex) {
288             // Expected
289         }
290         try { // more than 1 regressor
291             noIntRegression.regress(new int[] {0, 1});
292             fail("Expecting MathIllegalArgumentException - too many regressors");
293         } catch (MathIllegalArgumentException ex) {
294             // Expected
295         }
296         try { // invalid regressor
297             noIntRegression.regress(new int[] {1});
298             fail("Expecting MathIllegalArgumentException - invalid regression");
299         } catch (MathIllegalArgumentException ex) {
300             // Expected
301         }
302 
303         // With intercept
304         final SimpleRegression regression = new SimpleRegression(true);
305         regression.addData(noint2[0][1], noint2[0][0]);
306         regression.addData(noint2[1][1], noint2[1][0]);
307         regression.addData(noint2[2][1], noint2[2][0]);
308         try { // null array
309             regression.regress(null);
310             fail("Expecting MathIllegalArgumentException for null array");
311         } catch (MathIllegalArgumentException ex) {
312             // Expected
313         }
314         try { // empty array
315             regression.regress(new int[] {});
316             fail("Expecting MathIllegalArgumentException for empty array");
317         } catch (MathIllegalArgumentException ex) {
318             // Expected
319         }
320         try { // more than 2 regressors
321             regression.regress(new int[] {0, 1, 2});
322             fail("Expecting MathIllegalArgumentException - too many regressors");
323         } catch (MathIllegalArgumentException ex) {
324             // Expected
325         }
326         try { // wrong order
327             regression.regress(new int[] {1,0});
328             fail("Expecting MathIllegalArgumentException - invalid regression");
329         } catch (MathIllegalArgumentException ex) {
330             // Expected
331         }
332         try { // out of range
333             regression.regress(new int[] {3,4});
334             fail("Expecting MathIllegalArgumentException");
335         } catch (MathIllegalArgumentException ex) {
336             // Expected
337         }
338         try { // out of range
339             regression.regress(new int[] {0,2});
340             fail("Expecting MathIllegalArgumentException");
341         } catch (MathIllegalArgumentException ex) {
342             // Expected
343         }
344         try { // out of range
345             regression.regress(new int[] {2});
346             fail("Expecting MathIllegalArgumentException");
347         } catch (MathIllegalArgumentException ex) {
348             // Expected
349         }
350     }
351 
352     @Test
353     void testNoInterceot_noint2(){
354          SimpleRegression regression = new SimpleRegression(false);
355          regression.addData(noint2[0][1], noint2[0][0]);
356          regression.addData(noint2[1][1], noint2[1][0]);
357          regression.addData(noint2[2][1], noint2[2][0]);
358          assertEquals(0, regression.getIntercept(), 0, "intercept");
359          assertEquals(0.727272727272727,
360                  regression.getSlope(), 10E-12, "slope");
361          assertEquals(0.420827318078432E-01,
362                 regression.getSlopeStdErr(),10E-12,"slope std err");
363         assertEquals(3, regression.getN(), "number of observations");
364         assertEquals(0.993348115299335,
365             regression.getRSquare(), 10E-12, "r-square");
366         assertEquals(40.7272727272727,
367             regression.getRegressionSumSquares(), 10E-9, "SSR");
368         assertEquals(0.136363636363636,
369             regression.getMeanSquareError(), 10E-10, "MSE");
370         assertEquals(0.272727272727273,
371             regression.getSumSquaredErrors(),10E-9,"SSE");
372     }
373 
374     @Test
375     void testNoIntercept_noint1(){
376         SimpleRegression regression = new SimpleRegression(false);
377         for (int i = 0; i < noint1.length; i++) {
378             regression.addData(noint1[i][1], noint1[i][0]);
379         }
380         assertEquals(0, regression.getIntercept(), 0, "intercept");
381         assertEquals(2.07438016528926, regression.getSlope(), 10E-12, "slope");
382         assertEquals(0.165289256198347E-01,
383                 regression.getSlopeStdErr(),10E-12,"slope std err");
384         assertEquals(11, regression.getN(), "number of observations");
385         assertEquals(0.999365492298663,
386             regression.getRSquare(), 10E-12, "r-square");
387         assertEquals(200457.727272727,
388             regression.getRegressionSumSquares(), 10E-9, "SSR");
389         assertEquals(12.7272727272727,
390             regression.getMeanSquareError(), 10E-10, "MSE");
391         assertEquals(127.272727272727,
392             regression.getSumSquaredErrors(),10E-9,"SSE");
393 
394     }
395 
396     @Test
397     void testNorris() {
398         SimpleRegression regression = new SimpleRegression();
399         for (int i = 0; i < data.length; i++) {
400             regression.addData(data[i][1], data[i][0]);
401         }
402         // Tests against certified values from
403         // http://www.itl.nist.gov/div898/strd/lls/data/LINKS/DATA/Norris.dat
404         assertEquals(1.00211681802045, regression.getSlope(), 10E-12, "slope");
405         assertEquals(0.429796848199937E-03,
406                 regression.getSlopeStdErr(),10E-12,"slope std err");
407         assertEquals(36, regression.getN(), "number of observations");
408         assertEquals( -0.262323073774029,
409             regression.getIntercept(),10E-12,"intercept");
410         assertEquals(0.232818234301152,
411             regression.getInterceptStdErr(),10E-12,"std err intercept");
412         assertEquals(0.999993745883712,
413             regression.getRSquare(), 10E-12, "r-square");
414         assertEquals(4255954.13232369,
415             regression.getRegressionSumSquares(), 10E-9, "SSR");
416         assertEquals(0.782864662630069,
417             regression.getMeanSquareError(), 10E-10, "MSE");
418         assertEquals(26.6173985294224,
419             regression.getSumSquaredErrors(),10E-9,"SSE");
420         // ------------  End certified data tests
421 
422         assertEquals( -0.262323073774029,
423             regression.predict(0), 10E-12, "predict(0)");
424         assertEquals(1.00211681802045 - 0.262323073774029,
425             regression.predict(1), 10E-12, "predict(1)");
426     }
427 
428     @Test
429     void testCorr() {
430         SimpleRegression regression = new SimpleRegression();
431         regression.addData(corrData);
432         assertEquals(17, regression.getN(), "number of observations");
433         assertEquals(.896123, regression.getRSquare(), 10E-6, "r-square");
434         assertEquals(-0.94663767742, regression.getR(), 1E-10, "r");
435     }
436 
437     @Test
438     void testNaNs() {
439         SimpleRegression regression = new SimpleRegression();
440         assertTrue(Double.isNaN(regression.getIntercept()), "intercept not NaN");
441         assertTrue(Double.isNaN(regression.getSlope()), "slope not NaN");
442         assertTrue(Double.isNaN(regression.getSlopeStdErr()), "slope std err not NaN");
443         assertTrue(Double.isNaN(regression.getInterceptStdErr()), "intercept std err not NaN");
444         assertTrue(Double.isNaN(regression.getMeanSquareError()), "MSE not NaN");
445         assertTrue(Double.isNaN(regression.getR()), "e not NaN");
446         assertTrue(Double.isNaN(regression.getRSquare()), "r-square not NaN");
447         assertTrue( Double.isNaN(regression.getRegressionSumSquares()), "RSS not NaN");
448         assertTrue(Double.isNaN(regression.getSumSquaredErrors()),"SSE not NaN");
449         assertTrue(Double.isNaN(regression.getTotalSumSquares()), "SSTO not NaN");
450         assertTrue(Double.isNaN(regression.predict(0)), "predict not NaN");
451 
452         regression.addData(1, 2);
453         regression.addData(1, 3);
454 
455         // No x variation, so these should still blow...
456         assertTrue(Double.isNaN(regression.getIntercept()), "intercept not NaN");
457         assertTrue(Double.isNaN(regression.getSlope()), "slope not NaN");
458         assertTrue(Double.isNaN(regression.getSlopeStdErr()), "slope std err not NaN");
459         assertTrue(Double.isNaN(regression.getInterceptStdErr()), "intercept std err not NaN");
460         assertTrue(Double.isNaN(regression.getMeanSquareError()), "MSE not NaN");
461         assertTrue(Double.isNaN(regression.getR()), "e not NaN");
462         assertTrue(Double.isNaN(regression.getRSquare()), "r-square not NaN");
463         assertTrue(Double.isNaN(regression.getRegressionSumSquares()), "RSS not NaN");
464         assertTrue(Double.isNaN(regression.getSumSquaredErrors()), "SSE not NaN");
465         assertTrue(Double.isNaN(regression.predict(0)), "predict not NaN");
466 
467         // but SSTO should be OK
468         assertFalse(Double.isNaN(regression.getTotalSumSquares()), "SSTO NaN");
469 
470         regression = new SimpleRegression();
471 
472         regression.addData(1, 2);
473         regression.addData(3, 3);
474 
475         // All should be OK except MSE, s(b0), s(b1) which need one more df
476         assertFalse(Double.isNaN(regression.getIntercept()), "interceptNaN");
477         assertFalse(Double.isNaN(regression.getSlope()), "slope NaN");
478         assertTrue(Double.isNaN(regression.getSlopeStdErr()), "slope std err not NaN");
479         assertTrue(Double.isNaN(regression.getInterceptStdErr()), "intercept std err not NaN");
480         assertTrue(Double.isNaN(regression.getMeanSquareError()), "MSE not NaN");
481         assertFalse(Double.isNaN(regression.getR()), "r NaN");
482         assertFalse(Double.isNaN(regression.getRSquare()), "r-square NaN");
483         assertFalse(Double.isNaN(regression.getRegressionSumSquares()), "RSS NaN");
484         assertFalse(Double.isNaN(regression.getSumSquaredErrors()), "SSE NaN");
485         assertFalse(Double.isNaN(regression.getTotalSumSquares()), "SSTO NaN");
486         assertFalse(Double.isNaN(regression.predict(0)), "predict NaN");
487 
488         regression.addData(1, 4);
489 
490         // MSE, MSE, s(b0), s(b1) should all be OK now
491         assertFalse(Double.isNaN(regression.getMeanSquareError()), "MSE NaN");
492         assertFalse(Double.isNaN(regression.getSlopeStdErr()), "slope std err NaN");
493         assertFalse(Double.isNaN(regression.getInterceptStdErr()), "intercept std err NaN");
494     }
495 
496     @Test
497     void testClear() {
498         SimpleRegression regression = new SimpleRegression();
499         regression.addData(corrData);
500         assertEquals(17, regression.getN(), "number of observations");
501         regression.clear();
502         assertEquals(0, regression.getN(), "number of observations");
503         regression.addData(corrData);
504         assertEquals(.896123, regression.getRSquare(), 10E-6, "r-square");
505         regression.addData(data);
506         assertEquals(53, regression.getN(), "number of observations");
507     }
508 
509     @Test
510     void testInference() {
511         //----------  verified against R, version 1.8.1 -----
512         // infData
513         SimpleRegression regression = new SimpleRegression();
514         regression.addData(infData);
515         assertEquals(0.011448491,
516                 regression.getSlopeStdErr(), 1E-10, "slope std err");
517         assertEquals(0.286036932,
518                 regression.getInterceptStdErr(),1E-8,"std err intercept");
519         assertEquals(4.596e-07,
520                 regression.getSignificance(),1E-8,"significance");
521         assertEquals(0.0270713794287,
522                 regression.getSlopeConfidenceInterval(),1E-8,"slope conf interval half-width");
523         // infData2
524         regression = new SimpleRegression();
525         regression.addData(infData2);
526         assertEquals(1.07260253,
527                 regression.getSlopeStdErr(), 1E-8, "slope std err");
528         assertEquals(4.17718672,
529                 regression.getInterceptStdErr(),1E-8,"std err intercept");
530         assertEquals(0.261829133982,
531                 regression.getSignificance(),1E-11,"significance");
532         assertEquals(2.97802204827,
533                 regression.getSlopeConfidenceInterval(),1E-8,"slope conf interval half-width");
534         //------------- End R-verified tests -------------------------------
535 
536         //FIXME: get a real example to test against with alpha = .01
537         assertTrue(regression.getSlopeConfidenceInterval() < regression.getSlopeConfidenceInterval(0.01),
538                 "tighter means wider");
539 
540         try {
541             regression.getSlopeConfidenceInterval(1);
542             fail("expecting MathIllegalArgumentException for alpha = 1");
543         } catch (MathIllegalArgumentException ex) {
544             // ignored
545         }
546 
547     }
548 
549     @Test
550     void testPerfect() {
551         SimpleRegression regression = new SimpleRegression();
552         int n = 100;
553         for (int i = 0; i < n; i++) {
554             regression.addData(((double) i) / (n - 1), i);
555         }
556         assertEquals(0.0, regression.getSignificance(), 1.0e-5);
557         assertTrue(regression.getSlope() > 0.0);
558         assertTrue(regression.getSumSquaredErrors() >= 0.0);
559     }
560 
561     @Test
562     void testPerfect2() {
563         SimpleRegression regression = new SimpleRegression();
564         regression.addData(0, 0);
565         regression.addData(1, 1);
566         regression.addData(2, 2);
567         assertEquals(0.0, regression.getSlopeStdErr(), 0.0);
568         assertEquals(0.0, regression.getSignificance(), Double.MIN_VALUE);
569         assertEquals(1, regression.getRSquare(), Double.MIN_VALUE);
570     }
571 
572     @Test
573     void testPerfectNegative() {
574         SimpleRegression regression = new SimpleRegression();
575         int n = 100;
576         for (int i = 0; i < n; i++) {
577             regression.addData(- ((double) i) / (n - 1), i);
578         }
579 
580         assertEquals(0.0, regression.getSignificance(), 1.0e-5);
581         assertTrue(regression.getSlope() < 0.0);
582     }
583 
584     @Test
585     void testRandom() {
586         SimpleRegression regression = new SimpleRegression();
587         Random random = new Random(1);
588         int n = 100;
589         for (int i = 0; i < n; i++) {
590             regression.addData(((double) i) / (n - 1), random.nextDouble());
591         }
592 
593         assertTrue( 0.0 < regression.getSignificance()
594                     && regression.getSignificance() < 1.0);
595     }
596 
597 
598     // Jira MATH-85 = Bugzilla 39432
599     @Test
600     void testSSENonNegative() {
601         double[] y = { 8915.102, 8919.302, 8923.502 };
602         double[] x = { 1.107178495E2, 1.107264895E2, 1.107351295E2 };
603         SimpleRegression reg = new SimpleRegression();
604         for (int i = 0; i < x.length; i++) {
605             reg.addData(x[i], y[i]);
606         }
607         assertTrue(reg.getSumSquaredErrors() >= 0.0);
608     }
609 
610     // Test remove X,Y (single observation)
611     @Test
612     void testRemoveXY() {
613         // Create regression with inference data then remove to test
614         SimpleRegression regression = new SimpleRegression();
615         assertTrue(regression.hasIntercept());
616         regression.addData(infData);
617         regression.removeData(removeX, removeY);
618         regression.addData(removeX, removeY);
619         // Use the inference assertions to make sure that everything worked
620         assertEquals(0.011448491,
621                 regression.getSlopeStdErr(), 1E-10, "slope std err");
622         assertEquals(0.286036932,
623                 regression.getInterceptStdErr(),1E-8,"std err intercept");
624         assertEquals(4.596e-07,
625                 regression.getSignificance(),1E-8,"significance");
626         assertEquals(0.0270713794287,
627                 regression.getSlopeConfidenceInterval(),1E-8,"slope conf interval half-width");
628      }
629 
630     // Test remove single observation in array
631     @Test
632     void testRemoveSingle() {
633         // Create regression with inference data then remove to test
634         SimpleRegression regression = new SimpleRegression();
635         assertTrue(regression.hasIntercept());
636         regression.addData(infData);
637         regression.removeData(removeSingle);
638         regression.addData(removeSingle);
639         // Use the inference assertions to make sure that everything worked
640         assertEquals(0.011448491,
641                 regression.getSlopeStdErr(), 1E-10, "slope std err");
642         assertEquals(0.286036932,
643                 regression.getInterceptStdErr(),1E-8,"std err intercept");
644         assertEquals(4.596e-07,
645                 regression.getSignificance(),1E-8,"significance");
646         assertEquals(0.0270713794287,
647                 regression.getSlopeConfidenceInterval(),1E-8,"slope conf interval half-width");
648      }
649 
650     // Test remove multiple observations
651     @Test
652     void testRemoveMultiple() {
653         // Create regression with inference data then remove to test
654         SimpleRegression regression = new SimpleRegression();
655         assertTrue(regression.hasIntercept());
656         regression.addData(infData);
657         regression.removeData(removeMultiple);
658         regression.addData(removeMultiple);
659         // Use the inference assertions to make sure that everything worked
660         assertEquals(0.011448491,
661                 regression.getSlopeStdErr(), 1E-10, "slope std err");
662         assertEquals(0.286036932,
663                 regression.getInterceptStdErr(),1E-8,"std err intercept");
664         assertEquals(4.596e-07,
665                 regression.getSignificance(),1E-8,"significance");
666         assertEquals(0.0270713794287,
667                 regression.getSlopeConfidenceInterval(),1E-8,"slope conf interval half-width");
668      }
669 
670     // Test remove multiple observations
671     @Test
672     void testRemoveMultipleNoIntercept() {
673         // Create regression with inference data then remove to test
674         SimpleRegression regression = new SimpleRegression(false);
675         assertFalse(regression.hasIntercept());
676         assertEquals(0.0, regression.getIntercept(), 1.0e-15);
677         regression.addData(infData);
678         assertEquals(0.30593, regression.predict(1.25), 1.0e-5);
679         regression.removeData(removeMultiple);
680         regression.addData(removeMultiple);
681         // Use the inference assertions to make sure that everything worked
682         assertEquals(0.0103629732,
683                 regression.getSlopeStdErr(), 1E-10, "slope std err");
684         assertTrue(Double.isNaN(regression.getInterceptStdErr()), "std err intercept");
685         assertEquals(6.199e-08,
686                 regression.getSignificance(),1E-10,"significance");
687         assertEquals(0.02450454,
688                 regression.getSlopeConfidenceInterval(),1E-8,"slope conf interval half-width");
689      }
690 
691     // Remove observation when empty
692     @Test
693     void testRemoveObsFromEmpty() {
694         SimpleRegression regression = new SimpleRegression();
695         regression.removeData(removeX, removeY);
696         assertEquals(0, regression.getN());
697     }
698 
699     // Remove single observation to empty
700     @Test
701     void testRemoveObsFromSingle() {
702         SimpleRegression regression = new SimpleRegression();
703         regression.addData(removeX, removeY);
704         regression.removeData(removeX, removeY);
705         assertEquals(0, regression.getN());
706     }
707 
708     // Remove multiple observations to empty
709     @Test
710     void testRemoveMultipleToEmpty() {
711         SimpleRegression regression = new SimpleRegression();
712         regression.addData(removeMultiple);
713         regression.removeData(removeMultiple);
714         assertEquals(0, regression.getN());
715     }
716 
717     // Remove multiple observations past empty (i.e. size of array > n)
718     @Test
719     void testRemoveMultiplePastEmpty() {
720         SimpleRegression regression = new SimpleRegression();
721         regression.addData(removeX, removeY);
722         regression.removeData(removeMultiple);
723         assertEquals(0, regression.getN());
724     }
725 
726     @Test
727     void testWrongDimensions() {
728         try {
729             new SimpleRegression().addData(new double[1][1]);
730             fail("an exception should have been thrown");
731         } catch (MathIllegalArgumentException miae) {
732             assertEquals(LocalizedStatFormats.INVALID_REGRESSION_OBSERVATION, miae.getSpecifier());
733         }
734         try {
735             new SimpleRegression().addObservation(null, 0.0);
736             fail("an exception should have been thrown");
737         } catch (MathIllegalArgumentException miae) {
738             assertEquals(LocalizedStatFormats.INVALID_REGRESSION_OBSERVATION, miae.getSpecifier());
739         }
740         try {
741             new SimpleRegression().addObservation(new double[0], 0.0);
742             fail("an exception should have been thrown");
743         } catch (MathIllegalArgumentException miae) {
744             assertEquals(LocalizedStatFormats.INVALID_REGRESSION_OBSERVATION, miae.getSpecifier());
745         }
746         try {
747             new SimpleRegression().addObservations(new double[][] { null, null }, new double[2]);
748             fail("an exception should have been thrown");
749         } catch (MathIllegalArgumentException miae) {
750             assertEquals(LocalizedStatFormats.NOT_ENOUGH_DATA_FOR_NUMBER_OF_PREDICTORS, miae.getSpecifier());
751         }
752         try {
753             new SimpleRegression().addObservations(new double[][] { new double[0], new double[0] }, new double[2]);
754             fail("an exception should have been thrown");
755         } catch (MathIllegalArgumentException miae) {
756             assertEquals(LocalizedStatFormats.NOT_ENOUGH_DATA_FOR_NUMBER_OF_PREDICTORS, miae.getSpecifier());
757         }
758     }
759 
760     @Test
761     void testFewPoints() {
762         SimpleRegression sr = new SimpleRegression();
763         sr.addObservations(new double[][] { new double[] { 1.0, 1.5 }}, new double[] { 1.0 });
764         assertEquals(1, sr.getN());
765         assertTrue(Double.isNaN(sr.getXSumSquares()));
766         sr.addObservations(new double[][] { new double[] { 1.0, 1.5 }}, new double[] { 1.0 });
767         assertEquals(2, sr.getN());
768         assertFalse(Double.isNaN(sr.getXSumSquares()));
769         assertTrue(Double.isNaN(sr.getSlopeConfidenceInterval()));
770         assertTrue(Double.isNaN(sr.getSignificance()));
771         try {
772             sr.regress();
773             fail("an exception should have been thrown");
774         } catch (MathIllegalArgumentException miae) {
775             assertEquals(LocalizedStatFormats.NOT_ENOUGH_DATA_REGRESSION, miae.getSpecifier());
776         }
777         sr.addObservations(new double[][] { new double[] { 1.0, 1.5 }}, new double[] { 1.0 });
778         RegressionResults results = sr.regress();
779         assertTrue(Double.isNaN(results.getParameterEstimate(1)));
780         results = sr.regress(new int[] { 1 });
781         assertEquals(1.0, results.getParameterEstimate(0), 1.0e-15);
782         sr.addObservations(new double[][] { new double[] { 2.0, 2.5 }}, new double[] { 2.0 });
783         results = sr.regress();
784         assertFalse(Double.isNaN(results.getParameterEstimate(1)));
785         sr.addObservations(new double[][] { new double[] { Double.NaN, Double.NaN }}, new double[] { Double.NaN });
786         results = sr.regress(new int[] { 1 });
787         assertTrue(Double.isNaN(results.getParameterEstimate(0)));
788 
789     }
790 
791     @Test
792     void testFewPointsWithoutIntercept() {
793         SimpleRegression sr = new SimpleRegression(false);
794         sr.addObservations(new double[][] { new double[] { 1.0, 1.5 }}, new double[] { 1.0 });
795         assertEquals(1, sr.getN());
796         assertTrue(Double.isNaN(sr.getXSumSquares()));
797         try {
798             sr.regress();
799             fail("an exception should have been thrown");
800         } catch (MathIllegalArgumentException miae) {
801             assertEquals(LocalizedStatFormats.NOT_ENOUGH_DATA_REGRESSION, miae.getSpecifier());
802         }
803         sr.addObservations(new double[][] { new double[] { 1.0, 1.5 }}, new double[] { 1.0 });
804         RegressionResults results = sr.regress();
805         assertFalse(Double.isNaN(results.getParameterEstimate(0)));
806         sr.addObservations(new double[][] { new double[] { Double.NaN, 1.0 }}, new double[] { 2.0 });
807         results = sr.regress();
808         assertTrue(Double.isNaN(results.getParameterEstimate(0)));
809     }
810 
811 }