View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      https://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  /*
19   * This is not the original file distributed by the Apache Software Foundation
20   * It has been modified by the Hipparchus project
21   */
22  package org.hipparchus.stat.regression;
23  
24  import org.hipparchus.UnitTestUtils;
25  import org.hipparchus.exception.MathIllegalArgumentException;
26  import org.hipparchus.exception.NullArgumentException;
27  import org.hipparchus.linear.Array2DRowRealMatrix;
28  import org.hipparchus.linear.DefaultRealMatrixChangingVisitor;
29  import org.hipparchus.linear.MatrixUtils;
30  import org.hipparchus.linear.RealMatrix;
31  import org.hipparchus.linear.RealVector;
32  import org.hipparchus.stat.StatUtils;
33  import org.junit.jupiter.api.BeforeEach;
34  import org.junit.jupiter.api.Test;
35  
36  import static org.junit.jupiter.api.Assertions.assertEquals;
37  import static org.junit.jupiter.api.Assertions.assertThrows;
38  import static org.junit.jupiter.api.Assertions.assertTrue;
39  
40  class OLSMultipleLinearRegressionTest extends MultipleLinearRegressionAbstractTest {
41  
42      private double[] y;
43      private double[][] x;
44  
45      @BeforeEach
46      @Override
47      public void setUp(){
48          y = new double[]{11.0, 12.0, 13.0, 14.0, 15.0, 16.0};
49          x = new double[6][];
50          x[0] = new double[]{0, 0, 0, 0, 0};
51          x[1] = new double[]{2.0, 0, 0, 0, 0};
52          x[2] = new double[]{0, 3.0, 0, 0, 0};
53          x[3] = new double[]{0, 0, 4.0, 0, 0};
54          x[4] = new double[]{0, 0, 0, 5.0, 0};
55          x[5] = new double[]{0, 0, 0, 0, 6.0};
56          super.setUp();
57      }
58  
59      @Override
60      protected OLSMultipleLinearRegression createRegression() {
61          OLSMultipleLinearRegression regression = new OLSMultipleLinearRegression();
62          regression.newSampleData(y, x);
63          return regression;
64      }
65  
66      @Override
67      protected int getNumberOfRegressors() {
68          return x[0].length + 1;
69      }
70  
71      @Override
72      protected int getSampleSize() {
73          return y.length;
74      }
75  
76      @Test
77      void cannotAddSampleDataWithSizeMismatch() {
78          assertThrows(MathIllegalArgumentException.class, () -> {
79              double[] y = new double[]{1.0, 2.0};
80              double[][] x = new double[1][];
81              x[0] = new double[]{1.0, 0};
82              createRegression().newSampleData(y, x);
83          });
84      }
85  
86      @Test
87      void testPerfectFit() {
88          double[] betaHat = regression.estimateRegressionParameters();
89          UnitTestUtils.customAssertEquals(betaHat,
90                                           new double[]{ 11.0, 1.0 / 2.0, 2.0 / 3.0, 3.0 / 4.0, 4.0 / 5.0, 5.0 / 6.0 },
91                                           1e-14);
92          double[] residuals = regression.estimateResiduals();
93          UnitTestUtils.customAssertEquals(residuals, new double[]{ 0d, 0d, 0d, 0d, 0d, 0d},
94                                           1e-14);
95          RealMatrix errors =
96              new Array2DRowRealMatrix(regression.estimateRegressionParametersVariance(), false);
97          final double[] s = { 1.0, -1.0 /  2.0, -1.0 /  3.0, -1.0 /  4.0, -1.0 /  5.0, -1.0 /  6.0 };
98          RealMatrix referenceVariance = new Array2DRowRealMatrix(s.length, s.length);
99          referenceVariance.walkInOptimizedOrder(new DefaultRealMatrixChangingVisitor() {
100             @Override
101             public double visit(int row, int column, double value) {
102                 if (row == 0) {
103                     return s[column];
104                 }
105                 double x = s[row] * s[column];
106                 return (row == column) ? 2 * x : x;
107             }
108         });
109        assertEquals(0.0,
110                      errors.subtract(referenceVariance).getNorm1(),
111                      5.0e-16 * referenceVariance.getNorm1());
112        assertEquals(1, ((OLSMultipleLinearRegression) regression).calculateRSquared(), 1E-12);
113     }
114 
115 
116     /**
117      * Test Longley dataset against certified values provided by NIST.
118      * Data Source: J. Longley (1967) "An Appraisal of Least Squares
119      * Programs for the Electronic Computer from the Point of View of the User"
120      * Journal of the American Statistical Association, vol. 62. September,
121      * pp. 819-841.
122      *
123      * Certified values (and data) are from NIST:
124      * <a href="https://www.itl.nist.gov/div898/strd/lls/data/LINKS/DATA/Longley.dat">Longley dataset</a>
125      */
126     @Test
127     void testLongly() {
128         // Y values are first, then independent vars
129         // Each row is one observation
130         double[] design = new double[] {
131             60323,83.0,234289,2356,1590,107608,1947,
132             61122,88.5,259426,2325,1456,108632,1948,
133             60171,88.2,258054,3682,1616,109773,1949,
134             61187,89.5,284599,3351,1650,110929,1950,
135             63221,96.2,328975,2099,3099,112075,1951,
136             63639,98.1,346999,1932,3594,113270,1952,
137             64989,99.0,365385,1870,3547,115094,1953,
138             63761,100.0,363112,3578,3350,116219,1954,
139             66019,101.2,397469,2904,3048,117388,1955,
140             67857,104.6,419180,2822,2857,118734,1956,
141             68169,108.4,442769,2936,2798,120445,1957,
142             66513,110.8,444546,4681,2637,121950,1958,
143             68655,112.6,482704,3813,2552,123366,1959,
144             69564,114.2,502601,3931,2514,125368,1960,
145             69331,115.7,518173,4806,2572,127852,1961,
146             70551,116.9,554894,4007,2827,130081,1962
147         };
148 
149         final int nobs = 16;
150         final int nvars = 6;
151 
152         // Estimate the model
153         OLSMultipleLinearRegression model = new OLSMultipleLinearRegression();
154         model.newSampleData(design, nobs, nvars);
155 
156         // Check expected beta values from NIST
157         double[] betaHat = model.estimateRegressionParameters();
158         UnitTestUtils.customAssertEquals(betaHat,
159                                          new double[]{-3482258.63459582, 15.0618722713733,
160                 -0.358191792925910E-01,-2.02022980381683,
161                 -1.03322686717359,-0.511041056535807E-01,
162                  1829.15146461355}, 2E-8); //
163 
164         // Check expected residuals from R
165         double[] residuals = model.estimateResiduals();
166         UnitTestUtils.customAssertEquals(residuals, new double[]{
167                 267.340029759711,-94.0139423988359,46.28716775752924,
168                 -410.114621930906,309.7145907602313,-249.3112153297231,
169                 -164.0489563956039,-13.18035686637081,14.30477260005235,
170                  455.394094551857,-17.26892711483297,-39.0550425226967,
171                 -155.5499735953195,-85.6713080421283,341.9315139607727,
172                 -206.7578251937366},
173                                          1E-8);
174 
175         // Check standard errors from NIST
176         double[] errors = model.estimateRegressionParametersStandardErrors();
177         UnitTestUtils.customAssertEquals(new double[] { 890420.383607373,
178                                                         84.9149257747669,
179                                                         0.334910077722432E-01,
180                                                         0.488399681651699,
181                                                         0.214274163161675,
182                                                         0.226073200069370,
183                                                         455.478499142212}, errors, 1E-6);
184 
185         // Check regression standard error against R
186         assertEquals(304.8540735619638, model.estimateRegressionStandardError(), 1E-10);
187 
188         // Check R-Square statistics against R
189         assertEquals(0.995479004577296, model.calculateRSquared(), 1E-12);
190         assertEquals(0.992465007628826, model.calculateAdjustedRSquared(), 1E-12);
191 
192         checkVarianceConsistency(model);
193 
194         // Estimate model without intercept
195         model.setNoIntercept(true);
196         model.newSampleData(design, nobs, nvars);
197 
198         // Check expected beta values from R
199         betaHat = model.estimateRegressionParameters();
200         UnitTestUtils.customAssertEquals(betaHat,
201                                          new double[]{-52.99357013868291, 0.07107319907358,
202                 -0.42346585566399,-0.57256866841929,
203                 -0.41420358884978, 48.41786562001326}, 1E-11);
204 
205         // Check standard errors from R
206         errors = model.estimateRegressionParametersStandardErrors();
207         UnitTestUtils.customAssertEquals(new double[] { 129.54486693117232, 0.03016640003786,
208                                                         0.41773654056612, 0.27899087467676, 0.32128496193363,
209                                                         17.68948737819961}, errors, 1E-11);
210 
211         // Check expected residuals from R
212         residuals = model.estimateResiduals();
213         UnitTestUtils.customAssertEquals(residuals, new double[]{
214                 279.90274927293092, -130.32465380836874, 90.73228661967445, -401.31252201634948,
215                 -440.46768772620027, -543.54512853774793, 201.32111639536299, 215.90889365977932,
216                 73.09368242049943, 913.21694494481869, 424.82484953610174, -8.56475876776709,
217                 -361.32974610842876, 27.34560497213464, 151.28955976355002, -492.49937355336846},
218                                          1E-10);
219 
220         // Check regression standard error against R
221         assertEquals(475.1655079819517, model.estimateRegressionStandardError(), 1E-10);
222 
223         // Check R-Square statistics against R
224         assertEquals(0.9999670130706, model.calculateRSquared(), 1E-12);
225         assertEquals(0.999947220913, model.calculateAdjustedRSquared(), 1E-12);
226 
227     }
228 
229     /**
230      * Test R Swiss fertility dataset against R.
231      * Data Source: R datasets package
232      */
233     @Test
234     void testSwissFertility() {
235         double[] design = new double[] {
236             80.2,17.0,15,12,9.96,
237             83.1,45.1,6,9,84.84,
238             92.5,39.7,5,5,93.40,
239             85.8,36.5,12,7,33.77,
240             76.9,43.5,17,15,5.16,
241             76.1,35.3,9,7,90.57,
242             83.8,70.2,16,7,92.85,
243             92.4,67.8,14,8,97.16,
244             82.4,53.3,12,7,97.67,
245             82.9,45.2,16,13,91.38,
246             87.1,64.5,14,6,98.61,
247             64.1,62.0,21,12,8.52,
248             66.9,67.5,14,7,2.27,
249             68.9,60.7,19,12,4.43,
250             61.7,69.3,22,5,2.82,
251             68.3,72.6,18,2,24.20,
252             71.7,34.0,17,8,3.30,
253             55.7,19.4,26,28,12.11,
254             54.3,15.2,31,20,2.15,
255             65.1,73.0,19,9,2.84,
256             65.5,59.8,22,10,5.23,
257             65.0,55.1,14,3,4.52,
258             56.6,50.9,22,12,15.14,
259             57.4,54.1,20,6,4.20,
260             72.5,71.2,12,1,2.40,
261             74.2,58.1,14,8,5.23,
262             72.0,63.5,6,3,2.56,
263             60.5,60.8,16,10,7.72,
264             58.3,26.8,25,19,18.46,
265             65.4,49.5,15,8,6.10,
266             75.5,85.9,3,2,99.71,
267             69.3,84.9,7,6,99.68,
268             77.3,89.7,5,2,100.00,
269             70.5,78.2,12,6,98.96,
270             79.4,64.9,7,3,98.22,
271             65.0,75.9,9,9,99.06,
272             92.2,84.6,3,3,99.46,
273             79.3,63.1,13,13,96.83,
274             70.4,38.4,26,12,5.62,
275             65.7,7.7,29,11,13.79,
276             72.7,16.7,22,13,11.22,
277             64.4,17.6,35,32,16.92,
278             77.6,37.6,15,7,4.97,
279             67.6,18.7,25,7,8.65,
280             35.0,1.2,37,53,42.34,
281             44.7,46.6,16,29,50.43,
282             42.8,27.7,22,29,58.33
283         };
284 
285         final int nobs = 47;
286         final int nvars = 4;
287 
288         // Estimate the model
289         OLSMultipleLinearRegression model = new OLSMultipleLinearRegression();
290         model.newSampleData(design, nobs, nvars);
291 
292         // Check expected beta values from R
293         double[] betaHat = model.estimateRegressionParameters();
294         UnitTestUtils.customAssertEquals(betaHat,
295                                          new double[]{91.05542390271397,
296                 -0.22064551045715,
297                 -0.26058239824328,
298                 -0.96161238456030,
299                  0.12441843147162}, 1E-12);
300 
301         // Check expected residuals from R
302         double[] residuals = model.estimateResiduals();
303         UnitTestUtils.customAssertEquals(residuals, new double[]{
304                 7.1044267859730512,1.6580347433531366,
305                 4.6944952770029644,8.4548022690166160,13.6547432343186212,
306                -9.3586864458500774,7.5822446330520386,15.5568995563859289,
307                 0.8113090736598980,7.1186762732484308,7.4251378771228724,
308                 2.6761316873234109,0.8351584810309354,7.1769991119615177,
309                -3.8746753206299553,-3.1337779476387251,-0.1412575244091504,
310                 1.1186809170469780,-6.3588097346816594,3.4039270429434074,
311                 2.3374058329820175,-7.9272368576900503,-7.8361010968497959,
312                -11.2597369269357070,0.9445333697827101,6.6544245101380328,
313                -0.9146136301118665,-4.3152449403848570,-4.3536932047009183,
314                -3.8907885169304661,-6.3027643926302188,-7.8308982189289091,
315                -3.1792280015332750,-6.7167298771158226,-4.8469946718041754,
316                -10.6335664353633685,11.1031134362036958,6.0084032641811733,
317                 5.4326230830188482,-7.2375578629692230,2.1671550814448222,
318                 15.0147574652763112,4.8625103516321015,-7.1597256413907706,
319                 -0.4515205619767598,-10.2916870903837587,-15.7812984571900063},
320                                          1E-12);
321 
322         // Check standard errors from R
323         double[] errors = model.estimateRegressionParametersStandardErrors();
324         UnitTestUtils.customAssertEquals(new double[] { 6.94881329475087,
325                                                         0.07360008972340,
326                                                         0.27410957467466,
327                                                         0.19454551679325,
328                                                         0.03726654773803}, errors, 1E-10);
329 
330         // Check regression standard error against R
331         assertEquals(7.73642194433223, model.estimateRegressionStandardError(), 1E-12);
332 
333         // Check R-Square statistics against R
334         assertEquals(0.649789742860228, model.calculateRSquared(), 1E-12);
335         assertEquals(0.6164363850373927, model.calculateAdjustedRSquared(), 1E-12);
336 
337         checkVarianceConsistency(model);
338 
339         // Estimate the model with no intercept
340         model = new OLSMultipleLinearRegression();
341         model.setNoIntercept(true);
342         model.newSampleData(design, nobs, nvars);
343 
344         // Check expected beta values from R
345         betaHat = model.estimateRegressionParameters();
346         UnitTestUtils.customAssertEquals(betaHat,
347                                          new double[]{0.52191832900513,
348                   2.36588087917963,
349                   -0.94770353802795,
350                   0.30851985863609}, 1E-12);
351 
352         // Check expected residuals from R
353         residuals = model.estimateResiduals();
354         UnitTestUtils.customAssertEquals(residuals, new double[]{
355                 44.138759883538249, 27.720705122356215, 35.873200836126799,
356                 34.574619581211977, 26.600168342080213, 15.074636243026923, -12.704904871199814,
357                 1.497443824078134, 2.691972687079431, 5.582798774291231, -4.422986561283165,
358                 -9.198581600334345, 4.481765170730647, 2.273520207553216, -22.649827853221336,
359                 -17.747900013943308, 20.298314638496436, 6.861405135329779, -8.684712790954924,
360                 -10.298639278062371, -9.896618896845819, 4.568568616351242, -15.313570491727944,
361                 -13.762961360873966, 7.156100301980509, 16.722282219843990, 26.716200609071898,
362                 -1.991466398777079, -2.523342564719335, 9.776486693095093, -5.297535127628603,
363                 -16.639070567471094, -10.302057295211819, -23.549487860816846, 1.506624392156384,
364                 -17.939174438345930, 13.105792202765040, -1.943329906928462, -1.516005841666695,
365                 -0.759066561832886, 20.793137744128977, -2.485236153005426, 27.588238710486976,
366                 2.658333257106881, -15.998337823623046, -5.550742066720694, -14.219077806826615},
367                                          1E-12);
368 
369         // Check standard errors from R
370         errors = model.estimateRegressionParametersStandardErrors();
371         UnitTestUtils.customAssertEquals(new double[] { 0.10470063765677, 0.41684100584290,
372                                                         0.43370143099691, 0.07694953606522}, errors, 1E-10);
373 
374         // Check regression standard error against R
375         assertEquals(17.24710630547, model.estimateRegressionStandardError(), 1E-10);
376 
377         // Check R-Square statistics against R
378         assertEquals(0.946350722085, model.calculateRSquared(), 1E-12);
379         assertEquals(0.9413600915813, model.calculateAdjustedRSquared(), 1E-12);
380     }
381 
382     /**
383      * Test hat matrix computation
384      *
385      */
386     @Test
387     void testHat() {
388 
389         /*
390          * This example is from "The Hat Matrix in Regression and ANOVA",
391          * David C. Hoaglin and Roy E. Welsch,
392          * The American Statistician, Vol. 32, No. 1 (Feb., 1978), pp. 17-22.
393          *
394          */
395         double[] design = new double[] {
396                 11.14, .499, 11.1,
397                 12.74, .558, 8.9,
398                 13.13, .604, 8.8,
399                 11.51, .441, 8.9,
400                 12.38, .550, 8.8,
401                 12.60, .528, 9.9,
402                 11.13, .418, 10.7,
403                 11.7, .480, 10.5,
404                 11.02, .406, 10.5,
405                 11.41, .467, 10.7
406         };
407 
408         int nobs = 10;
409         int nvars = 2;
410 
411         // Estimate the model
412         OLSMultipleLinearRegression model = new OLSMultipleLinearRegression();
413         model.newSampleData(design, nobs, nvars);
414 
415         RealMatrix hat = model.calculateHat();
416 
417         // Reference data is upper half of symmetric hat matrix
418         double[] referenceData = new double[] {
419                 .418, -.002,  .079, -.274, -.046,  .181,  .128,  .222,  .050,  .242,
420                        .242,  .292,  .136,  .243,  .128, -.041,  .033, -.035,  .004,
421                               .417, -.019,  .273,  .187, -.126,  .044, -.153,  .004,
422                                      .604,  .197, -.038,  .168, -.022,  .275, -.028,
423                                             .252,  .111, -.030,  .019, -.010, -.010,
424                                                    .148,  .042,  .117,  .012,  .111,
425                                                           .262,  .145,  .277,  .174,
426                                                                  .154,  .120,  .168,
427                                                                         .315,  .148,
428                                                                                .187
429         };
430 
431         // Check against reference data and verify symmetry
432         int k = 0;
433         for (int i = 0; i < 10; i++) {
434             for (int j = i; j < 10; j++) {
435                 assertEquals(referenceData[k], hat.getEntry(i, j), 10e-3);
436                 assertEquals(hat.getEntry(i, j), hat.getEntry(j, i), 10e-12);
437                 k++;
438             }
439         }
440 
441         /*
442          * Verify that residuals computed using the hat matrix are close to
443          * what we get from direct computation, i.e. r = (I - H) y
444          */
445         double[] residuals = model.estimateResiduals();
446         RealMatrix I = MatrixUtils.createRealIdentityMatrix(10);
447         double[] hatResiduals = I.subtract(hat).operate(model.getY()).toArray();
448         UnitTestUtils.customAssertEquals(residuals, hatResiduals, 10e-12);
449     }
450 
451     /**
452      * test calculateYVariance
453      */
454     @Test
455     void testYVariance() {
456 
457         // assumes: y = new double[]{11.0, 12.0, 13.0, 14.0, 15.0, 16.0};
458 
459         OLSMultipleLinearRegression model = new OLSMultipleLinearRegression();
460         model.newSampleData(y, x);
461         UnitTestUtils.customAssertEquals(model.calculateYVariance(), 3.5, 0);
462     }
463 
464     /**
465      * Verifies that calculateYVariance and calculateResidualVariance return consistent
466      * values with direct variance computation from Y, residuals, respectively.
467      */
468     protected void checkVarianceConsistency(OLSMultipleLinearRegression model) {
469         // Check Y variance consistency
470         UnitTestUtils.customAssertEquals(StatUtils.variance(model.getY().toArray()), model.calculateYVariance(), 0);
471 
472         // Check residual variance consistency
473         double[] residuals = model.calculateResiduals().toArray();
474         RealMatrix X = model.getX();
475         UnitTestUtils.customAssertEquals(
476                 StatUtils.variance(model.calculateResiduals().toArray()) * (residuals.length - 1),
477                 model.calculateErrorVariance() * (X.getRowDimension() - X.getColumnDimension()), 1E-20);
478 
479     }
480 
481     /**
482      * Verifies that setting X and Y separately has the same effect as newSample(X,Y).
483      */
484     @Test
485     void testNewSample2() {
486         double[] y = new double[] {1, 2, 3, 4};
487         double[][] x = new double[][] {
488           {19, 22, 33},
489           {20, 30, 40},
490           {25, 35, 45},
491           {27, 37, 47}
492         };
493         OLSMultipleLinearRegression regression = new OLSMultipleLinearRegression();
494         regression.newSampleData(y, x);
495         RealMatrix combinedX = regression.getX().copy();
496         RealVector combinedY = regression.getY().copy();
497         regression.newXSampleData(x);
498         regression.newYSampleData(y);
499         assertEquals(combinedX, regression.getX());
500         assertEquals(combinedY, regression.getY());
501 
502         // No intercept
503         regression.setNoIntercept(true);
504         regression.newSampleData(y, x);
505         combinedX = regression.getX().copy();
506         combinedY = regression.getY().copy();
507         regression.newXSampleData(x);
508         regression.newYSampleData(y);
509         assertEquals(combinedX, regression.getX());
510         assertEquals(combinedY, regression.getY());
511     }
512 
513     @Test
514     void testNewSampleDataYNull() {
515         assertThrows(NullArgumentException.class, () -> {
516             createRegression().newSampleData(null, new double[][]{});
517         });
518     }
519 
520     @Test
521     void testNewSampleDataXNull() {
522         assertThrows(NullArgumentException.class, () -> {
523             createRegression().newSampleData(new double[]{}, null);
524         });
525     }
526 
527     /*
528     * This is a test based on the Wampler1 data set
529     * http://www.itl.nist.gov/div898/strd/lls/data/Wampler1.shtml
530     */
531     @Test
532     void testWampler1() {
533         double[] data = new double[]{
534             1, 0,
535             6, 1,
536             63, 2,
537             364, 3,
538             1365, 4,
539             3906, 5,
540             9331, 6,
541             19608, 7,
542             37449, 8,
543             66430, 9,
544             111111, 10,
545             177156, 11,
546             271453, 12,
547             402234, 13,
548             579195, 14,
549             813616, 15,
550             1118481, 16,
551             1508598, 17,
552             2000719, 18,
553             2613660, 19,
554             3368421, 20};
555         OLSMultipleLinearRegression model = new OLSMultipleLinearRegression();
556 
557 
558         final int nvars = 5;
559         final int nobs = 21;
560         double[] tmp = new double[(nvars + 1) * nobs];
561         int off = 0;
562         int off2 = 0;
563         for (int i = 0; i < nobs; i++) {
564             tmp[off2] = data[off];
565             tmp[off2 + 1] = data[off + 1];
566             tmp[off2 + 2] = tmp[off2 + 1] * tmp[off2 + 1];
567             tmp[off2 + 3] = tmp[off2 + 1] * tmp[off2 + 2];
568             tmp[off2 + 4] = tmp[off2 + 1] * tmp[off2 + 3];
569             tmp[off2 + 5] = tmp[off2 + 1] * tmp[off2 + 4];
570             off2 += (nvars + 1);
571             off += 2;
572         }
573         model.newSampleData(tmp, nobs, nvars);
574         double[] betaHat = model.estimateRegressionParameters();
575         UnitTestUtils.customAssertEquals(betaHat,
576                                          new double[]{1.0,
577                     1.0, 1.0,
578                     1.0, 1.0,
579                     1.0}, 1E-8);
580 
581         double[] se = model.estimateRegressionParametersStandardErrors();
582         UnitTestUtils.customAssertEquals(se,
583                                          new double[]{0.0,
584                     0.0, 0.0,
585                     0.0, 0.0,
586                     0.0}, 1E-8);
587 
588         UnitTestUtils.customAssertEquals(1.0, model.calculateRSquared(), 1.0e-10);
589         UnitTestUtils.customAssertEquals(0, model.estimateErrorVariance(), 1.0e-7);
590         UnitTestUtils.customAssertEquals(0.00, model.calculateResidualSumOfSquares(), 1.0e-6);
591 
592         return;
593     }
594 
595     /*
596      * This is a test based on the Wampler2 data set
597      * http://www.itl.nist.gov/div898/strd/lls/data/Wampler2.shtml
598      */
599     @Test
600     void testWampler2() {
601         double[] data = new double[]{
602             1.00000, 0,
603             1.11111, 1,
604             1.24992, 2,
605             1.42753, 3,
606             1.65984, 4,
607             1.96875, 5,
608             2.38336, 6,
609             2.94117, 7,
610             3.68928, 8,
611             4.68559, 9,
612             6.00000, 10,
613             7.71561, 11,
614             9.92992, 12,
615             12.75603, 13,
616             16.32384, 14,
617             20.78125, 15,
618             26.29536, 16,
619             33.05367, 17,
620             41.26528, 18,
621             51.16209, 19,
622             63.00000, 20};
623         OLSMultipleLinearRegression model = new OLSMultipleLinearRegression();
624 
625 
626         final int nvars = 5;
627         final int nobs = 21;
628         double[] tmp = new double[(nvars + 1) * nobs];
629         int off = 0;
630         int off2 = 0;
631         for (int i = 0; i < nobs; i++) {
632             tmp[off2] = data[off];
633             tmp[off2 + 1] = data[off + 1];
634             tmp[off2 + 2] = tmp[off2 + 1] * tmp[off2 + 1];
635             tmp[off2 + 3] = tmp[off2 + 1] * tmp[off2 + 2];
636             tmp[off2 + 4] = tmp[off2 + 1] * tmp[off2 + 3];
637             tmp[off2 + 5] = tmp[off2 + 1] * tmp[off2 + 4];
638             off2 += (nvars + 1);
639             off += 2;
640         }
641         model.newSampleData(tmp, nobs, nvars);
642         double[] betaHat = model.estimateRegressionParameters();
643         UnitTestUtils.customAssertEquals(betaHat,
644                                          new double[]{
645                     1.0,
646                     1.0e-1,
647                     1.0e-2,
648                     1.0e-3, 1.0e-4,
649                     1.0e-5}, 1E-8);
650 
651         double[] se = model.estimateRegressionParametersStandardErrors();
652         UnitTestUtils.customAssertEquals(se,
653                                          new double[]{0.0,
654                     0.0, 0.0,
655                     0.0, 0.0,
656                     0.0}, 1E-8);
657         UnitTestUtils.customAssertEquals(1.0, model.calculateRSquared(), 1.0e-10);
658         UnitTestUtils.customAssertEquals(0, model.estimateErrorVariance(), 1.0e-7);
659         UnitTestUtils.customAssertEquals(0.00, model.calculateResidualSumOfSquares(), 1.0e-6);
660         return;
661     }
662 
663     /*
664      * This is a test based on the Wampler3 data set
665      * http://www.itl.nist.gov/div898/strd/lls/data/Wampler3.shtml
666      */
667     @Test
668     void testWampler3() {
669         double[] data = new double[]{
670             760, 0,
671             -2042, 1,
672             2111, 2,
673             -1684, 3,
674             3888, 4,
675             1858, 5,
676             11379, 6,
677             17560, 7,
678             39287, 8,
679             64382, 9,
680             113159, 10,
681             175108, 11,
682             273291, 12,
683             400186, 13,
684             581243, 14,
685             811568, 15,
686             1121004, 16,
687             1506550, 17,
688             2002767, 18,
689             2611612, 19,
690             3369180, 20};
691 
692         OLSMultipleLinearRegression model = new OLSMultipleLinearRegression();
693         final int nvars = 5;
694         final int nobs = 21;
695         double[] tmp = new double[(nvars + 1) * nobs];
696         int off = 0;
697         int off2 = 0;
698         for (int i = 0; i < nobs; i++) {
699             tmp[off2] = data[off];
700             tmp[off2 + 1] = data[off + 1];
701             tmp[off2 + 2] = tmp[off2 + 1] * tmp[off2 + 1];
702             tmp[off2 + 3] = tmp[off2 + 1] * tmp[off2 + 2];
703             tmp[off2 + 4] = tmp[off2 + 1] * tmp[off2 + 3];
704             tmp[off2 + 5] = tmp[off2 + 1] * tmp[off2 + 4];
705             off2 += (nvars + 1);
706             off += 2;
707         }
708         model.newSampleData(tmp, nobs, nvars);
709         double[] betaHat = model.estimateRegressionParameters();
710         UnitTestUtils.customAssertEquals(betaHat,
711                                          new double[]{
712                     1.0,
713                     1.0,
714                     1.0,
715                     1.0,
716                     1.0,
717                     1.0}, 1E-8);
718 
719         double[] se = model.estimateRegressionParametersStandardErrors();
720         UnitTestUtils.customAssertEquals(se,
721                                          new double[]{2152.32624678170,
722                     2363.55173469681, 779.343524331583,
723                     101.475507550350, 5.64566512170752,
724                     0.112324854679312}, 1E-8); //
725 
726         UnitTestUtils.customAssertEquals(.999995559025820, model.calculateRSquared(), 1.0e-10);
727         UnitTestUtils.customAssertEquals(5570284.53333333, model.estimateErrorVariance(), 1.0e-6);
728         UnitTestUtils.customAssertEquals(83554268.0000000, model.calculateResidualSumOfSquares(), 1.0e-5);
729         return;
730     }
731 
732     /*
733      * This is a test based on the Wampler4 data set
734      * http://www.itl.nist.gov/div898/strd/lls/data/Wampler4.shtml
735      */
736     @Test
737     void testWampler4() {
738         double[] data = new double[]{
739             75901, 0,
740             -204794, 1,
741             204863, 2,
742             -204436, 3,
743             253665, 4,
744             -200894, 5,
745             214131, 6,
746             -185192, 7,
747             221249, 8,
748             -138370, 9,
749             315911, 10,
750             -27644, 11,
751             455253, 12,
752             197434, 13,
753             783995, 14,
754             608816, 15,
755             1370781, 16,
756             1303798, 17,
757             2205519, 18,
758             2408860, 19,
759             3444321, 20};
760 
761         OLSMultipleLinearRegression model = new OLSMultipleLinearRegression();
762         final int nvars = 5;
763         final int nobs = 21;
764         double[] tmp = new double[(nvars + 1) * nobs];
765         int off = 0;
766         int off2 = 0;
767         for (int i = 0; i < nobs; i++) {
768             tmp[off2] = data[off];
769             tmp[off2 + 1] = data[off + 1];
770             tmp[off2 + 2] = tmp[off2 + 1] * tmp[off2 + 1];
771             tmp[off2 + 3] = tmp[off2 + 1] * tmp[off2 + 2];
772             tmp[off2 + 4] = tmp[off2 + 1] * tmp[off2 + 3];
773             tmp[off2 + 5] = tmp[off2 + 1] * tmp[off2 + 4];
774             off2 += (nvars + 1);
775             off += 2;
776         }
777         model.newSampleData(tmp, nobs, nvars);
778         double[] betaHat = model.estimateRegressionParameters();
779         UnitTestUtils.customAssertEquals(betaHat,
780                                          new double[]{
781                     1.0,
782                     1.0,
783                     1.0,
784                     1.0,
785                     1.0,
786                     1.0}, 1E-6);
787 
788         double[] se = model.estimateRegressionParametersStandardErrors();
789         UnitTestUtils.customAssertEquals(se,
790                                          new double[]{215232.624678170,
791                     236355.173469681, 77934.3524331583,
792                     10147.5507550350, 564.566512170752,
793                     11.2324854679312}, 1E-8);
794 
795         UnitTestUtils.customAssertEquals(.957478440825662, model.calculateRSquared(), 1.0e-10);
796         UnitTestUtils.customAssertEquals(55702845333.3333, model.estimateErrorVariance(), 1.0e-4);
797         UnitTestUtils.customAssertEquals(835542680000.000, model.calculateResidualSumOfSquares(), 1.0e-3);
798         return;
799     }
800 
801     /**
802      * Anything requiring beta calculation should advertise SME.
803      */
804     @Test
805     void testSingularCalculateBeta() {
806         assertThrows(MathIllegalArgumentException.class, () -> {
807             OLSMultipleLinearRegression model = new OLSMultipleLinearRegression();
808             model.newSampleData(new double[]{1, 2, 3, 1, 2, 3, 1, 2, 3}, 3, 2);
809             model.calculateBeta();
810         });
811     }
812 
813     @Test
814     void testNoSSTOCalculateRsquare() {
815         OLSMultipleLinearRegression model = new OLSMultipleLinearRegression();
816         model.newSampleData(new double[] {1,  2,  3, 1, 7, 8, 1, 10, 12}, 3, 2);
817         assertTrue(Double.isNaN(model.calculateRSquared()));
818     }
819 
820     @Test
821     void testNoDataNPECalculateBeta() {
822         assertThrows(NullPointerException.class, () -> {
823             OLSMultipleLinearRegression model = new OLSMultipleLinearRegression();
824             model.calculateBeta();
825         });
826     }
827 
828     @Test
829     void testNoDataNPECalculateHat() {
830         assertThrows(NullPointerException.class, () -> {
831             OLSMultipleLinearRegression model = new OLSMultipleLinearRegression();
832             model.calculateHat();
833         });
834     }
835 
836     @Test
837     void testNoDataNPESSTO() {
838         assertThrows(NullPointerException.class, () -> {
839             OLSMultipleLinearRegression model = new OLSMultipleLinearRegression();
840             model.calculateTotalSumOfSquares();
841         });
842     }
843 
844     /**
845      * From <a href="https://stackoverflow.com/questions/37320008/ols-multiple-linear-regression-with-commons-math">OLS
846      * Multiple Linear Regression with commons-math</a>
847      */
848     @Test
849     void testNewSampleDataNoIntercept() {
850         final double[][] x = { { 1, 0, 0, 0 }, { 0, 1, 0, 0 }, { 0, 0, 1, 0}, {  0, 0, 0, 1 } };
851         final double[] y = { 1, 2, 3, 4 };
852 
853         final OLSMultipleLinearRegression regression = new OLSMultipleLinearRegression();
854         regression.setNoIntercept(true);
855         regression.newSampleData(y, x);
856 
857         final double[] b = regression.estimateRegressionParameters();
858         for (int i = 0; i < y.length; i++) {
859             assertEquals(b[i], y[i], 0.001);
860         }
861     }
862 }