View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      https://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  /*
19   * This is not the original file distributed by the Apache Software Foundation
20   * It has been modified by the Hipparchus project
21   */
22  package org.hipparchus.stat.regression;
23  
24  import org.hipparchus.UnitTestUtils;
25  import org.hipparchus.exception.MathIllegalArgumentException;
26  import org.hipparchus.exception.NullArgumentException;
27  import org.hipparchus.linear.MatrixUtils;
28  import org.hipparchus.linear.RealMatrix;
29  import org.hipparchus.linear.RealVector;
30  import org.hipparchus.random.CorrelatedRandomVectorGenerator;
31  import org.hipparchus.random.GaussianRandomGenerator;
32  import org.hipparchus.random.JDKRandomGenerator;
33  import org.hipparchus.random.RandomGenerator;
34  import org.hipparchus.stat.correlation.Covariance;
35  import org.hipparchus.stat.descriptive.DescriptiveStatistics;
36  import org.junit.jupiter.api.BeforeEach;
37  import org.junit.jupiter.api.Test;
38  
39  import static org.junit.jupiter.api.Assertions.assertEquals;
40  import static org.junit.jupiter.api.Assertions.assertThrows;
41  
42  class GLSMultipleLinearRegressionTest extends MultipleLinearRegressionAbstractTest {
43  
44      private double[] y;
45      private double[][] x;
46      private double[][] omega;
47      private final double[] longley = new double[] {
48              60323,83.0,234289,2356,1590,107608,1947,
49              61122,88.5,259426,2325,1456,108632,1948,
50              60171,88.2,258054,3682,1616,109773,1949,
51              61187,89.5,284599,3351,1650,110929,1950,
52              63221,96.2,328975,2099,3099,112075,1951,
53              63639,98.1,346999,1932,3594,113270,1952,
54              64989,99.0,365385,1870,3547,115094,1953,
55              63761,100.0,363112,3578,3350,116219,1954,
56              66019,101.2,397469,2904,3048,117388,1955,
57              67857,104.6,419180,2822,2857,118734,1956,
58              68169,108.4,442769,2936,2798,120445,1957,
59              66513,110.8,444546,4681,2637,121950,1958,
60              68655,112.6,482704,3813,2552,123366,1959,
61              69564,114.2,502601,3931,2514,125368,1960,
62              69331,115.7,518173,4806,2572,127852,1961,
63              70551,116.9,554894,4007,2827,130081,1962
64          };
65  
66      @BeforeEach
67      @Override
68      public void setUp(){
69          y = new double[]{11.0, 12.0, 13.0, 14.0, 15.0, 16.0};
70          x = new double[6][];
71          x[0] = new double[]{0, 0, 0, 0, 0};
72          x[1] = new double[]{2.0, 0, 0, 0, 0};
73          x[2] = new double[]{0, 3.0, 0, 0, 0};
74          x[3] = new double[]{0, 0, 4.0, 0, 0};
75          x[4] = new double[]{0, 0, 0, 5.0, 0};
76          x[5] = new double[]{0, 0, 0, 0, 6.0};
77          omega = new double[6][];
78          omega[0] = new double[]{1.0, 0, 0, 0, 0, 0};
79          omega[1] = new double[]{0, 2.0, 0, 0, 0, 0};
80          omega[2] = new double[]{0, 0, 3.0, 0, 0, 0};
81          omega[3] = new double[]{0, 0, 0, 4.0, 0, 0};
82          omega[4] = new double[]{0, 0, 0, 0, 5.0, 0};
83          omega[5] = new double[]{0, 0, 0, 0, 0, 6.0};
84          super.setUp();
85      }
86  
87      @Test
88      void cannotAddXSampleData() {
89          assertThrows(NullArgumentException.class, () -> {
90              createRegression().newSampleData(new double[]{}, null, null);
91          });
92      }
93  
94      @Test
95      void cannotAddNullYSampleData() {
96          assertThrows(NullArgumentException.class, () -> {
97              createRegression().newSampleData(null, new double[][]{}, null);
98          });
99      }
100 
101     @Test
102     void cannotAddSampleDataWithSizeMismatch() {
103         assertThrows(MathIllegalArgumentException.class, () -> {
104             double[] y = new double[]{1.0, 2.0};
105             double[][] x = new double[1][];
106             x[0] = new double[]{1.0, 0};
107             createRegression().newSampleData(y, x, null);
108         });
109     }
110 
111     @Test
112     void cannotAddNullCovarianceData() {
113         assertThrows(MathIllegalArgumentException.class, () -> {
114             createRegression().newSampleData(new double[]{}, new double[][]{}, null);
115         });
116     }
117 
118     @Test
119     void notEnoughData() {
120         assertThrows(MathIllegalArgumentException.class, () -> {
121             double[]   reducedY = new double[y.length - 1];
122             double[][] reducedX = new double[x.length - 1][];
123             double[][] reducedO = new double[omega.length - 1][];
124             System.arraycopy(y, 0, reducedY, 0, reducedY.length);
125             System.arraycopy(x, 0, reducedX, 0, reducedX.length);
126             System.arraycopy(omega, 0, reducedO, 0, reducedO.length);
127             createRegression().newSampleData(reducedY, reducedX, reducedO);
128         });
129     }
130 
131     @Test
132     void cannotAddCovarianceDataWithSampleSizeMismatch() {
133         assertThrows(MathIllegalArgumentException.class, () -> {
134             double[] y = new double[]{1.0, 2.0};
135             double[][] x = new double[2][];
136             x[0] = new double[]{1.0, 0};
137             x[1] = new double[]{0, 1.0};
138             double[][] omega = new double[1][];
139             omega[0] = new double[]{1.0, 0};
140             createRegression().newSampleData(y, x, omega);
141         });
142     }
143 
144     @Test
145     void cannotAddCovarianceDataThatIsNotSquare() {
146         assertThrows(MathIllegalArgumentException.class, () -> {
147             double[] y = new double[]{1.0, 2.0};
148             double[][] x = new double[2][];
149             x[0] = new double[]{1.0, 0};
150             x[1] = new double[]{0, 1.0};
151             double[][] omega = new double[3][];
152             omega[0] = new double[]{1.0, 0};
153             omega[1] = new double[]{0, 1.0};
154             omega[2] = new double[]{0, 2.0};
155             createRegression().newSampleData(y, x, omega);
156         });
157     }
158 
159     @Override
160     protected GLSMultipleLinearRegression createRegression() {
161         GLSMultipleLinearRegression regression = new GLSMultipleLinearRegression();
162         regression.newSampleData(y, x, omega);
163         return regression;
164     }
165 
166     @Override
167     protected int getNumberOfRegressors() {
168         return x[0].length + 1;
169     }
170 
171     @Override
172     protected int getSampleSize() {
173         return y.length;
174     }
175 
176     /**
177      * test calculateYVariance
178      */
179     @Test
180     void testYVariance() {
181 
182         // assumes: y = new double[]{11.0, 12.0, 13.0, 14.0, 15.0, 16.0};
183 
184         GLSMultipleLinearRegression model = new GLSMultipleLinearRegression();
185         model.newSampleData(y, x, omega);
186         UnitTestUtils.customAssertEquals(model.calculateYVariance(), 3.5, 0);
187     }
188 
189     /**
190      * Verifies that setting X, Y and covariance separately has the same effect as newSample(X,Y,cov).
191      */
192     @Test
193     void testNewSample2() {
194         double[] y = new double[] {1, 2, 3, 4};
195         double[][] x = new double[][] {
196           {19, 22, 33},
197           {20, 30, 40},
198           {25, 35, 45},
199           {27, 37, 47}
200         };
201         double[][] covariance = MatrixUtils.createRealIdentityMatrix(4).scalarMultiply(2).getData();
202         GLSMultipleLinearRegression regression = new GLSMultipleLinearRegression();
203         regression.newSampleData(y, x, covariance);
204         RealMatrix combinedX = regression.getX().copy();
205         RealVector combinedY = regression.getY().copy();
206         RealMatrix combinedCovInv = regression.getOmegaInverse();
207         regression.newXSampleData(x);
208         regression.newYSampleData(y);
209         assertEquals(combinedX, regression.getX());
210         assertEquals(combinedY, regression.getY());
211         assertEquals(combinedCovInv, regression.getOmegaInverse());
212     }
213 
214     /**
215      * Verifies that GLS with identity covariance matrix gives the same results
216      * as OLS.
217      */
218     @Test
219     void testGLSOLSConsistency() {
220         RealMatrix identityCov = MatrixUtils.createRealIdentityMatrix(16);
221         GLSMultipleLinearRegression glsModel = new GLSMultipleLinearRegression();
222         OLSMultipleLinearRegression olsModel = new OLSMultipleLinearRegression();
223         glsModel.newSampleData(longley, 16, 6);
224         olsModel.newSampleData(longley, 16, 6);
225         glsModel.newCovarianceData(identityCov.getData());
226         double[] olsBeta = olsModel.calculateBeta().toArray();
227         double[] glsBeta = glsModel.calculateBeta().toArray();
228         // TODO:  Should have assertRelativelyEquals(double[], double[], eps) in TestUtils
229         //        Should also add RealVector and RealMatrix versions
230         for (int i = 0; i < olsBeta.length; i++) {
231             UnitTestUtils.customAssertRelativelyEquals(olsBeta[i], glsBeta[i], 10E-7);
232         }
233     }
234 
235     /**
236      * Generate an error covariance matrix and sample data representing models
237      * with this error structure. Then verify that GLS estimated coefficients,
238      * on average, perform better than OLS.
239      */
240     @Test
241     void testGLSEfficiency() {
242         RandomGenerator rg = new JDKRandomGenerator();
243         rg.setSeed(200);  // Seed has been selected to generate non-trivial covariance
244 
245         // Assume model has 16 observations (will use Longley data).  Start by generating
246         // non-constant variances for the 16 error terms.
247         final int nObs = 16;
248         double[] sigma = new double[nObs];
249         for (int i = 0; i < nObs; i++) {
250             sigma[i] = 10 * rg.nextDouble();
251         }
252 
253         // Now generate 1000 error vectors to use to estimate the covariance matrix
254         // Columns are draws on N(0, sigma[col])
255         final int numSeeds = 1000;
256         RealMatrix errorSeeds = MatrixUtils.createRealMatrix(numSeeds, nObs);
257         for (int i = 0; i < numSeeds; i++) {
258             for (int j = 0; j < nObs; j++) {
259                 errorSeeds.setEntry(i, j, rg.nextGaussian() * sigma[j]);
260             }
261         }
262 
263         // Get covariance matrix for columns
264         RealMatrix cov = (new Covariance(errorSeeds)).getCovarianceMatrix();
265 
266         // Create a CorrelatedRandomVectorGenerator to use to generate correlated errors
267         GaussianRandomGenerator rawGenerator = new GaussianRandomGenerator(rg);
268         double[] errorMeans = new double[nObs];  // Counting on init to 0 here
269         CorrelatedRandomVectorGenerator gen = new CorrelatedRandomVectorGenerator(errorMeans, cov,
270          1.0e-12 * cov.getNorm1(), rawGenerator);
271 
272         // Now start generating models.  Use Longley X matrix on LHS
273         // and Longley OLS beta vector as "true" beta.  Generate
274         // Y values by XB + u where u is a CorrelatedRandomVector generated
275         // from cov.
276         OLSMultipleLinearRegression ols = new OLSMultipleLinearRegression();
277         ols.newSampleData(longley, nObs, 6);
278         final RealVector b = ols.calculateBeta().copy();
279         final RealMatrix x = ols.getX().copy();
280 
281         // Create a GLS model to reuse
282         GLSMultipleLinearRegression gls = new GLSMultipleLinearRegression();
283         gls.newSampleData(longley, nObs, 6);
284         gls.newCovarianceData(cov.getData());
285 
286         // Create aggregators for stats measuring model performance
287         DescriptiveStatistics olsBetaStats = new DescriptiveStatistics();
288         DescriptiveStatistics glsBetaStats = new DescriptiveStatistics();
289 
290         // Generate Y vectors for 10000 models, estimate GLS and OLS and
291         // Verify that OLS estimates are better
292         final int nModels = 10000;
293         for (int i = 0; i < nModels; i++) {
294 
295             // Generate y = xb + u with u cov
296             RealVector u = MatrixUtils.createRealVector(gen.nextVector());
297             double[] y = u.add(x.operate(b)).toArray();
298 
299             // Estimate OLS parameters
300             ols.newYSampleData(y);
301             RealVector olsBeta = ols.calculateBeta();
302 
303             // Estimate GLS parameters
304             gls.newYSampleData(y);
305             RealVector glsBeta = gls.calculateBeta();
306 
307             // Record deviations from "true" beta
308             double dist = olsBeta.getDistance(b);
309             olsBetaStats.addValue(dist * dist);
310             dist = glsBeta.getDistance(b);
311             glsBetaStats.addValue(dist * dist);
312 
313         }
314 
315         // Verify that GLS is on average more efficient, lower variance
316         assert(olsBetaStats.getMean() > 1.5 * glsBetaStats.getMean());
317         assert(olsBetaStats.getStandardDeviation() > glsBetaStats.getStandardDeviation());
318     }
319 
320 }