View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      https://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  /*
19   * This is not the original file distributed by the Apache Software Foundation
20   * It has been modified by the Hipparchus project
21   */
22  package org.hipparchus.stat.regression;
23  
24  import org.hipparchus.UnitTestUtils;
25  import org.hipparchus.exception.MathIllegalArgumentException;
26  import org.hipparchus.exception.NullArgumentException;
27  import org.hipparchus.linear.MatrixUtils;
28  import org.hipparchus.linear.RealMatrix;
29  import org.hipparchus.linear.RealVector;
30  import org.hipparchus.random.CorrelatedRandomVectorGenerator;
31  import org.hipparchus.random.GaussianRandomGenerator;
32  import org.hipparchus.random.JDKRandomGenerator;
33  import org.hipparchus.random.RandomGenerator;
34  import org.hipparchus.stat.correlation.Covariance;
35  import org.hipparchus.stat.descriptive.DescriptiveStatistics;
36  import org.junit.Assert;
37  import org.junit.Before;
38  import org.junit.Test;
39  
40  public class GLSMultipleLinearRegressionTest extends MultipleLinearRegressionAbstractTest {
41  
42      private double[] y;
43      private double[][] x;
44      private double[][] omega;
45      private final double[] longley = new double[] {
46              60323,83.0,234289,2356,1590,107608,1947,
47              61122,88.5,259426,2325,1456,108632,1948,
48              60171,88.2,258054,3682,1616,109773,1949,
49              61187,89.5,284599,3351,1650,110929,1950,
50              63221,96.2,328975,2099,3099,112075,1951,
51              63639,98.1,346999,1932,3594,113270,1952,
52              64989,99.0,365385,1870,3547,115094,1953,
53              63761,100.0,363112,3578,3350,116219,1954,
54              66019,101.2,397469,2904,3048,117388,1955,
55              67857,104.6,419180,2822,2857,118734,1956,
56              68169,108.4,442769,2936,2798,120445,1957,
57              66513,110.8,444546,4681,2637,121950,1958,
58              68655,112.6,482704,3813,2552,123366,1959,
59              69564,114.2,502601,3931,2514,125368,1960,
60              69331,115.7,518173,4806,2572,127852,1961,
61              70551,116.9,554894,4007,2827,130081,1962
62          };
63  
64      @Before
65      @Override
66      public void setUp(){
67          y = new double[]{11.0, 12.0, 13.0, 14.0, 15.0, 16.0};
68          x = new double[6][];
69          x[0] = new double[]{0, 0, 0, 0, 0};
70          x[1] = new double[]{2.0, 0, 0, 0, 0};
71          x[2] = new double[]{0, 3.0, 0, 0, 0};
72          x[3] = new double[]{0, 0, 4.0, 0, 0};
73          x[4] = new double[]{0, 0, 0, 5.0, 0};
74          x[5] = new double[]{0, 0, 0, 0, 6.0};
75          omega = new double[6][];
76          omega[0] = new double[]{1.0, 0, 0, 0, 0, 0};
77          omega[1] = new double[]{0, 2.0, 0, 0, 0, 0};
78          omega[2] = new double[]{0, 0, 3.0, 0, 0, 0};
79          omega[3] = new double[]{0, 0, 0, 4.0, 0, 0};
80          omega[4] = new double[]{0, 0, 0, 0, 5.0, 0};
81          omega[5] = new double[]{0, 0, 0, 0, 0, 6.0};
82          super.setUp();
83      }
84  
85      @Test(expected=NullArgumentException.class)
86      public void cannotAddXSampleData() {
87          createRegression().newSampleData(new double[]{}, null, null);
88      }
89  
90      @Test(expected=NullArgumentException.class)
91      public void cannotAddNullYSampleData() {
92          createRegression().newSampleData(null, new double[][]{}, null);
93      }
94  
95      @Test(expected=MathIllegalArgumentException.class)
96      public void cannotAddSampleDataWithSizeMismatch() {
97          double[] y = new double[]{1.0, 2.0};
98          double[][] x = new double[1][];
99          x[0] = new double[]{1.0, 0};
100         createRegression().newSampleData(y, x, null);
101     }
102 
103     @Test(expected=MathIllegalArgumentException.class)
104     public void cannotAddNullCovarianceData() {
105         createRegression().newSampleData(new double[]{}, new double[][]{}, null);
106     }
107 
108     @Test(expected=MathIllegalArgumentException.class)
109     public void notEnoughData() {
110         double[]   reducedY = new double[y.length - 1];
111         double[][] reducedX = new double[x.length - 1][];
112         double[][] reducedO = new double[omega.length - 1][];
113         System.arraycopy(y,     0, reducedY, 0, reducedY.length);
114         System.arraycopy(x,     0, reducedX, 0, reducedX.length);
115         System.arraycopy(omega, 0, reducedO, 0, reducedO.length);
116         createRegression().newSampleData(reducedY, reducedX, reducedO);
117     }
118 
119     @Test(expected=MathIllegalArgumentException.class)
120     public void cannotAddCovarianceDataWithSampleSizeMismatch() {
121         double[] y = new double[]{1.0, 2.0};
122         double[][] x = new double[2][];
123         x[0] = new double[]{1.0, 0};
124         x[1] = new double[]{0, 1.0};
125         double[][] omega = new double[1][];
126         omega[0] = new double[]{1.0, 0};
127         createRegression().newSampleData(y, x, omega);
128     }
129 
130     @Test(expected=MathIllegalArgumentException.class)
131     public void cannotAddCovarianceDataThatIsNotSquare() {
132         double[] y = new double[]{1.0, 2.0};
133         double[][] x = new double[2][];
134         x[0] = new double[]{1.0, 0};
135         x[1] = new double[]{0, 1.0};
136         double[][] omega = new double[3][];
137         omega[0] = new double[]{1.0, 0};
138         omega[1] = new double[]{0, 1.0};
139         omega[2] = new double[]{0, 2.0};
140         createRegression().newSampleData(y, x, omega);
141     }
142 
143     @Override
144     protected GLSMultipleLinearRegression createRegression() {
145         GLSMultipleLinearRegression regression = new GLSMultipleLinearRegression();
146         regression.newSampleData(y, x, omega);
147         return regression;
148     }
149 
150     @Override
151     protected int getNumberOfRegressors() {
152         return x[0].length + 1;
153     }
154 
155     @Override
156     protected int getSampleSize() {
157         return y.length;
158     }
159 
160     /**
161      * test calculateYVariance
162      */
163     @Test
164     public void testYVariance() {
165 
166         // assumes: y = new double[]{11.0, 12.0, 13.0, 14.0, 15.0, 16.0};
167 
168         GLSMultipleLinearRegression model = new GLSMultipleLinearRegression();
169         model.newSampleData(y, x, omega);
170         UnitTestUtils.assertEquals(model.calculateYVariance(), 3.5, 0);
171     }
172 
173     /**
174      * Verifies that setting X, Y and covariance separately has the same effect as newSample(X,Y,cov).
175      */
176     @Test
177     public void testNewSample2() {
178         double[] y = new double[] {1, 2, 3, 4};
179         double[][] x = new double[][] {
180           {19, 22, 33},
181           {20, 30, 40},
182           {25, 35, 45},
183           {27, 37, 47}
184         };
185         double[][] covariance = MatrixUtils.createRealIdentityMatrix(4).scalarMultiply(2).getData();
186         GLSMultipleLinearRegression regression = new GLSMultipleLinearRegression();
187         regression.newSampleData(y, x, covariance);
188         RealMatrix combinedX = regression.getX().copy();
189         RealVector combinedY = regression.getY().copy();
190         RealMatrix combinedCovInv = regression.getOmegaInverse();
191         regression.newXSampleData(x);
192         regression.newYSampleData(y);
193         Assert.assertEquals(combinedX, regression.getX());
194         Assert.assertEquals(combinedY, regression.getY());
195         Assert.assertEquals(combinedCovInv, regression.getOmegaInverse());
196     }
197 
198     /**
199      * Verifies that GLS with identity covariance matrix gives the same results
200      * as OLS.
201      */
202     @Test
203     public void testGLSOLSConsistency() {
204         RealMatrix identityCov = MatrixUtils.createRealIdentityMatrix(16);
205         GLSMultipleLinearRegression glsModel = new GLSMultipleLinearRegression();
206         OLSMultipleLinearRegression olsModel = new OLSMultipleLinearRegression();
207         glsModel.newSampleData(longley, 16, 6);
208         olsModel.newSampleData(longley, 16, 6);
209         glsModel.newCovarianceData(identityCov.getData());
210         double[] olsBeta = olsModel.calculateBeta().toArray();
211         double[] glsBeta = glsModel.calculateBeta().toArray();
212         // TODO:  Should have assertRelativelyEquals(double[], double[], eps) in TestUtils
213         //        Should also add RealVector and RealMatrix versions
214         for (int i = 0; i < olsBeta.length; i++) {
215             UnitTestUtils.assertRelativelyEquals(olsBeta[i], glsBeta[i], 10E-7);
216         }
217     }
218 
219     /**
220      * Generate an error covariance matrix and sample data representing models
221      * with this error structure. Then verify that GLS estimated coefficients,
222      * on average, perform better than OLS.
223      */
224     @Test
225     public void testGLSEfficiency() {
226         RandomGenerator rg = new JDKRandomGenerator();
227         rg.setSeed(200);  // Seed has been selected to generate non-trivial covariance
228 
229         // Assume model has 16 observations (will use Longley data).  Start by generating
230         // non-constant variances for the 16 error terms.
231         final int nObs = 16;
232         double[] sigma = new double[nObs];
233         for (int i = 0; i < nObs; i++) {
234             sigma[i] = 10 * rg.nextDouble();
235         }
236 
237         // Now generate 1000 error vectors to use to estimate the covariance matrix
238         // Columns are draws on N(0, sigma[col])
239         final int numSeeds = 1000;
240         RealMatrix errorSeeds = MatrixUtils.createRealMatrix(numSeeds, nObs);
241         for (int i = 0; i < numSeeds; i++) {
242             for (int j = 0; j < nObs; j++) {
243                 errorSeeds.setEntry(i, j, rg.nextGaussian() * sigma[j]);
244             }
245         }
246 
247         // Get covariance matrix for columns
248         RealMatrix cov = (new Covariance(errorSeeds)).getCovarianceMatrix();
249 
250         // Create a CorrelatedRandomVectorGenerator to use to generate correlated errors
251         GaussianRandomGenerator rawGenerator = new GaussianRandomGenerator(rg);
252         double[] errorMeans = new double[nObs];  // Counting on init to 0 here
253         CorrelatedRandomVectorGenerator gen = new CorrelatedRandomVectorGenerator(errorMeans, cov,
254          1.0e-12 * cov.getNorm1(), rawGenerator);
255 
256         // Now start generating models.  Use Longley X matrix on LHS
257         // and Longley OLS beta vector as "true" beta.  Generate
258         // Y values by XB + u where u is a CorrelatedRandomVector generated
259         // from cov.
260         OLSMultipleLinearRegression ols = new OLSMultipleLinearRegression();
261         ols.newSampleData(longley, nObs, 6);
262         final RealVector b = ols.calculateBeta().copy();
263         final RealMatrix x = ols.getX().copy();
264 
265         // Create a GLS model to reuse
266         GLSMultipleLinearRegression gls = new GLSMultipleLinearRegression();
267         gls.newSampleData(longley, nObs, 6);
268         gls.newCovarianceData(cov.getData());
269 
270         // Create aggregators for stats measuring model performance
271         DescriptiveStatistics olsBetaStats = new DescriptiveStatistics();
272         DescriptiveStatistics glsBetaStats = new DescriptiveStatistics();
273 
274         // Generate Y vectors for 10000 models, estimate GLS and OLS and
275         // Verify that OLS estimates are better
276         final int nModels = 10000;
277         for (int i = 0; i < nModels; i++) {
278 
279             // Generate y = xb + u with u cov
280             RealVector u = MatrixUtils.createRealVector(gen.nextVector());
281             double[] y = u.add(x.operate(b)).toArray();
282 
283             // Estimate OLS parameters
284             ols.newYSampleData(y);
285             RealVector olsBeta = ols.calculateBeta();
286 
287             // Estimate GLS parameters
288             gls.newYSampleData(y);
289             RealVector glsBeta = gls.calculateBeta();
290 
291             // Record deviations from "true" beta
292             double dist = olsBeta.getDistance(b);
293             olsBetaStats.addValue(dist * dist);
294             dist = glsBeta.getDistance(b);
295             glsBetaStats.addValue(dist * dist);
296 
297         }
298 
299         // Verify that GLS is on average more efficient, lower variance
300         assert(olsBetaStats.getMean() > 1.5 * glsBetaStats.getMean());
301         assert(olsBetaStats.getStandardDeviation() > glsBetaStats.getStandardDeviation());
302     }
303 
304 }