View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      https://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  /*
19   * This is not the original file distributed by the Apache Software Foundation
20   * It has been modified by the Hipparchus project
21   */
22  package org.hipparchus.stat.correlation;
23  
24  import org.hipparchus.UnitTestUtils;
25  import org.hipparchus.distribution.continuous.TDistribution;
26  import org.hipparchus.exception.MathIllegalArgumentException;
27  import org.hipparchus.linear.BlockRealMatrix;
28  import org.hipparchus.linear.RealMatrix;
29  import org.hipparchus.util.FastMath;
30  import org.junit.jupiter.api.Test;
31  
32  import static org.junit.jupiter.api.Assertions.assertEquals;
33  import static org.junit.jupiter.api.Assertions.assertTrue;
34  import static org.junit.jupiter.api.Assertions.fail;
35  
36  
37  public class PearsonsCorrelationTest {
38  
39      protected final double[] longleyData = new double[] {
40              60323,83.0,234289,2356,1590,107608,1947,
41              61122,88.5,259426,2325,1456,108632,1948,
42              60171,88.2,258054,3682,1616,109773,1949,
43              61187,89.5,284599,3351,1650,110929,1950,
44              63221,96.2,328975,2099,3099,112075,1951,
45              63639,98.1,346999,1932,3594,113270,1952,
46              64989,99.0,365385,1870,3547,115094,1953,
47              63761,100.0,363112,3578,3350,116219,1954,
48              66019,101.2,397469,2904,3048,117388,1955,
49              67857,104.6,419180,2822,2857,118734,1956,
50              68169,108.4,442769,2936,2798,120445,1957,
51              66513,110.8,444546,4681,2637,121950,1958,
52              68655,112.6,482704,3813,2552,123366,1959,
53              69564,114.2,502601,3931,2514,125368,1960,
54              69331,115.7,518173,4806,2572,127852,1961,
55              70551,116.9,554894,4007,2827,130081,1962
56          };
57  
58      protected final double[] swissData = new double[] {
59              80.2,17.0,15,12,9.96,
60              83.1,45.1,6,9,84.84,
61              92.5,39.7,5,5,93.40,
62              85.8,36.5,12,7,33.77,
63              76.9,43.5,17,15,5.16,
64              76.1,35.3,9,7,90.57,
65              83.8,70.2,16,7,92.85,
66              92.4,67.8,14,8,97.16,
67              82.4,53.3,12,7,97.67,
68              82.9,45.2,16,13,91.38,
69              87.1,64.5,14,6,98.61,
70              64.1,62.0,21,12,8.52,
71              66.9,67.5,14,7,2.27,
72              68.9,60.7,19,12,4.43,
73              61.7,69.3,22,5,2.82,
74              68.3,72.6,18,2,24.20,
75              71.7,34.0,17,8,3.30,
76              55.7,19.4,26,28,12.11,
77              54.3,15.2,31,20,2.15,
78              65.1,73.0,19,9,2.84,
79              65.5,59.8,22,10,5.23,
80              65.0,55.1,14,3,4.52,
81              56.6,50.9,22,12,15.14,
82              57.4,54.1,20,6,4.20,
83              72.5,71.2,12,1,2.40,
84              74.2,58.1,14,8,5.23,
85              72.0,63.5,6,3,2.56,
86              60.5,60.8,16,10,7.72,
87              58.3,26.8,25,19,18.46,
88              65.4,49.5,15,8,6.10,
89              75.5,85.9,3,2,99.71,
90              69.3,84.9,7,6,99.68,
91              77.3,89.7,5,2,100.00,
92              70.5,78.2,12,6,98.96,
93              79.4,64.9,7,3,98.22,
94              65.0,75.9,9,9,99.06,
95              92.2,84.6,3,3,99.46,
96              79.3,63.1,13,13,96.83,
97              70.4,38.4,26,12,5.62,
98              65.7,7.7,29,11,13.79,
99              72.7,16.7,22,13,11.22,
100             64.4,17.6,35,32,16.92,
101             77.6,37.6,15,7,4.97,
102             67.6,18.7,25,7,8.65,
103             35.0,1.2,37,53,42.34,
104             44.7,46.6,16,29,50.43,
105             42.8,27.7,22,29,58.33
106         };
107 
108 
109     /**
110      * Test Longley dataset against R.
111      */
112     @Test
113     void testLongly() {
114         RealMatrix matrix = createRealMatrix(longleyData, 16, 7);
115         PearsonsCorrelation corrInstance = new PearsonsCorrelation(matrix);
116         RealMatrix correlationMatrix = corrInstance.getCorrelationMatrix();
117         double[] rData = new double[] {
118                 1.000000000000000, 0.9708985250610560, 0.9835516111796693, 0.5024980838759942,
119                 0.4573073999764817, 0.960390571594376, 0.9713294591921188,
120                 0.970898525061056, 1.0000000000000000, 0.9915891780247822, 0.6206333925590966,
121                 0.4647441876006747, 0.979163432977498, 0.9911491900672053,
122                 0.983551611179669, 0.9915891780247822, 1.0000000000000000, 0.6042609398895580,
123                 0.4464367918926265, 0.991090069458478, 0.9952734837647849,
124                 0.502498083875994, 0.6206333925590966, 0.6042609398895580, 1.0000000000000000,
125                 -0.1774206295018783, 0.686551516365312, 0.6682566045621746,
126                 0.457307399976482, 0.4647441876006747, 0.4464367918926265, -0.1774206295018783,
127                 1.0000000000000000, 0.364416267189032, 0.4172451498349454,
128                 0.960390571594376, 0.9791634329774981, 0.9910900694584777, 0.6865515163653120,
129                 0.3644162671890320, 1.000000000000000, 0.9939528462329257,
130                 0.971329459192119, 0.9911491900672053, 0.9952734837647849, 0.6682566045621746,
131                 0.4172451498349454, 0.993952846232926, 1.0000000000000000
132         };
133         UnitTestUtils.customAssertEquals("correlation matrix", createRealMatrix(rData, 7, 7), correlationMatrix, 10E-15);
134 
135         double[] rPvalues = new double[] {
136                 4.38904690369668e-10,
137                 8.36353208910623e-12, 7.8159700933611e-14,
138                 0.0472894097790304, 0.01030636128354301, 0.01316878049026582,
139                 0.0749178049642416, 0.06971758330341182, 0.0830166169296545, 0.510948586323452,
140                 3.693245043123738e-09, 4.327782576751815e-11, 1.167954621905665e-13, 0.00331028281967516, 0.1652293725106684,
141                 3.95834476307755e-10, 1.114663916723657e-13, 1.332267629550188e-15, 0.00466039138541463, 0.1078477071581498, 7.771561172376096e-15
142         };
143         RealMatrix rPMatrix = createLowerTriangularRealMatrix(rPvalues, 7);
144         fillUpper(rPMatrix, 0d);
145         UnitTestUtils.customAssertEquals("correlation p values", rPMatrix, corrInstance.getCorrelationPValues(), 10E-15);
146     }
147 
148     /**
149      * Test R Swiss fertility dataset against R.
150      */
151     @Test
152     void testSwissFertility() {
153          RealMatrix matrix = createRealMatrix(swissData, 47, 5);
154          PearsonsCorrelation corrInstance = new PearsonsCorrelation(matrix);
155          RealMatrix correlationMatrix = corrInstance.getCorrelationMatrix();
156          double[] rData = new double[] {
157                1.0000000000000000, 0.3530791836199747, -0.6458827064572875, -0.6637888570350691,  0.4636847006517939,
158                  0.3530791836199747, 1.0000000000000000,-0.6865422086171366, -0.6395225189483201, 0.4010950530487398,
159                 -0.6458827064572875, -0.6865422086171366, 1.0000000000000000, 0.6984152962884830, -0.5727418060641666,
160                 -0.6637888570350691, -0.6395225189483201, 0.6984152962884830, 1.0000000000000000, -0.1538589170909148,
161                  0.4636847006517939, 0.4010950530487398, -0.5727418060641666, -0.1538589170909148, 1.0000000000000000
162          };
163          UnitTestUtils.customAssertEquals("correlation matrix", createRealMatrix(rData, 5, 5), correlationMatrix, 10E-15);
164 
165          double[] rPvalues = new double[] {
166                  0.01491720061472623,
167                  9.45043734069043e-07, 9.95151527133974e-08,
168                  3.658616965962355e-07, 1.304590105694471e-06, 4.811397236181847e-08,
169                  0.001028523190118147, 0.005204433539191644, 2.588307925380906e-05, 0.301807756132683
170          };
171          RealMatrix rPMatrix = createLowerTriangularRealMatrix(rPvalues, 5);
172          fillUpper(rPMatrix, 0d);
173          UnitTestUtils.customAssertEquals("correlation p values", rPMatrix, corrInstance.getCorrelationPValues(), 10E-15);
174     }
175 
176     /**
177      * Test p-value near 0. JIRA: MATH-371
178      */
179     @Test
180     void testPValueNearZero() {
181         /*
182          * Create a dataset that has r -> 1, p -> 0 as dimension increases.
183          * Prior to the fix for MATH-371, p vanished for dimension >= 14.
184          * Post fix, p-values diminish smoothly, vanishing at dimension = 127.
185          * Tested value is ~1E-303.
186          */
187         int dimension = 120;
188         double[][] data = new double[dimension][2];
189         for (int i = 0; i < dimension; i++) {
190             data[i][0] = i;
191             data[i][1] = i + 1/((double)i + 1);
192         }
193         PearsonsCorrelation corrInstance = new PearsonsCorrelation(data);
194         assertTrue(corrInstance.getCorrelationPValues().getEntry(0, 1) > 0);
195     }
196 
197 
198     /**
199      * Constant column
200      */
201     @Test
202     void testConstant() {
203         double[] noVariance = new double[] {1, 1, 1, 1};
204         double[] values = new double[] {1, 2, 3, 4};
205         assertTrue(Double.isNaN(new PearsonsCorrelation().correlation(noVariance, values)));
206         assertTrue(Double.isNaN(new PearsonsCorrelation().correlation(values, noVariance)));
207     }
208 
209 
210     /**
211      * Insufficient data
212      */
213 
214     @Test
215     void testInsufficientData() {
216         double[] one = new double[] {1};
217         double[] two = new double[] {2};
218         try {
219             new PearsonsCorrelation().correlation(one, two);
220             fail("Expecting MathIllegalArgumentException");
221         } catch (MathIllegalArgumentException ex) {
222             // Expected
223         }
224         RealMatrix matrix = new BlockRealMatrix(new double[][] {{0},{1}});
225         try {
226             new PearsonsCorrelation(matrix);
227             fail("Expecting MathIllegalArgumentException");
228         } catch (MathIllegalArgumentException ex) {
229             // Expected
230         }
231     }
232 
233     /**
234      * Verify that direct t-tests using standard error estimates are consistent
235      * with reported p-values
236      */
237     @Test
238     void testStdErrorConsistency() {
239         TDistribution tDistribution = new TDistribution(45);
240         RealMatrix matrix = createRealMatrix(swissData, 47, 5);
241         PearsonsCorrelation corrInstance = new PearsonsCorrelation(matrix);
242         RealMatrix rValues = corrInstance.getCorrelationMatrix();
243         RealMatrix pValues = corrInstance.getCorrelationPValues();
244         RealMatrix stdErrors = corrInstance.getCorrelationStandardErrors();
245         for (int i = 0; i < 5; i++) {
246             for (int j = 0; j < i; j++) {
247                 double t = FastMath.abs(rValues.getEntry(i, j)) / stdErrors.getEntry(i, j);
248                 double p = 2 * (1 - tDistribution.cumulativeProbability(t));
249                 assertEquals(p, pValues.getEntry(i, j), 10E-15);
250             }
251         }
252     }
253 
254     /**
255      * Verify that creating correlation from covariance gives same results as
256      * direct computation from the original matrix
257      */
258     @Test
259     void testCovarianceConsistency() {
260         RealMatrix matrix = createRealMatrix(longleyData, 16, 7);
261         PearsonsCorrelation corrInstance = new PearsonsCorrelation(matrix);
262         Covariance covInstance = new Covariance(matrix);
263         PearsonsCorrelation corrFromCovInstance = new PearsonsCorrelation(covInstance);
264         UnitTestUtils.customAssertEquals("correlation values", corrInstance.getCorrelationMatrix(),
265                                          corrFromCovInstance.getCorrelationMatrix(), 10E-15);
266         UnitTestUtils.customAssertEquals("p values", corrInstance.getCorrelationPValues(),
267                                          corrFromCovInstance.getCorrelationPValues(), 10E-15);
268         UnitTestUtils.customAssertEquals("standard errors", corrInstance.getCorrelationStandardErrors(),
269                                          corrFromCovInstance.getCorrelationStandardErrors(), 10E-15);
270 
271         PearsonsCorrelation corrFromCovInstance2 =
272             new PearsonsCorrelation(covInstance.getCovarianceMatrix(), 16);
273         UnitTestUtils.customAssertEquals("correlation values", corrInstance.getCorrelationMatrix(),
274                                          corrFromCovInstance2.getCorrelationMatrix(), 10E-15);
275         UnitTestUtils.customAssertEquals("p values", corrInstance.getCorrelationPValues(),
276                                          corrFromCovInstance2.getCorrelationPValues(), 10E-15);
277         UnitTestUtils.customAssertEquals("standard errors", corrInstance.getCorrelationStandardErrors(),
278                                          corrFromCovInstance2.getCorrelationStandardErrors(), 10E-15);
279     }
280 
281 
282     @Test
283     void testConsistency() {
284         RealMatrix matrix = createRealMatrix(longleyData, 16, 7);
285         PearsonsCorrelation corrInstance = new PearsonsCorrelation(matrix);
286         double[][] data = matrix.getData();
287         double[] x = matrix.getColumn(0);
288         double[] y = matrix.getColumn(1);
289         assertEquals(new PearsonsCorrelation().correlation(x, y),
290                 corrInstance.getCorrelationMatrix().getEntry(0, 1), Double.MIN_VALUE);
291         UnitTestUtils.customAssertEquals("Correlation matrix", corrInstance.getCorrelationMatrix(),
292                                          new PearsonsCorrelation().computeCorrelationMatrix(data), Double.MIN_VALUE);
293     }
294 
295     protected RealMatrix createRealMatrix(double[] data, int nRows, int nCols) {
296         double[][] matrixData = new double[nRows][nCols];
297         int ptr = 0;
298         for (int i = 0; i < nRows; i++) {
299             System.arraycopy(data, ptr, matrixData[i], 0, nCols);
300             ptr += nCols;
301         }
302         return new BlockRealMatrix(matrixData);
303     }
304 
305     protected RealMatrix createLowerTriangularRealMatrix(double[] data, int dimension) {
306         int ptr = 0;
307         RealMatrix result = new BlockRealMatrix(dimension, dimension);
308         for (int i = 1; i < dimension; i++) {
309             for (int j = 0; j < i; j++) {
310                 result.setEntry(i, j, data[ptr]);
311                 ptr++;
312             }
313         }
314         return result;
315     }
316 
317     protected void fillUpper(RealMatrix matrix, double diagonalValue) {
318         int dimension = matrix.getColumnDimension();
319         for (int i = 0; i < dimension; i++) {
320             matrix.setEntry(i, i, diagonalValue);
321             for (int j = i+1; j < dimension; j++) {
322                 matrix.setEntry(i, j, matrix.getEntry(j, i));
323             }
324         }
325     }
326 }