View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      https://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  /*
19   * This is not the original file distributed by the Apache Software Foundation
20   * It has been modified by the Hipparchus project
21   */
22  package org.hipparchus.stat.correlation;
23  
24  import org.hipparchus.exception.LocalizedCoreFormats;
25  import org.hipparchus.exception.MathIllegalArgumentException;
26  import org.hipparchus.linear.BlockRealMatrix;
27  import org.hipparchus.linear.RealMatrix;
28  import org.hipparchus.stat.descriptive.moment.Mean;
29  import org.hipparchus.stat.descriptive.moment.Variance;
30  
31  /**
32   * Computes covariances for pairs of arrays or columns of a matrix.
33   * <p>
34   * The constructors that take {@code RealMatrix} or {@code double[][]}
35   * arguments generate covariance matrices. The columns of the input
36   * matrices are assumed to represent variable values.
37   * <p>
38   * The constructor argument {@code biasCorrected} determines whether or
39   * not computed covariances are bias-corrected.
40   * <p>
41   * Unbiased covariances are given by the formula:
42   * <p>
43   * <code>cov(X, Y) = &Sigma;[(x<sub>i</sub> - E(X))(y<sub>i</sub> - E(Y))] / (n - 1)</code>
44   * <p>
45   * where {@code E(X)} is the mean of {@code X} and {@code E(Y)}
46   * is the mean of the <code>Y</code> values.
47   * <p>
48   * Non-bias-corrected estimates use {@code n} in place of {@code n - 1}.
49   */
50  public class Covariance {
51  
52      /** The covariance matrix. */
53      private final RealMatrix covarianceMatrix;
54  
55      /** Number of observations (length of covariate vectors). */
56      private final int n;
57  
58      /**
59       * Create a Covariance with no data.
60       */
61      public Covariance() {
62          super();
63          covarianceMatrix = null;
64          n = 0;
65      }
66  
67      /**
68       * Create a Covariance matrix from a rectangular array
69       * whose columns represent covariates.
70       * <p>
71       * The <code>biasCorrected</code> parameter determines whether or not
72       * covariance estimates are bias-corrected.
73       * <p>
74       * The input array must be rectangular with at least one column
75       * and two rows.
76       *
77       * @param data rectangular array with columns representing covariates
78       * @param biasCorrected true means covariances are bias-corrected
79       * @throws MathIllegalArgumentException if the input data array is not
80       * rectangular with at least two rows and one column.
81       * @throws MathIllegalArgumentException if the input data array is not
82       * rectangular with at least one row and one column.
83       */
84      public Covariance(double[][] data, boolean biasCorrected)
85          throws MathIllegalArgumentException {
86          this(new BlockRealMatrix(data), biasCorrected);
87      }
88  
89      /**
90       * Create a Covariance matrix from a rectangular array
91       * whose columns represent covariates.
92       * <p>
93       * The input array must be rectangular with at least one column
94       * and two rows.
95       *
96       * @param data rectangular array with columns representing covariates
97       * @throws MathIllegalArgumentException if the input data array is not
98       * rectangular with at least two rows and one column.
99       * @throws MathIllegalArgumentException if the input data array is not
100      * rectangular with at least one row and one column.
101      */
102     public Covariance(double[][] data) throws MathIllegalArgumentException {
103         this(data, true);
104     }
105 
106     /**
107      * Create a covariance matrix from a matrix whose columns
108      * represent covariates.
109      * <p>
110      * The <code>biasCorrected</code> parameter determines whether or not
111      * covariance estimates are bias-corrected.
112      * <p>
113      * The matrix must have at least one column and two rows.
114      *
115      * @param matrix matrix with columns representing covariates
116      * @param biasCorrected true means covariances are bias-corrected
117      * @throws MathIllegalArgumentException if the input matrix does not have
118      * at least two rows and one column
119      */
120     public Covariance(RealMatrix matrix, boolean biasCorrected)
121         throws MathIllegalArgumentException {
122         checkSufficientData(matrix);
123         n = matrix.getRowDimension();
124         covarianceMatrix = computeCovarianceMatrix(matrix, biasCorrected);
125     }
126 
127     /**
128      * Create a covariance matrix from a matrix whose columns
129      * represent covariates.
130      * <p>
131      * The matrix must have at least one column and two rows.
132      *
133      * @param matrix matrix with columns representing covariates
134      * @throws MathIllegalArgumentException if the input matrix does not have
135      * at least two rows and one column
136      */
137     public Covariance(RealMatrix matrix) throws MathIllegalArgumentException {
138         this(matrix, true);
139     }
140 
141     /**
142      * Returns the covariance matrix
143      *
144      * @return covariance matrix
145      */
146     public RealMatrix getCovarianceMatrix() {
147         return covarianceMatrix;
148     }
149 
150     /**
151      * Returns the number of observations (length of covariate vectors)
152      *
153      * @return number of observations
154      */
155     public int getN() {
156         return n;
157     }
158 
159     /**
160      * Compute a covariance matrix from a matrix whose columns represent covariates.
161      *
162      * @param matrix input matrix (must have at least one column and two rows)
163      * @param biasCorrected determines whether or not covariance estimates are bias-corrected
164      * @return covariance matrix
165      * @throws MathIllegalArgumentException if the matrix does not contain sufficient data
166      */
167     protected RealMatrix computeCovarianceMatrix(RealMatrix matrix, boolean biasCorrected)
168         throws MathIllegalArgumentException {
169 
170         int dimension = matrix.getColumnDimension();
171         Variance variance = new Variance(biasCorrected);
172         RealMatrix outMatrix = new BlockRealMatrix(dimension, dimension);
173         for (int i = 0; i < dimension; i++) {
174             for (int j = 0; j < i; j++) {
175               double cov = covariance(matrix.getColumn(i), matrix.getColumn(j), biasCorrected);
176               outMatrix.setEntry(i, j, cov);
177               outMatrix.setEntry(j, i, cov);
178             }
179             outMatrix.setEntry(i, i, variance.evaluate(matrix.getColumn(i)));
180         }
181         return outMatrix;
182     }
183 
184     /**
185      * Create a covariance matrix from a matrix whose columns represent
186      * covariates. Covariances are computed using the bias-corrected formula.
187      *
188      * @param matrix input matrix (must have at least one column and two rows)
189      * @return covariance matrix
190      * @throws MathIllegalArgumentException if matrix does not contain sufficient data
191      * @see #Covariance
192      */
193     protected RealMatrix computeCovarianceMatrix(RealMatrix matrix)
194         throws MathIllegalArgumentException {
195         return computeCovarianceMatrix(matrix, true);
196     }
197 
198     /**
199      * Compute a covariance matrix from a rectangular array whose columns represent covariates.
200      *
201      * @param data input array (must have at least one column and two rows)
202      * @param biasCorrected determines whether or not covariance estimates are bias-corrected
203      * @return covariance matrix
204      * @throws MathIllegalArgumentException if the data array does not contain sufficient data
205      * @throws MathIllegalArgumentException if the input data array is not
206      * rectangular with at least one row and one column.
207      */
208     protected RealMatrix computeCovarianceMatrix(double[][] data, boolean biasCorrected)
209         throws MathIllegalArgumentException {
210         return computeCovarianceMatrix(new BlockRealMatrix(data), biasCorrected);
211     }
212 
213     /**
214      * Create a covariance matrix from a rectangular array whose columns represent
215      * covariates. Covariances are computed using the bias-corrected formula.
216      *
217      * @param data input array (must have at least one column and two rows)
218      * @return covariance matrix
219      * @throws MathIllegalArgumentException if the data array does not contain sufficient data
220      * @throws MathIllegalArgumentException if the input data array is not
221      * rectangular with at least one row and one column.
222      * @see #Covariance
223      */
224     protected RealMatrix computeCovarianceMatrix(double[][] data)
225         throws MathIllegalArgumentException {
226         return computeCovarianceMatrix(data, true);
227     }
228 
229     /**
230      * Computes the covariance between the two arrays.
231      * <p>
232      * Array lengths must match and the common length must be at least 2.
233      *
234      * @param xArray first data array
235      * @param yArray second data array
236      * @param biasCorrected if true, returned value will be bias-corrected
237      * @return returns the covariance for the two arrays
238      * @throws  MathIllegalArgumentException if the arrays lengths do not match or
239      * there is insufficient data
240      */
241     public double covariance(final double[] xArray, final double[] yArray, boolean biasCorrected)
242         throws MathIllegalArgumentException {
243         Mean mean = new Mean();
244         double result = 0d;
245         int length = xArray.length;
246         if (length != yArray.length) {
247             throw new MathIllegalArgumentException(
248                   LocalizedCoreFormats.DIMENSIONS_MISMATCH, length, yArray.length);
249         } else if (length < 2) {
250             throw new MathIllegalArgumentException(
251                   LocalizedCoreFormats.INSUFFICIENT_OBSERVED_POINTS_IN_SAMPLE, length, 2);
252         } else {
253             double xMean = mean.evaluate(xArray);
254             double yMean = mean.evaluate(yArray);
255             for (int i = 0; i < length; i++) {
256                 double xDev = xArray[i] - xMean;
257                 double yDev = yArray[i] - yMean;
258                 result += (xDev * yDev - result) / (i + 1);
259             }
260         }
261         return biasCorrected ? result * ((double) length / (double)(length - 1)) : result;
262     }
263 
264     /**
265      * Computes the covariance between the two arrays, using the bias-corrected
266      * formula.
267      * <p>
268      * Array lengths must match and the common length must be at least 2.
269      *
270      * @param xArray first data array
271      * @param yArray second data array
272      * @return returns the covariance for the two arrays
273      * @throws MathIllegalArgumentException if the arrays lengths do not match or
274      * there is insufficient data
275      */
276     public double covariance(final double[] xArray, final double[] yArray)
277         throws MathIllegalArgumentException {
278         return covariance(xArray, yArray, true);
279     }
280 
281     /**
282      * Throws MathIllegalArgumentException if the matrix does not have at least
283      * one column and two rows.
284      *
285      * @param matrix matrix to check
286      * @throws MathIllegalArgumentException if the matrix does not contain sufficient data
287      * to compute covariance
288      */
289     private void checkSufficientData(final RealMatrix matrix) throws MathIllegalArgumentException {
290         int nRows = matrix.getRowDimension();
291         int nCols = matrix.getColumnDimension();
292         if (nRows < 2 || nCols < 1) {
293             throw new MathIllegalArgumentException(LocalizedCoreFormats.INSUFFICIENT_ROWS_AND_COLUMNS,
294                                                    nRows, nCols);
295         }
296     }
297 }