View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      https://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  /*
19   * This is not the original file distributed by the Apache Software Foundation
20   * It has been modified by the Hipparchus project
21   */
22  package org.hipparchus.stat.correlation;
23  
24  import org.hipparchus.exception.LocalizedCoreFormats;
25  import org.hipparchus.exception.MathIllegalArgumentException;
26  
27  /**
28   * Bivariate Covariance implementation that does not require input data to be
29   * stored in memory.
30   * <p>
31   * This class is based on a paper written by Philippe P&eacute;bay:
32   * <a href="http://prod.sandia.gov/techlib/access-control.cgi/2008/086212.pdf">
33   * Formulas for Robust, One-Pass Parallel Computation of Covariances and
34   * Arbitrary-Order Statistical Moments</a>, 2008, Technical Report SAND2008-6212,
35   * Sandia National Laboratories. It computes the covariance for a pair of variables.
36   * Use {@link StorelessCovariance} to estimate an entire covariance matrix.
37   * <p>
38   * Note: This class is package private as it is only used internally in
39   * the {@link StorelessCovariance} class.
40   */
41  class StorelessBivariateCovariance {
42  
43      /** the mean of variable x */
44      private double meanX;
45  
46      /** the mean of variable y */
47      private double meanY;
48  
49      /** number of observations */
50      private double n;
51  
52      /** the running covariance estimate */
53      private double covarianceNumerator;
54  
55      /** flag for bias correction */
56      private boolean biasCorrected;
57  
58      /**
59       * Create an empty {@link StorelessBivariateCovariance} instance with
60       * bias correction.
61       */
62      StorelessBivariateCovariance() {
63          this(true);
64      }
65  
66      /**
67       * Create an empty {@link StorelessBivariateCovariance} instance.
68       *
69       * @param biasCorrection if <code>true</code> the covariance estimate is corrected
70       * for bias, i.e. n-1 in the denominator, otherwise there is no bias correction,
71       * i.e. n in the denominator.
72       */
73      StorelessBivariateCovariance(final boolean biasCorrection) {
74          meanX = meanY = 0.0;
75          n = 0;
76          covarianceNumerator = 0.0;
77          biasCorrected = biasCorrection;
78      }
79  
80      /**
81       * Update the covariance estimation with a pair of variables (x, y).
82       *
83       * @param x the x value
84       * @param y the y value
85       */
86      public void increment(final double x, final double y) {
87          n++;
88          final double deltaX = x - meanX;
89          final double deltaY = y - meanY;
90          meanX += deltaX / n;
91          meanY += deltaY / n;
92          covarianceNumerator += ((n - 1.0) / n) * deltaX * deltaY;
93      }
94  
95      /**
96       * Appends another bivariate covariance calculation to this.
97       * After this operation, statistics returned should be close to what would
98       * have been obtained by by performing all of the {@link #increment(double, double)}
99       * operations in {@code cov} directly on this.
100      *
101      * @param cov StorelessBivariateCovariance instance to append.
102      */
103     public void append(StorelessBivariateCovariance cov) {
104         double oldN = n;
105         n += cov.n;
106         final double deltaX = cov.meanX - meanX;
107         final double deltaY = cov.meanY - meanY;
108         meanX += deltaX * cov.n / n;
109         meanY += deltaY * cov.n / n;
110         covarianceNumerator += cov.covarianceNumerator + oldN * cov.n / n * deltaX * deltaY;
111     }
112 
113     /**
114      * Returns the number of observations.
115      *
116      * @return number of observations
117      */
118     public double getN() {
119         return n;
120     }
121 
122     /**
123      * Return the current covariance estimate.
124      *
125      * @return the current covariance
126      * @throws MathIllegalArgumentException if the number of observations
127      * is &lt; 2
128      */
129     public double getResult() throws MathIllegalArgumentException {
130         if (n < 2) {
131             throw new MathIllegalArgumentException(LocalizedCoreFormats.INSUFFICIENT_DIMENSION,
132                                                    n, 2, true);
133         }
134         if (biasCorrected) {
135             return covarianceNumerator / (n - 1d);
136         } else {
137             return covarianceNumerator / n;
138         }
139     }
140 }
141