View Javadoc
1   /*
2    * Licensed to the Hipparchus project under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The Hipparchus project licenses this file to You under the Apache License,
6    * Version 2.0 (the "License"); you may not use this file except in compliance
7    * with the License.  You may obtain a copy of the License at
8    *
9    *      https://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.hipparchus.stat.projection;
18  
19  import org.hipparchus.exception.MathIllegalStateException;
20  import org.hipparchus.stat.LocalizedStatFormats;
21  import org.junit.jupiter.api.Test;
22  
23  import static org.junit.jupiter.api.Assertions.assertArrayEquals;
24  import static org.junit.jupiter.api.Assertions.assertEquals;
25  import static org.junit.jupiter.api.Assertions.assertFalse;
26  import static org.junit.jupiter.api.Assertions.assertTrue;
27  import static org.junit.jupiter.api.Assertions.fail;
28  
29  public class PCATest {
30  
31      // example from:
32      // https://towardsdatascience.com/the-mathematics-behind-principal-component-analysis-fff2d7f4b643
33      // https://stattrek.com/matrix-algebra/covariance-matrix
34      private static final double[][] SCORES = {
35              {90, 60, 90},
36              {90, 90, 30},
37              {60, 60, 60},
38              {60, 60, 90},
39              {30, 30, 30},
40      };
41  
42      public static final double[] EXPECTED_MEAN = {66, 60, 60};
43      public static final double[] EXPECTED_VARIANCE = {1137.587441, 786.387983, 56.024575};
44  
45      private static final double[][] EXPECTED_COMPONENTS = {
46              {  0.6558023,  0.385999 },
47              {  0.4291978,  0.516366 },
48              {  0.6210577, -0.7644414 }
49      };
50  
51      /**
52       * This is the expected value (give or take sign) when centering (covariance)
53       * but no scaling is applied. In general, components with the same values but
54       * differing sign are
55       * <a href="https://stats.stackexchange.com/questions/30348/is-it-acceptable-to-reverse-a-sign-of-a-principal-component-score">equivalent</a>.
56       *
57       * The result has been cross-checked with
58       * <a href="https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html">sklearn.decomposition.PCA</a>
59       * <a href="https://javadoc.io/static/nz.ac.waikato.cms.weka/weka-dev/3.9.4/weka/attributeSelection/PrincipalComponents.html>weka.attributeSelection.PrincipalComponents</a>
60       * (with the <code>centerData</code> option set to <code>true</code>)
61       * <a href="https://github.com/datumbox/datumbox-framework/blob/develop/datumbox-framework-core/src/main/java/com/datumbox/framework/core/machinelearning/featureselection/PCA.java">com.datumbox.framework.core.machinelearning.featureselection.PCA</a>
62       * (but for whatever reason datumbox does the transform on the unnormalized original data rather than normalized data - normalizing manually achieves the result below)
63       * <a href="https://www.javadoc.io/doc/com.github.haifengl/smile-core/latest/smile/feature/extraction/PCA.html">smile.feature.extraction.PCA</a>
64       * (using the PCA.fit method)
65       * <a href="https://au.mathworks.com/help/stats/pca.html">pca from matlab</a>
66       */
67      private static final double[][] EXPECTED_COV = {
68              {  34.3709848, -13.6692708 },
69              {   9.9834573,  47.6882055 },
70              {  -3.9348135,  -2.3159927 },
71              {  14.6969171, -25.2492347 },
72              { -55.1165457,  -6.4537072 },
73      };
74  
75      /**
76       * This is the expected value give or take sign when centering and scaling (correlation).
77       *
78       * The result has been cross-checked with
79       * <a href="https://javadoc.io/static/nz.ac.waikato.cms.weka/weka-dev/3.9.4/weka/attributeSelection/PrincipalComponents.html></a>
80       * <a href="https://au.mathworks.com/help/stats/pca.html">pca from matlab</a>
81       * (using the 'VariableWeights','variance' option)
82       */
83      private static final double[][] EXPECTED_COR = {
84              {  0.9118256, -0.942809  },
85              {  1.3832302,  1.4142136 },
86              { -0.1690309,  0.0       },
87              {  0.0666714, -0.942809  },
88              { -2.1926964,  0.4714045 },
89      };
90  
91      /**
92       * This is the expected value give or take sign when centering and scaling (correlation)
93       * is applied with no bias adjustment. In general, bias adjustment is more accurate but
94       * alters most PCA machine learning models by an insignificant amount so is often not accounted for.
95       *
96       * The result has been cross-checked with
97       * <a href="https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html">sklearn.decomposition.PCA</a>
98       * (but with prior preprocessing using <a href="https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.StandardScaler.html">sklearn.preprocessing.StandardScaler</a>)
99       * <a href="https://www.javadoc.io/doc/com.github.haifengl/smile-core/latest/smile/feature/extraction/PCA.html">smile.feature.extraction.PCA</a>
100      * (using the PCA.cor method)
101      */
102     public static final double[][] EXPECTED_COR_NO_BIAS = {
103             {  1.0194521, -1.0540926 },
104             {  1.5464984,  1.5811388 },
105             { -0.1889822,  0.0       },
106             {  0.0745409, -1.0540926 },
107             { -2.451509,   0.5270463 },
108     };
109     public static final double DELTA = 0.000001;
110 
111     @Test
112     void defaultSettings() {
113         PCA pca = new PCA(2);
114         assertEquals(2, pca.getNumComponents());
115         assertFalse(pca.isScale());
116         assertTrue(pca.isBiasCorrection());
117     }
118 
119     @Test
120     void covariance() {
121         PCA pca = new PCA(2);
122         double[][] actual = pca.fitAndTransform(SCORES);
123         assertArrayEquals(EXPECTED_MEAN, pca.getCenter(), DELTA);
124         assertArrayEquals(EXPECTED_VARIANCE, pca.getVariance(), DELTA);
125         customAssertExpected(EXPECTED_COMPONENTS, pca.getComponents());
126         customAssertExpected(EXPECTED_COV, actual);
127 
128         // calling fit and transform individually should be the same as combo method
129         pca = new PCA(2);
130         actual = pca.fit(SCORES).transform(SCORES);
131         assertArrayEquals(EXPECTED_MEAN, pca.getCenter(), DELTA);
132         customAssertExpected(EXPECTED_COV, actual);
133     }
134 
135     @Test
136     void correlation() {
137         PCA pca = new PCA(2, true, true);
138         double[][] actual = pca.fitAndTransform(SCORES);
139         customAssertExpected(EXPECTED_COR, actual);
140     }
141 
142     @Test
143     void correlationNoBias() {
144         PCA pca = new PCA(2, true, false);
145         double[][] actual = pca.fitAndTransform(SCORES);
146         customAssertExpected(EXPECTED_COR_NO_BIAS, actual);
147     }
148 
149     @Test
150     void transformWithoutFit() {
151         PCA pca = new PCA(2);
152         try {
153             pca.transform(SCORES);
154             fail("an exception should have been thrown");
155         } catch (MathIllegalStateException mise) {
156             assertEquals(LocalizedStatFormats.ILLEGAL_STATE_PCA, mise.getSpecifier());
157             assertEquals("transform", mise.getParts()[0]);
158         }
159     }
160 
161     private static void customAssertExpected(double[][] expected, double[][] actual) {
162         for (int i = 0; i < expected.length; i++) {
163             double[] e = expected[i];
164             double[] t = actual[i];
165             assertArrayEquals(e, t, DELTA);
166         }
167     }
168 }