1 /* 2 * Licensed to the Hipparchus project under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The Hipparchus project licenses this file to You under the Apache License, 6 * Version 2.0 (the "License"); you may not use this file except in compliance 7 * with the License. You may obtain a copy of the License at 8 * 9 * https://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 package org.hipparchus.stat.projection; 18 19 import org.hipparchus.exception.MathIllegalStateException; 20 import org.hipparchus.stat.LocalizedStatFormats; 21 import org.junit.jupiter.api.Test; 22 23 import static org.junit.jupiter.api.Assertions.assertArrayEquals; 24 import static org.junit.jupiter.api.Assertions.assertEquals; 25 import static org.junit.jupiter.api.Assertions.assertFalse; 26 import static org.junit.jupiter.api.Assertions.assertTrue; 27 import static org.junit.jupiter.api.Assertions.fail; 28 29 public class PCATest { 30 31 // example from: 32 // https://towardsdatascience.com/the-mathematics-behind-principal-component-analysis-fff2d7f4b643 33 // https://stattrek.com/matrix-algebra/covariance-matrix 34 private static final double[][] SCORES = { 35 {90, 60, 90}, 36 {90, 90, 30}, 37 {60, 60, 60}, 38 {60, 60, 90}, 39 {30, 30, 30}, 40 }; 41 42 public static final double[] EXPECTED_MEAN = {66, 60, 60}; 43 public static final double[] EXPECTED_VARIANCE = {1137.587441, 786.387983, 56.024575}; 44 45 private static final double[][] EXPECTED_COMPONENTS = { 46 { 0.6558023, 0.385999 }, 47 { 0.4291978, 0.516366 }, 48 { 0.6210577, -0.7644414 } 49 }; 50 51 /** 52 * This is the expected value (give or take sign) when centering (covariance) 53 * but no scaling is applied. In general, components with the same values but 54 * differing sign are 55 * <a href="https://stats.stackexchange.com/questions/30348/is-it-acceptable-to-reverse-a-sign-of-a-principal-component-score">equivalent</a>. 56 * 57 * The result has been cross-checked with 58 * <a href="https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html">sklearn.decomposition.PCA</a> 59 * <a href="https://javadoc.io/static/nz.ac.waikato.cms.weka/weka-dev/3.9.4/weka/attributeSelection/PrincipalComponents.html>weka.attributeSelection.PrincipalComponents</a> 60 * (with the <code>centerData</code> option set to <code>true</code>) 61 * <a href="https://github.com/datumbox/datumbox-framework/blob/develop/datumbox-framework-core/src/main/java/com/datumbox/framework/core/machinelearning/featureselection/PCA.java">com.datumbox.framework.core.machinelearning.featureselection.PCA</a> 62 * (but for whatever reason datumbox does the transform on the unnormalized original data rather than normalized data - normalizing manually achieves the result below) 63 * <a href="https://www.javadoc.io/doc/com.github.haifengl/smile-core/latest/smile/feature/extraction/PCA.html">smile.feature.extraction.PCA</a> 64 * (using the PCA.fit method) 65 * <a href="https://au.mathworks.com/help/stats/pca.html">pca from matlab</a> 66 */ 67 private static final double[][] EXPECTED_COV = { 68 { 34.3709848, -13.6692708 }, 69 { 9.9834573, 47.6882055 }, 70 { -3.9348135, -2.3159927 }, 71 { 14.6969171, -25.2492347 }, 72 { -55.1165457, -6.4537072 }, 73 }; 74 75 /** 76 * This is the expected value give or take sign when centering and scaling (correlation). 77 * 78 * The result has been cross-checked with 79 * <a href="https://javadoc.io/static/nz.ac.waikato.cms.weka/weka-dev/3.9.4/weka/attributeSelection/PrincipalComponents.html></a> 80 * <a href="https://au.mathworks.com/help/stats/pca.html">pca from matlab</a> 81 * (using the 'VariableWeights','variance' option) 82 */ 83 private static final double[][] EXPECTED_COR = { 84 { 0.9118256, -0.942809 }, 85 { 1.3832302, 1.4142136 }, 86 { -0.1690309, 0.0 }, 87 { 0.0666714, -0.942809 }, 88 { -2.1926964, 0.4714045 }, 89 }; 90 91 /** 92 * This is the expected value give or take sign when centering and scaling (correlation) 93 * is applied with no bias adjustment. In general, bias adjustment is more accurate but 94 * alters most PCA machine learning models by an insignificant amount so is often not accounted for. 95 * 96 * The result has been cross-checked with 97 * <a href="https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html">sklearn.decomposition.PCA</a> 98 * (but with prior preprocessing using <a href="https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.StandardScaler.html">sklearn.preprocessing.StandardScaler</a>) 99 * <a href="https://www.javadoc.io/doc/com.github.haifengl/smile-core/latest/smile/feature/extraction/PCA.html">smile.feature.extraction.PCA</a> 100 * (using the PCA.cor method) 101 */ 102 public static final double[][] EXPECTED_COR_NO_BIAS = { 103 { 1.0194521, -1.0540926 }, 104 { 1.5464984, 1.5811388 }, 105 { -0.1889822, 0.0 }, 106 { 0.0745409, -1.0540926 }, 107 { -2.451509, 0.5270463 }, 108 }; 109 public static final double DELTA = 0.000001; 110 111 @Test 112 void defaultSettings() { 113 PCA pca = new PCA(2); 114 assertEquals(2, pca.getNumComponents()); 115 assertFalse(pca.isScale()); 116 assertTrue(pca.isBiasCorrection()); 117 } 118 119 @Test 120 void covariance() { 121 PCA pca = new PCA(2); 122 double[][] actual = pca.fitAndTransform(SCORES); 123 assertArrayEquals(EXPECTED_MEAN, pca.getCenter(), DELTA); 124 assertArrayEquals(EXPECTED_VARIANCE, pca.getVariance(), DELTA); 125 customAssertExpected(EXPECTED_COMPONENTS, pca.getComponents()); 126 customAssertExpected(EXPECTED_COV, actual); 127 128 // calling fit and transform individually should be the same as combo method 129 pca = new PCA(2); 130 actual = pca.fit(SCORES).transform(SCORES); 131 assertArrayEquals(EXPECTED_MEAN, pca.getCenter(), DELTA); 132 customAssertExpected(EXPECTED_COV, actual); 133 } 134 135 @Test 136 void correlation() { 137 PCA pca = new PCA(2, true, true); 138 double[][] actual = pca.fitAndTransform(SCORES); 139 customAssertExpected(EXPECTED_COR, actual); 140 } 141 142 @Test 143 void correlationNoBias() { 144 PCA pca = new PCA(2, true, false); 145 double[][] actual = pca.fitAndTransform(SCORES); 146 customAssertExpected(EXPECTED_COR_NO_BIAS, actual); 147 } 148 149 @Test 150 void transformWithoutFit() { 151 PCA pca = new PCA(2); 152 try { 153 pca.transform(SCORES); 154 fail("an exception should have been thrown"); 155 } catch (MathIllegalStateException mise) { 156 assertEquals(LocalizedStatFormats.ILLEGAL_STATE_PCA, mise.getSpecifier()); 157 assertEquals("transform", mise.getParts()[0]); 158 } 159 } 160 161 private static void customAssertExpected(double[][] expected, double[][] actual) { 162 for (int i = 0; i < expected.length; i++) { 163 double[] e = expected[i]; 164 double[] t = actual[i]; 165 assertArrayEquals(e, t, DELTA); 166 } 167 } 168 }