View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      https://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  /*
19   * This is not the original file distributed by the Apache Software Foundation
20   * It has been modified by the Hipparchus project
21   */
22  
23  package org.hipparchus.stat.descriptive;
24  
25  import java.util.ArrayList;
26  import java.util.Collection;
27  
28  import org.hipparchus.UnitTestUtils;
29  import org.hipparchus.distribution.RealDistribution;
30  import org.hipparchus.distribution.continuous.UniformRealDistribution;
31  import org.hipparchus.random.RandomDataGenerator;
32  import org.junit.Assert;
33  import org.junit.Test;
34  
35  
36  /**
37   * Test cases for {@link StatisticalSummary}.
38   */
39  public class StatisticalSummaryTest {
40  
41      /**
42       * Test aggregate function by randomly generating a dataset of 10-100 values
43       * from [-100, 100], dividing it into 2-5 partitions, computing stats for each
44       * partition and comparing the result of aggregate(...) applied to the collection
45       * of per-partition SummaryStatistics with a single SummaryStatistics computed
46       * over the full sample.
47       */
48      @Test
49      public void testAggregate() {
50  
51          // Generate a random sample and random partition
52          double[] totalSample = generateSample();
53          double[][] subSamples = generatePartition(totalSample);
54          int nSamples = subSamples.length;
55  
56          // Compute combined stats directly
57          StreamingStatistics totalStats = new StreamingStatistics();
58          for (int i = 0; i < totalSample.length; i++) {
59              totalStats.addValue(totalSample[i]);
60          }
61  
62          // Now compute subsample stats individually and aggregate
63          StreamingStatistics[] subSampleStats = new StreamingStatistics[nSamples];
64          for (int i = 0; i < nSamples; i++) {
65              subSampleStats[i] = new StreamingStatistics();
66          }
67          Collection<StreamingStatistics> aggregate = new ArrayList<StreamingStatistics>();
68          for (int i = 0; i < nSamples; i++) {
69              for (int j = 0; j < subSamples[i].length; j++) {
70                  subSampleStats[i].addValue(subSamples[i][j]);
71              }
72              aggregate.add(subSampleStats[i]);
73          }
74  
75          // Compare values
76          StatisticalSummary aggregatedStats = StatisticalSummary.aggregate(aggregate);
77          assertStatisticalSummaryEquals(totalStats.getSummary(), aggregatedStats, 10E-12);
78      }
79  
80      /**
81       * Similar to {@link #testAggregate()} but operating on
82       * {@link StatisticalSummary} instead.
83       */
84      @Test
85      public void testAggregateStatisticalSummary() {
86  
87          // Generate a random sample and random partition
88          double[] totalSample = generateSample();
89          double[][] subSamples = generatePartition(totalSample);
90          int nSamples = subSamples.length;
91  
92          // Compute combined stats directly
93          StreamingStatistics totalStats = new StreamingStatistics();
94          for (int i = 0; i < totalSample.length; i++) {
95              totalStats.addValue(totalSample[i]);
96          }
97  
98          // Now compute subsample stats individually and aggregate
99          StreamingStatistics[] subSampleStats = new StreamingStatistics[nSamples];
100         for (int i = 0; i < nSamples; i++) {
101             subSampleStats[i] = new StreamingStatistics();
102         }
103         Collection<StatisticalSummary> aggregate = new ArrayList<StatisticalSummary>();
104         for (int i = 0; i < nSamples; i++) {
105             for (int j = 0; j < subSamples[i].length; j++) {
106                 subSampleStats[i].addValue(subSamples[i][j]);
107             }
108             aggregate.add(subSampleStats[i].getSummary());
109         }
110 
111         // Compare values
112         StatisticalSummary aggregatedStats = StatisticalSummary.aggregate(aggregate);
113         assertStatisticalSummaryEquals(totalStats.getSummary(), aggregatedStats, 10E-12);
114     }
115 
116     @Test
117     public void testAggregateDegenerate() {
118         double[] totalSample = {1, 2, 3, 4, 5};
119         double[][] subSamples = {{1}, {2}, {3}, {4}, {5}};
120 
121         // Compute combined stats directly
122         StreamingStatistics totalStats = new StreamingStatistics();
123         for (int i = 0; i < totalSample.length; i++) {
124             totalStats.addValue(totalSample[i]);
125         }
126 
127         // Now compute subsample stats individually and aggregate
128         StreamingStatistics[] subSampleStats = new StreamingStatistics[5];
129         for (int i = 0; i < 5; i++) {
130             subSampleStats[i] = new StreamingStatistics();
131         }
132         Collection<StreamingStatistics> aggregate = new ArrayList<StreamingStatistics>();
133         for (int i = 0; i < 5; i++) {
134             for (int j = 0; j < subSamples[i].length; j++) {
135                 subSampleStats[i].addValue(subSamples[i][j]);
136             }
137             aggregate.add(subSampleStats[i]);
138         }
139 
140         // Compare values
141         StatisticalSummary aggregatedStats = StatisticalSummary.aggregate(aggregate);
142         assertStatisticalSummaryEquals(totalStats.getSummary(), aggregatedStats, 10E-12);
143     }
144 
145     @Test
146     public void testAggregateSpecialValues() {
147         double[] totalSample = {Double.POSITIVE_INFINITY, 2, 3, Double.NaN, 5};
148         double[][] subSamples = {{Double.POSITIVE_INFINITY, 2}, {3}, {Double.NaN}, {5}};
149 
150         // Compute combined stats directly
151         StreamingStatistics totalStats = new StreamingStatistics();
152         for (int i = 0; i < totalSample.length; i++) {
153             totalStats.addValue(totalSample[i]);
154         }
155 
156         // Now compute subsample stats individually and aggregate
157         StreamingStatistics[] subSampleStats = new StreamingStatistics[5];
158         for (int i = 0; i < 4; i++) {
159             subSampleStats[i] = new StreamingStatistics();
160         }
161         Collection<StreamingStatistics> aggregate = new ArrayList<StreamingStatistics>();
162         for (int i = 0; i < 4; i++) {
163             for (int j = 0; j < subSamples[i].length; j++) {
164                 subSampleStats[i].addValue(subSamples[i][j]);
165             }
166             aggregate.add(subSampleStats[i]);
167         }
168 
169         // Compare values
170         StatisticalSummary aggregatedStats = StatisticalSummary.aggregate(aggregate);
171         assertStatisticalSummaryEquals(totalStats.getSummary(), aggregatedStats, 10E-12);
172     }
173 
174     /**
175      * Verifies that a StatisticalSummary and a StatisticalSummaryValues are equal up
176      * to delta, with NaNs, infinities returned in the same spots. For max, min, n, values
177      * have to agree exactly, delta is used only for sum, mean, variance, std dev.
178      */
179     protected static void assertStatisticalSummaryEquals(StatisticalSummary expected,
180                                                          StatisticalSummary observed,
181                                                          double delta) {
182         UnitTestUtils.assertEquals(expected.getMax(), observed.getMax(), 0);
183         UnitTestUtils.assertEquals(expected.getMin(), observed.getMin(), 0);
184         Assert.assertEquals(expected.getN(), observed.getN());
185         UnitTestUtils.assertEquals(expected.getSum(), observed.getSum(), delta);
186         UnitTestUtils.assertEquals(expected.getMean(), observed.getMean(), delta);
187         UnitTestUtils.assertEquals(expected.getStandardDeviation(), observed.getStandardDeviation(), delta);
188         UnitTestUtils.assertEquals(expected.getVariance(), observed.getVariance(), delta);
189     }
190 
191     /**
192      * Generates a random sample of double values.
193      * Sample size is random, between 10 and 100 and values are
194      * uniformly distributed over [-100, 100].
195      *
196      * @return array of random double values
197      */
198     private double[] generateSample() {
199         final RealDistribution uniformDist = new UniformRealDistribution(-100, 100);
200         final RandomDataGenerator randomDataGenerator = new RandomDataGenerator(100);
201         final int sampleSize = randomDataGenerator.nextInt(10,  100);
202         final double[] out = randomDataGenerator.nextDeviates(uniformDist, sampleSize);
203         return out;
204     }
205 
206     /**
207      * Generates a partition of <sample> into up to 5 sequentially selected
208      * subsamples with randomly selected partition points.
209      *
210      * @param sample array to partition
211      * @return rectangular array with rows = subsamples
212      */
213     private double[][] generatePartition(double[] sample) {
214         final RandomDataGenerator randomDataGenerator = new RandomDataGenerator(100);
215         final int length = sample.length;
216         final double[][] out = new double[5][];
217         int cur = 0;          // beginning of current partition segment
218         int offset = 0;       // end of current partition segment
219         int sampleCount = 0;  // number of segments defined
220         for (int i = 0; i < 5; i++) {
221             if (cur == length || offset == length) {
222                 break;
223             }
224             final int next;
225             if (i == 4 || cur == length - 1) {
226                 next = length - 1;
227             } else {
228                 next = randomDataGenerator.nextInt(cur, length - 1);
229             }
230             final int subLength = next - cur + 1;
231             out[i] = new double[subLength];
232             System.arraycopy(sample, offset, out[i], 0, subLength);
233             cur = next + 1;
234             sampleCount++;
235             offset += subLength;
236         }
237         if (sampleCount < 5) {
238             double[][] out2 = new double[sampleCount][];
239             for (int j = 0; j < sampleCount; j++) {
240                 final int curSize = out[j].length;
241                 out2[j] = new double[curSize];
242                 System.arraycopy(out[j], 0, out2[j], 0, curSize);
243             }
244             return out2;
245         } else {
246             return out;
247         }
248     }
249 
250 }