View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      https://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  /*
19   * This is not the original file distributed by the Apache Software Foundation
20   * It has been modified by the Hipparchus project
21   */
22  
23  package org.hipparchus.stat.descriptive;
24  
25  import org.hipparchus.UnitTestUtils;
26  import org.hipparchus.distribution.RealDistribution;
27  import org.hipparchus.distribution.continuous.UniformRealDistribution;
28  import org.hipparchus.random.RandomDataGenerator;
29  import org.junit.jupiter.api.Test;
30  
31  import java.util.ArrayList;
32  import java.util.Collection;
33  
34  import static org.junit.jupiter.api.Assertions.assertEquals;
35  
36  
37  /**
38   * Test cases for {@link StatisticalSummary}.
39   */
40  class StatisticalSummaryTest {
41  
42      /**
43       * Test aggregate function by randomly generating a dataset of 10-100 values
44       * from [-100, 100], dividing it into 2-5 partitions, computing stats for each
45       * partition and comparing the result of aggregate(...) applied to the collection
46       * of per-partition SummaryStatistics with a single SummaryStatistics computed
47       * over the full sample.
48       */
49      @Test
50      void testAggregate() {
51  
52          // Generate a random sample and random partition
53          double[] totalSample = generateSample();
54          double[][] subSamples = generatePartition(totalSample);
55          int nSamples = subSamples.length;
56  
57          // Compute combined stats directly
58          StreamingStatistics totalStats = new StreamingStatistics();
59          for (int i = 0; i < totalSample.length; i++) {
60              totalStats.addValue(totalSample[i]);
61          }
62  
63          // Now compute subsample stats individually and aggregate
64          StreamingStatistics[] subSampleStats = new StreamingStatistics[nSamples];
65          for (int i = 0; i < nSamples; i++) {
66              subSampleStats[i] = new StreamingStatistics();
67          }
68          Collection<StreamingStatistics> aggregate = new ArrayList<StreamingStatistics>();
69          for (int i = 0; i < nSamples; i++) {
70              for (int j = 0; j < subSamples[i].length; j++) {
71                  subSampleStats[i].addValue(subSamples[i][j]);
72              }
73              aggregate.add(subSampleStats[i]);
74          }
75  
76          // Compare values
77          StatisticalSummary aggregatedStats = StatisticalSummary.aggregate(aggregate);
78          customAssertStatisticalSummaryEquals(totalStats.getSummary(), aggregatedStats, 10E-12);
79      }
80  
81      /**
82       * Similar to {@link #testAggregate()} but operating on
83       * {@link StatisticalSummary} instead.
84       */
85      @Test
86      void testAggregateStatisticalSummary() {
87  
88          // Generate a random sample and random partition
89          double[] totalSample = generateSample();
90          double[][] subSamples = generatePartition(totalSample);
91          int nSamples = subSamples.length;
92  
93          // Compute combined stats directly
94          StreamingStatistics totalStats = new StreamingStatistics();
95          for (int i = 0; i < totalSample.length; i++) {
96              totalStats.addValue(totalSample[i]);
97          }
98  
99          // Now compute subsample stats individually and aggregate
100         StreamingStatistics[] subSampleStats = new StreamingStatistics[nSamples];
101         for (int i = 0; i < nSamples; i++) {
102             subSampleStats[i] = new StreamingStatistics();
103         }
104         Collection<StatisticalSummary> aggregate = new ArrayList<StatisticalSummary>();
105         for (int i = 0; i < nSamples; i++) {
106             for (int j = 0; j < subSamples[i].length; j++) {
107                 subSampleStats[i].addValue(subSamples[i][j]);
108             }
109             aggregate.add(subSampleStats[i].getSummary());
110         }
111 
112         // Compare values
113         StatisticalSummary aggregatedStats = StatisticalSummary.aggregate(aggregate);
114         customAssertStatisticalSummaryEquals(totalStats.getSummary(), aggregatedStats, 10E-12);
115     }
116 
117     @Test
118     void testAggregateDegenerate() {
119         double[] totalSample = {1, 2, 3, 4, 5};
120         double[][] subSamples = {{1}, {2}, {3}, {4}, {5}};
121 
122         // Compute combined stats directly
123         StreamingStatistics totalStats = new StreamingStatistics();
124         for (int i = 0; i < totalSample.length; i++) {
125             totalStats.addValue(totalSample[i]);
126         }
127 
128         // Now compute subsample stats individually and aggregate
129         StreamingStatistics[] subSampleStats = new StreamingStatistics[5];
130         for (int i = 0; i < 5; i++) {
131             subSampleStats[i] = new StreamingStatistics();
132         }
133         Collection<StreamingStatistics> aggregate = new ArrayList<StreamingStatistics>();
134         for (int i = 0; i < 5; i++) {
135             for (int j = 0; j < subSamples[i].length; j++) {
136                 subSampleStats[i].addValue(subSamples[i][j]);
137             }
138             aggregate.add(subSampleStats[i]);
139         }
140 
141         // Compare values
142         StatisticalSummary aggregatedStats = StatisticalSummary.aggregate(aggregate);
143         customAssertStatisticalSummaryEquals(totalStats.getSummary(), aggregatedStats, 10E-12);
144     }
145 
146     @Test
147     void testAggregateSpecialValues() {
148         double[] totalSample = {Double.POSITIVE_INFINITY, 2, 3, Double.NaN, 5};
149         double[][] subSamples = {{Double.POSITIVE_INFINITY, 2}, {3}, {Double.NaN}, {5}};
150 
151         // Compute combined stats directly
152         StreamingStatistics totalStats = new StreamingStatistics();
153         for (int i = 0; i < totalSample.length; i++) {
154             totalStats.addValue(totalSample[i]);
155         }
156 
157         // Now compute subsample stats individually and aggregate
158         StreamingStatistics[] subSampleStats = new StreamingStatistics[5];
159         for (int i = 0; i < 4; i++) {
160             subSampleStats[i] = new StreamingStatistics();
161         }
162         Collection<StreamingStatistics> aggregate = new ArrayList<StreamingStatistics>();
163         for (int i = 0; i < 4; i++) {
164             for (int j = 0; j < subSamples[i].length; j++) {
165                 subSampleStats[i].addValue(subSamples[i][j]);
166             }
167             aggregate.add(subSampleStats[i]);
168         }
169 
170         // Compare values
171         StatisticalSummary aggregatedStats = StatisticalSummary.aggregate(aggregate);
172         customAssertStatisticalSummaryEquals(totalStats.getSummary(), aggregatedStats, 10E-12);
173     }
174 
175     /**
176      * Verifies that a StatisticalSummary and a StatisticalSummaryValues are equal up
177      * to delta, with NaNs, infinities returned in the same spots. For max, min, n, values
178      * have to agree exactly, delta is used only for sum, mean, variance, std dev.
179      */
180     protected static void customAssertStatisticalSummaryEquals(StatisticalSummary expected,
181                                                                StatisticalSummary observed,
182                                                                double delta) {
183         UnitTestUtils.customAssertEquals(expected.getMax(), observed.getMax(), 0);
184         UnitTestUtils.customAssertEquals(expected.getMin(), observed.getMin(), 0);
185         assertEquals(expected.getN(), observed.getN());
186         UnitTestUtils.customAssertEquals(expected.getSum(), observed.getSum(), delta);
187         UnitTestUtils.customAssertEquals(expected.getMean(), observed.getMean(), delta);
188         UnitTestUtils.customAssertEquals(expected.getStandardDeviation(), observed.getStandardDeviation(), delta);
189         UnitTestUtils.customAssertEquals(expected.getVariance(), observed.getVariance(), delta);
190     }
191 
192     /**
193      * Generates a random sample of double values.
194      * Sample size is random, between 10 and 100 and values are
195      * uniformly distributed over [-100, 100].
196      *
197      * @return array of random double values
198      */
199     private double[] generateSample() {
200         final RealDistribution uniformDist = new UniformRealDistribution(-100, 100);
201         final RandomDataGenerator randomDataGenerator = new RandomDataGenerator(100);
202         final int sampleSize = randomDataGenerator.nextInt(10,  100);
203         final double[] out = randomDataGenerator.nextDeviates(uniformDist, sampleSize);
204         return out;
205     }
206 
207     /**
208      * Generates a partition of <sample> into up to 5 sequentially selected
209      * subsamples with randomly selected partition points.
210      *
211      * @param sample array to partition
212      * @return rectangular array with rows = subsamples
213      */
214     private double[][] generatePartition(double[] sample) {
215         final RandomDataGenerator randomDataGenerator = new RandomDataGenerator(100);
216         final int length = sample.length;
217         final double[][] out = new double[5][];
218         int cur = 0;          // beginning of current partition segment
219         int offset = 0;       // end of current partition segment
220         int sampleCount = 0;  // number of segments defined
221         for (int i = 0; i < 5; i++) {
222             if (cur == length || offset == length) {
223                 break;
224             }
225             final int next;
226             if (i == 4 || cur == length - 1) {
227                 next = length - 1;
228             } else {
229                 next = randomDataGenerator.nextInt(cur, length - 1);
230             }
231             final int subLength = next - cur + 1;
232             out[i] = new double[subLength];
233             System.arraycopy(sample, offset, out[i], 0, subLength);
234             cur = next + 1;
235             sampleCount++;
236             offset += subLength;
237         }
238         if (sampleCount < 5) {
239             double[][] out2 = new double[sampleCount][];
240             for (int j = 0; j < sampleCount; j++) {
241                 final int curSize = out[j].length;
242                 out2[j] = new double[curSize];
243                 System.arraycopy(out[j], 0, out2[j], 0, curSize);
244             }
245             return out2;
246         } else {
247             return out;
248         }
249     }
250 
251 }