View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      https://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  /*
19   * This is not the original file distributed by the Apache Software Foundation
20   * It has been modified by the Hipparchus project
21   */
22  package org.hipparchus.distribution.continuous;
23  
24  import java.util.ArrayList;
25  import java.util.HashMap;
26  import java.util.List;
27  import java.util.Map;
28  import java.util.Map.Entry;
29  
30  import org.hipparchus.distribution.EnumeratedDistribution;
31  import org.hipparchus.exception.MathIllegalArgumentException;
32  import org.hipparchus.util.MathArrays;
33  import org.hipparchus.util.MathUtils;
34  import org.hipparchus.util.Pair;
35  
36  /**
37   * Implementation of a real-valued {@link EnumeratedDistribution}.
38   * <p>
39   * Values with zero-probability are allowed but they do not extend the
40   * support.
41   * <p>
42   * Duplicate values are allowed. Probabilities of duplicate values are
43   * combined when computing cumulative probabilities and statistics.
44   */
45  public class EnumeratedRealDistribution extends AbstractRealDistribution {
46  
47      /** Serializable UID. */
48      private static final long serialVersionUID = 20130308L;
49  
50      /**
51       * {@link EnumeratedDistribution} (using the {@link Double} wrapper)
52       * used to generate the pmf.
53       */
54      private final EnumeratedDistribution<Double> innerDistribution;
55  
56      /**
57       * Create a discrete real-valued distribution from the input data.  Values are assigned
58       * mass based on their frequency.  For example, [0,1,1,2] as input creates a distribution
59       * with values 0, 1 and 2 having probability masses 0.25, 0.5 and 0.25 respectively,
60       *
61       * @param data input dataset
62       */
63      public EnumeratedRealDistribution(final double[] data) {
64          super();
65          final Map<Double, Integer> dataMap = new HashMap<>();
66          for (double value : data) {
67              Integer count = dataMap.get(value);
68              if (count == null) {
69                  count = 0;
70              }
71              dataMap.put(value, ++count);
72          }
73          final int massPoints = dataMap.size();
74          final double denom = data.length;
75          final double[] values = new double[massPoints];
76          final double[] probabilities = new double[massPoints];
77          int index = 0;
78          for (Entry<Double, Integer> entry : dataMap.entrySet()) {
79              values[index] = entry.getKey();
80              probabilities[index] = entry.getValue().intValue() / denom;
81              index++;
82          }
83          innerDistribution =
84                  new EnumeratedDistribution<>(createDistribution(values, probabilities));
85      }
86  
87      /**
88       * Create a discrete real-valued distribution using the given probability mass function
89       * enumeration.
90       *
91       * @param singletons array of random variable values.
92       * @param probabilities array of probabilities.
93       * @throws MathIllegalArgumentException if
94       * {@code singletons.length != probabilities.length}
95       * @throws MathIllegalArgumentException if any of the probabilities are negative.
96       * @throws MathIllegalArgumentException if any of the probabilities are NaN.
97       * @throws MathIllegalArgumentException if any of the probabilities are infinite.
98       */
99      public EnumeratedRealDistribution(final double[] singletons, final double[] probabilities)
100         throws MathIllegalArgumentException {
101         super();
102         innerDistribution =
103                 new EnumeratedDistribution<>(createDistribution(singletons, probabilities));
104     }
105 
106 
107     /**
108      * Create the list of Pairs representing the distribution from singletons and probabilities.
109      *
110      * @param singletons values
111      * @param probabilities probabilities
112      * @return list of value/probability pairs
113      * @throws MathIllegalArgumentException if probabilities contains negative, infinite or NaN values or only 0's
114      */
115     private static List<Pair<Double, Double>> createDistribution(double[] singletons,
116                                                                  double[] probabilities) {
117         MathArrays.checkEqualLength(singletons, probabilities);
118         final List<Pair<Double, Double>> samples = new ArrayList<>(singletons.length);
119 
120         final double[] normalizedProbabilities = EnumeratedDistribution.checkAndNormalize(probabilities);
121         for (int i = 0; i < singletons.length; i++) {
122             samples.add(new Pair<>(singletons[i], normalizedProbabilities[i]));
123         }
124         return samples;
125     }
126 
127     /**
128      * For a random variable {@code X} whose values are distributed according to
129      * this distribution, this method returns {@code P(X = x)}. In other words,
130      * this method represents the probability mass function (PMF) for the
131      * distribution.
132      * <p>
133      * Note that if {@code x1} and {@code x2} satisfy {@code x1.equals(x2)},
134      * or both are null, then {@code probability(x1) = probability(x2)}.
135      *
136      * @param x the point at which the PMF is evaluated
137      * @return the value of the probability mass function at {@code x}
138      */
139     public double probability(final double x) {
140         return innerDistribution.probability(x);
141     }
142 
143     /**
144      * For a random variable {@code X} whose values are distributed according to
145      * this distribution, this method returns {@code P(X = x)}. In other words,
146      * this method represents the probability mass function (PMF) for the
147      * distribution.
148      *
149      * @param x the point at which the PMF is evaluated
150      * @return the value of the probability mass function at point {@code x}
151      */
152     @Override
153     public double density(final double x) {
154         return probability(x);
155     }
156 
157     /**
158      * {@inheritDoc}
159      */
160     @Override
161     public double cumulativeProbability(final double x) {
162         double probability = 0;
163 
164         for (final Pair<Double, Double> sample : innerDistribution.getPmf()) {
165             if (sample.getKey() <= x) {
166                 probability += sample.getValue();
167             }
168         }
169 
170         return probability;
171     }
172 
173     /**
174      * {@inheritDoc}
175      */
176     @Override
177     public double inverseCumulativeProbability(final double p) throws MathIllegalArgumentException {
178         MathUtils.checkRangeInclusive(p, 0, 1);
179 
180         double probability = 0;
181         double x = getSupportLowerBound();
182         for (final Pair<Double, Double> sample : innerDistribution.getPmf()) {
183             if (sample.getValue() == 0.0) {
184                 continue;
185             }
186 
187             probability += sample.getValue();
188             x = sample.getKey();
189 
190             if (probability >= p) {
191                 break;
192             }
193         }
194 
195         return x;
196     }
197 
198     /**
199      * {@inheritDoc}
200      *
201      * @return {@code sum(singletons[i] * probabilities[i])}
202      */
203     @Override
204     public double getNumericalMean() {
205         double mean = 0;
206 
207         for (final Pair<Double, Double> sample : innerDistribution.getPmf()) {
208             mean += sample.getValue() * sample.getKey();
209         }
210 
211         return mean;
212     }
213 
214     /**
215      * {@inheritDoc}
216      *
217      * @return {@code sum((singletons[i] - mean) ^ 2 * probabilities[i])}
218      */
219     @Override
220     public double getNumericalVariance() {
221         double mean = 0;
222         double meanOfSquares = 0;
223 
224         for (final Pair<Double, Double> sample : innerDistribution.getPmf()) {
225             mean += sample.getValue() * sample.getKey();
226             meanOfSquares += sample.getValue() * sample.getKey() * sample.getKey();
227         }
228 
229         return meanOfSquares - mean * mean;
230     }
231 
232     /**
233      * {@inheritDoc}
234      *
235      * Returns the lowest value with non-zero probability.
236      *
237      * @return the lowest value with non-zero probability.
238      */
239     @Override
240     public double getSupportLowerBound() {
241         double min = Double.POSITIVE_INFINITY;
242         for (final Pair<Double, Double> sample : innerDistribution.getPmf()) {
243             if (sample.getKey() < min && sample.getValue() > 0) {
244                 min = sample.getKey();
245             }
246         }
247 
248         return min;
249     }
250 
251     /**
252      * {@inheritDoc}
253      *
254      * Returns the highest value with non-zero probability.
255      *
256      * @return the highest value with non-zero probability.
257      */
258     @Override
259     public double getSupportUpperBound() {
260         double max = Double.NEGATIVE_INFINITY;
261         for (final Pair<Double, Double> sample : innerDistribution.getPmf()) {
262             if (sample.getKey() > max && sample.getValue() > 0) {
263                 max = sample.getKey();
264             }
265         }
266 
267         return max;
268     }
269 
270     /**
271      * {@inheritDoc}
272      *
273      * The support of this distribution is connected.
274      *
275      * @return {@code true}
276      */
277     @Override
278     public boolean isSupportConnected() {
279         return true;
280     }
281 
282     /**
283      * Return the probability mass function as a list of (value, probability) pairs.
284      *
285      * @return the probability mass function.
286      */
287     public List<Pair<Double, Double>> getPmf() {
288         return innerDistribution.getPmf();
289     }
290 }