View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      https://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  /*
19   * This is not the original file distributed by the Apache Software Foundation
20   * It has been modified by the Hipparchus project
21   */
22  
23  package org.hipparchus.distribution.discrete;
24  
25  import org.hipparchus.exception.LocalizedCoreFormats;
26  import org.hipparchus.exception.MathIllegalArgumentException;
27  import org.hipparchus.util.FastMath;
28  
29  /**
30   * Implementation of the hypergeometric distribution.
31   *
32   * @see <a href="http://en.wikipedia.org/wiki/Hypergeometric_distribution">Hypergeometric distribution (Wikipedia)</a>
33   * @see <a href="http://mathworld.wolfram.com/HypergeometricDistribution.html">Hypergeometric distribution (MathWorld)</a>
34   */
35  public class HypergeometricDistribution extends AbstractIntegerDistribution {
36      /** Serializable version identifier. */
37      private static final long serialVersionUID = 20160320L;
38      /** The number of successes in the population. */
39      private final int numberOfSuccesses;
40      /** The population size. */
41      private final int populationSize;
42      /** The sample size. */
43      private final int sampleSize;
44      /** Cached numerical variance */
45      private final double numericalVariance;
46  
47      /**
48       * Construct a new hypergeometric distribution with the specified population
49       * size, number of successes in the population, and sample size.
50       *
51       * @param populationSize Population size.
52       * @param numberOfSuccesses Number of successes in the population.
53       * @param sampleSize Sample size.
54       * @throws MathIllegalArgumentException if {@code numberOfSuccesses < 0}.
55       * @throws MathIllegalArgumentException if {@code populationSize <= 0}.
56       * @throws MathIllegalArgumentException if {@code numberOfSuccesses > populationSize},
57       * or {@code sampleSize > populationSize}.
58       */
59      public HypergeometricDistribution(int populationSize, int numberOfSuccesses, int sampleSize)
60          throws MathIllegalArgumentException {
61          if (populationSize <= 0) {
62              throw new MathIllegalArgumentException(LocalizedCoreFormats.POPULATION_SIZE,
63                                                     populationSize);
64          }
65          if (numberOfSuccesses < 0) {
66              throw new MathIllegalArgumentException(LocalizedCoreFormats.NUMBER_OF_SUCCESSES,
67                                                     numberOfSuccesses);
68          }
69          if (sampleSize < 0) {
70              throw new MathIllegalArgumentException(LocalizedCoreFormats.NUMBER_OF_SAMPLES,
71                                                     sampleSize);
72          }
73  
74          if (numberOfSuccesses > populationSize) {
75              throw new MathIllegalArgumentException(LocalizedCoreFormats.NUMBER_OF_SUCCESS_LARGER_THAN_POPULATION_SIZE,
76                                                     numberOfSuccesses, populationSize, true);
77          }
78          if (sampleSize > populationSize) {
79              throw new MathIllegalArgumentException(LocalizedCoreFormats.SAMPLE_SIZE_LARGER_THAN_POPULATION_SIZE,
80                                                     sampleSize, populationSize, true);
81          }
82  
83          this.numberOfSuccesses = numberOfSuccesses;
84          this.populationSize = populationSize;
85          this.sampleSize = sampleSize;
86          this.numericalVariance = calculateNumericalVariance();
87      }
88  
89      /** {@inheritDoc} */
90      @Override
91      public double cumulativeProbability(int x) {
92          double ret;
93  
94          int[] domain = getDomain(populationSize, numberOfSuccesses, sampleSize);
95          if (x < domain[0]) {
96              ret = 0.0;
97          } else if (x >= domain[1]) {
98              ret = 1.0;
99          } else {
100             ret = innerCumulativeProbability(domain[0], x, 1);
101         }
102 
103         return ret;
104     }
105 
106     /**
107      * Return the domain for the given hypergeometric distribution parameters.
108      *
109      * @param n Population size.
110      * @param m Number of successes in the population.
111      * @param k Sample size.
112      * @return a two element array containing the lower and upper bounds of the
113      * hypergeometric distribution.
114      */
115     private int[] getDomain(int n, int m, int k) {
116         return new int[] { getLowerDomain(n, m, k), getUpperDomain(m, k) };
117     }
118 
119     /**
120      * Return the lowest domain value for the given hypergeometric distribution
121      * parameters.
122      *
123      * @param n Population size.
124      * @param m Number of successes in the population.
125      * @param k Sample size.
126      * @return the lowest domain value of the hypergeometric distribution.
127      */
128     private int getLowerDomain(int n, int m, int k) {
129         return FastMath.max(0, m - (n - k));
130     }
131 
132     /**
133      * Access the number of successes.
134      *
135      * @return the number of successes.
136      */
137     public int getNumberOfSuccesses() {
138         return numberOfSuccesses;
139     }
140 
141     /**
142      * Access the population size.
143      *
144      * @return the population size.
145      */
146     public int getPopulationSize() {
147         return populationSize;
148     }
149 
150     /**
151      * Access the sample size.
152      *
153      * @return the sample size.
154      */
155     public int getSampleSize() {
156         return sampleSize;
157     }
158 
159     /**
160      * Return the highest domain value for the given hypergeometric distribution
161      * parameters.
162      *
163      * @param m Number of successes in the population.
164      * @param k Sample size.
165      * @return the highest domain value of the hypergeometric distribution.
166      */
167     private int getUpperDomain(int m, int k) {
168         return FastMath.min(k, m);
169     }
170 
171     /** {@inheritDoc} */
172     @Override
173     public double probability(int x) {
174         final double logProbability = logProbability(x);
175         return logProbability == Double.NEGATIVE_INFINITY ? 0 : FastMath.exp(logProbability);
176     }
177 
178     /** {@inheritDoc} */
179     @Override
180     public double logProbability(int x) {
181         double ret;
182 
183         int[] domain = getDomain(populationSize, numberOfSuccesses, sampleSize);
184         if (x < domain[0] || x > domain[1]) {
185             ret = Double.NEGATIVE_INFINITY;
186         } else {
187             double p = ((double) sampleSize) / populationSize;
188             double q = ((double) (populationSize - sampleSize)) / populationSize;
189             double p1 = SaddlePointExpansion.logBinomialProbability(x, numberOfSuccesses, p, q);
190             double p2 = SaddlePointExpansion.logBinomialProbability(sampleSize - x, populationSize - numberOfSuccesses, p, q);
191             double p3 = SaddlePointExpansion.logBinomialProbability(sampleSize, populationSize, p, q);
192             ret = p1 + p2 - p3;
193         }
194 
195         return ret;
196     }
197 
198     /**
199      * For this distribution, {@code X}, this method returns {@code P(X >= x)}.
200      *
201      * @param x Value at which the CDF is evaluated.
202      * @return the upper tail CDF for this distribution.
203      */
204     public double upperCumulativeProbability(int x) {
205         double ret;
206 
207         final int[] domain = getDomain(populationSize, numberOfSuccesses, sampleSize);
208         if (x <= domain[0]) {
209             ret = 1.0;
210         } else if (x > domain[1]) {
211             ret = 0.0;
212         } else {
213             ret = innerCumulativeProbability(domain[1], x, -1);
214         }
215 
216         return ret;
217     }
218 
219     /**
220      * For this distribution, {@code X}, this method returns
221      * {@code P(x0 <= X <= x1)}.
222      * This probability is computed by summing the point probabilities for the
223      * values {@code x0, x0 + 1, x0 + 2, ..., x1}, in the order directed by
224      * {@code dx}.
225      *
226      * @param x0 Inclusive lower bound.
227      * @param x1 Inclusive upper bound.
228      * @param dx Direction of summation (1 indicates summing from x0 to x1, and
229      * 0 indicates summing from x1 to x0).
230      * @return {@code P(x0 <= X <= x1)}.
231      */
232     private double innerCumulativeProbability(int x0, int x1, int dx) {
233         double ret = probability(x0);
234         while (x0 != x1) {
235             x0 += dx;
236             ret += probability(x0);
237         }
238         return ret;
239     }
240 
241     /**
242      * {@inheritDoc}
243      *
244      * For population size {@code N}, number of successes {@code m}, and sample
245      * size {@code n}, the mean is {@code n * m / N}.
246      */
247     @Override
248     public double getNumericalMean() {
249         return getSampleSize() * (getNumberOfSuccesses() / (double) getPopulationSize());
250     }
251 
252     /**
253      * {@inheritDoc}
254      *
255      * For population size {@code N}, number of successes {@code m}, and sample
256      * size {@code n}, the variance is
257      * {@code [n * m * (N - n) * (N - m)] / [N^2 * (N - 1)]}.
258      */
259     @Override
260     public double getNumericalVariance() {
261         return numericalVariance;
262     }
263 
264     /**
265      * Calculate the numerical variance.
266      *
267      * @return the variance of this distribution
268      */
269     private double calculateNumericalVariance() {
270         final double N = getPopulationSize();
271         final double m = getNumberOfSuccesses();
272         final double n = getSampleSize();
273         return (n * m * (N - n) * (N - m)) / (N * N * (N - 1));
274     }
275 
276     /**
277      * {@inheritDoc}
278      *
279      * For population size {@code N}, number of successes {@code m}, and sample
280      * size {@code n}, the lower bound of the support is
281      * {@code max(0, n + m - N)}.
282      *
283      * @return lower bound of the support
284      */
285     @Override
286     public int getSupportLowerBound() {
287         return FastMath.max(0,
288                             getSampleSize() + getNumberOfSuccesses() - getPopulationSize());
289     }
290 
291     /**
292      * {@inheritDoc}
293      *
294      * For number of successes {@code m} and sample size {@code n}, the upper
295      * bound of the support is {@code min(m, n)}.
296      *
297      * @return upper bound of the support
298      */
299     @Override
300     public int getSupportUpperBound() {
301         return FastMath.min(getNumberOfSuccesses(), getSampleSize());
302     }
303 
304     /**
305      * {@inheritDoc}
306      *
307      * The support of this distribution is connected.
308      *
309      * @return {@code true}
310      */
311     @Override
312     public boolean isSupportConnected() {
313         return true;
314     }
315 }