View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      https://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  /*
19   * This is not the original file distributed by the Apache Software Foundation
20   * It has been modified by the Hipparchus project
21   */
22  package org.hipparchus.stat.interval;
23  
24  import org.hipparchus.distribution.continuous.FDistribution;
25  import org.hipparchus.distribution.continuous.NormalDistribution;
26  import org.hipparchus.exception.LocalizedCoreFormats;
27  import org.hipparchus.exception.MathIllegalArgumentException;
28  import org.hipparchus.stat.LocalizedStatFormats;
29  import org.hipparchus.util.FastMath;
30  import org.hipparchus.util.MathUtils;
31  
32  /**
33   * Utility methods to generate confidence intervals for a binomial proportion.
34   *
35   * @see
36   * <a href="http://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval">
37   * Binomial proportion confidence interval (Wikipedia)</a>
38   */
39  public class BinomialProportion {
40  
41      /**
42       * The standard normal distribution to calculate the inverse cumulative probability.
43       * Accessed and used in a thread-safe way.
44       */
45      private static final NormalDistribution NORMAL_DISTRIBUTION = new NormalDistribution(0, 1);
46  
47      /** Utility class, prevent instantiation. */
48      private BinomialProportion() {}
49  
50      /**
51       * Create an Agresti-Coull binomial confidence interval for the true
52       * probability of success of an unknown binomial distribution with
53       * the given observed number of trials, probability of success and
54       * confidence level.
55       * <p>
56       * Preconditions:
57       * <ul>
58       * <li>{@code numberOfTrials} must be positive</li>
59       * <li>{@code probabilityOfSuccess} must be between 0 and 1 (inclusive)</li>
60       * <li>{@code confidenceLevel} must be strictly between 0 and 1 (exclusive)</li>
61       * </ul>
62       *
63       * @see
64       * <a href="http://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval#Agresti-Coull_Interval">
65       * Agresti-Coull interval (Wikipedia)</a>
66       *
67       * @param numberOfTrials number of trials
68       * @param probabilityOfSuccess observed probability of success
69       * @param confidenceLevel desired probability that the true probability of
70       * success falls within the returned interval
71       * @return Confidence interval containing the probability of success with
72       * probability {@code confidenceLevel}
73       * @throws MathIllegalArgumentException if {@code numberOfTrials <= 0}.
74       * @throws MathIllegalArgumentException if {@code probabilityOfSuccess} is not in the interval [0, 1].
75       * @throws MathIllegalArgumentException if {@code confidenceLevel} is not in the interval (0, 1).
76       */
77      public static ConfidenceInterval getAgrestiCoullInterval(int numberOfTrials,
78                                                               double probabilityOfSuccess,
79                                                               double confidenceLevel)
80          throws MathIllegalArgumentException {
81  
82          checkParameters(numberOfTrials, probabilityOfSuccess, confidenceLevel);
83  
84          final int numberOfSuccesses = (int) (numberOfTrials * probabilityOfSuccess);
85  
86          final double alpha = (1.0 - confidenceLevel) / 2;
87          final double z = NORMAL_DISTRIBUTION.inverseCumulativeProbability(1 - alpha);
88          final double zSquared = FastMath.pow(z, 2);
89          final double modifiedNumberOfTrials = numberOfTrials + zSquared;
90          final double modifiedSuccessesRatio = (1.0 / modifiedNumberOfTrials) *
91                                                (numberOfSuccesses + 0.5 * zSquared);
92          final double difference = z * FastMath.sqrt(1.0 / modifiedNumberOfTrials *
93                                                      modifiedSuccessesRatio *
94                                                      (1 - modifiedSuccessesRatio));
95  
96          return new ConfidenceInterval(modifiedSuccessesRatio - difference,
97                                        modifiedSuccessesRatio + difference,
98                                        confidenceLevel);
99      }
100 
101     /**
102      * Create a Clopper-Pearson binomial confidence interval for the true
103      * probability of success of an unknown binomial distribution with
104      * the given observed number of trials, probability of success and
105      * confidence level.
106      * <p>
107      * Preconditions:
108      * <ul>
109      * <li>{@code numberOfTrials} must be positive</li>
110      * <li>{@code probabilityOfSuccess} must be between 0 and 1 (inclusive)</li>
111      * <li>{@code confidenceLevel} must be strictly between 0 and 1 (exclusive)</li>
112      * </ul>
113      *
114      * @see
115      * <a href="http://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval#Clopper-Pearson_interval">
116      * Clopper-Pearson interval (Wikipedia)</a>
117      *
118      * @param numberOfTrials number of trials
119      * @param probabilityOfSuccess observed probability of success
120      * @param confidenceLevel desired probability that the true probability of
121      * success falls within the returned interval
122      * @return Confidence interval containing the probability of success with
123      * probability {@code confidenceLevel}
124      * @throws MathIllegalArgumentException if {@code numberOfTrials <= 0}.
125      * @throws MathIllegalArgumentException if {@code probabilityOfSuccess} is not in the interval [0, 1].
126      * @throws MathIllegalArgumentException if {@code confidenceLevel} is not in the interval (0, 1).
127      */
128     public static ConfidenceInterval getClopperPearsonInterval(int numberOfTrials,
129                                                                double probabilityOfSuccess,
130                                                                double confidenceLevel)
131         throws MathIllegalArgumentException {
132 
133         checkParameters(numberOfTrials, probabilityOfSuccess, confidenceLevel);
134 
135         double lowerBound = 0;
136         double upperBound = 0;
137         final int numberOfSuccesses = (int) (numberOfTrials * probabilityOfSuccess);
138 
139         if (numberOfSuccesses > 0) {
140             final double alpha = (1.0 - confidenceLevel) / 2.0;
141 
142             final FDistribution distributionLowerBound =
143                     new FDistribution(2 * (numberOfTrials - numberOfSuccesses + 1),
144                                       2 * numberOfSuccesses);
145 
146             final double fValueLowerBound =
147                     distributionLowerBound.inverseCumulativeProbability(1 - alpha);
148             lowerBound = numberOfSuccesses /
149                          (numberOfSuccesses + (numberOfTrials - numberOfSuccesses + 1) * fValueLowerBound);
150 
151             final FDistribution distributionUpperBound =
152                     new FDistribution(2 * (numberOfSuccesses + 1),
153                                       2 * (numberOfTrials - numberOfSuccesses));
154 
155             final double fValueUpperBound =
156                     distributionUpperBound.inverseCumulativeProbability(1 - alpha);
157             upperBound = (numberOfSuccesses + 1) * fValueUpperBound /
158                          (numberOfTrials - numberOfSuccesses + (numberOfSuccesses + 1) * fValueUpperBound);
159         }
160 
161         return new ConfidenceInterval(lowerBound, upperBound, confidenceLevel);
162     }
163 
164     /**
165      * Create a binomial confidence interval using normal approximation
166      * for the true probability of success of an unknown binomial distribution
167      * with the given observed number of trials, probability of success and
168      * confidence level.
169      * <p>
170      * Preconditions:
171      * <ul>
172      * <li>{@code numberOfTrials} must be positive</li>
173      * <li>{@code probabilityOfSuccess} must be between 0 and 1 (inclusive)</li>
174      * <li>{@code confidenceLevel} must be strictly between 0 and 1 (exclusive)</li>
175      * </ul>
176      *
177      * @see
178      * <a href="http://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval#Normal_approximation_interval">
179      * Normal approximation interval (Wikipedia)</a>
180      *
181      * @param numberOfTrials number of trials
182      * @param probabilityOfSuccess observed probability of success
183      * @param confidenceLevel desired probability that the true probability of
184      * success falls within the returned interval
185      * @return Confidence interval containing the probability of success with
186      * probability {@code confidenceLevel}
187      * @throws MathIllegalArgumentException if {@code numberOfTrials <= 0}.
188      * @throws MathIllegalArgumentException if {@code probabilityOfSuccess} is not in the interval [0, 1].
189      * @throws MathIllegalArgumentException if {@code confidenceLevel} is not in the interval (0, 1).
190      */
191     public static ConfidenceInterval getNormalApproximationInterval(int numberOfTrials,
192                                                                     double probabilityOfSuccess,
193                                                                     double confidenceLevel)
194         throws MathIllegalArgumentException {
195 
196         checkParameters(numberOfTrials, probabilityOfSuccess, confidenceLevel);
197 
198         final double mean = probabilityOfSuccess;
199         final double alpha = (1.0 - confidenceLevel) / 2;
200 
201         final double difference = NORMAL_DISTRIBUTION.inverseCumulativeProbability(1 - alpha) *
202                                   FastMath.sqrt(1.0 / numberOfTrials * mean * (1 - mean));
203         return new ConfidenceInterval(mean - difference, mean + difference, confidenceLevel);
204     }
205 
206     /**
207      * Create an Wilson score binomial confidence interval for the true
208      * probability of success of an unknown binomial distribution with
209      * the given observed number of trials, probability of success and
210      * confidence level.
211      * <p>
212      * Preconditions:
213      * <ul>
214      * <li>{@code numberOfTrials} must be positive</li>
215      * <li>{@code probabilityOfSuccess} must be between 0 and 1 (inclusive)</li>
216      * <li>{@code confidenceLevel} must be strictly between 0 and 1 (exclusive)</li>
217      * </ul>
218      *
219      * @see
220      * <a href="http://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval#Wilson_score_interval">
221      * Wilson score interval (Wikipedia)</a>
222      *
223      * @param numberOfTrials number of trials
224      * @param probabilityOfSuccess observed probability of success
225      * @param confidenceLevel desired probability that the true probability of
226      * success falls within the returned interval
227      * @return Confidence interval containing the probability of success with
228      * probability {@code confidenceLevel}
229      * @throws MathIllegalArgumentException if {@code numberOfTrials <= 0}.
230      * @throws MathIllegalArgumentException if {@code probabilityOfSuccess} is not in the interval [0, 1].
231      * @throws MathIllegalArgumentException if {@code confidenceLevel} is not in the interval (0, 1).
232      */
233     public static ConfidenceInterval getWilsonScoreInterval(int numberOfTrials,
234                                                             double probabilityOfSuccess,
235                                                             double confidenceLevel)
236         throws MathIllegalArgumentException {
237 
238         checkParameters(numberOfTrials, probabilityOfSuccess, confidenceLevel);
239 
240         final double alpha = (1.0 - confidenceLevel) / 2;
241         final double z = NORMAL_DISTRIBUTION.inverseCumulativeProbability(1 - alpha);
242         final double zSquared = FastMath.pow(z, 2);
243         final double mean = probabilityOfSuccess;
244 
245         final double factor = 1.0 / (1 + (1.0 / numberOfTrials) * zSquared);
246         final double modifiedSuccessRatio = mean + (1.0 / (2 * numberOfTrials)) * zSquared;
247         final double difference =
248                 z * FastMath.sqrt(1.0 / numberOfTrials * mean * (1 - mean) +
249                                   (1.0 / (4 * FastMath.pow(numberOfTrials, 2)) * zSquared));
250 
251         final double lowerBound = factor * (modifiedSuccessRatio - difference);
252         final double upperBound = factor * (modifiedSuccessRatio + difference);
253         return new ConfidenceInterval(lowerBound, upperBound, confidenceLevel);
254     }
255 
256     /**
257      * Verifies that parameters satisfy preconditions.
258      *
259      * @param numberOfTrials number of trials (must be positive)
260      * @param probabilityOfSuccess probability of successes (must be between 0 and 1)
261      * @param confidenceLevel confidence level (must be strictly between 0 and 1)
262      * @throws MathIllegalArgumentException if {@code numberOfTrials <= 0}.
263      * @throws MathIllegalArgumentException if {@code probabilityOfSuccess is not in the interval [0, 1]}.
264      * @throws MathIllegalArgumentException if {@code confidenceLevel} is not in the interval (0, 1)}.
265      */
266     private static void checkParameters(int numberOfTrials,
267                                         double probabilityOfSuccess,
268                                         double confidenceLevel) {
269         if (numberOfTrials <= 0) {
270             throw new MathIllegalArgumentException(LocalizedCoreFormats.NUMBER_OF_TRIALS,
271                                                    numberOfTrials);
272         }
273         MathUtils.checkRangeInclusive(probabilityOfSuccess, 0, 1);
274         if (confidenceLevel <= 0 || confidenceLevel >= 1) {
275             throw new MathIllegalArgumentException(LocalizedStatFormats.OUT_OF_BOUNDS_CONFIDENCE_LEVEL,
276                                                    confidenceLevel, 0, 1);
277         }
278     }
279 
280 }