View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      https://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  /*
19   * This is not the original file distributed by the Apache Software Foundation
20   * It has been modified by the Hipparchus project
21   */
22  package org.hipparchus.stat;
23  
24  import java.io.Serializable;
25  import java.text.NumberFormat;
26  import java.util.Collection;
27  import java.util.Comparator;
28  import java.util.Iterator;
29  import java.util.List;
30  import java.util.Map;
31  import java.util.NavigableMap;
32  import java.util.Objects;
33  import java.util.TreeMap;
34  import java.util.stream.Collectors;
35  
36  import org.hipparchus.exception.NullArgumentException;
37  import org.hipparchus.util.MathUtils;
38  
39  /**
40   * Maintains a frequency distribution of Comparable values.
41   * <p>
42   * The values are ordered using the default (natural order), unless a
43   * {@code Comparator} is supplied in the constructor.
44   *
45   * @see LongFrequency
46   * @param <T> the element type
47   */
48  public class Frequency<T extends Comparable<T>> implements Serializable {
49  
50      /** Serializable version identifier */
51      private static final long serialVersionUID = 20160322L;
52  
53      /** underlying collection */
54      private final NavigableMap<T, Long> freqTable;
55  
56      /**
57       * Default constructor.
58       */
59      public Frequency() {
60          freqTable = new TreeMap<>();
61      }
62  
63      /**
64       * Constructor allowing values Comparator to be specified.
65       *
66       * @param comparator Comparator used to order values
67       */
68      public Frequency(Comparator<? super T> comparator) {
69          freqTable = new TreeMap<>(comparator);
70      }
71  
72      /**
73       * Adds 1 to the frequency count for v.
74       *
75       * @param v the value to add.
76       */
77      public void addValue(T v) {
78          incrementValue(v, 1);
79      }
80  
81      /**
82       * Increments the frequency count for v.
83       *
84       * @param v the value to add.
85       * @param increment the amount by which the value should be incremented
86       */
87      public void incrementValue(T v, long increment) {
88          Long count = freqTable.getOrDefault(v, Long.valueOf(0));
89          freqTable.put(v, Long.valueOf(count.longValue() + increment));
90      }
91  
92      /** Clears the frequency table */
93      public void clear() {
94          freqTable.clear();
95      }
96  
97      /**
98       * Returns an Iterator over the set of values that have been added.
99       *
100      * @return values Iterator
101      */
102     public Iterator<T> valuesIterator() {
103         return freqTable.keySet().iterator();
104     }
105 
106     /**
107      * Return an Iterator over the set of keys and values that have been added.
108      * Using the entry set to iterate is more efficient in the case where you
109      * need to access respective counts as well as values, since it doesn't
110      * require a "get" for every key...the value is provided in the Map.Entry.
111      *
112      * @return entry set Iterator
113      */
114     public Iterator<Map.Entry<T, Long>> entrySetIterator() {
115         return freqTable.entrySet().iterator();
116     }
117 
118     //-------------------------------------------------------------------------
119 
120     /**
121      * Returns the sum of all frequencies.
122      *
123      * @return the total frequency count.
124      */
125     public long getSumFreq() {
126         return freqTable.values()
127                         .stream()
128                         .mapToLong(Long::longValue)
129                         .sum();
130     }
131 
132     /**
133      * Returns the number of values equal to v.
134      * Returns 0 if the value is not comparable.
135      *
136      * @param v the value to lookup.
137      * @return the frequency of v.
138      */
139     public long getCount(T v) {
140         return freqTable.getOrDefault(v, 0L);
141     }
142 
143     /**
144      * Returns the number of values in the frequency table.
145      *
146      * @return the number of unique values that have been added to the frequency table.
147      * @see #valuesIterator()
148      */
149     public int getUniqueCount(){
150         return freqTable.keySet().size();
151     }
152 
153     /**
154      * Returns the percentage of values that are equal to v
155      * (as a proportion between 0 and 1).
156      * <p>
157      * Returns {@code Double.NaN} if no values have been added.
158      *
159      * @param v the value to lookup
160      * @return the proportion of values equal to v
161      */
162     public double getPct(T v) {
163         final long sumFreq = getSumFreq();
164         if (sumFreq == 0) {
165             return Double.NaN;
166         }
167         return (double) getCount(v) / (double) sumFreq;
168     }
169 
170     //-----------------------------------------------------------------------------------------
171 
172     /**
173      * Returns the cumulative frequency of values less than or equal to v.
174      *
175      * @param v the value to lookup.
176      * @return the proportion of values equal to v
177      */
178     public long getCumFreq(T v) {
179         if (getSumFreq() == 0) {
180             return 0;
181         }
182 
183         NavigableMap<T, Long> headMap = freqTable.headMap(v, true);
184 
185         if (headMap.isEmpty()) {
186             // v is less than first value
187             return 0;
188         } else if (headMap.size() == freqTable.size()) {
189             // v is greater than or equal to last value
190             return getSumFreq();
191         }
192 
193         return headMap.values()
194                       .stream()
195                       .mapToLong(Long::longValue)
196                       .sum();
197     }
198 
199     //----------------------------------------------------------------------------------------------
200 
201     /**
202      * Returns the cumulative percentage of values less than or equal to v
203      * (as a proportion between 0 and 1).
204      * <p>
205      * Returns {@code Double.NaN} if no values have been added.
206      *
207      * @param v the value to lookup
208      * @return the proportion of values less than or equal to v
209      */
210     public double getCumPct(T v) {
211         final long sumFreq = getSumFreq();
212         if (sumFreq == 0) {
213             return Double.NaN;
214         }
215         return (double) getCumFreq(v) / (double) sumFreq;
216     }
217 
218     /**
219      * Returns the mode value(s) in comparator order.
220      *
221      * @return a list containing the value(s) which appear most often.
222      */
223     public List<T> getMode() {
224         // Get the max count first
225         final long mostPopular =
226                 freqTable.values()
227                          .stream()
228                          .mapToLong(Long::longValue)
229                          .max()
230                          .orElse(0L);
231 
232         return freqTable.entrySet()
233                         .stream()
234                         .filter(entry -> entry.getValue() == mostPopular)
235                         .map(entry -> entry.getKey())
236                         .collect(Collectors.toList());
237     }
238 
239     //----------------------------------------------------------------------------------------------
240 
241     /**
242      * Merge another Frequency object's counts into this instance.
243      * This Frequency's counts will be incremented (or set when not already set)
244      * by the counts represented by other.
245      *
246      * @param other the other {@link Frequency} object to be merged
247      * @throws NullArgumentException if {@code other} is null
248      */
249     public void merge(final Frequency<? extends T> other) throws NullArgumentException {
250         MathUtils.checkNotNull(other);
251 
252         Iterator<? extends Map.Entry<? extends T, Long>> iter = other.entrySetIterator();
253         while (iter.hasNext()) {
254             final Map.Entry<? extends T, Long> entry = iter.next();
255             incrementValue(entry.getKey(), entry.getValue().longValue());
256         }
257     }
258 
259     /**
260      * Merge a {@link Collection} of {@link Frequency} objects into this instance.
261      * This Frequency's counts will be incremented (or set when not already set)
262      * by the counts represented by each of the others.
263      *
264      * @param others the other {@link Frequency} objects to be merged
265      * @throws NullArgumentException if the collection is null
266      */
267     public void merge(final Collection<? extends Frequency<? extends T>> others)
268         throws NullArgumentException {
269         MathUtils.checkNotNull(others);
270 
271         for (final Frequency<? extends T> freq : others) {
272             merge(freq);
273         }
274     }
275 
276     //----------------------------------------------------------------------------------------------
277 
278     /**
279      * Return a string representation of this frequency distribution.
280      *
281      * @return a string representation.
282      */
283     @Override
284     public String toString() {
285         NumberFormat nf = NumberFormat.getPercentInstance();
286         StringBuilder outBuffer = new StringBuilder(200); // this size is just a wild guess
287         outBuffer.append("Value \tFreq. \tPct. \tCum Pct. \n");
288         Iterator<T> iter = freqTable.keySet().iterator();
289         while (iter.hasNext()) {
290             T value = iter.next();
291             outBuffer.append(value).
292                       append('\t').
293                       append(getCount(value)).
294                       append('\t').
295                       append(nf.format(getPct(value))).
296                       append('\t').
297                       append(nf.format(getCumPct(value))).
298                       append('\n');
299         }
300         return outBuffer.toString();
301     }
302 
303     /** {@inheritDoc} */
304     @Override
305     public int hashCode() {
306         final int prime = 31;
307         int result = 1;
308         result = prime * result +
309                  ((freqTable == null) ? 0 : freqTable.hashCode());
310         return result;
311     }
312 
313     /** {@inheritDoc} */
314     @Override
315     public boolean equals(Object obj) {
316         if (this == obj) {
317             return true;
318         }
319         if (!(obj instanceof Frequency)) {
320             return false;
321         }
322         Frequency<?> other = (Frequency<?>) obj;
323         return Objects.equals(freqTable, other.freqTable);
324     }
325 
326 }