Frequency.java

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      https://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/*
 * This is not the original file distributed by the Apache Software Foundation
 * It has been modified by the Hipparchus project
 */
package org.hipparchus.stat;

import java.io.Serializable;
import java.text.NumberFormat;
import java.util.Collection;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.NavigableMap;
import java.util.Objects;
import java.util.TreeMap;
import java.util.stream.Collectors;

import org.hipparchus.exception.NullArgumentException;
import org.hipparchus.util.MathUtils;

/**
 * Maintains a frequency distribution of Comparable values.
 * <p>
 * The values are ordered using the default (natural order), unless a
 * {@code Comparator} is supplied in the constructor.
 *
 * @see LongFrequency
 * @param <T> the element type
 */
public class Frequency<T extends Comparable<T>> implements Serializable {

    /** Serializable version identifier */
    private static final long serialVersionUID = 20160322L;

    /** underlying collection */
    private final NavigableMap<T, Long> freqTable;

    /**
     * Default constructor.
     */
    public Frequency() {
        freqTable = new TreeMap<>();
    }

    /**
     * Constructor allowing values Comparator to be specified.
     *
     * @param comparator Comparator used to order values
     */
    public Frequency(Comparator<? super T> comparator) {
        freqTable = new TreeMap<>(comparator);
    }

    /**
     * Adds 1 to the frequency count for v.
     *
     * @param v the value to add.
     */
    public void addValue(T v) {
        incrementValue(v, 1);
    }

    /**
     * Increments the frequency count for v.
     *
     * @param v the value to add.
     * @param increment the amount by which the value should be incremented
     */
    public void incrementValue(T v, long increment) {
        Long count = freqTable.getOrDefault(v, Long.valueOf(0));
        freqTable.put(v, Long.valueOf(count.longValue() + increment));
    }

    /** Clears the frequency table */
    public void clear() {
        freqTable.clear();
    }

    /**
     * Returns an Iterator over the set of values that have been added.
     *
     * @return values Iterator
     */
    public Iterator<T> valuesIterator() {
        return freqTable.keySet().iterator();
    }

    /**
     * Return an Iterator over the set of keys and values that have been added.
     * Using the entry set to iterate is more efficient in the case where you
     * need to access respective counts as well as values, since it doesn't
     * require a "get" for every key...the value is provided in the Map.Entry.
     *
     * @return entry set Iterator
     */
    public Iterator<Map.Entry<T, Long>> entrySetIterator() {
        return freqTable.entrySet().iterator();
    }

    //-------------------------------------------------------------------------

    /**
     * Returns the sum of all frequencies.
     *
     * @return the total frequency count.
     */
    public long getSumFreq() {
        return freqTable.values()
                        .stream()
                        .mapToLong(Long::longValue)
                        .sum();
    }

    /**
     * Returns the number of values equal to v.
     * Returns 0 if the value is not comparable.
     *
     * @param v the value to lookup.
     * @return the frequency of v.
     */
    public long getCount(T v) {
        return freqTable.getOrDefault(v, 0L);
    }

    /**
     * Returns the number of values in the frequency table.
     *
     * @return the number of unique values that have been added to the frequency table.
     * @see #valuesIterator()
     */
    public int getUniqueCount(){
        return freqTable.keySet().size();
    }

    /**
     * Returns the percentage of values that are equal to v
     * (as a proportion between 0 and 1).
     * <p>
     * Returns {@code Double.NaN} if no values have been added.
     *
     * @param v the value to lookup
     * @return the proportion of values equal to v
     */
    public double getPct(T v) {
        final long sumFreq = getSumFreq();
        if (sumFreq == 0) {
            return Double.NaN;
        }
        return (double) getCount(v) / (double) sumFreq;
    }

    //-----------------------------------------------------------------------------------------

    /**
     * Returns the cumulative frequency of values less than or equal to v.
     *
     * @param v the value to lookup.
     * @return the proportion of values equal to v
     */
    public long getCumFreq(T v) {
        if (getSumFreq() == 0) {
            return 0;
        }

        NavigableMap<T, Long> headMap = freqTable.headMap(v, true);

        if (headMap.isEmpty()) {
            // v is less than first value
            return 0;
        } else if (headMap.size() == freqTable.size()) {
            // v is greater than or equal to last value
            return getSumFreq();
        }

        return headMap.values()
                      .stream()
                      .mapToLong(Long::longValue)
                      .sum();
    }

    //----------------------------------------------------------------------------------------------

    /**
     * Returns the cumulative percentage of values less than or equal to v
     * (as a proportion between 0 and 1).
     * <p>
     * Returns {@code Double.NaN} if no values have been added.
     *
     * @param v the value to lookup
     * @return the proportion of values less than or equal to v
     */
    public double getCumPct(T v) {
        final long sumFreq = getSumFreq();
        if (sumFreq == 0) {
            return Double.NaN;
        }
        return (double) getCumFreq(v) / (double) sumFreq;
    }

    /**
     * Returns the mode value(s) in comparator order.
     *
     * @return a list containing the value(s) which appear most often.
     */
    public List<T> getMode() {
        // Get the max count first
        final long mostPopular =
                freqTable.values()
                         .stream()
                         .mapToLong(Long::longValue)
                         .max()
                         .orElse(0L);

        return freqTable.entrySet()
                        .stream()
                        .filter(entry -> entry.getValue() == mostPopular)
                        .map(entry -> entry.getKey())
                        .collect(Collectors.toList());
    }

    //----------------------------------------------------------------------------------------------

    /**
     * Merge another Frequency object's counts into this instance.
     * This Frequency's counts will be incremented (or set when not already set)
     * by the counts represented by other.
     *
     * @param other the other {@link Frequency} object to be merged
     * @throws NullArgumentException if {@code other} is null
     */
    public void merge(final Frequency<? extends T> other) throws NullArgumentException {
        MathUtils.checkNotNull(other);

        Iterator<? extends Map.Entry<? extends T, Long>> iter = other.entrySetIterator();
        while (iter.hasNext()) {
            final Map.Entry<? extends T, Long> entry = iter.next();
            incrementValue(entry.getKey(), entry.getValue().longValue());
        }
    }

    /**
     * Merge a {@link Collection} of {@link Frequency} objects into this instance.
     * This Frequency's counts will be incremented (or set when not already set)
     * by the counts represented by each of the others.
     *
     * @param others the other {@link Frequency} objects to be merged
     * @throws NullArgumentException if the collection is null
     */
    public void merge(final Collection<? extends Frequency<? extends T>> others)
        throws NullArgumentException {
        MathUtils.checkNotNull(others);

        for (final Frequency<? extends T> freq : others) {
            merge(freq);
        }
    }

    //----------------------------------------------------------------------------------------------

    /**
     * Return a string representation of this frequency distribution.
     *
     * @return a string representation.
     */
    @Override
    public String toString() {
        NumberFormat nf = NumberFormat.getPercentInstance();
        StringBuilder outBuffer = new StringBuilder(200); // this size is just a wild guess
        outBuffer.append("Value \tFreq. \tPct. \tCum Pct. \n");
        Iterator<T> iter = freqTable.keySet().iterator();
        while (iter.hasNext()) {
            T value = iter.next();
            outBuffer.append(value).
                      append('\t').
                      append(getCount(value)).
                      append('\t').
                      append(nf.format(getPct(value))).
                      append('\t').
                      append(nf.format(getCumPct(value))).
                      append('\n');
        }
        return outBuffer.toString();
    }

    /** {@inheritDoc} */
    @Override
    public int hashCode() {
        final int prime = 31;
        int result = 1;
        result = prime * result +
                 ((freqTable == null) ? 0 : freqTable.hashCode());
        return result;
    }

    /** {@inheritDoc} */
    @Override
    public boolean equals(Object obj) {
        if (this == obj) {
            return true;
        }
        if (!(obj instanceof Frequency)) {
            return false;
        }
        Frequency<?> other = (Frequency<?>) obj;
        return Objects.equals(freqTable, other.freqTable);
    }

}