[prev in list] [next in list] [prev in thread] [next in thread]
List: jakarta-commons-dev
Subject: (commons-statistics) 01/04: STATISTICS-71: Add DoubleStatistics aggregator of multiple statistics
From: aherbert () apache ! org
Date: 2023-11-30 16:10:27
Message-ID: 20231130161026.4B5DD440725 () gitbox2-he-fi ! apache ! org
[Download RAW message or body]
This is an automated email from the ASF dual-hosted git repository.
aherbert pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-statistics.git
commit 6e841006455e0175d907aee572f4a5a448e530b0
Author: aherbert <aherbert@apache.org>
AuthorDate: Fri Nov 17 16:27:36 2023 +0000
STATISTICS-71: Add DoubleStatistics aggregator of multiple statistics
---
.../statistics/descriptive/DoubleStatistics.java | 686 +++++++++++++++++++++
.../statistics/descriptive/GeometricMean.java | 17 +-
.../commons/statistics/descriptive/Kurtosis.java | 2 +-
.../commons/statistics/descriptive/Mean.java | 2 +-
.../commons/statistics/descriptive/Skewness.java | 2 +-
.../statistics/descriptive/StandardDeviation.java | 2 +-
.../commons/statistics/descriptive/Statistic.java | 58 ++
.../commons/statistics/descriptive/Variance.java | 2 +-
.../descriptive/DoubleStatisticsTest.java | 495 +++++++++++++++
.../statistics/descriptive/UserGuideTest.java | 93 +++
src/conf/checkstyle/checkstyle-suppressions.xml | 1 +
src/conf/pmd/pmd-ruleset.xml | 5 +-
12 files changed, 1355 insertions(+), 10 deletions(-)
diff --git a/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/DoubleStatistics.java \
b/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/DoubleStatistics.java
new file mode 100644
index 0000000..67122cf
--- /dev/null
+++ b/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/DoubleStatistics.java
@@ -0,0 +1,686 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.statistics.descriptive;
+
+import java.util.Objects;
+import java.util.Set;
+import java.util.function.DoubleConsumer;
+import java.util.function.DoubleSupplier;
+import java.util.function.Function;
+
+/**
+ * Statistics for {@code double} values.
+ *
+ * <p>This class provides combinations of individual statistic implementations in \
the + * {@code org.apache.commons.statistics.descriptive} package.
+ *
+ * @since 1.1
+ */
+public final class DoubleStatistics implements DoubleConsumer {
+ /** A no-operation double consumer. This is exposed for testing. */
+ static final DoubleConsumer NOOP = new DoubleConsumer() {
+ @Override
+ public void accept(double value) {
+ // Do nothing
+ }
+
+ @Override
+ public DoubleConsumer andThen(DoubleConsumer after) {
+ // Delegate to the after consumer
+ return after;
+ }
+ };
+ /** Error message for non configured statistics. */
+ private static final String NO_CONFIGURED_STATISTICS = "No configured \
statistics"; + /** Error message for an unsupported statistic. */
+ private static final String UNSUPPORTED_STATISTIC = "Unsupported statistic: ";
+ /** Error message for an incompatible statistics. */
+ private static final String INCOMPATIBLE_STATISTICS = "Incompatible statistics";
+
+ /** Count of values recorded. */
+ private long count;
+ /** The consumer of values. */
+ private final DoubleConsumer consumer;
+ /** The {@link Min} implementation. */
+ private final Min min;
+ /** The {@link Max} implementation. */
+ private final Max max;
+ /** The moment implementation. May be any instance of {@link FirstMoment}. */
+ private final FirstMoment moment;
+ /** The {@link Sum} implementation. */
+ private final Sum sum;
+ /** The {@link Product} implementation. */
+ private final Product product;
+ /** The {@link SumOfSquares} implementation. */
+ private final SumOfSquares sumOfSquares;
+ /** The {@link SumOfLogs} implementation. */
+ private final SumOfLogs sumOfLogs;
+
+ /**
+ * A builder for {@link DoubleStatistics}.
+ */
+ public static final class Builder {
+ /** An empty double array. */
+ private static final double[] NO_VALUES = {};
+
+ /** The {@link Min} constructor. */
+ private Function<double[], Min> min;
+ /** The {@link Max} constructor. */
+ private Function<double[], Max> max;
+ /** The moment constructor. May return any instance of {@link FirstMoment}. \
*/ + private Function<double[], FirstMoment> moment;
+ /** The {@link Sum} constructor. */
+ private Function<double[], Sum> sum;
+ /** The {@link Product} constructor. */
+ private Function<double[], Product> product;
+ /** The {@link SumOfSquares} constructor. */
+ private Function<double[], SumOfSquares> sumOfSquares;
+ /** The {@link SumOfLogs} constructor. */
+ private Function<double[], SumOfLogs> sumOfLogs;
+ /** The order of the moment. It corresponds to the power computed by the \
{@link FirstMoment} + * instance constructed by {@link #moment}. This should \
only be increased from the default + * of zero (corresponding to no moment \
computation). */ + private int momentOrder;
+
+ /**
+ * Create an instance.
+ */
+ Builder() {
+ // Do nothing
+ }
+
+ /**
+ * Add the statistic to the statistics to compute.
+ *
+ * @param statistic Statistic to compute.
+ * @return {@code this} instance
+ */
+ Builder add(Statistic statistic) {
+ switch (statistic) {
+ case GEOMETRIC_MEAN:
+ case SUM_OF_LOGS:
+ sumOfLogs = SumOfLogs::of;
+ break;
+ case KURTOSIS:
+ createMoment(4);
+ break;
+ case MAX:
+ max = Max::of;
+ break;
+ case MEAN:
+ createMoment(1);
+ break;
+ case MIN:
+ min = Min::of;
+ break;
+ case PRODUCT:
+ product = Product::of;
+ break;
+ case SKEWNESS:
+ createMoment(3);
+ break;
+ case STANDARD_DEVIATION:
+ case VARIANCE:
+ createMoment(2);
+ break;
+ case SUM:
+ sum = Sum::of;
+ break;
+ case SUM_OF_SQUARES:
+ sumOfSquares = SumOfSquares::of;
+ break;
+ default:
+ throw new IllegalArgumentException(UNSUPPORTED_STATISTIC + \
statistic); + }
+ return this;
+ }
+
+ /**
+ * Creates the moment constructor for the specified {@code order},
+ * e.g. order=2 is sum of squared deviations.
+ *
+ * @param order Order.
+ */
+ private void createMoment(int order) {
+ if (order > momentOrder) {
+ momentOrder = order;
+ if (order == 4) {
+ moment = SumOfFourthDeviations::of;
+ } else if (order == 3) {
+ moment = SumOfCubedDeviations::of;
+ } else if (order == 2) {
+ moment = SumOfSquaredDeviations::of;
+ } else {
+ // Assume order == 1
+ moment = FirstMoment::of;
+ }
+ }
+ }
+
+ /**
+ * Builds a {@code DoubleStatistics} instance.
+ *
+ * @return {@code DoubleStatistics} instance.
+ */
+ public DoubleStatistics build() {
+ return build(NO_VALUES);
+ }
+
+ /**
+ * Builds a {@code DoubleStatistics} instance using the input {@code \
values}. + *
+ * <p>Note: {@code DoubleStatistics} computed using
+ * {@link DoubleStatistics#accept(double) accept} may be
+ * different from this instance.
+ *
+ * @param values Values.
+ * @return {@code DoubleStatistics} instance.
+ */
+ public DoubleStatistics build(double... values) {
+ Objects.requireNonNull(values, "values");
+ return new DoubleStatistics(
+ values.length,
+ create(min, values),
+ create(max, values),
+ create(moment, values),
+ create(sum, values),
+ create(product, values),
+ create(sumOfSquares, values),
+ create(sumOfLogs, values));
+ }
+
+ /**
+ * Creates the object from the {@code values}.
+ *
+ * @param <T> object type
+ * @param constructor Constructor.
+ * @param values Values
+ * @return the instance
+ */
+ private static <T> T create(Function<double[], T> constructor, double[] \
values) { + if (constructor != null) {
+ return constructor.apply(values);
+ }
+ return null;
+ }
+ }
+
+ /**
+ * Create an instance.
+ *
+ * @param count Count of values.
+ * @param min Min implementation.
+ * @param max Max implementation.
+ * @param moment Moment implementation.
+ * @param sum Sum implementation.
+ * @param product Product implementation.
+ * @param sumOfSquares Sum of squares implementation.
+ * @param sumOfLogs Sum of logs implementation.
+ */
+ DoubleStatistics(long count, Min min, Max max, FirstMoment moment, Sum sum,
+ Product product, SumOfSquares sumOfSquares, SumOfLogs \
sumOfLogs) { + this.count = count;
+ this.min = min;
+ this.max = max;
+ this.moment = moment;
+ this.sum = sum;
+ this.product = product;
+ this.sumOfSquares = sumOfSquares;
+ this.sumOfLogs = sumOfLogs;
+ consumer = compose(min, max, moment, sum, product, sumOfSquares, sumOfLogs);
+ }
+
+ /**
+ * Chain the {@code consumers} into a single composite consumer. Ignore any \
{@code null} + * consumer.
+ *
+ * @param consumers Consumers.
+ * @return a composed consumer
+ */
+ private static DoubleConsumer compose(DoubleConsumer... consumers) {
+ DoubleConsumer action = NOOP;
+ for (final DoubleConsumer consumer : consumers) {
+ if (consumer != null) {
+ action = action.andThen(consumer);
+ }
+ }
+ if (action == NOOP) {
+ // This should not be possible
+ throw new IllegalStateException(NO_CONFIGURED_STATISTICS + ": Please \
file a bug report"); + }
+ return action;
+ }
+
+ /**
+ * Returns a new instance configured to compute the specified {@code \
statistics}. + *
+ * <p>The statistics will be empty and so will return the default values for \
each + * computed statistic.
+ *
+ * @param statistics Statistics to compute.
+ * @return the instance
+ * @throws IllegalArgumentException if there are no {@code statistics} to \
compute. + */
+ public static DoubleStatistics of(Statistic... statistics) {
+ return builder(statistics).build();
+ }
+
+ /**
+ * Returns a new instance configured to compute the specified {@code statistics}
+ * populated using the input {@code values}.
+ *
+ * <p>Use this method to create an instance populated with a (variable) array of
+ * {@code double[]} data:
+ *
+ * <pre>
+ * DoubleStatistics stats = DoubleStatistics.of(
+ * EnumSet.of(Statistic.MIN, Statistic.MAX),
+ * 1, 1, 2, 3, 5, 8, 13);
+ * </pre>
+ *
+ * @param statistics Statistics to compute.
+ * @param values Values.
+ * @return the instance
+ * @throws IllegalArgumentException if there are no {@code statistics} to \
compute. + */
+ public static DoubleStatistics of(Set<Statistic> statistics, double... values) {
+ if (statistics.isEmpty()) {
+ throw new IllegalArgumentException(NO_CONFIGURED_STATISTICS);
+ }
+ final Builder b = new Builder();
+ statistics.forEach(b::add);
+ return b.build(values);
+ }
+
+ /**
+ * Returns a new builder configured to create instances to compute the specified
+ * {@code statistics}.
+ *
+ * <p>Use this method to create an instance populated with an array of {@code \
double[]} + * data using the {@link Builder#build(double...)} method:
+ *
+ * <pre>
+ * double[] data = ...
+ * DoubleStatistics stats = DoubleStatistics.builder(
+ * Statistic.MIN, Statistic.MAX, Statistic.VARIANCE)
+ * .build(data);
+ * </pre>
+ *
+ * <p>The builder can be used to create multiple instances of {@link \
DoubleStatistics} + * to be used in parallel, or on separate arrays of {@code \
double[]} data. These may + * be {@link #combine(DoubleStatistics) combined}. For \
example: + *
+ * <pre>
+ * double[][] data = ...
+ * DoubleStatistics.Builder builder = DoubleStatistics.builder(
+ * Statistic.MIN, Statistic.MAX, Statistic.VARIANCE);
+ * DoubleStatistics stats = Arrays.stream(data)
+ * .parallel()
+ * .map(builder::build)
+ * .reduce(DoubleStatistics::combine)
+ * .get();
+ * </pre>
+ *
+ * <p>The builder can be used to create a {@link java.util.stream.Collector} for \
repeat + * use on multiple data:
+ *
+ * <pre>{@code
+ * DoubleStatistics.Builder builder = DoubleStatistics.builder(
+ * Statistic.MIN, Statistic.MAX, Statistic.VARIANCE);
+ * Collector<double[], DoubleStatistics, DoubleStatistics> collector =
+ * Collector.of(builder::build,
+ * (s, d) -> s.combine(builder.build(d)),
+ * DoubleStatistics::combine);
+ *
+ * // Repeated
+ * double[][] data = ...
+ * DoubleStatistics stats = Arrays.stream(data).collect(collector);
+ * }</pre>
+ *
+ * @param statistics Statistics to compute.
+ * @return the builder
+ * @throws IllegalArgumentException if there are no {@code statistics} to \
compute. + */
+ public static Builder builder(Statistic... statistics) {
+ if (statistics.length == 0) {
+ throw new IllegalArgumentException(NO_CONFIGURED_STATISTICS);
+ }
+ final Builder b = new Builder();
+ for (final Statistic s : statistics) {
+ b.add(s);
+ }
+ return b;
+ }
+
+ /**
+ * Updates the state of the statistics to reflect the addition of {@code value}.
+ *
+ * @param value Value.
+ */
+ @Override
+ public void accept(double value) {
+ count++;
+ consumer.accept(value);
+ }
+
+ /**
+ * Return the count of values recorded.
+ *
+ * @return the count of values
+ */
+ public long getCount() {
+ return count;
+ }
+
+ /**
+ * Check if the specified {@code statistic} is supported.
+ *
+ * <p>Note: This method will not return {@code false} if the argument is {@code \
null}. + *
+ * @param statistic Statistic.
+ * @return {@code true} if supported
+ * @throws NullPointerException if the {@code statistic} is {@code null}
+ * @see #get(Statistic)
+ */
+ public boolean isSupported(Statistic statistic) {
+ // Check for the appropriate underlying implementation
+ switch (statistic) {
+ case GEOMETRIC_MEAN:
+ case SUM_OF_LOGS:
+ return sumOfLogs != null;
+ case KURTOSIS:
+ return moment instanceof SumOfFourthDeviations;
+ case MAX:
+ return max != null;
+ case MEAN:
+ return moment != null;
+ case MIN:
+ return min != null;
+ case PRODUCT:
+ return product != null;
+ case SKEWNESS:
+ return moment instanceof SumOfCubedDeviations;
+ case STANDARD_DEVIATION:
+ case VARIANCE:
+ return moment instanceof SumOfSquaredDeviations;
+ case SUM:
+ return sum != null;
+ case SUM_OF_SQUARES:
+ return sumOfSquares != null;
+ default:
+ return false;
+ }
+ }
+
+ /**
+ * Gets the value of the specified {@code statistic}.
+ *
+ * @param statistic Statistic.
+ * @return the double
+ * @throws IllegalArgumentException if the {@code statistic} is not supported
+ * @see #isSupported(Statistic)
+ * @see #getSupplier(Statistic)
+ */
+ public double get(Statistic statistic) {
+ return getSupplier(statistic).getAsDouble();
+ }
+
+ /**
+ * Gets a supplier for the value of the specified {@code statistic}.
+ *
+ * <p>The returned function will supply the correct result after
+ * calls to {@link #accept(double) accept} or
+ * {@link #combine(DoubleStatistics) combine} further values into
+ * {@code this} instance.
+ *
+ * <p>This method can be used to perform a one-time look-up of the statistic
+ * function to compute statistics as values are dynamically added.
+ *
+ * @param statistic Statistic.
+ * @return the supplier
+ * @throws IllegalArgumentException if the {@code statistic} is not supported
+ * @see #isSupported(Statistic)
+ * @see #get(Statistic)
+ */
+ public DoubleSupplier getSupplier(Statistic statistic) {
+ // Locate the implementation.
+ // Statistics that wrap an underlying implementation are created in methods.
+ // The return argument should be a method reference and not an instance
+ // of DoubleStatistic. This ensures the statistic implementation cannot
+ // be updated with new values by casting the result and calling \
accept(double). + DoubleSupplier stat = null;
+ switch (statistic) {
+ case GEOMETRIC_MEAN:
+ stat = getGeometricMean();
+ break;
+ case KURTOSIS:
+ stat = getKurtosis();
+ break;
+ case MAX:
+ stat = max;
+ break;
+ case MEAN:
+ stat = getMean();
+ break;
+ case MIN:
+ stat = min;
+ break;
+ case PRODUCT:
+ stat = product;
+ break;
+ case SKEWNESS:
+ stat = getSkewness();
+ break;
+ case STANDARD_DEVIATION:
+ stat = getStandardDeviation();
+ break;
+ case SUM:
+ stat = sum;
+ break;
+ case SUM_OF_LOGS:
+ stat = sumOfLogs;
+ break;
+ case SUM_OF_SQUARES:
+ stat = sumOfSquares;
+ break;
+ case VARIANCE:
+ stat = getVariance();
+ break;
+ default:
+ break;
+ }
+ if (stat != null) {
+ return stat instanceof DoubleStatistic ?
+ ((DoubleStatistic) stat)::getAsDouble :
+ stat;
+ }
+ throw new IllegalArgumentException(UNSUPPORTED_STATISTIC + statistic);
+ }
+
+ /**
+ * Gets the geometric mean.
+ *
+ * @return a geometric mean supplier (or null if unsupported)
+ */
+ private DoubleSupplier getGeometricMean() {
+ if (sumOfLogs != null) {
+ // Return a function that has access to the count and sumOfLogs
+ return () -> GeometricMean.computeGeometricMean(count, sumOfLogs);
+ }
+ return null;
+ }
+
+ /**
+ * Gets the kurtosis.
+ *
+ * @return a kurtosis supplier (or null if unsupported)
+ */
+ private DoubleSupplier getKurtosis() {
+ if (moment instanceof SumOfFourthDeviations) {
+ return new Kurtosis((SumOfFourthDeviations) moment)::getAsDouble;
+ }
+ return null;
+ }
+
+ /**
+ * Gets the mean.
+ *
+ * @return a mean supplier (or null if unsupported)
+ */
+ private DoubleSupplier getMean() {
+ if (moment != null) {
+ // Special case where wrapping with a Mean is not required
+ return moment::getFirstMoment;
+ }
+ return null;
+ }
+
+ /**
+ * Gets the skewness.
+ *
+ * @return a skewness supplier (or null if unsupported)
+ */
+ private DoubleSupplier getSkewness() {
+ if (moment instanceof SumOfCubedDeviations) {
+ return new Skewness((SumOfCubedDeviations) moment)::getAsDouble;
+ }
+ return null;
+ }
+
+ /**
+ * Gets the standard deviation.
+ *
+ * @return a standard deviation supplier (or null if unsupported)
+ */
+ private DoubleSupplier getStandardDeviation() {
+ if (moment instanceof SumOfSquaredDeviations) {
+ return new StandardDeviation((SumOfSquaredDeviations) \
moment)::getAsDouble; + }
+ return null;
+ }
+
+ /**
+ * Gets the variance.
+ *
+ * @return a variance supplier (or null if unsupported)
+ */
+ private DoubleSupplier getVariance() {
+ if (moment instanceof SumOfSquaredDeviations) {
+ return new Variance((SumOfSquaredDeviations) moment)::getAsDouble;
+ }
+ return null;
+ }
+
+ /**
+ * Combines the state of the {@code other} statistics into this one.
+ * Only {@code this} instance is modified by the {@code combine} operation.
+ *
+ * <p>The {@code other} instance must be <em>compatible</em>. This is {@code \
true} if the + * {@code other} instance returns {@code true} for {@link \
#isSupported(Statistic)} for + * all values of the {@link Statistic} enum which \
are supported by {@code this} + * instance.
+ *
+ * <p>Note that this operation is <em>not symmetric</em>. It may be possible to \
perform + * {@code a.combine(b)} but not {@code b.combine(a)}. In the event that \
the {@code other} + * instance is not compatible then an exception is raised \
before any state is modified. + *
+ * @param other Another set of statistics to be combined.
+ * @return {@code this} instance after combining {@code other}.
+ * @throws IllegalArgumentException if the {@code other} is not compatible
+ */
+ public DoubleStatistics combine(DoubleStatistics other) {
+ // Check compatibility
+ checkNullOrElseOtherNonNull(min, other.min);
+ checkNullOrElseOtherNonNull(max, other.max);
+ checkNullOrElseOtherNonNull(sum, other.sum);
+ checkNullOrElseOtherNonNull(product, other.product);
+ checkNullOrElseOtherNonNull(sumOfSquares, other.sumOfSquares);
+ checkNullOrElseOtherNonNull(sumOfLogs, other.sumOfLogs);
+ checkNullOrElseOtherIsAssignable(moment, other.moment);
+ // Combine
+ count += other.count;
+ combine(min, other.min);
+ combine(max, other.max);
+ combine(sum, other.sum);
+ combine(product, other.product);
+ combine(sumOfSquares, other.sumOfSquares);
+ combine(sumOfLogs, other.sumOfLogs);
+ combineMoment(moment, other.moment);
+ return this;
+ }
+
+ /**
+ * Check left-hand side argument {@code a} is {@code null} or else the \
right-hand side + * argument {@code b} must also be non-{@code null}.
+ *
+ * @param a LHS.
+ * @param b RHS.
+ */
+ private static void checkNullOrElseOtherNonNull(Object a, Object b) {
+ if (a != null && b == null) {
+ throw new IllegalArgumentException(INCOMPATIBLE_STATISTICS);
+ }
+ }
+
+ /**
+ * Check left-hand side argument {@code a} is {@code null} or else the \
right-hand side + * argument {@code b} must be run-time assignable to the same \
class as {@code a}. + *
+ * @param a LHS.
+ * @param b RHS.
+ */
+ private static void checkNullOrElseOtherIsAssignable(Object a, Object b) {
+ if (a != null && (b == null || \
!a.getClass().isAssignableFrom(b.getClass()))) { + throw new \
IllegalArgumentException(INCOMPATIBLE_STATISTICS); + }
+ }
+
+ /**
+ * If the left-hand side argument {@code a} is non-{@code null}, combined it \
with the + * right-hand side argument {@code b}.
+ *
+ * @param <T> {@link DoubleStatistic} being accumulated.
+ * @param a LHS.
+ * @param b RHS.
+ */
+ private static <T extends DoubleStatistic & DoubleStatisticAccumulator<T>> void \
combine(T a, T b) { + if (a != null) {
+ a.combine(b);
+ }
+ }
+
+ /**
+ * If the left-hand side argument {@code a} is non-{@code null}, combined it \
with the + * right-hand side argument {@code b}. Assumes that the RHS is run-time \
assignable + * to the same class as LHS.
+ *
+ * @param a LHS.
+ * @param b RHS.
+ */
+ private static void combineMoment(FirstMoment a, FirstMoment b) {
+ // Avoid reflection and use the simpler instanceof
+ if (a instanceof SumOfFourthDeviations) {
+ ((SumOfFourthDeviations) a).combine((SumOfFourthDeviations) b);
+ } else if (a instanceof SumOfCubedDeviations) {
+ ((SumOfCubedDeviations) a).combine((SumOfCubedDeviations) b);
+ } else if (a instanceof SumOfSquaredDeviations) {
+ ((SumOfSquaredDeviations) a).combine((SumOfSquaredDeviations) b);
+ } else if (a != null) {
+ a.combine(b);
+ }
+ }
+}
diff --git a/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/GeometricMean.java \
b/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/GeometricMean.java
index 162c9df..f550f8f 100644
--- a/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/GeometricMean.java
+++ b/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/GeometricMean.java
@@ -117,9 +117,7 @@ public final class GeometricMean implements DoubleStatistic, \
DoubleStatisticAccu
*/
@Override
public double getAsDouble() {
- return n == 0 ?
- Double.NaN :
- Math.exp(sumOfLogs.getAsDouble() / n);
+ return computeGeometricMean(n, sumOfLogs);
}
@Override
@@ -128,4 +126,17 @@ public final class GeometricMean implements DoubleStatistic, \
DoubleStatisticAccu sumOfLogs.combine(other.sumOfLogs);
return this;
}
+
+ /**
+ * Compute the geometric mean.
+ *
+ * @param n Count of values.
+ * @param sumOfLogs Sum of logs.
+ * @return the geometric mean
+ */
+ static double computeGeometricMean(long n, SumOfLogs sumOfLogs) {
+ return n == 0 ?
+ Double.NaN :
+ Math.exp(sumOfLogs.getAsDouble() / n);
+ }
}
diff --git a/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/Kurtosis.java \
b/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/Kurtosis.java
index 61e87e0..12eb179 100644
--- a/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/Kurtosis.java
+++ b/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/Kurtosis.java
@@ -83,7 +83,7 @@ public final class Kurtosis implements DoubleStatistic, \
DoubleStatisticAccumulat
*
* @param sq Sum of fourth deviations.
*/
- private Kurtosis(SumOfFourthDeviations sq) {
+ Kurtosis(SumOfFourthDeviations sq) {
this.sq = sq;
}
diff --git a/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/Mean.java \
b/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/Mean.java
index 626c526..f35c07c 100644
--- a/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/Mean.java
+++ b/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/Mean.java
@@ -90,7 +90,7 @@ public final class Mean implements DoubleStatistic, \
DoubleStatisticAccumulator<M
*
* @param m1 First moment.
*/
- private Mean(FirstMoment m1) {
+ Mean(FirstMoment m1) {
firstMoment = m1;
}
diff --git a/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/Skewness.java \
b/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/Skewness.java
index 1c24771..305382f 100644
--- a/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/Skewness.java
+++ b/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/Skewness.java
@@ -84,7 +84,7 @@ public final class Skewness implements DoubleStatistic, \
DoubleStatisticAccumulat
*
* @param sc Sum of cubed deviations.
*/
- private Skewness(SumOfCubedDeviations sc) {
+ Skewness(SumOfCubedDeviations sc) {
this.sc = sc;
}
diff --git a/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/StandardDeviation.java \
b/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/StandardDeviation.java
index 71de8d7..870d1b1 100644
--- a/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/StandardDeviation.java
+++ b/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/StandardDeviation.java
@@ -95,7 +95,7 @@ public final class StandardDeviation implements DoubleStatistic, \
DoubleStatistic
*
* @param ss Sum of squared deviations.
*/
- private StandardDeviation(SumOfSquaredDeviations ss) {
+ StandardDeviation(SumOfSquaredDeviations ss) {
this.ss = ss;
}
diff --git a/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/Statistic.java \
b/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/Statistic.java
new file mode 100644
index 0000000..fe58d4a
--- /dev/null
+++ b/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/Statistic.java
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.statistics.descriptive;
+
+/**
+ * A statistic that can be computed on univariate data, for example a stream of
+ * {@code double} values.
+ *
+ * <p>{@code Statistic} is an enum representing the statistics that can be computed
+ * by implementations in the {@code org.apache.commons.statistics.descriptive} \
package. + *
+ * <p><strong>Note</strong>
+ *
+ * <p>Implementations may provide additional parameters to control the computation \
of + * the statistic, for example to compute the population (biased) or sample \
(unbiased) variance. + *
+ * @since 1.1
+ */
+public enum Statistic {
+ /** Minimum. */
+ MIN,
+ /** Maximum. */
+ MAX,
+ /** Mean, or average. */
+ MEAN,
+ /** Standard deviation. */
+ STANDARD_DEVIATION,
+ /** Variance. */
+ VARIANCE,
+ /** Skewness. */
+ SKEWNESS,
+ /** Kurtosis. */
+ KURTOSIS,
+ /** Product. */
+ PRODUCT,
+ /** Sum. */
+ SUM,
+ /** Sum of the natural logarithm of values. */
+ SUM_OF_LOGS,
+ /** Sum of the squared values. */
+ SUM_OF_SQUARES,
+ /** Geometric mean. */
+ GEOMETRIC_MEAN
+}
diff --git a/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/Variance.java \
b/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/Variance.java
index 08f03e2..6162275 100644
--- a/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/Variance.java
+++ b/commons-statistics-descriptive/src/main/java/org/apache/commons/statistics/descriptive/Variance.java
@@ -95,7 +95,7 @@ public final class Variance implements DoubleStatistic, \
DoubleStatisticAccumulat
*
* @param ss Sum of squared deviations.
*/
- private Variance(SumOfSquaredDeviations ss) {
+ Variance(SumOfSquaredDeviations ss) {
this.ss = ss;
}
diff --git a/commons-statistics-descriptive/src/test/java/org/apache/commons/statistics/descriptive/DoubleStatisticsTest.java \
b/commons-statistics-descriptive/src/test/java/org/apache/commons/statistics/descriptive/DoubleStatisticsTest.java
new file mode 100644
index 0000000..89962a9
--- /dev/null
+++ b/commons-statistics-descriptive/src/test/java/org/apache/commons/statistics/descriptive/DoubleStatisticsTest.java
@@ -0,0 +1,495 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.commons.statistics.descriptive;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.EnumMap;
+import java.util.EnumSet;
+import java.util.List;
+import java.util.concurrent.ThreadLocalRandom;
+import java.util.function.DoubleConsumer;
+import java.util.function.DoubleSupplier;
+import java.util.function.Function;
+import java.util.function.Supplier;
+import java.util.function.ToDoubleFunction;
+import java.util.stream.IntStream;
+import java.util.stream.Stream;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.Arguments;
+import org.junit.jupiter.params.provider.MethodSource;
+
+/**
+ * Test for {@link DoubleStatistics}.
+ *
+ * <p>This class verifies that the statistics computed using the summary
+ * class are an exact match to the statistics computed individually.
+ */
+class DoubleStatisticsTest {
+ /** Empty statistic array. */
+ private static final Statistic[] EMPTY_STATISTIC_ARRAY = {};
+
+ /** The test data. */
+ private static List<TestData> testData;
+
+ /** The expected result for each statistic on the test data. */
+ private static EnumMap<Statistic, List<ExpectedResult>> expectedResult;
+
+ /** The statistics that are co-computed. */
+ private static EnumMap<Statistic, EnumSet<Statistic>> coComputed;
+
+ /**
+ * Container for test data.
+ */
+ private static class TestData {
+ /** Identifier. */
+ private final int id;
+ /** The sample values. */
+ private final double[][] values;
+ /* The number of values. */
+ private final long size;
+
+ /**
+ * @param id Identifier.
+ * @param values Sample values.
+ */
+ TestData(int id, double[]... values) {
+ this.id = id;
+ this.values = values;
+ size = Arrays.stream(values).mapToLong(x -> x.length).sum();
+ }
+
+ /**
+ * @return the identifier
+ */
+ int getId() {
+ return id;
+ }
+
+ /**
+ * @return the values as a stream
+ */
+ Stream<double[]> stream() {
+ return Arrays.stream(values);
+ }
+
+ /**
+ * @return the number of values
+ */
+ long size() {
+ return size;
+ }
+ }
+
+ /**
+ * Container for expected results.
+ */
+ private static class ExpectedResult {
+ /** The expected result for a stream of values. */
+ private final double stream;
+ /** The expected result for an array of values. */
+ private final double array;
+
+ /**
+ * @param stream Stream result.
+ * @param array Array result.
+ */
+ ExpectedResult(double stream, double array) {
+ this.stream = stream;
+ this.array = array;
+ }
+
+ /**
+ * @return the expected result for a stream of values.
+ */
+ double getStream() {
+ return stream;
+ }
+
+ /**
+ * @return the expected result for an array of values.
+ */
+ double getArray() {
+ return array;
+ }
+ }
+
+ @BeforeAll
+ static void setup() {
+ // Random double[] of different lengths
+ final double[][] arrays = IntStream.of(4, 5, 7)
+ .mapToObj(i -> ThreadLocalRandom.current().doubles(i, 2.25, \
3.75).toArray()) + .toArray(double[][]::new);
+ // Create test data. IDs must be created in order of the data.
+ testData = new ArrayList<>();
+ testData.add(new TestData(testData.size(), new double[0]));
+ testData.add(new TestData(testData.size(), arrays[0]));
+ testData.add(new TestData(testData.size(), arrays[1]));
+ testData.add(new TestData(testData.size(), arrays[2]));
+ testData.add(new TestData(testData.size(), arrays[0], arrays[1]));
+ testData.add(new TestData(testData.size(), arrays[1], arrays[2]));
+ // Create reference expected results
+ expectedResult = new EnumMap<>(Statistic.class);
+ addExpected(Statistic.MIN, Min::create, Min::of);
+ addExpected(Statistic.MAX, Max::create, Max::of);
+ addExpected(Statistic.MEAN, Mean::create, Mean::of);
+ addExpected(Statistic.STANDARD_DEVIATION, StandardDeviation::create, \
StandardDeviation::of); + addExpected(Statistic.VARIANCE, Variance::create, \
Variance::of); + addExpected(Statistic.SKEWNESS, Skewness::create, \
Skewness::of); + addExpected(Statistic.KURTOSIS, Kurtosis::create, \
Kurtosis::of); + addExpected(Statistic.PRODUCT, Product::create, Product::of);
+ addExpected(Statistic.SUM, Sum::create, Sum::of);
+ addExpected(Statistic.SUM_OF_LOGS, SumOfLogs::create, SumOfLogs::of);
+ addExpected(Statistic.SUM_OF_SQUARES, SumOfSquares::create, \
SumOfSquares::of); + addExpected(Statistic.GEOMETRIC_MEAN, \
GeometricMean::create, GeometricMean::of); + // Create co-computed statistics
+ coComputed = new EnumMap<>(Statistic.class);
+ Arrays.stream(Statistic.values()).forEach(s -> coComputed.put(s, \
EnumSet.of(s))); + addCoComputed(Statistic.GEOMETRIC_MEAN, \
Statistic.SUM_OF_LOGS); + addCoComputed(Statistic.VARIANCE, \
Statistic.STANDARD_DEVIATION); + // Cascade moments up
+ EnumSet<Statistic> m = coComputed.get(Statistic.MEAN);
+ coComputed.get(Statistic.STANDARD_DEVIATION).addAll(m);
+ coComputed.get(Statistic.VARIANCE).addAll(m);
+ m = coComputed.get(Statistic.VARIANCE);
+ coComputed.get(Statistic.SKEWNESS).addAll(m);
+ m = coComputed.get(Statistic.SKEWNESS);
+ coComputed.get(Statistic.KURTOSIS).addAll(m);
+ }
+
+ /**
+ * Adds the expected expected result for the specified {@code statistic}.
+ *
+ * @param <T> {@link DoubleStatistic} being computed.
+ * @param statistic Statistic.
+ * @param constructor Constructor for an empty object.
+ * @param arrayConstructor Constructor using an array of values.
+ */
+ private static <T extends DoubleStatistic & DoubleStatisticAccumulator<T>> void \
addExpected(Statistic statistic, + Supplier<T> constructor, \
Function<double[], T> arrayConstructor) { + final List<ExpectedResult> results \
= new ArrayList<>(); + for (final TestData d : testData) {
+ // Stream values
+ final double e1 = d.stream()
+ .map(values -> Statistics.add(constructor.get(), values))
+ .reduce(DoubleStatisticAccumulator::combine)
+ .orElseThrow(IllegalStateException::new)
+ .getAsDouble();
+ // Create from array
+ final double e2 = d.stream()
+ .map(arrayConstructor)
+ .reduce(DoubleStatisticAccumulator::combine)
+ .orElseThrow(IllegalStateException::new)
+ .getAsDouble();
+ // Check that there is a finite value to compute during testing
+ if (d.size() != 0) {
+ assertFinite(e1, statistic);
+ assertFinite(e2, statistic);
+ }
+ results.add(new ExpectedResult(e1, e2));
+ }
+ expectedResult.put(statistic, results);
+ }
+
+ /**
+ * Adds the co-computed statistics to the co-computed mapping.
+ * The statistics must be co-computed (computing either one will compute the \
other) + * and not a one-way relationship (a computes b but b does not compute \
a). + *
+ * @param s1 First statistic.
+ * @param s2 Second statistic.
+ */
+ private static void addCoComputed(Statistic s1, Statistic s2) {
+ coComputed.get(s1).add(s2);
+ coComputed.get(s2).add(s1);
+ }
+
+ @AfterAll
+ static void teardown() {
+ // Free memory
+ testData = null;
+ expectedResult = null;
+ coComputed = null;
+ }
+
+ static Stream<Arguments> streamTestData() {
+ final Stream.Builder<Arguments> builder = Stream.builder();
+ final Statistic[] statistics = Statistic.values();
+ for (int i = 0; i < statistics.length; i++) {
+ // Single statistics
+ final EnumSet<Statistic> s1 = EnumSet.of(statistics[i]);
+ testData.stream().forEach(d -> builder.add(Arguments.of(s1, d)));
+ // Paired statistics
+ for (int j = i + 1; j < statistics.length; j++) {
+ final EnumSet<Statistic> s2 = EnumSet.of(statistics[i], \
statistics[j]); + testData.stream().forEach(d -> \
builder.add(Arguments.of(s2, d))); + }
+ }
+ return builder.build();
+ }
+
+ /**
+ * Test the {@link DoubleStatistics} when all data is passed as a stream of \
single values. + */
+ @ParameterizedTest
+ @MethodSource(value = {"streamTestData"})
+ void testStream(EnumSet<Statistic> stats, TestData data) {
+ // Test creation from specified statistics
+ final Statistic[] statistics = stats.toArray(EMPTY_STATISTIC_ARRAY);
+ assertStatistics(stats, data, x -> acceptAll(statistics, x), \
ExpectedResult::getStream); + }
+
+ /**
+ * Test the {@link DoubleStatistics} when data is passed as a {@code double[]} \
of values. + */
+ @ParameterizedTest
+ @MethodSource(value = {"streamTestData"})
+ void testArray(EnumSet<Statistic> stats, TestData data) {
+ // Test creation from a builder
+ final DoubleStatistics.Builder builder = \
DoubleStatistics.builder(stats.toArray(EMPTY_STATISTIC_ARRAY)); + \
assertStatistics(stats, data, builder::build, ExpectedResult::getArray); + }
+
+ private static void assertStatistics(EnumSet<Statistic> stats, TestData data,
+ Function<double[], DoubleStatistics> constructor,
+ ToDoubleFunction<ExpectedResult> expected) {
+ final Statistic[] statsArray = stats.toArray(EMPTY_STATISTIC_ARRAY);
+ final DoubleStatistics statistics = data.stream()
+ .map(constructor)
+ .reduce((a, b) -> combine(statsArray, a, b))
+ .orElseThrow(IllegalStateException::new);
+ final int id = data.getId();
+ Assertions.assertEquals(data.size(), statistics.getCount(), "Count");
+ final EnumSet<Statistic> computed = EnumSet.copyOf(stats);
+ stats.forEach(s -> computed.addAll(coComputed.get(s)));
+
+ // Test if the statistics are correctly identified as supported
+ EnumSet.allOf(Statistic.class).forEach(s ->
+ Assertions.assertEquals(computed.contains(s), statistics.isSupported(s),
+ () -> stats + " isSupported -> " + s.toString()));
+
+ // Test the values
+ computed.forEach(s ->
+ Assertions.assertEquals(expected.applyAsDouble(expectedResult.get(s).get(id)), \
statistics.get(s), + () -> stats + " value -> " + s.toString()));
+ }
+
+ /**
+ * Add all the {@code values} to an aggregator of the {@code statistics}.
+ *
+ * <p>This method verifies that the {@link DoubleStatistics#get(Statistic)} and
+ * {@link DoubleStatistics#getSupplier(Statistic)} methods return the same
+ * result as values are added.
+ *
+ * @param statistic Statistics.
+ * @param values Values.
+ * @return the statistics
+ */
+ private static DoubleStatistics acceptAll(Statistic[] statistics, double[] \
values) { + final DoubleStatistics stats = DoubleStatistics.of(statistics);
+ final DoubleSupplier[] f = getSuppliers(statistics, stats);
+ for (final double x : values) {
+ stats.accept(x);
+ for (int i = 0; i < statistics.length; i++) {
+ final Statistic s = statistics[i];
+ Assertions.assertEquals(stats.get(s), f[i].getAsDouble(),
+ () -> "Supplier(" + s + ") after value " + x);
+ }
+ }
+ return stats;
+ }
+
+ /**
+ * Gets the suppliers for the {@code statistics}.
+ *
+ * @param statistics Statistics to compute.
+ * @param stats Statistic aggregator.
+ * @return the suppliers
+ */
+ private static DoubleSupplier[] getSuppliers(Statistic[] statistics, final \
DoubleStatistics stats) { + final DoubleSupplier[] f = new \
DoubleSupplier[statistics.length]; + for (int i = 0; i < statistics.length; \
i++) { + final DoubleSupplier supplier = stats.getSupplier(statistics[i]);
+ Assertions.assertFalse(supplier instanceof DoubleStatistic,
+ () -> "DoubleStatistic instance: " + \
supplier.getClass().getSimpleName()); + f[i] = supplier;
+ }
+ return f;
+ }
+
+ /**
+ * Combine the two statistic aggregators.
+ *
+ * <p>This method verifies that the {@link DoubleStatistics#get(Statistic)} and
+ * {@link DoubleStatistics#getSupplier(Statistic)} methods return the same
+ * result after the {@link DoubleStatistics#combine(DoubleStatistics)}.
+ *
+ * @param statistics Statistics to compute.
+ * @param s1 Statistic aggregator.
+ * @param s2 Statistic aggregator.
+ * @return the double statistics
+ */
+ private static DoubleStatistics combine(Statistic[] statistics,
+ DoubleStatistics s1, DoubleStatistics s2) {
+ final DoubleSupplier[] f = getSuppliers(statistics, s1);
+ s1.combine(s2);
+ for (int i = 0; i < statistics.length; i++) {
+ final Statistic s = statistics[i];
+ Assertions.assertEquals(s1.get(s), f[i].getAsDouble(),
+ () -> "Supplier(" + s + ") after combine");
+ }
+ return s1;
+ }
+
+ @Test
+ void testNoOpConsumer() {
+ final DoubleConsumer c = DoubleStatistics.NOOP;
+ // Hit coverage
+ c.accept(0);
+ final double[] value = {0};
+ final DoubleConsumer other = x -> value[0] = x;
+ final DoubleConsumer combined = c.andThen(other);
+ Assertions.assertSame(combined, other);
+ final double y = 42;
+ combined.accept(y);
+ Assertions.assertEquals(y, value[0]);
+ }
+
+ @Test
+ void testOfThrows() {
+ Assertions.assertThrows(IllegalArgumentException.class, () -> \
DoubleStatistics.of()); + \
Assertions.assertThrows(IllegalArgumentException.class, () -> \
DoubleStatistics.of(EMPTY_STATISTIC_ARRAY)); + \
Assertions.assertThrows(NullPointerException.class, () -> DoubleStatistics.of(new \
Statistic[1])); + }
+
+ @Test
+ void testOfSetThrows() {
+ final EnumSet<Statistic> s1 = EnumSet.noneOf(Statistic.class);
+ Assertions.assertThrows(IllegalArgumentException.class, () -> \
DoubleStatistics.of(s1)); + final EnumSet<Statistic> s2 = null;
+ Assertions.assertThrows(NullPointerException.class, () -> \
DoubleStatistics.of(s2)); + final EnumSet<Statistic> s3 = \
EnumSet.of(Statistic.MIN); + \
Assertions.assertThrows(NullPointerException.class, () -> DoubleStatistics.of(s3, \
null)); + }
+
+ @Test
+ void testBuilderThrows() {
+ Assertions.assertThrows(IllegalArgumentException.class, () -> \
DoubleStatistics.builder()); + \
Assertions.assertThrows(IllegalArgumentException.class, () -> \
DoubleStatistics.builder(EMPTY_STATISTIC_ARRAY)); + \
Assertions.assertThrows(NullPointerException.class, () -> \
DoubleStatistics.builder(new Statistic[1])); + }
+
+ @Test
+ void testIsSupportedWithNull() {
+ DoubleStatistics s = DoubleStatistics.of(Statistic.MIN);
+ Assertions.assertThrows(NullPointerException.class, () -> \
s.isSupported(null)); + }
+
+ @ParameterizedTest
+ @MethodSource
+ void testNotSupported(Statistic stat) {
+ DoubleStatistics statistics = DoubleStatistics.of(stat);
+ for (final Statistic s : Statistic.values()) {
+ Assertions.assertEquals(s == stat, statistics.isSupported(s),
+ () -> stat + " isSupported -> " + s.toString());
+ if (s == stat) {
+ Assertions.assertDoesNotThrow(() -> statistics.get(s),
+ () -> stat + " get -> " + s.toString());
+ Assertions.assertNotNull(statistics.getSupplier(s),
+ () -> stat + " getSupplier -> " + s.toString());
+ } else {
+ Assertions.assertThrows(IllegalArgumentException.class, () -> \
statistics.get(s), + () -> stat + " get -> " + s.toString());
+ Assertions.assertThrows(IllegalArgumentException.class, () -> \
statistics.getSupplier(s), + () -> stat + " getSupplier -> " + \
s.toString()); + }
+ }
+ }
+
+ static Statistic[] testNotSupported() {
+ return new Statistic[] {Statistic.MIN, Statistic.PRODUCT};
+ }
+
+ @ParameterizedTest
+ @MethodSource
+ void testIncompatibleCombineThrows(EnumSet<Statistic> stat1, EnumSet<Statistic> \
stat2) { + final double[] v1 = {1, 2, 3.5, 6};
+ final double[] v2 = {3, 4, 5};
+ DoubleStatistics statistics = DoubleStatistics.of(stat1, v1);
+ DoubleStatistics other = DoubleStatistics.of(stat2, v2);
+ // Store values
+ final double[] values = \
stat1.stream().mapToDouble(statistics::get).toArray(); + \
Assertions.assertThrows(IllegalArgumentException.class, () -> \
statistics.combine(other), + () -> stat1 + " " + stat2);
+ // Values should be unchanged
+ final int[] i = {0};
+ stat1.stream().forEach(
+ s -> Assertions.assertEquals(values[i[0]++], statistics.get(s), () -> s \
+ " changed")); + }
+
+ static Stream<Arguments> testIncompatibleCombineThrows() {
+ return Stream.of(
+ Arguments.of(EnumSet.of(Statistic.MIN), EnumSet.of(Statistic.PRODUCT)),
+ Arguments.of(EnumSet.of(Statistic.VARIANCE), EnumSet.of(Statistic.MIN)),
+ // Note: MEAN is compatible with VARIANCE. The combine is not symmetric.
+ Arguments.of(EnumSet.of(Statistic.VARIANCE), EnumSet.of(Statistic.MEAN))
+ );
+ }
+
+ @ParameterizedTest
+ @MethodSource
+ void testCompatibleCombine(EnumSet<Statistic> stat1, EnumSet<Statistic> stat2) {
+ final double[] v1 = {1, 2, 3.5, 6};
+ final double[] v2 = {3, 4, 5};
+ final DoubleStatistics statistics1 = DoubleStatistics.of(stat1, v1);
+ final DoubleStatistics statistics2 = DoubleStatistics.of(stat1, v1);
+ // Note: other1 is intentionally using the same flags as statistics1
+ final DoubleStatistics other1 = DoubleStatistics.of(stat1, v2);
+ final DoubleStatistics other2 = DoubleStatistics.of(stat2, v2);
+ // This should work
+ statistics1.combine(other1);
+ // This should be compatible
+ statistics2.combine(other2);
+ // The stats should be the same
+ for (final Statistic s : stat1) {
+ final double expected = statistics1.get(s);
+ assertFinite(expected, s);
+ Assertions.assertEquals(expected, statistics2.get(s), () -> \
s.toString()); + }
+ }
+
+ static Stream<Arguments> testCompatibleCombine() {
+ return Stream.of(
+ Arguments.of(EnumSet.of(Statistic.MEAN), \
EnumSet.of(Statistic.VARIANCE)), + \
Arguments.of(EnumSet.of(Statistic.VARIANCE), EnumSet.of(Statistic.SKEWNESS)), + \
Arguments.of(EnumSet.of(Statistic.VARIANCE), EnumSet.of(Statistic.KURTOSIS)), + \
Arguments.of(EnumSet.of(Statistic.GEOMETRIC_MEAN), \
EnumSet.of(Statistic.SUM_OF_LOGS)), + // Compatible combinations
+ Arguments.of(EnumSet.of(Statistic.VARIANCE, Statistic.MIN, \
Statistic.SKEWNESS), + EnumSet.of(Statistic.KURTOSIS, \
Statistic.MEAN, Statistic.MIN)) + );
+ }
+
+ private static void assertFinite(double value, Statistic s) {
+ Assertions.assertTrue(Double.isFinite(value), () -> s.toString() + " \
isFinite"); + }
+}
diff --git a/commons-statistics-descriptive/src/test/java/org/apache/commons/statistics/descriptive/UserGuideTest.java \
b/commons-statistics-descriptive/src/test/java/org/apache/commons/statistics/descriptive/UserGuideTest.java
index 6d8c528..daeba31 100644
--- a/commons-statistics-descriptive/src/test/java/org/apache/commons/statistics/descriptive/UserGuideTest.java
+++ b/commons-statistics-descriptive/src/test/java/org/apache/commons/statistics/descriptive/UserGuideTest.java
@@ -17,6 +17,11 @@
package org.apache.commons.statistics.descriptive;
+import java.util.Arrays;
+import java.util.EnumSet;
+import java.util.function.DoubleSupplier;
+import java.util.stream.Collector;
+import java.util.stream.IntStream;
import java.util.stream.Stream;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
@@ -43,4 +48,92 @@ class UserGuideTest {
// np.var([3, 3, 5, 4], ddof=1)
Assertions.assertEquals(0.9166666666666666, v2);
}
+
+ @Test
+ void testDoubleStatistics1() {
+ double[] data = {1, 2, 3, 4, 5, 6, 7, 8};
+ DoubleStatistics stats = DoubleStatistics.builder(
+ Statistic.MIN, Statistic.MAX, Statistic.VARIANCE)
+ .build(data);
+ Assertions.assertEquals(1, stats.get(Statistic.MIN));
+ Assertions.assertEquals(8, stats.get(Statistic.MAX));
+ // Python numpy 1.24.4
+ // np.var(np.arange(1, 9), ddof=1)
+ // np.std(np.arange(1, 9), ddof=1)
+ Assertions.assertEquals(6.0, stats.get(Statistic.VARIANCE), 1e-10);
+ // Get other statistics supported by the underlying computations
+ Assertions.assertEquals(2.449489742783178, \
stats.get(Statistic.STANDARD_DEVIATION), 1e-10); + \
Assertions.assertEquals(4.5, stats.get(Statistic.MEAN), 1e-10); + }
+
+ @Test
+ void testDoubleStatistics2() {
+ double[][] data = {
+ {1, 2, 3, 4},
+ {5, 6, 7, 8},
+ };
+ DoubleStatistics.Builder builder = DoubleStatistics.builder(
+ Statistic.MIN, Statistic.MAX, Statistic.VARIANCE);
+ DoubleStatistics stats = Arrays.stream(data)
+ .map(builder::build)
+ .reduce(DoubleStatistics::combine)
+ .get();
+ Assertions.assertEquals(1, stats.get(Statistic.MIN));
+ Assertions.assertEquals(8, stats.get(Statistic.MAX));
+ Assertions.assertEquals(6.0, stats.get(Statistic.VARIANCE), 1e-10);
+ // Get other statistics supported by the underlying computations
+ Assertions.assertEquals(2.449489742783178, \
stats.get(Statistic.STANDARD_DEVIATION), 1e-10); + \
Assertions.assertEquals(4.5, stats.get(Statistic.MEAN), 1e-10); + }
+
+ @Test
+ void testDoubleStatistics3() {
+ double[][] data = {
+ {1, 2, 3, 4},
+ {5, 6, 7, 8},
+ };
+ DoubleStatistics.Builder builder = DoubleStatistics.builder(
+ Statistic.MIN, Statistic.MAX, Statistic.VARIANCE);
+ Collector<double[], DoubleStatistics, DoubleStatistics> collector =
+ Collector.of(builder::build, (s, d) -> s.combine(builder.build(d)), \
DoubleStatistics::combine); + DoubleStatistics stats = \
Arrays.stream(data).collect(collector); + Assertions.assertEquals(1, \
stats.get(Statistic.MIN)); + Assertions.assertEquals(8, \
stats.get(Statistic.MAX)); + Assertions.assertEquals(6.0, \
stats.get(Statistic.VARIANCE), 1e-10); + // Get other statistics supported by \
the underlying computations + Assertions.assertEquals(2.449489742783178, \
stats.get(Statistic.STANDARD_DEVIATION), 1e-10); + \
Assertions.assertEquals(4.5, stats.get(Statistic.MEAN), 1e-10); + }
+
+ @Test
+ void testDoubleStatistics4() {
+ double[] data = {1, 2, 3, 4, 5, 6, 7, 8};
+ DoubleStatistics varStats = \
DoubleStatistics.builder(Statistic.VARIANCE).build(data); + DoubleStatistics \
meanStats = DoubleStatistics.builder(Statistic.MEAN).build(data); + \
Assertions.assertThrows(IllegalArgumentException.class, () -> \
varStats.combine(meanStats)); + Assertions.assertDoesNotThrow(() -> \
meanStats.combine(varStats)); + }
+
+ @Test
+ void testDoubleStatistics5() {
+ DoubleStatistics stats = DoubleStatistics.of(
+ EnumSet.of(Statistic.MIN, Statistic.MAX),
+ 1, 1, 2, 3, 5, 8, 13);
+ Assertions.assertEquals(1, stats.get(Statistic.MIN));
+ Assertions.assertEquals(13, stats.get(Statistic.MAX));
+ }
+
+ @Test
+ void testDoubleStatistics6() {
+ DoubleStatistics stats = DoubleStatistics.of(Statistic.MEAN, Statistic.MAX);
+ DoubleSupplier mean = stats.getSupplier(Statistic.MEAN);
+ DoubleSupplier max = stats.getSupplier(Statistic.MAX);
+ IntStream.rangeClosed(1, 5).forEach(x -> {
+ stats.accept(x);
+ Assertions.assertEquals((x + 1.0) / 2, mean.getAsDouble(), "mean");
+ Assertions.assertEquals(x, max.getAsDouble(), "max");
+ // Example print
+ // printf("[1 .. %d] mean=%.1f, max=%s%n", x, mean.getAsDouble(), \
max.getAsDouble()); + });
+ }
}
diff --git a/src/conf/checkstyle/checkstyle-suppressions.xml \
b/src/conf/checkstyle/checkstyle-suppressions.xml index c00acf9..7086dac 100644
--- a/src/conf/checkstyle/checkstyle-suppressions.xml
+++ b/src/conf/checkstyle/checkstyle-suppressions.xml
@@ -23,6 +23,7 @@
<suppress checks="LocalFinalVariableName" files=".*[/\\]ZipfDistribution.java" />
<suppress checks="LocalFinalVariableName" \
files=".*[/\\]HypergeometricDistribution.java" /> <suppress checks="ParameterNumber" \
files=".*[/\\]TTest.java" /> + <suppress checks="ParameterNumber" \
files=".*[/\\]DoubleStatistics.java" /> <!-- Be more lenient on tests. -->
<suppress checks="Javadoc" files=".*[/\\]test[/\\].*" />
<suppress checks="MultipleStringLiterals" files=".*[/\\]test[/\\].*" />
diff --git a/src/conf/pmd/pmd-ruleset.xml b/src/conf/pmd/pmd-ruleset.xml
index 8b40d99..3035bbe 100644
--- a/src/conf/pmd/pmd-ruleset.xml
+++ b/src/conf/pmd/pmd-ruleset.xml
@@ -159,7 +159,8 @@
value="./ancestor-or-self::ClassOrInterfaceDeclaration[@SimpleName='NaturalRanking'
or @SimpleName='KolmogorovSmirnovTest' or @SimpleName='DD' or \
@SimpleName='Arguments'
or @SimpleName='MannWhitneyUTest' or @SimpleName='WilcoxonSignedRankTest'
- or @SimpleName='HypergeometricDistribution' or \
@SimpleName='UnconditionedExactTest']"/> + or \
@SimpleName='HypergeometricDistribution' or @SimpleName='UnconditionedExactTest' + \
or @SimpleName='DoubleStatistics']"/> </properties>
</rule>
<rule ref="category/java/design.xml/LogicInversion">
@@ -182,7 +183,7 @@
<!-- Add 1 as a magic number. -->
<property name="ignoreMagicNumbers" value="-1,0,1" />
<property name="violationSuppressXPath"
- value="./ancestor-or-self::MethodDeclaration[@Name='ldexp']"/>
+ value="./ancestor-or-self::MethodDeclaration[@Name='ldexp' or \
@Name='createMoment']"/> </properties>
</rule>
<rule ref="category/java/errorprone.xml/AvoidFieldNameMatchingMethodName">
[prev in list] [next in list] [prev in thread] [next in thread]
Configure |
About |
News |
Add a list |
Sponsored by KoreLogic