Proposal to enhance Stream.collect
Tagir Valeev
amaembo at gmail.com
Wed Feb 27 07:05:54 UTC 2019
Hello!
> A less intrusive API direction might be a version of Collector whose
> supplier function took a size-estimate argument; this might even help in
> parallel since it allows for intermediate results to start with a better
> initial size guess. (And this could be implemented as a default that
> delegates to the existing supplier.) Still, not really sure this
> carries its weight.
It's interesting that such optimization is possible without API
change, using the "hidden knowledge".
While public API stays the same, the collect method implementation may
check if custom
non-public Collector implementation is supplied, and in this case may
use the sized supplier.
I think it's the best solution given the fact that very few collectors
may benefit from the exact
known size, and this benefit usually disappears when collectors are
composed (e.g. using groupingBy:
downstream toList() would not win anything if it provides a sizedSupplier).
I created a quick patch and launched a quick benchmark like this:
return new SplittableRandom(0).ints(length, 0,
100).boxed().collect(Collectors.toList());
For length = 100, 10000, 1000000
Here's results for vanilla 13-ea+8:
length = 100, avg time = 1434,469 ± 27,147 ns/op, alloc rate = 1712 B/op
length = 10000, avg time = 155032,390 ± 2657,927 ns/op, alloc rate =
169280 B/op
length = 1000000, avg time = 27099621,763 ± 1979054,500 ns/op, alloc
rate = 14586750 B/op
Patched:
length = 100, avg time = 1414,480 ± 30,040 ns/op, alloc rate = 768 B/op
length = 10000, avg time = 137338,320 ± 1316,789 ns/op, alloc rate = 40368 B/op
length = 1000000, avg time = 17654635,871 ± 210607,441 ns/op, alloc
rate = 4000382 B/op
As you can see, exact allocation really helps to reduce alloc pressure
and produces better performance for big lists.
If such kind of patch is acceptable for Stream API, I can file a
ticket and submit a webrev.
With best regards,
Tagir Valeev
The patch follows:
--- src/java.base/share/classes/java/util/stream/Collectors.java
(revision 53626:e2fc434b410a35b28ab433c29863c8a26e4e813a)
+++ src/java.base/share/classes/java/util/stream/Collectors.java
(revision 53626+:e2fc434b410a+)
@@ -50,6 +50,7 @@
import java.util.function.BinaryOperator;
import java.util.function.Consumer;
import java.util.function.Function;
+import java.util.function.IntFunction;
import java.util.function.Predicate;
import java.util.function.Supplier;
import java.util.function.ToDoubleFunction;
@@ -194,23 +195,34 @@
*/
static class CollectorImpl<T, A, R> implements Collector<T, A, R> {
private final Supplier<A> supplier;
+ private final IntFunction<A> sizedSupplier;
private final BiConsumer<A, T> accumulator;
private final BinaryOperator<A> combiner;
private final Function<A, R> finisher;
private final Set<Characteristics> characteristics;
CollectorImpl(Supplier<A> supplier,
+ IntFunction<A> sizedSupplier,
BiConsumer<A, T> accumulator,
BinaryOperator<A> combiner,
Function<A,R> finisher,
Set<Characteristics> characteristics) {
this.supplier = supplier;
+ this.sizedSupplier = sizedSupplier;
this.accumulator = accumulator;
this.combiner = combiner;
this.finisher = finisher;
this.characteristics = characteristics;
}
+ CollectorImpl(Supplier<A> supplier,
+ BiConsumer<A, T> accumulator,
+ BinaryOperator<A> combiner,
+ Function<A,R> finisher,
+ Set<Characteristics> characteristics) {
+ this(supplier, null, accumulator, combiner, finisher,
characteristics);
+ }
+
CollectorImpl(Supplier<A> supplier,
BiConsumer<A, T> accumulator,
BinaryOperator<A> combiner,
@@ -228,6 +240,10 @@
return supplier;
}
+ IntFunction<A> sizedSupplier() {
+ return sizedSupplier;
+ }
+
@Override
public BinaryOperator<A> combiner() {
return combiner;
@@ -275,8 +291,11 @@
*/
public static <T>
Collector<T, ?, List<T>> toList() {
- return new CollectorImpl<>((Supplier<List<T>>)
ArrayList::new, List::add,
+ return new CollectorImpl<>((Supplier<List<T>>) ArrayList::new,
+ ArrayList::new,
+ List::add,
(left, right) -> {
left.addAll(right); return left; },
+ castingIdentity(),
CH_ID);
}
Index: src/java.base/share/classes/java/util/stream/ReduceOps.java
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
--- src/java.base/share/classes/java/util/stream/ReduceOps.java
(revision 53626:e2fc434b410a35b28ab433c29863c8a26e4e813a)
+++ src/java.base/share/classes/java/util/stream/ReduceOps.java
(revision 53626+:e2fc434b410a+)
@@ -36,6 +36,7 @@
import java.util.function.BinaryOperator;
import java.util.function.DoubleBinaryOperator;
import java.util.function.IntBinaryOperator;
+import java.util.function.IntFunction;
import java.util.function.LongBinaryOperator;
import java.util.function.ObjDoubleConsumer;
import java.util.function.ObjIntConsumer;
@@ -155,13 +156,20 @@
public static <T, I> TerminalOp<T, I>
makeRef(Collector<? super T, I, ?> collector) {
Supplier<I> supplier = Objects.requireNonNull(collector).supplier();
+ @SuppressWarnings("unchecked")
+ IntFunction<I> sizedSupplier = collector instanceof
Collectors.CollectorImpl ?
+ ((Collectors.CollectorImpl<?, I, ?>)
collector).sizedSupplier() : null;
BiConsumer<I, ? super T> accumulator = collector.accumulator();
BinaryOperator<I> combiner = collector.combiner();
class ReducingSink extends Box<I>
implements AccumulatingSink<T, I, ReducingSink> {
@Override
public void begin(long size) {
- state = supplier.get();
+ if (sizedSupplier != null && size >= 0 && size <=
Integer.MAX_VALUE) {
+ state = sizedSupplier.apply((int) size);
+ } else {
+ state = supplier.get();
+ }
}
@Override
More information about the core-libs-dev
mailing list