Multisets#countHighest vs PriorityQueue

Vladimir Sitnikov sitnikov.vladimir at gmail.com
Wed Apr 22 22:09:44 UTC 2015


Hi,

I would like to update Multisets#countHighest with a true bounded
priority queue.
This patch goes on top of "multiset update" (it uses .entrySet).

Please review the attached patch.

-- 
Vladimir Sitnikov
-------------- next part --------------
# HG changeset patch
# User Vladimir Sitnikov <sitnikov.vladimir at gmail.com>
# Date 1429740126 -10800
#      Thu Apr 23 01:02:06 2015 +0300
# Node ID e994ba5569fd62288eedaf93fece17777e586c61
# Parent  1b3c409d1331bb00a7439b12d1acdeb2ae122bef
Add BoundedPriorityQueue and use it in Multisets.countHighest

diff -r 1b3c409d1331 -r e994ba5569fd jmh-core/src/main/java/org/openjdk/jmh/util/BoundedPriorityQueue.java
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jmh-core/src/main/java/org/openjdk/jmh/util/BoundedPriorityQueue.java	Thu Apr 23 01:02:06 2015 +0300
@@ -0,0 +1,96 @@
+package org.openjdk.jmh.util;
+
+import java.io.Serializable;
+import java.util.*;
+
+/**
+ * Bounded variant of {@link PriorityQueue}.
+ * Note: elements are returned in reverse order.
+ * For instance, if "top N smallest elements" are required, use {@code new BoundedPriorityQueue(N)},
+ * and the elements would be returned in largest to smallest order.
+ *
+ * @param <E> type of the element
+ */
+public class BoundedPriorityQueue<E> extends AbstractQueue<E> implements Serializable {
+    private static final long serialVersionUID = 7159618773497127413L;
+
+    private final int maxSize;
+    private final Comparator<? super E> comparator;
+    private final Queue<E> queue;
+
+    /**
+     * Creates a bounded priority queue with the specified maximum size and default ordering.
+     * At most {@code maxSize} smallest elements would be kept in the queue.
+     *
+     * @param maxSize maximum size of the queue
+     */
+    public BoundedPriorityQueue(int maxSize) {
+        this(maxSize, null);
+    }
+
+    /**
+     * Creates a bounded priority queue with the specified maximum size.
+     * At most {@code maxSize} smallest elements would be kept in the queue.
+     *
+     * @param maxSize    maximum size of the queue
+     * @param comparator comparator that orders the elements
+     */
+    public BoundedPriorityQueue(int maxSize, Comparator<? super E> comparator) {
+        this.maxSize = maxSize;
+        this.comparator = reverse(comparator);
+        this.queue = new PriorityQueue<E>(this.comparator);
+    }
+
+    /**
+     * Internal queue should be in fact in reverse order.
+     * By default, the queue aims for "top N smallest elements".
+     * So peek() should return the biggest element, so it can be removed when adding smaller element
+     *
+     * @param comparator comparator that designates ordering of the entries or {@code null} for default ordering
+     * @param <E>        type of the element
+     * @return reverse comparator
+     */
+    private static <E> Comparator<? super E> reverse(Comparator<? super E> comparator) {
+        if (comparator == null) {
+            return Collections.reverseOrder();
+        }
+        return Collections.reverseOrder(comparator);
+    }
+
+    @Override
+    public boolean add(E e) {
+        return offer(e);
+    }
+
+    @Override
+    public boolean offer(E e) {
+        if (size() >= maxSize) {
+            E head = peek();
+            if (comparator.compare(e, head) < 1) {
+                return false;
+            }
+            poll();
+        }
+        return queue.offer(e);
+    }
+
+    @Override
+    public E poll() {
+        return queue.poll();
+    }
+
+    @Override
+    public E peek() {
+        return queue.peek();
+    }
+
+    @Override
+    public Iterator<E> iterator() {
+        return queue.iterator();
+    }
+
+    @Override
+    public int size() {
+        return queue.size();
+    }
+}
diff -r 1b3c409d1331 -r e994ba5569fd jmh-core/src/main/java/org/openjdk/jmh/util/Multisets.java
--- a/jmh-core/src/main/java/org/openjdk/jmh/util/Multisets.java	Wed Apr 22 20:17:39 2015 +0300
+++ b/jmh-core/src/main/java/org/openjdk/jmh/util/Multisets.java	Thu Apr 23 01:02:06 2015 +0300
@@ -24,32 +24,28 @@
  */
 package org.openjdk.jmh.util;
 
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.List;
-import java.util.PriorityQueue;
+import java.util.*;
 
 public class Multisets {
 
-    public static <T> Collection<T> countHighest(Multiset<T> set, int top) {
-        // crude and inefficient
-        PriorityQueue<Pair<T, Long>> q = new PriorityQueue<Pair<T, Long>>(10, new Comparator<Pair<T, Long>>() {
+    public static <T> List<T> countHighest(Multiset<T> set, int top) {
+        Queue<Map.Entry<T, Long>> q = new BoundedPriorityQueue<Map.Entry<T, Long>>(top, new Comparator<Map.Entry<T, Long>>() {
             @Override
-            public int compare(Pair<T, Long> o1, Pair<T, Long> o2) {
-                return o2.k2.compareTo(o1.k2);
+            public int compare(Map.Entry<T, Long> o1, Map.Entry<T, Long> o2) {
+                return o2.getValue().compareTo(o1.getValue());
             }
         });
 
-        for (T key : set.keys()) {
-            q.add(new Pair<T, Long>(key, set.count(key)));
+        q.addAll(set.entrySet());
+
+        List<T> result = new ArrayList<T>(q.size());
+
+        for (Map.Entry<T, Long> pair : q) {
+            result.add(pair.getKey());
         }
 
-        List<T> result = new ArrayList<T>();
-        for (int t = 0; (t < top && !q.isEmpty()); t++) {
-            result.add(q.poll().k1);
-        }
+        // BoundedPriorityQueue returns "smallest to largest", so we reverse the result
+        Collections.reverse(result);
 
         return result;
     }
@@ -67,14 +63,4 @@
         return sorted;
     }
 
-    private static class Pair<K1, K2> {
-        public final K1 k1;
-        public final K2 k2;
-
-        private Pair(K1 k1, K2 k2) {
-            this.k1 = k1;
-            this.k2 = k2;
-        }
-    }
-
 }
diff -r 1b3c409d1331 -r e994ba5569fd jmh-core/src/test/java/org/openjdk/jmh/util/BoundedPriorityQueueTest.java
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jmh-core/src/test/java/org/openjdk/jmh/util/BoundedPriorityQueueTest.java	Thu Apr 23 01:02:06 2015 +0300
@@ -0,0 +1,44 @@
+package org.openjdk.jmh.util;
+
+import junit.framework.Assert;
+import org.junit.Test;
+
+import java.util.Collections;
+import java.util.Queue;
+
+public class BoundedPriorityQueueTest {
+    @Test
+    public void top3Smallest() {
+        Queue<Integer> queue = new BoundedPriorityQueue<Integer>(3);
+        queue.add(50);
+        queue.add(40);
+        queue.add(10);
+        queue.add(20);
+        queue.add(30);
+        queue.add(30);
+        queue.add(80);
+
+        Assert.assertEquals(3, queue.size());
+        Assert.assertEquals((Integer) 30, queue.poll());
+        Assert.assertEquals((Integer) 20, queue.poll());
+        Assert.assertEquals((Integer) 10, queue.poll());
+    }
+
+    @Test
+    public void top3Largest() {
+        Queue<Integer> queue = new BoundedPriorityQueue<Integer>(3, Collections.reverseOrder());
+        queue.add(50);
+        queue.add(40);
+        queue.add(10);
+        queue.add(20);
+        queue.add(30);
+        queue.add(30);
+        queue.add(80);
+
+        Assert.assertEquals(3, queue.size());
+        Assert.assertEquals((Integer) 40, queue.poll());
+        Assert.assertEquals((Integer) 50, queue.poll());
+        Assert.assertEquals((Integer) 80, queue.poll());
+    }
+
+}
diff -r 1b3c409d1331 -r e994ba5569fd jmh-core/src/test/java/org/openjdk/jmh/util/MultisetsTest.java
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jmh-core/src/test/java/org/openjdk/jmh/util/MultisetsTest.java	Thu Apr 23 01:02:06 2015 +0300
@@ -0,0 +1,41 @@
+package org.openjdk.jmh.util;
+
+import junit.framework.Assert;
+import org.junit.Test;
+
+import java.util.List;
+
+public class MultisetsTest {
+    @Test
+    public void testCountHighest() {
+        Multiset<Integer> set = new HashMultiset<Integer>();
+        set.add(10);
+        set.add(10);
+        set.add(10);
+        set.add(20);
+        set.add(20);
+        set.add(70);
+        List<Integer> topInts = Multisets.countHighest(set, 2);
+
+        Assert.assertEquals(2, topInts.size());
+        Assert.assertEquals((Integer) 10, topInts.get(0));
+        Assert.assertEquals((Integer) 20, topInts.get(1));
+    }
+
+    @Test
+    public void testSortedDesc() {
+        Multiset<Integer> set = new HashMultiset<Integer>();
+        set.add(10);
+        set.add(10);
+        set.add(10);
+        set.add(20);
+        set.add(20);
+        set.add(70);
+        List<Integer> topInts = Multisets.sortedDesc(set);
+
+        Assert.assertEquals(3, topInts.size());
+        Assert.assertEquals((Integer) 10, topInts.get(0));
+        Assert.assertEquals((Integer) 20, topInts.get(1));
+        Assert.assertEquals((Integer) 70, topInts.get(2));
+    }
+}


More information about the jmh-dev mailing list