New allocation profiler

Vladimir Sitnikov sitnikov.vladimir at gmail.com
Sun Apr 12 13:50:05 UTC 2015


Hi,

I would like to add allocation profiler into jmh.
Hotspot exposes it via ThreadMXBean.getThreadAllocatedBytes.

Basic features:
* HotSpot specific
* Zero overhead
* High precision even with low iteration number
* High accuracy in avgt/sample/throughput modes (results are close to
the expected ones)
* Can miss account for threads created during benchmark.

Open questions:
* next steps
* fix "single shot mode" if possible (see [1])

If my memory serves me well, allocation profiling is always enabled by
default and the API is available since 1.6u26.

The idea is to sum allocated bytes over the threads excluding current one.
This intentionally includes allocation made by background threads, and
I believe that is expected.

The attached implementation uses just two snapshots (before and
after). I'm not sure if spinning a background thread doing snapshots
to capture newly created/destroyed threads is worth doing.

I am not sure if the approach is accurate in terms of "only benchmark
load is measured, not setup/teardown steps".

The reported results for "average time" mode are close to the expected
ones (e.g. new Object() is reported to allocate 16 bytes in 64bit
JVM).

It looks like "single shot" mode has some issues and it reports to
allocate 480 bytes. Does it mean profilers catch a bit of
setup/teardown in SS mode? Can this be improved?

[1]: https://gist.github.com/vlsi/c4c0c6c6595f318e639a

-- 
Regards,
Vladimir Sitnikov
-------------- next part --------------
# HG changeset patch
# User Vladimir Sitnikov <sitnikov.vladimir at gmail.com>
# Date 1428845687 -10800
#      Sun Apr 12 16:34:47 2015 +0300
# Node ID f37e28c894633ba1ac373e654da2e6411e38eaaf
# Parent  1898f77c754c84e33da6b74b4e913f009cd27841
Add profiler for memory allocation. The implementation is based on ThreadMXBean.getThreadAllocatedBytes.
The implementation can miss allocations if new thread is created and destroyed in between profiler snapshots.

diff -r 1898f77c754c -r f37e28c89463 jmh-core-it/src/test/java/org/openjdk/jmh/it/profilers/HotspotAllocationProfilerTest.java
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jmh-core-it/src/test/java/org/openjdk/jmh/it/profilers/HotspotAllocationProfilerTest.java	Sun Apr 12 16:34:47 2015 +0300
@@ -0,0 +1,61 @@
+package org.openjdk.jmh.it.profilers;
+
+import org.junit.Test;
+import org.openjdk.jmh.annotations.*;
+import org.openjdk.jmh.it.Fixtures;
+import org.openjdk.jmh.profile.HotspotAllocationProfiler;
+import org.openjdk.jmh.runner.Runner;
+import org.openjdk.jmh.runner.RunnerException;
+import org.openjdk.jmh.runner.options.Options;
+import org.openjdk.jmh.runner.options.OptionsBuilder;
+
+import java.util.ArrayList;
+import java.util.concurrent.TimeUnit;
+
+/**
+ * Tests allocation profiler.
+ */
+ at OutputTimeUnit(TimeUnit.NANOSECONDS)
+public class HotspotAllocationProfilerTest {
+    @Benchmark
+    @Warmup(iterations = 2)
+    @Measurement(iterations = 1, time = 1, timeUnit = TimeUnit.MILLISECONDS)
+    @BenchmarkMode(Mode.AverageTime)
+    @Fork(1)
+    public Object allocateObject() {
+        return new Object();
+    }
+
+    @Benchmark
+    @Warmup(iterations = 2)
+    @Measurement(iterations = 5)
+    @BenchmarkMode(Mode.SingleShotTime)
+    @Fork(1)
+    public Object allocateObjectSingleShot() {
+        return new Object();
+    }
+
+    @Benchmark
+    @Warmup(iterations = 2)
+    @Measurement(iterations = 1)
+    @BenchmarkMode(Mode.SampleTime)
+    @Fork(1)
+    public Object allocateObjectSampleTime() {
+        return new Object();
+    }
+
+    @Test
+    public void testAllocationProfiler() throws RunnerException {
+        HotspotAllocationProfiler profiler = new HotspotAllocationProfiler();
+        ArrayList<String> errors = new ArrayList<String>();
+        if (!profiler.checkSupport(errors)) {
+            System.out.println("Allocation profiler is not available: " + errors.toString());
+            return;
+        }
+        Options opts = new OptionsBuilder()
+                .include(Fixtures.getTestMask(this.getClass()))
+                .addProfiler(HotspotAllocationProfiler.class)
+                .build();
+        new Runner(opts).run();
+    }
+}
diff -r 1898f77c754c -r f37e28c89463 jmh-core/src/main/java/org/openjdk/jmh/profile/HotspotAllocationProfiler.java
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jmh-core/src/main/java/org/openjdk/jmh/profile/HotspotAllocationProfiler.java	Sun Apr 12 16:34:47 2015 +0300
@@ -0,0 +1,179 @@
+/*
+ * Copyright (c) 2014, 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package org.openjdk.jmh.profile;
+
+import org.openjdk.jmh.infra.BenchmarkParams;
+import org.openjdk.jmh.infra.IterationParams;
+import org.openjdk.jmh.results.AggregationPolicy;
+import org.openjdk.jmh.results.IterationResult;
+import org.openjdk.jmh.results.Result;
+
+import java.lang.management.ManagementFactory;
+import java.lang.management.ThreadMXBean;
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+import java.util.*;
+import java.util.concurrent.TimeUnit;
+
+/**
+ * Measures thread allocated memory in bytes via {@code ThreadMXBean.getThreadAllocatedBytes}.
+ * Profiler measures allocation of all the threads but current one.
+ */
+public class HotspotAllocationProfiler implements InternalProfiler {
+    private static final ThreadMXBean threadMXBean;
+    private static final Method getThreadAllocatedBytes;
+    private static final Collection<String> FAIL_MSGS;
+
+    private static final long[] EMPTY_IDS = new long[0];
+
+    private long[] beforeIds, beforeAllocated;
+    private long[] tmpIds, tmpAllocated; // To avoid allocation while returning two values from getTotalAllocatedMemory
+    private long beforeTime;
+
+    static {
+        threadMXBean = ManagementFactory.getThreadMXBean();
+        Collection<String> failMessages;
+        Method getBytes;
+        try {
+            getBytes = threadMXBean.getClass()
+                    .getMethod("getThreadAllocatedBytes", long[].class);
+            getBytes.setAccessible(true);
+            failMessages = Collections.emptyList();
+        } catch (NoSuchMethodException e) {
+            getBytes = null;
+            failMessages = Collections.singletonList(
+                    "Method getThreadAllocatedBytes is not found in ThreadMXBean: " + e
+                            .getMessage() + ", are you running HotSpot VM 1.6u26+?");
+        }
+        getThreadAllocatedBytes = getBytes;
+        FAIL_MSGS = failMessages;
+    }
+
+    @Override
+    public boolean checkSupport(List<String> msgs) {
+        msgs.addAll(FAIL_MSGS);
+        return FAIL_MSGS.isEmpty();
+    }
+
+    @Override
+    public String label() {
+        return "hs_alloc";
+    }
+
+    @Override
+    public String getDescription() {
+        return "Memory allocation profiler";
+    }
+
+    @Override
+    public void beforeIteration(BenchmarkParams benchmarkParams,
+                                IterationParams iterationParams) {
+        snapshotAllocatedMemory();
+        beforeAllocated = tmpAllocated;
+        beforeIds = tmpIds;
+        beforeTime = System.nanoTime();
+    }
+
+    private void snapshotAllocatedMemory() {
+        if (getThreadAllocatedBytes == null) {
+            tmpIds = tmpAllocated = EMPTY_IDS;
+            return;
+        }
+        try {
+            tmpIds = threadMXBean.getAllThreadIds();
+            tmpAllocated = (long[]) getThreadAllocatedBytes.invoke(threadMXBean, (Object) tmpIds);
+            return;
+        } catch (IllegalAccessException e) {
+            /* intentionally left blank */
+        } catch (InvocationTargetException e) {
+            /* intentionally left blank */
+        }
+        // In exceptional cases, assume information is not available
+        tmpIds = tmpAllocated = EMPTY_IDS;
+    }
+
+    @Override
+    public Collection<? extends Result> afterIteration(
+            BenchmarkParams benchmarkParams, IterationParams iterationParams,
+            IterationResult result) {
+        long afterTime = System.nanoTime();
+        snapshotAllocatedMemory();
+        if (beforeAllocated.length == 0 || tmpAllocated.length == 0) {
+            // When allocation profiling fails, make sure it is distinguishable in report
+            return Collections.singleton(
+                    new ProfilerResult(Defaults.PREFIX + "allocated",
+                            -1,
+                            "error", AggregationPolicy.MAX));
+        }
+        long allocated = estimateAllocatedBytes();
+        // When no allocations measured, we just ignore zero result lines to improve readability
+        if (allocated == 0) {
+            return Collections.emptyList();
+        }
+        ArrayList<Result> results = new ArrayList<Result>(2);
+        if (afterTime - beforeTime != 0) {
+            results.add(
+                    new ProfilerResult(Defaults.PREFIX + "allocation.rate",
+                            1.0 * allocated / 1024 / 1024 * TimeUnit.SECONDS.toNanos(1) / (afterTime - beforeTime),
+                            "MB/sec", AggregationPolicy.AVG));
+        }
+        if (result.getMetadata().getAllOps() != 0) {
+            results.add(
+                    new ProfilerResult(Defaults.PREFIX + "allocated",
+                            1.0 * allocated / result.getMetadata().getAllOps(),
+                            "B/op", AggregationPolicy.AVG));
+        }
+        return results;
+    }
+
+    /**
+     * Estimates allocated bytes based on two snapshots.
+     * The problem is threads can come and go while performing the benchmark,
+     * thus we would miss allocations made in a thread that was created and died between the snapshots.
+     *
+     * @return estimated number of allocated bytes between profiler calls
+     */
+    private long estimateAllocatedBytes() {
+        HashMap<Long, Integer> prevIndex = new HashMap<Long, Integer>();
+        for (int i = 0; i < beforeIds.length; i++) {
+            long id = beforeIds[i];
+            prevIndex.put(id, i);
+        }
+        long currentThreadId = Thread.currentThread().getId();
+        long allocated = 0;
+        for (int i = 0; i < tmpIds.length; i++) {
+            long id = tmpIds[i];
+            if (id == currentThreadId) {
+                continue;
+            }
+            allocated += tmpAllocated[i];
+            Integer prev = prevIndex.get(id);
+            if (prev != null) {
+                allocated -= beforeAllocated[prev];
+            }
+        }
+        return allocated;
+    }
+}
diff -r 1898f77c754c -r f37e28c89463 jmh-core/src/main/java/org/openjdk/jmh/profile/ProfilerFactory.java
--- a/jmh-core/src/main/java/org/openjdk/jmh/profile/ProfilerFactory.java	Tue Apr 07 13:13:18 2015 +0300
+++ b/jmh-core/src/main/java/org/openjdk/jmh/profile/ProfilerFactory.java	Sun Apr 12 16:34:47 2015 +0300
@@ -40,6 +40,7 @@
         profs.add(ClassloaderProfiler.class);
         profs.add(CompilerProfiler.class);
         profs.add(GCProfiler.class);
+        profs.add(HotspotAllocationProfiler.class);
         profs.add(HotspotClassloadingProfiler.class);
         profs.add(HotspotCompilationProfiler.class);
         profs.add(HotspotMemoryProfiler.class);


More information about the jmh-dev mailing list