DTrace asm profiler for Mac OS X

Vsevolod Tolstopyatov qwwdfsad at gmail.com
Sat Jan 20 19:52:41 UTC 2018


>Vsevolod, can you please post the recent version of the patch inline here?

Here it is. Note that I've changed one line: "String[] splits =
line.split(" ");" -> "String[] splits = line.split(" ", 5);" to properly
handle native symbols (especially ones from libjvm.dylib). Retested again
on JMH samples + VM-heavy benchmarks (String#intern, exceptions)


diff -r 1ddf31f810a3
jmh-core/src/main/java/org/openjdk/jmh/profile/AbstractPerfAsmProfiler.java
---
a/jmh-core/src/main/java/org/openjdk/jmh/profile/AbstractPerfAsmProfiler.java
Fri
Sep 22 18:11:47 2017 +0200
+++
b/jmh-core/src/main/java/org/openjdk/jmh/profile/AbstractPerfAsmProfiler.java
Sat
Jan 20 22:38:48 2018 +0300
@@ -270,7 +270,7 @@

     @Override
     public Collection<? extends Result> afterTrial(BenchmarkResult br,
long pid, File stdOut, File stdErr) {
-        PerfResult result = processAssembly(br, stdOut, stdErr);
+        PerfResult result = processAssembly(br);

         // we know these are not needed anymore, proactively delete
         hsLog.delete();
@@ -311,7 +311,7 @@
      */
     protected abstract String perfBinaryExtension();

-    private PerfResult processAssembly(BenchmarkResult br, File stdOut,
File stdErr) {
+    private PerfResult processAssembly(BenchmarkResult br) {
         /**
          * 1. Parse binary events.
          */
@@ -647,11 +647,11 @@
         }
     }

-    void printDottedLine(PrintWriter pw) {
+    private void printDottedLine(PrintWriter pw) {
         printDottedLine(pw, null);
     }

-    void printDottedLine(PrintWriter pw, String header) {
+    private void printDottedLine(PrintWriter pw, String header) {
         final int HEADER_WIDTH = 100;

         pw.print("....");
@@ -668,7 +668,7 @@
         pw.println();
     }

-    List<Region> makeRegions(Assembly asms, PerfEvents events) {
+    private List<Region> makeRegions(Assembly asms, PerfEvents events) {
         List<Region> regions = new ArrayList<>();

         SortedSet<Long> allAddrs = events.getAllAddresses();
@@ -728,7 +728,7 @@
         return intervals;
     }

-    Collection<Collection<String>> splitAssembly(File stdOut) {
+    private Collection<Collection<String>> splitAssembly(File stdOut) {
         try (FileReader in = new FileReader(stdOut);
              BufferedReader br = new BufferedReader(in)) {
             Multimap<Long, String> writerToLines = new HashMultimap<>();
@@ -764,7 +764,7 @@
         }
     }

-    Assembly readAssembly(File stdOut) {
+    private Assembly readAssembly(File stdOut) {
         List<ASMLine> lines = new ArrayList<>();
         SortedMap<Long, Integer> addressMap = new TreeMap<>();

@@ -919,11 +919,11 @@
     static class PerfResultAggregator implements Aggregator<PerfResult> {
         @Override
         public PerfResult aggregate(Collection<PerfResult> results) {
-            String output = "";
+            StringBuilder output = new StringBuilder();
             for (PerfResult r : results) {
-                output += r.output;
+                output.append(r.output);
             }
-            return new PerfResult(output);
+            return new PerfResult(output.toString());
         }
     }

diff -r 1ddf31f810a3
jmh-core/src/main/java/org/openjdk/jmh/profile/DTraceAsmProfiler.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/jmh-core/src/main/java/org/openjdk/jmh/profile/DTraceAsmProfiler.java Sat
Jan 20 22:38:48 2018 +0300
@@ -0,0 +1,194 @@
+package org.openjdk.jmh.profile;
+
+import joptsimple.OptionException;
+import joptsimple.OptionParser;
+import joptsimple.OptionSpec;
+import org.openjdk.jmh.infra.BenchmarkParams;
+import org.openjdk.jmh.results.BenchmarkResult;
+import org.openjdk.jmh.results.Result;
+import org.openjdk.jmh.util.*;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Map;
+import java.util.TreeMap;
+import java.util.concurrent.TimeUnit;
+
+/**
+ * Mac OS X perfasm profiler based on DTrace "profile-n" provider which
samples program counter by timer interrupt.
+ * Due to DTrace limitations on Mac OS X target JVM cannot be run directly
under DTrace control, so DTrace is run separately,
+ * all processes are sampled and irrelevant samples are filtered out in
{@link #readEvents(double, double)} stage.
+ * Super user privileges are required in order to run DTrace.
+ * <p>
+ * If you see a lot of "[unknown]" regions in profile then you are
probably hitting kernel code, kernel sampling is not yet supported.
+ *
+ * @author Tolstopyatov Vsevolod
+ * @since 18/10/2017
+ */
+public class DTraceAsmProfiler extends AbstractPerfAsmProfiler {
+
+    private final long sampleFrequency;
+    private volatile String pid;
+    private volatile Process dtraceProcess;
+    private OptionSpec<Long> optFrequency;
+
+    public DTraceAsmProfiler(String initLine) throws ProfilerException {
+        super(initLine, "sampled_pc");
+
+        // Check DTrace availability
+        Collection<String> messages = Utils.tryWith("sudo", "dtrace",
"-V");
+        if (!messages.isEmpty()) {
+            throw new ProfilerException(messages.toString());
+        }
+
+        try {
+            sampleFrequency = set.valueOf(optFrequency);
+        } catch (OptionException e) {
+            throw new ProfilerException(e.getMessage());
+        }
+    }
+
+    @Override
+    public void beforeTrial(BenchmarkParams params) {
+        super.beforeTrial(params);
+    }
+
+    @Override
+    public Collection<? extends Result> afterTrial(BenchmarkResult br,
long pid, File stdOut, File stdErr) {
+        if (pid == 0) {
+            throw new IllegalStateException("DTrace needs the forked VM
PID, but it is not initialized");
+        }
+
+        Collection<String> messages = Utils.destroy(dtraceProcess);
+        if (!messages.isEmpty()) {
+            throw new IllegalStateException(messages.toString());
+        }
+
+        this.pid = String.valueOf(pid);
+        return super.afterTrial(br, pid, stdOut, stdErr);
+    }
+
+    @Override
+    public Collection<String> addJVMInvokeOptions(BenchmarkParams params) {
+        dtraceProcess = Utils.runAsync("sudo", "dtrace", "-n", "profile-"
+ sampleFrequency +
+                        " /arg1/ { printf(\"%d 0x%lx %d\", pid, arg1,
timestamp); ufunc(arg1)}", "-o",
+                perfBinData.getAbsolutePath());
+        return Collections.emptyList();
+    }
+
+    @Override
+    public String getDescription() {
+        return "DTrace profile provider + PrintAssembly Profiler";
+    }
+
+    @Override
+    protected void addMyOptions(OptionParser parser) {
+        optFrequency = parser.accepts("frequency",
+                "Sampling frequency. This is synonymous to profile-#")
+
.withRequiredArg().ofType(Long.class).describedAs("freq").defaultsTo(1001L);
+    }
+
+    @Override
+    protected void parseEvents() {
+        // Do nothing because DTrace writes text output anyway
+    }
+
+    @Override
+    protected PerfEvents readEvents(double skipMs, double lenMs) {
+        long start = (long) skipMs;
+        long end = (long) (skipMs + lenMs);
+
+        try (FileReader fr = new FileReader(perfBinData.file());
+             BufferedReader reader = new BufferedReader(fr)) {
+
+            Deduplicator<MethodDesc> dedup = new Deduplicator<>();
+            Multimap<MethodDesc, Long> methods = new HashMultimap<>();
+            Multiset<Long> events = new TreeMultiset<>();
+
+            long dtraceTimestampBase = 0L;
+            String line;
+            while ((line = reader.readLine()) != null) {
+
+                // Filter out DTrace misc
+                if (!line.contains(":profile")) {
+                    continue;
+                }
+
+                line = line.trim();
+                line = line.substring(line.indexOf(":profile"));
+                String[] splits = line.split(" ", 5);
+                String sampledPid = splits[1];
+
+                if (!sampledPid.equals(pid)) {
+                    continue;
+                }
+
+                // Sometimes DTrace ufunc fails and gives no information
about symbols
+                if (splits.length < 4) {
+                    continue;
+                }
+
+                long timestamp = Long.valueOf(splits[3]);
+                if (dtraceTimestampBase == 0) {
+                    // Use first event timestamp as base for time
comparison
+                    dtraceTimestampBase = timestamp;
+                    continue;
+                }
+
+                long elapsed = timestamp - dtraceTimestampBase;
+                long elapsedMs = TimeUnit.NANOSECONDS.toMillis(elapsed);
+
+                if (elapsedMs < start || elapsedMs > end) {
+                    continue;
+                }
+
+                long address = Long.decode(splits[2]);
+                events.add(address);
+
+                String methodLine = splits[4];
+                // JIT-compiled code has address instead of symbol
information
+                if (methodLine.startsWith("0x")) {
+                    continue;
+                }
+
+                String symbol = "[unknown]";
+                String[] methodSplit = methodLine.split("`");
+                String library = methodSplit[0];
+                if ("".equals(library)) {
+                    library = "[unknown]";
+                }
+
+                if (methodSplit.length == 2) {
+                    symbol = methodSplit[1];
+                }
+
+                methods.put(dedup.dedup(MethodDesc.nativeMethod(symbol,
library)), address);
+            }
+
+            IntervalMap<MethodDesc> methodMap = new IntervalMap<>();
+            for (MethodDesc md : methods.keys()) {
+                Collection<Long> longs = methods.get(md);
+                methodMap.add(md, Utils.min(longs), Utils.max(longs));
+            }
+
+            Map<String, Multiset<Long>> allEvents = new TreeMap<>();
+            assert this.events.size() == 1;
+            allEvents.put(this.events.get(0), events);
+            return new PerfEvents(this.events, allEvents, methodMap);
+
+        } catch (IOException e) {
+            return new PerfEvents(events);
+        }
+
+    }
+
+    @Override
+    protected String perfBinaryExtension() {
+        // DTrace produces human-readable txt
+        return ".txt";
+    }
+}
diff -r 1ddf31f810a3
jmh-core/src/main/java/org/openjdk/jmh/profile/ProfilerFactory.java
--- a/jmh-core/src/main/java/org/openjdk/jmh/profile/ProfilerFactory.java Fri
Sep 22 18:11:47 2017 +0200
+++ b/jmh-core/src/main/java/org/openjdk/jmh/profile/ProfilerFactory.java Sat
Jan 20 22:38:48 2018 +0300
@@ -27,7 +27,6 @@
 import org.openjdk.jmh.runner.options.ProfilerConfig;

 import java.io.PrintStream;
-import java.lang.reflect.Constructor;
 import java.lang.reflect.InvocationTargetException;
 import java.util.*;

@@ -178,6 +177,7 @@
         BUILT_IN.put("perfnorm", LinuxPerfNormProfiler.class);
         BUILT_IN.put("perfasm",  LinuxPerfAsmProfiler.class);
         BUILT_IN.put("xperfasm", WinPerfAsmProfiler.class);
+        BUILT_IN.put("dtraceasm", DTraceAsmProfiler.class);
         BUILT_IN.put("pauses",   PausesProfiler.class);
         BUILT_IN.put("safepoints", SafepointsProfiler.class);
     }
diff -r 1ddf31f810a3 jmh-core/src/main/java/org/openjdk/jmh/util/Utils.java
--- a/jmh-core/src/main/java/org/openjdk/jmh/util/Utils.java Fri Sep 22
18:11:47 2017 +0200
+++ b/jmh-core/src/main/java/org/openjdk/jmh/util/Utils.java Sat Jan 20
22:38:48 2018 +0300
@@ -446,6 +446,31 @@
         return messages;
     }

+    public static Process runAsync(String... cmd) {
+        try {
+            return new ProcessBuilder(cmd).start();
+        } catch (IOException ex) {
+            throw new IllegalStateException(ex);
+        }
+    }
+
+    public static Collection<String> destroy(Process process) {
+        Collection<String> messages = new ArrayList<>();
+        try {
+            ByteArrayOutputStream baos = new ByteArrayOutputStream();
+            process.destroy();
+            int exitCode = process.waitFor();
+            if (exitCode == 0) {
+                return Collections.emptyList();
+            }
+
+            messages.add(baos.toString());
+            return messages;
+        } catch (InterruptedException e) {
+            throw new IllegalStateException(e);
+        }
+    }
+
     public static Collection<String> runWith(List<String> cmd) {
         Collection<String> messages = new ArrayList<>();
         try {
diff -r 1ddf31f810a3
jmh-samples/src/main/java/org/openjdk/jmh/samples/JMHSample_35_Profilers.java
---
a/jmh-samples/src/main/java/org/openjdk/jmh/samples/JMHSample_35_Profilers.java
Fri
Sep 22 18:11:47 2017 +0200
+++
b/jmh-samples/src/main/java/org/openjdk/jmh/samples/JMHSample_35_Profilers.java
Sat
Jan 20 22:38:48 2018 +0300
@@ -33,6 +33,7 @@
 import org.openjdk.jmh.annotations.*;
 import org.openjdk.jmh.infra.Blackhole;
 import org.openjdk.jmh.profile.ClassloaderProfiler;
+import org.openjdk.jmh.profile.DTraceAsmProfiler;
 import org.openjdk.jmh.profile.LinuxPerfProfiler;
 import org.openjdk.jmh.profile.StackProfiler;
 import org.openjdk.jmh.runner.Runner;
@@ -352,7 +353,7 @@
          *    $ java -jar target/benchmarks.jar JMHSample_35.*Atomic -prof
perfnorm -f 3 (Linux)
          *    $ java -jar target/benchmarks.jar JMHSample_35.*Atomic -prof
perfasm  -f 1 (Linux)
          *    $ java -jar target/benchmarks.jar JMHSample_35.*Atomic -prof
xperfasm -f 1 (Windows)
-         *
+         *    $ java -jar target/benchmarks.jar JMHSample_35.*Atomic -prof
dtraceasm -f 1 (Mac OS X)
          * b) Via the Java API:
          *    (see the JMH homepage for possible caveats when running from
IDE:
          *      http://openjdk.java.net/projects/code-tools/jmh/)
@@ -365,6 +366,7 @@
 //                    .addProfiler(LinuxPerfNormProfiler.class)
 //                    .addProfiler(LinuxPerfAsmProfiler.class)
 //                    .addProfiler(WinPerfAsmProfiler.class)
+//                    .addProfiler(DTraceAsmProfiler.class)
                     .build();

             new Runner(opt).run();


--
Best regards,
Tolstopyatov Vsevolod

On Wed, Jan 17, 2018 at 3:58 PM, Henri Tremblay <henri.tremblay at gmail.com>
wrote:

> Well found! It seems much better.
>
> Then, it still not found lines. I thought JMH was adding the correct
> PrintAssembly flags when forking. But the issue seems to be similar with
> the warning I had for perfasm.
>
> ERROR: No address lines detected in assembly capture, make sure your JDK
> is PrintAssembly-enabled:
>     https://wiki.openjdk.java.net/display/HotSpot/PrintAssembly
>
> Perf output processed (skipped 2.531 seconds):
>  Column 1: sampled_pc (11033 events)
>
> Hottest code regions (>10.00% "sampled_pc" events):
>
> ....[Hottest Region 1]............................
> ..................................................
> [unknown], [unknown] (231 bytes)
>
>  <no assembly is recorded, native region>
> ............................................................
> ........................................
>  22.54%  <total for region 1>
>
> ....[Hottest Region 2]............................
> ..................................................
> [unknown], [unknown] (134 bytes)
>
>  <no assembly is recorded, native region>
> ............................................................
> ........................................
>  11.31%  <total for region 2>
>
> ....[Hottest Region 3]............................
> ..................................................
> [unknown], [unknown] (41 bytes)
>
>  <no assembly is recorded, native region>
> ............................................................
> ........................................
>  10.39%  <total for region 3>
>
> ....[Hottest Region 4]............................
> ..................................................
> [unknown], [unknown] (417 bytes)
>
>  <no assembly is recorded, native region>
> ............................................................
> ........................................
>  10.27%  <total for region 4>
>
> ....[Hottest Regions]....................................................
> ...........................
>  22.54%  [unknown]  [unknown] (231 bytes)
>  11.31%  [unknown]  [unknown] (134 bytes)
>  10.39%  [unknown]  [unknown] (41 bytes)
>  10.27%  [unknown]  [unknown] (417 bytes)
>   8.45%  [unknown]  [unknown] (396 bytes)
>   5.71%  [unknown]  [unknown] (43 bytes)
>   5.66%  [unknown]  [unknown] (55 bytes)
>   5.45%  [unknown]  [unknown] (84 bytes)
>   4.17%  [unknown]  [unknown] (209 bytes)
>   2.47%  [unknown]  [unknown] (71 bytes)
>   2.12%  [unknown]  [unknown] (28 bytes)
>   2.04%  [unknown]  [unknown] (28 bytes)
>   1.63%  [unknown]  [unknown] (170 bytes)
>   0.95%  [unknown]  [unknown] (86 bytes)
>   0.89%  [unknown]  [unknown] (5 bytes)
>   0.83%  [unknown]  [unknown] (127 bytes)
>   0.81%  [unknown]  [unknown] (0 bytes)
>   0.55%  [unknown]  [unknown] (46 bytes)
>   0.44%  [unknown]  [unknown] (71 bytes)
>   0.41%  [unknown]  [unknown] (257 bytes)
>   2.91%  <...other 73 warm regions...>
> ............................................................
> ........................................
>  99.99%  <totals>
>
> ....[Hottest Methods (after inlining)]....................
> ..........................................
>  99.99%  [unknown]  [unknown]
> ............................................................
> ........................................
>  99.99%  <totals>
>
> ....[Distribution by Source].......................
> .................................................
>  99.99%  [unknown]
> ............................................................
> ........................................
>  99.99%  <totals>
>
>
>
> # Run complete. Total time: 00:00:18
>
> Benchmark                                          (implementation)
> (readRatio)  (tableSize)   Mode  Cnt         Score         Error  Units
> ConcurrentMapThroughput.randomGetPutRemove       NonBlockingHashMap
>      50       100000  thrpt    6  22470630.498 ± 3730613.043  ops/s
> ConcurrentMapThroughput.randomGetPutRemove:·asm  NonBlockingHashMap
>      50       100000  thrpt                NaN                  ---
> ConcurrentMapThroughput.randomGetPutRemove        ConcurrentHashMap
>      50       100000  thrpt    6  23411000.106 ± 1960758.197  ops/s
> ConcurrentMapThroughput.randomGetPutRemove:·asm   ConcurrentHashMap
>      50       100000  thrpt                NaN                  ---
>
> On 17 January 2018 at 07:20, Aleksey Shipilev <shade at redhat.com> wrote:
>
>> On 01/14/2018 04:24 PM, Vsevolod Tolstopyatov wrote:
>> > I have limited access to different versions of Mac OS X, but it seems
>> that in some minor updates
>> > DTrace works with SIP enabled.
>> > So as solution I'd suggest to check SIP status on profiler start (via
>> "csrutil status") and print
>> > warning if it's enabled or just clarify it in javadoc. It's up to
>> Alexey to decide what approach is
>> > preferable in JMH
>>
>> So, perf* profiler print warning messages when then number of samples is
>> suspiciously low. I think
>> dtraceasm profiler should do the same, and clearly say what the user is
>> supposed to do:
>>
>>  WARNING: The perf event count is suspiciously low (" + sum + "). The
>> performance data might be
>>  inaccurate or misleading. Try to do the profiling again, or tune up the
>> sampling frequency.
>>  With some profilers on Mac OS X, System Integrity Protection (SIP) may
>> prevent profiling.
>>  In such case, temporarily disabling SIP with 'csrutil disable' might
>> help.
>>
>> I'll add this to the patch myself.
>>
>> I can push this without my own testing, hoping that external people
>> validated this. OpenJDK rules
>> require the patch to be hosted on OpenJDK infra to get accepted, so,
>> Vsevolod, can you please post
>> the recent version of the patch inline here?
>>
>> Thanks
>> -Aleksey
>>
>>
>>
>


More information about the jmh-dev mailing list