DTrace asm profiler for Mac OS X
Vsevolod Tolstopyatov
qwwdfsad at gmail.com
Sat Jan 20 19:52:41 UTC 2018
>Vsevolod, can you please post the recent version of the patch inline here?
Here it is. Note that I've changed one line: "String[] splits =
line.split(" ");" -> "String[] splits = line.split(" ", 5);" to properly
handle native symbols (especially ones from libjvm.dylib). Retested again
on JMH samples + VM-heavy benchmarks (String#intern, exceptions)
diff -r 1ddf31f810a3
jmh-core/src/main/java/org/openjdk/jmh/profile/AbstractPerfAsmProfiler.java
---
a/jmh-core/src/main/java/org/openjdk/jmh/profile/AbstractPerfAsmProfiler.java
Fri
Sep 22 18:11:47 2017 +0200
+++
b/jmh-core/src/main/java/org/openjdk/jmh/profile/AbstractPerfAsmProfiler.java
Sat
Jan 20 22:38:48 2018 +0300
@@ -270,7 +270,7 @@
@Override
public Collection<? extends Result> afterTrial(BenchmarkResult br,
long pid, File stdOut, File stdErr) {
- PerfResult result = processAssembly(br, stdOut, stdErr);
+ PerfResult result = processAssembly(br);
// we know these are not needed anymore, proactively delete
hsLog.delete();
@@ -311,7 +311,7 @@
*/
protected abstract String perfBinaryExtension();
- private PerfResult processAssembly(BenchmarkResult br, File stdOut,
File stdErr) {
+ private PerfResult processAssembly(BenchmarkResult br) {
/**
* 1. Parse binary events.
*/
@@ -647,11 +647,11 @@
}
}
- void printDottedLine(PrintWriter pw) {
+ private void printDottedLine(PrintWriter pw) {
printDottedLine(pw, null);
}
- void printDottedLine(PrintWriter pw, String header) {
+ private void printDottedLine(PrintWriter pw, String header) {
final int HEADER_WIDTH = 100;
pw.print("....");
@@ -668,7 +668,7 @@
pw.println();
}
- List<Region> makeRegions(Assembly asms, PerfEvents events) {
+ private List<Region> makeRegions(Assembly asms, PerfEvents events) {
List<Region> regions = new ArrayList<>();
SortedSet<Long> allAddrs = events.getAllAddresses();
@@ -728,7 +728,7 @@
return intervals;
}
- Collection<Collection<String>> splitAssembly(File stdOut) {
+ private Collection<Collection<String>> splitAssembly(File stdOut) {
try (FileReader in = new FileReader(stdOut);
BufferedReader br = new BufferedReader(in)) {
Multimap<Long, String> writerToLines = new HashMultimap<>();
@@ -764,7 +764,7 @@
}
}
- Assembly readAssembly(File stdOut) {
+ private Assembly readAssembly(File stdOut) {
List<ASMLine> lines = new ArrayList<>();
SortedMap<Long, Integer> addressMap = new TreeMap<>();
@@ -919,11 +919,11 @@
static class PerfResultAggregator implements Aggregator<PerfResult> {
@Override
public PerfResult aggregate(Collection<PerfResult> results) {
- String output = "";
+ StringBuilder output = new StringBuilder();
for (PerfResult r : results) {
- output += r.output;
+ output.append(r.output);
}
- return new PerfResult(output);
+ return new PerfResult(output.toString());
}
}
diff -r 1ddf31f810a3
jmh-core/src/main/java/org/openjdk/jmh/profile/DTraceAsmProfiler.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/jmh-core/src/main/java/org/openjdk/jmh/profile/DTraceAsmProfiler.java Sat
Jan 20 22:38:48 2018 +0300
@@ -0,0 +1,194 @@
+package org.openjdk.jmh.profile;
+
+import joptsimple.OptionException;
+import joptsimple.OptionParser;
+import joptsimple.OptionSpec;
+import org.openjdk.jmh.infra.BenchmarkParams;
+import org.openjdk.jmh.results.BenchmarkResult;
+import org.openjdk.jmh.results.Result;
+import org.openjdk.jmh.util.*;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Map;
+import java.util.TreeMap;
+import java.util.concurrent.TimeUnit;
+
+/**
+ * Mac OS X perfasm profiler based on DTrace "profile-n" provider which
samples program counter by timer interrupt.
+ * Due to DTrace limitations on Mac OS X target JVM cannot be run directly
under DTrace control, so DTrace is run separately,
+ * all processes are sampled and irrelevant samples are filtered out in
{@link #readEvents(double, double)} stage.
+ * Super user privileges are required in order to run DTrace.
+ * <p>
+ * If you see a lot of "[unknown]" regions in profile then you are
probably hitting kernel code, kernel sampling is not yet supported.
+ *
+ * @author Tolstopyatov Vsevolod
+ * @since 18/10/2017
+ */
+public class DTraceAsmProfiler extends AbstractPerfAsmProfiler {
+
+ private final long sampleFrequency;
+ private volatile String pid;
+ private volatile Process dtraceProcess;
+ private OptionSpec<Long> optFrequency;
+
+ public DTraceAsmProfiler(String initLine) throws ProfilerException {
+ super(initLine, "sampled_pc");
+
+ // Check DTrace availability
+ Collection<String> messages = Utils.tryWith("sudo", "dtrace",
"-V");
+ if (!messages.isEmpty()) {
+ throw new ProfilerException(messages.toString());
+ }
+
+ try {
+ sampleFrequency = set.valueOf(optFrequency);
+ } catch (OptionException e) {
+ throw new ProfilerException(e.getMessage());
+ }
+ }
+
+ @Override
+ public void beforeTrial(BenchmarkParams params) {
+ super.beforeTrial(params);
+ }
+
+ @Override
+ public Collection<? extends Result> afterTrial(BenchmarkResult br,
long pid, File stdOut, File stdErr) {
+ if (pid == 0) {
+ throw new IllegalStateException("DTrace needs the forked VM
PID, but it is not initialized");
+ }
+
+ Collection<String> messages = Utils.destroy(dtraceProcess);
+ if (!messages.isEmpty()) {
+ throw new IllegalStateException(messages.toString());
+ }
+
+ this.pid = String.valueOf(pid);
+ return super.afterTrial(br, pid, stdOut, stdErr);
+ }
+
+ @Override
+ public Collection<String> addJVMInvokeOptions(BenchmarkParams params) {
+ dtraceProcess = Utils.runAsync("sudo", "dtrace", "-n", "profile-"
+ sampleFrequency +
+ " /arg1/ { printf(\"%d 0x%lx %d\", pid, arg1,
timestamp); ufunc(arg1)}", "-o",
+ perfBinData.getAbsolutePath());
+ return Collections.emptyList();
+ }
+
+ @Override
+ public String getDescription() {
+ return "DTrace profile provider + PrintAssembly Profiler";
+ }
+
+ @Override
+ protected void addMyOptions(OptionParser parser) {
+ optFrequency = parser.accepts("frequency",
+ "Sampling frequency. This is synonymous to profile-#")
+
.withRequiredArg().ofType(Long.class).describedAs("freq").defaultsTo(1001L);
+ }
+
+ @Override
+ protected void parseEvents() {
+ // Do nothing because DTrace writes text output anyway
+ }
+
+ @Override
+ protected PerfEvents readEvents(double skipMs, double lenMs) {
+ long start = (long) skipMs;
+ long end = (long) (skipMs + lenMs);
+
+ try (FileReader fr = new FileReader(perfBinData.file());
+ BufferedReader reader = new BufferedReader(fr)) {
+
+ Deduplicator<MethodDesc> dedup = new Deduplicator<>();
+ Multimap<MethodDesc, Long> methods = new HashMultimap<>();
+ Multiset<Long> events = new TreeMultiset<>();
+
+ long dtraceTimestampBase = 0L;
+ String line;
+ while ((line = reader.readLine()) != null) {
+
+ // Filter out DTrace misc
+ if (!line.contains(":profile")) {
+ continue;
+ }
+
+ line = line.trim();
+ line = line.substring(line.indexOf(":profile"));
+ String[] splits = line.split(" ", 5);
+ String sampledPid = splits[1];
+
+ if (!sampledPid.equals(pid)) {
+ continue;
+ }
+
+ // Sometimes DTrace ufunc fails and gives no information
about symbols
+ if (splits.length < 4) {
+ continue;
+ }
+
+ long timestamp = Long.valueOf(splits[3]);
+ if (dtraceTimestampBase == 0) {
+ // Use first event timestamp as base for time
comparison
+ dtraceTimestampBase = timestamp;
+ continue;
+ }
+
+ long elapsed = timestamp - dtraceTimestampBase;
+ long elapsedMs = TimeUnit.NANOSECONDS.toMillis(elapsed);
+
+ if (elapsedMs < start || elapsedMs > end) {
+ continue;
+ }
+
+ long address = Long.decode(splits[2]);
+ events.add(address);
+
+ String methodLine = splits[4];
+ // JIT-compiled code has address instead of symbol
information
+ if (methodLine.startsWith("0x")) {
+ continue;
+ }
+
+ String symbol = "[unknown]";
+ String[] methodSplit = methodLine.split("`");
+ String library = methodSplit[0];
+ if ("".equals(library)) {
+ library = "[unknown]";
+ }
+
+ if (methodSplit.length == 2) {
+ symbol = methodSplit[1];
+ }
+
+ methods.put(dedup.dedup(MethodDesc.nativeMethod(symbol,
library)), address);
+ }
+
+ IntervalMap<MethodDesc> methodMap = new IntervalMap<>();
+ for (MethodDesc md : methods.keys()) {
+ Collection<Long> longs = methods.get(md);
+ methodMap.add(md, Utils.min(longs), Utils.max(longs));
+ }
+
+ Map<String, Multiset<Long>> allEvents = new TreeMap<>();
+ assert this.events.size() == 1;
+ allEvents.put(this.events.get(0), events);
+ return new PerfEvents(this.events, allEvents, methodMap);
+
+ } catch (IOException e) {
+ return new PerfEvents(events);
+ }
+
+ }
+
+ @Override
+ protected String perfBinaryExtension() {
+ // DTrace produces human-readable txt
+ return ".txt";
+ }
+}
diff -r 1ddf31f810a3
jmh-core/src/main/java/org/openjdk/jmh/profile/ProfilerFactory.java
--- a/jmh-core/src/main/java/org/openjdk/jmh/profile/ProfilerFactory.java Fri
Sep 22 18:11:47 2017 +0200
+++ b/jmh-core/src/main/java/org/openjdk/jmh/profile/ProfilerFactory.java Sat
Jan 20 22:38:48 2018 +0300
@@ -27,7 +27,6 @@
import org.openjdk.jmh.runner.options.ProfilerConfig;
import java.io.PrintStream;
-import java.lang.reflect.Constructor;
import java.lang.reflect.InvocationTargetException;
import java.util.*;
@@ -178,6 +177,7 @@
BUILT_IN.put("perfnorm", LinuxPerfNormProfiler.class);
BUILT_IN.put("perfasm", LinuxPerfAsmProfiler.class);
BUILT_IN.put("xperfasm", WinPerfAsmProfiler.class);
+ BUILT_IN.put("dtraceasm", DTraceAsmProfiler.class);
BUILT_IN.put("pauses", PausesProfiler.class);
BUILT_IN.put("safepoints", SafepointsProfiler.class);
}
diff -r 1ddf31f810a3 jmh-core/src/main/java/org/openjdk/jmh/util/Utils.java
--- a/jmh-core/src/main/java/org/openjdk/jmh/util/Utils.java Fri Sep 22
18:11:47 2017 +0200
+++ b/jmh-core/src/main/java/org/openjdk/jmh/util/Utils.java Sat Jan 20
22:38:48 2018 +0300
@@ -446,6 +446,31 @@
return messages;
}
+ public static Process runAsync(String... cmd) {
+ try {
+ return new ProcessBuilder(cmd).start();
+ } catch (IOException ex) {
+ throw new IllegalStateException(ex);
+ }
+ }
+
+ public static Collection<String> destroy(Process process) {
+ Collection<String> messages = new ArrayList<>();
+ try {
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ process.destroy();
+ int exitCode = process.waitFor();
+ if (exitCode == 0) {
+ return Collections.emptyList();
+ }
+
+ messages.add(baos.toString());
+ return messages;
+ } catch (InterruptedException e) {
+ throw new IllegalStateException(e);
+ }
+ }
+
public static Collection<String> runWith(List<String> cmd) {
Collection<String> messages = new ArrayList<>();
try {
diff -r 1ddf31f810a3
jmh-samples/src/main/java/org/openjdk/jmh/samples/JMHSample_35_Profilers.java
---
a/jmh-samples/src/main/java/org/openjdk/jmh/samples/JMHSample_35_Profilers.java
Fri
Sep 22 18:11:47 2017 +0200
+++
b/jmh-samples/src/main/java/org/openjdk/jmh/samples/JMHSample_35_Profilers.java
Sat
Jan 20 22:38:48 2018 +0300
@@ -33,6 +33,7 @@
import org.openjdk.jmh.annotations.*;
import org.openjdk.jmh.infra.Blackhole;
import org.openjdk.jmh.profile.ClassloaderProfiler;
+import org.openjdk.jmh.profile.DTraceAsmProfiler;
import org.openjdk.jmh.profile.LinuxPerfProfiler;
import org.openjdk.jmh.profile.StackProfiler;
import org.openjdk.jmh.runner.Runner;
@@ -352,7 +353,7 @@
* $ java -jar target/benchmarks.jar JMHSample_35.*Atomic -prof
perfnorm -f 3 (Linux)
* $ java -jar target/benchmarks.jar JMHSample_35.*Atomic -prof
perfasm -f 1 (Linux)
* $ java -jar target/benchmarks.jar JMHSample_35.*Atomic -prof
xperfasm -f 1 (Windows)
- *
+ * $ java -jar target/benchmarks.jar JMHSample_35.*Atomic -prof
dtraceasm -f 1 (Mac OS X)
* b) Via the Java API:
* (see the JMH homepage for possible caveats when running from
IDE:
* http://openjdk.java.net/projects/code-tools/jmh/)
@@ -365,6 +366,7 @@
// .addProfiler(LinuxPerfNormProfiler.class)
// .addProfiler(LinuxPerfAsmProfiler.class)
// .addProfiler(WinPerfAsmProfiler.class)
+// .addProfiler(DTraceAsmProfiler.class)
.build();
new Runner(opt).run();
--
Best regards,
Tolstopyatov Vsevolod
On Wed, Jan 17, 2018 at 3:58 PM, Henri Tremblay <henri.tremblay at gmail.com>
wrote:
> Well found! It seems much better.
>
> Then, it still not found lines. I thought JMH was adding the correct
> PrintAssembly flags when forking. But the issue seems to be similar with
> the warning I had for perfasm.
>
> ERROR: No address lines detected in assembly capture, make sure your JDK
> is PrintAssembly-enabled:
> https://wiki.openjdk.java.net/display/HotSpot/PrintAssembly
>
> Perf output processed (skipped 2.531 seconds):
> Column 1: sampled_pc (11033 events)
>
> Hottest code regions (>10.00% "sampled_pc" events):
>
> ....[Hottest Region 1]............................
> ..................................................
> [unknown], [unknown] (231 bytes)
>
> <no assembly is recorded, native region>
> ............................................................
> ........................................
> 22.54% <total for region 1>
>
> ....[Hottest Region 2]............................
> ..................................................
> [unknown], [unknown] (134 bytes)
>
> <no assembly is recorded, native region>
> ............................................................
> ........................................
> 11.31% <total for region 2>
>
> ....[Hottest Region 3]............................
> ..................................................
> [unknown], [unknown] (41 bytes)
>
> <no assembly is recorded, native region>
> ............................................................
> ........................................
> 10.39% <total for region 3>
>
> ....[Hottest Region 4]............................
> ..................................................
> [unknown], [unknown] (417 bytes)
>
> <no assembly is recorded, native region>
> ............................................................
> ........................................
> 10.27% <total for region 4>
>
> ....[Hottest Regions]....................................................
> ...........................
> 22.54% [unknown] [unknown] (231 bytes)
> 11.31% [unknown] [unknown] (134 bytes)
> 10.39% [unknown] [unknown] (41 bytes)
> 10.27% [unknown] [unknown] (417 bytes)
> 8.45% [unknown] [unknown] (396 bytes)
> 5.71% [unknown] [unknown] (43 bytes)
> 5.66% [unknown] [unknown] (55 bytes)
> 5.45% [unknown] [unknown] (84 bytes)
> 4.17% [unknown] [unknown] (209 bytes)
> 2.47% [unknown] [unknown] (71 bytes)
> 2.12% [unknown] [unknown] (28 bytes)
> 2.04% [unknown] [unknown] (28 bytes)
> 1.63% [unknown] [unknown] (170 bytes)
> 0.95% [unknown] [unknown] (86 bytes)
> 0.89% [unknown] [unknown] (5 bytes)
> 0.83% [unknown] [unknown] (127 bytes)
> 0.81% [unknown] [unknown] (0 bytes)
> 0.55% [unknown] [unknown] (46 bytes)
> 0.44% [unknown] [unknown] (71 bytes)
> 0.41% [unknown] [unknown] (257 bytes)
> 2.91% <...other 73 warm regions...>
> ............................................................
> ........................................
> 99.99% <totals>
>
> ....[Hottest Methods (after inlining)]....................
> ..........................................
> 99.99% [unknown] [unknown]
> ............................................................
> ........................................
> 99.99% <totals>
>
> ....[Distribution by Source].......................
> .................................................
> 99.99% [unknown]
> ............................................................
> ........................................
> 99.99% <totals>
>
>
>
> # Run complete. Total time: 00:00:18
>
> Benchmark (implementation)
> (readRatio) (tableSize) Mode Cnt Score Error Units
> ConcurrentMapThroughput.randomGetPutRemove NonBlockingHashMap
> 50 100000 thrpt 6 22470630.498 ± 3730613.043 ops/s
> ConcurrentMapThroughput.randomGetPutRemove:·asm NonBlockingHashMap
> 50 100000 thrpt NaN ---
> ConcurrentMapThroughput.randomGetPutRemove ConcurrentHashMap
> 50 100000 thrpt 6 23411000.106 ± 1960758.197 ops/s
> ConcurrentMapThroughput.randomGetPutRemove:·asm ConcurrentHashMap
> 50 100000 thrpt NaN ---
>
> On 17 January 2018 at 07:20, Aleksey Shipilev <shade at redhat.com> wrote:
>
>> On 01/14/2018 04:24 PM, Vsevolod Tolstopyatov wrote:
>> > I have limited access to different versions of Mac OS X, but it seems
>> that in some minor updates
>> > DTrace works with SIP enabled.
>> > So as solution I'd suggest to check SIP status on profiler start (via
>> "csrutil status") and print
>> > warning if it's enabled or just clarify it in javadoc. It's up to
>> Alexey to decide what approach is
>> > preferable in JMH
>>
>> So, perf* profiler print warning messages when then number of samples is
>> suspiciously low. I think
>> dtraceasm profiler should do the same, and clearly say what the user is
>> supposed to do:
>>
>> WARNING: The perf event count is suspiciously low (" + sum + "). The
>> performance data might be
>> inaccurate or misleading. Try to do the profiling again, or tune up the
>> sampling frequency.
>> With some profilers on Mac OS X, System Integrity Protection (SIP) may
>> prevent profiling.
>> In such case, temporarily disabling SIP with 'csrutil disable' might
>> help.
>>
>> I'll add this to the patch myself.
>>
>> I can push this without my own testing, hoping that external people
>> validated this. OpenJDK rules
>> require the patch to be hosted on OpenJDK infra to get accepted, so,
>> Vsevolod, can you please post
>> the recent version of the patch inline here?
>>
>> Thanks
>> -Aleksey
>>
>>
>>
>
More information about the jmh-dev
mailing list