RFR: 8152641: Plugin to generate BMH$Species classes ahead-of-time

Peter Levart peter.levart at gmail.com
Wed Mar 30 07:53:43 UTC 2016


Hi Claes,

Sorry, I found a flaw in the benchmark (the regex pattern to split 
comma-separated string into substrings was wrong). What the benchmark 
did was compare the overheads of a single lookup of a longer class name 
containing commas. Here's the corrected result of overheads of 5 
unsuccessful lookups:

Benchmark (generate)                       (lookup)  Mode  Cnt      
Score Error  Units
ClassForNameBench.classForName          LL,LLL,LLLL,LLLLL,LLLLLL 
LLI,LLLI,LLLLI,LLLLLI,LLLLLLI  avgt   10  29627.141 ± 567.427  ns/op
ClassForNameBench.classForNameInModule  LL,LLL,LLLL,LLLLL,LLLLLL 
LLI,LLLI,LLLLI,LLLLLI,LLLLLLI  avgt   10   1073.256 ±  23.794  ns/op
ClassForNameBench.hashSetContains       LL,LLL,LLLL,LLLLL,LLLLLL 
LLI,LLLI,LLLLI,LLLLLI,LLLLLLI  avgt   10     33.022 ±   0.066  ns/op
ClassForNameBench.switchStatement       LL,LLL,LLLL,LLLLL,LLLLLL 
LLI,LLLI,LLLLI,LLLLLI,LLLLLLI  avgt   10     38.498 ±   5.842  ns/op

...overheads are a little bigger (x5 approx.).


Here's the corrected benchmark:


package jdk.test;

import org.openjdk.jmh.annotations.*;
import org.openjdk.jmh.infra.Blackhole;

import java.io.Serializable;
import java.lang.invoke.MethodHandle;
import java.util.Iterator;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;
import java.util.stream.Stream;

@BenchmarkMode(Mode.AverageTime)
@Fork(value = 1, warmups = 0)
@Warmup(iterations = 10)
@Measurement(iterations = 10)
@OutputTimeUnit(TimeUnit.NANOSECONDS)
@State(Scope.Thread)
public class ClassForNameBench {

     static final String BMH = "java/lang/invoke/BoundMethodHandle";
     static final String SPECIES_PREFIX_NAME = "Species_";
     static final String SPECIES_PREFIX_PATH = BMH + "$" + 
SPECIES_PREFIX_NAME;

     @Param({"LL,LLL,LLLL,LLLLL,LLLLLL"})
     public String generate;

     @Param({"LLI,LLLI,LLLLI,LLLLLI,LLLLLLI"})
     public String lookup;

     private String[] generatedTypes;
     private String[] lookedUpTypes;
     private Set<String> generatedNames;
     private String[] lookedUpNames;

     @Setup(Level.Trial)
     public void setup() {
         generatedTypes = generate.trim().split("\\s*,\\s*");
         lookedUpTypes = lookup.trim().split("\\s*,\\s*");
         generatedNames = Stream.of(generatedTypes)
                                .map(types -> SPECIES_PREFIX_PATH + 
shortenSignature(types))
                                .collect(Collectors.toSet());
         lookedUpNames = Stream.of(lookedUpTypes)
                               .map(types -> SPECIES_PREFIX_PATH + 
shortenSignature(types))
                               .toArray(String[]::new);
     }

     @Benchmark
     public void classForName(Blackhole bh) {
         for (String name : lookedUpNames) {
             try {
                 bh.consume(Class.forName(name, false, 
MethodHandle.class.getClassLoader()));
             } catch (ClassNotFoundException e) {
                 bh.consume(e);
             }
         }
     }

     @Benchmark
     public void classForNameInModule(Blackhole bh) {
         for (String name : lookedUpNames) {
             bh.consume(Class.forName(MethodHandle.class.getModule(), 
name));
         }
     }

     @Benchmark
     public void hashSetContains(Blackhole bh) {
         for (String name : lookedUpNames) {
             bh.consume(generatedNames.contains(name));
         }
     }

     @Benchmark
     public void switchStatement(Blackhole bh) {
         for (String types : lookedUpTypes) {
             bh.consume(getBMHSwitch(types));
         }
     }

     static Class<?> getBMHSwitch(String types) {
         // should be in sync with @Param generate above...
         switch (types) {
             case "LL": return Object.class;
             case "LLL": return Serializable.class;
             case "LLLL": return Iterator.class;
             case "LLLLL": return Throwable.class;
             case "LLLLLL": return String.class;
             default: return null;
         }
     }

     // copied from non-public LambdaForm
     static String shortenSignature(String signature) {
         // Hack to make signatures more readable when they show up in 
method names.
         final int NO_CHAR = -1, MIN_RUN = 3;
         int c0, c1 = NO_CHAR, c1reps = 0;
         StringBuilder buf = null;
         int len = signature.length();
         if (len < MIN_RUN) return signature;
         for (int i = 0; i <= len; i++) {
             // shift in the next char:
             c0 = c1; c1 = (i == len ? NO_CHAR : signature.charAt(i));
             if (c1 == c0) { ++c1reps; continue; }
             // shift in the next count:
             int c0reps = c1reps; c1reps = 1;
             // end of a  character run
             if (c0reps < MIN_RUN) {
                 if (buf != null) {
                     while (--c0reps >= 0)
                         buf.append((char) c0);
                 }
                 continue;
             }
             // found three or more in a row
             if (buf == null)
                 buf = new StringBuilder().append(signature, 0, i - c0reps);
             buf.append((char) c0).append(c0reps);
         }
         return (buf == null) ? signature : buf.toString();
     }
}


Regards, Peter



On 03/30/2016 09:40 AM, Peter Levart wrote:
> Hi Claes,
>
> On 03/30/2016 01:03 AM, Claes Redestad wrote:
>> Hi Peter, Mandy,
>>
>> On 2016-03-26 12:47, Peter Levart wrote:
>>>
>>> Comparing this with proposed code from webrev:
>>>
>>>  493                         try {
>>>  494                             return (Class<? extends 
>>> BoundMethodHandle>)
>>>  495 Class.forName("java.lang.invoke.BoundMethodHandle$Species_" + 
>>> LambdaForm.shortenSignature(types));
>>>  496                         } catch (ClassNotFoundException cnf) {
>>>  497                             // Not pregenerated, generate the class
>>>  498                             return generateConcreteBMHClass(types);
>>>  499                         }
>>>
>>>
>>> ...note that the function passed to CLASS_CACHE.computeIfAbsent is 
>>> executed just once per distinct 'types' argument. Even if you put 
>>> the generated switch between a call to getConcreteBMHClass and 
>>> CLASS_CACHE.computeIfAbsent, getConcreteBMHClass(String types) is 
>>> executed just once per distinct 'types' argument (except in rare 
>>> occasions when VM can not initialize the loaded class).
>>>
>>> In this respect a successful Class.forName() is not any worse than 
>>> static resolving. It's just that unsuccessful Class.forName() 
>>> represents some overhead for classes that are not pre-generated. So 
>>> an alternative way to get rid of that overhead is to have a HashSet 
>>> of 'types' strings for pre-generated classes at hand in order to 
>>> decide whether to call Class.forName or generateConcreteBMHClass.
>>>
>>> What's easier to support is another question.
>>>
>>> Regards, Peter
>>
>> to have something to compare with I built a version which, like you 
>> suggest,
>> generates a HashSet<String> with the set of generated classes here:
>>
>> http://cr.openjdk.java.net/~redestad/8152641/webrev.02/
>>
>> This adds a fair bit of complexity to the plugin and requires we add 
>> a nested
>> class in BoundMethodHandle that we can replace. Using the anonymous
>> compute Function for this seems like the best choice for this.
>
> ...what I had in mind as alternative to a pregenerated class with a 
> switch was a simple textual resource file, generated by plugin, 
> read-in by BMH into a HashSet. No special-purpose class generation and 
> much less complexity for the plugin.
>
>>
>> I've not been able to measure any statistical difference in real 
>> startup terms,
>> though, and not figured out a smart way to benchmark the overhead of the
>> CNFE in relation to the class generation (my guess it adds a fraction 
>> to the
>> total cost) and since this adds ever so little footprint and an extra 
>> lookup to the
>> fast path it would seem that this is the wrong trade-off to do here.
>
> Yes, perhaps it would be best to just use Class.forName(module, 
> className) instead. I have created a little benchmark to compare 
> overheads (just overheads) of unsuccessful lookups for pregenerated 
> classes (a situation where a BMH class is requested that has not been 
> pregenerated) and here's the result for overhead of 5 unsuccessfull 
> lookups:
>
> Benchmark (generate)                       (lookup)  Mode  Cnt     
> Score Error  Units
> ClassForNameBench.classForName          LL,LLL,LLLL,LLLLL,LLLLLL 
> LLI,LLLI,LLLLI,LLLLLI,LLLLLLI  avgt   10  6800.878 ± 421.424 ns/op
> ClassForNameBench.classForNameInModule  LL,LLL,LLLL,LLLLL,LLLLLL 
> LLI,LLLI,LLLLI,LLLLLI,LLLLLLI  avgt   10   209.574 ±   2.114 ns/op
> ClassForNameBench.hashSetContains       LL,LLL,LLLL,LLLLL,LLLLLL 
> LLI,LLLI,LLLLI,LLLLLI,LLLLLLI  avgt   10     6.813 ±   0.317 ns/op
> ClassForNameBench.switchStatement       LL,LLL,LLLL,LLLLL,LLLLLL 
> LLI,LLLI,LLLLI,LLLLLI,LLLLLLI  avgt   10     6.601 ±   0.061 ns/op
>
> ...compared to runtime BMH class generation and loading this is really 
> a very minor overhead. I would just use Class.forName(module, 
> className) and reduce the complexity of the plugin.
>
> What do you think?
>
>
> Here's the benchmark:
>
> package jdk.test;
>
> import org.openjdk.jmh.annotations.*;
> import org.openjdk.jmh.infra.Blackhole;
>
> import java.io.Serializable;
> import java.lang.invoke.MethodHandle;
> import java.util.Iterator;
> import java.util.Set;
> import java.util.concurrent.TimeUnit;
> import java.util.stream.Collectors;
> import java.util.stream.Stream;
>
> @BenchmarkMode(Mode.AverageTime)
> @Fork(value = 1, warmups = 0)
> @Warmup(iterations = 10)
> @Measurement(iterations = 10)
> @OutputTimeUnit(TimeUnit.NANOSECONDS)
> @State(Scope.Thread)
> public class ClassForNameBench {
>
>     static final String BMH = "java/lang/invoke/BoundMethodHandle";
>     static final String SPECIES_PREFIX_NAME = "Species_";
>     static final String SPECIES_PREFIX_PATH = BMH + "$" + 
> SPECIES_PREFIX_NAME;
>
>     @Param({"LL,LLL,LLLL,LLLLL,LLLLLL"})
>     public String generate;
>
>     @Param({"LLI,LLLI,LLLLI,LLLLLI,LLLLLLI"})
>     public String lookup;
>
>     private String[] generatedTypes;
>     private String[] lookedUpTypes;
>     private Set<String> generatedNames;
>     private String[] lookedUpNames;
>
>     @Setup(Level.Trial)
>     public void setup() {
>         generatedTypes = generate.trim().split("\\s+,\\s+");
>         lookedUpTypes = lookup.trim().split("\\s+,\\s+");
>         generatedNames = Stream.of(generatedTypes)
>                                .map(types -> SPECIES_PREFIX_PATH + 
> shortenSignature(types))
>                                .collect(Collectors.toSet());
>         lookedUpNames = Stream.of(lookedUpTypes)
>                               .map(types -> SPECIES_PREFIX_PATH + 
> shortenSignature(types))
>                               .toArray(String[]::new);
>     }
>
>     @Benchmark
>     public void classForName(Blackhole bh) {
>         for (String name : lookedUpNames) {
>             try {
>                 bh.consume(Class.forName(name, false, 
> MethodHandle.class.getClassLoader()));
>             } catch (ClassNotFoundException e) {
>                 bh.consume(e);
>             }
>         }
>     }
>
>     @Benchmark
>     public void classForNameInModule(Blackhole bh) {
>         for (String name : lookedUpNames) {
> bh.consume(Class.forName(MethodHandle.class.getModule(), name));
>         }
>     }
>
>     @Benchmark
>     public void hashSetContains(Blackhole bh) {
>         for (String name : lookedUpNames) {
>             bh.consume(generatedNames.contains(name));
>         }
>     }
>
>     @Benchmark
>     public void switchStatement(Blackhole bh) {
>         for (String types : lookedUpTypes) {
>             bh.consume(getBMHSwitch(types));
>         }
>     }
>
>     static Class<?> getBMHSwitch(String types) {
>         // should be in sync with @Param generate above...
>         switch (types) {
>             case "LL": return Object.class;
>             case "LLL": return Serializable.class;
>             case "LLLL": return Iterator.class;
>             case "LLLLL": return Throwable.class;
>             case "LLLLLL": return String.class;
>             default: return null;
>         }
>     }
>
>     // copied from non-public LambdaForm
>     static String shortenSignature(String signature) {
>         // Hack to make signatures more readable when they show up in 
> method names.
>         final int NO_CHAR = -1, MIN_RUN = 3;
>         int c0, c1 = NO_CHAR, c1reps = 0;
>         StringBuilder buf = null;
>         int len = signature.length();
>         if (len < MIN_RUN) return signature;
>         for (int i = 0; i <= len; i++) {
>             // shift in the next char:
>             c0 = c1; c1 = (i == len ? NO_CHAR : signature.charAt(i));
>             if (c1 == c0) { ++c1reps; continue; }
>             // shift in the next count:
>             int c0reps = c1reps; c1reps = 1;
>             // end of a  character run
>             if (c0reps < MIN_RUN) {
>                 if (buf != null) {
>                     while (--c0reps >= 0)
>                         buf.append((char) c0);
>                 }
>                 continue;
>             }
>             // found three or more in a row
>             if (buf == null)
>                 buf = new StringBuilder().append(signature, 0, i - 
> c0reps);
>             buf.append((char) c0).append(c0reps);
>         }
>         return (buf == null) ? signature : buf.toString();
>     }
> }
>
>
>
> Regards, Peter
>
>>
>> All-in-all I lean towards moving forward with the first, simpler 
>> revision of this
>> patch[1] and then evaluate if webrev.02 or a String-switch+static 
>> resolve
>> as a follow-up.
>>
>> A compromise would be to keep the SpeciesLookup class introduced here
>> to allow removing all overhead in case the plugin is disabled.
>>
>> Mandy: I've not found any test (under jdk/test/tools/jlink or 
>> elsewhere) which
>> has to be updated when adding a plugin like this.
>>
>> Thanks!
>>
>> /Claes
>>
>> [1] http://cr.openjdk.java.net/~redestad/8152641/webrev.01/ 
>




More information about the core-libs-dev mailing list