RFR: JDK-8278135: Remove un-necessary null-check for get-static in c2
Volker Simonis
simonis at openjdk.java.net
Fri Dec 3 21:24:22 UTC 2021
On Thu, 2 Dec 2021 11:20:48 GMT, 王超 <duke at openjdk.java.net> wrote:
> When run the following test, lots of un-necessary null check deoptimization happen.
>
> Small Test:
>
> public class CodeDependenciesTest {
> private Object obj;
> private String[] strs;
> private Object[][] objs;
> private static Class clzOne;
> private static Class clzTwo;
> public static void main(String[] args) throws Exception {
> CodeDependenciesTest codeDependenciesTest = new CodeDependenciesTest();
> codeDependenciesTest.obj = new String("1");
> for (int i = 0; i < 300000; i++) {
> codeDependenciesTest.foo();
> }
> }
>
> public void foo() {
> objs = new Object[10][10];
> for (int i = 0; i < 10; i++) {
> for (int j = 0; j < 10; j++) {
> objs[i][j] = new Object();
> }
> }
> clzOne = InvokeTest.class;
> clzTwo = clzOne;
> }
>
> static class InvokeTest {
> public void bar(String i) {
> try {
> Thread.sleep(Long.valueOf(i));
> } catch (Exception e) {
> e.printStackTrace();
> }
> }
> }
> }
>
>
> The deoptimization log generated by `-XX:+TraceDeoptimization` is:
>
> Uncommon trap bci=63 pc=0x00007f0eafbe1e38, relative_pc=0x00000000000005f8, method=CodeDependenciesTest.foo()V, debug_id=0
> Uncommon trap occurred in CodeDependenciesTest::foo compiler=c2 compile_id=288 (@0x00007f0eafbe1e38) thread=20285 reason=null_assert_or_unreached0 action=make_not_entrant unloaded_class_index=-1 debug_id=0
> DEOPT UNPACKING thread 0x00007f0ec0028190 vframeArray 0x00007f0ec02348a0 mode 2
> {method} {0x00007f0e7c4004f0} 'foo' '()V' in 'CodeDependenciesTest' - putstatic @ bci 63 sp = 0x00007f0ec8a317c8
> Uncommon trap bci=63 pc=0x00007f0eafbe0f34, relative_pc=0x0000000000000514, method=CodeDependenciesTest.foo()V, debug_id=0
> Uncommon trap occurred in CodeDependenciesTest::foo compiler=c2 compile_id=287 (@0x00007f0eafbe0f34) thread=20285 reason=null_assert_or_unreached0 action=make_not_entrant unloaded_class_index=-1 debug_id=0
> DEOPT UNPACKING thread 0x00007f0ec0028190 vframeArray 0x00007f0ec0235e40 mode 2
> {method} {0x00007f0e7c4004f0} 'foo' '()V' in 'CodeDependenciesTest' - putstatic @ bci 63 sp = 0x00007f0ec8a317c8
>
>
> The corresponding opto assembly is,
>
> 230 B20: # out( B56 B21 ) <- in( B58 B43 B41 B19 ) Freq: 0.999369
> 230 movl [RBX + #112 (8-bit)], narrowoop: java/lang/Class:exact * # compressed ptr ! Field: CodeDependenciesTest.clzOne
> 237 movq R10, RBX # ptr -> long
> 23a movq RBP, java/lang/Class:exact * # ptr
> 244 movq R11, RBP # ptr -> long
> 247 xorq R11, R10 # long
> 24a shrq R11, #22
> 24e testq R11, R11
> 251 je B56 P=0.000001 C=-1.000000
>
> 257 B21: # out( B56 B22 ) <- in( B20 ) Freq: 0.999368
> 257 shrq R10, #9
> 25b addq R14, R10 # ptr
> 25e cmpb [R14], #4
> 262 je B56 P=0.000001 C=-1.000000
>
> 5ed B56: # out( N780 ) <- in( B55 B21 B20 ) Freq: 3.02464e-06
> 5ed movl RSI, #-20 # int
> nop # 1 bytes pad for loops and calls
> 5f3 call,static wrapper for: uncommon_trap(reason='null_assert_or_unreached0' action='make_not_entrant' debug_id='0')
> # CodeDependenciesTest::foo @ bci:63 (line 23) L[0]=_ L[1]=_ L[2]=_ STK[0]=RBP
> # OopMap {rbp=Oop off=1528/0x5f8}
> 5f8 stop # ShouldNotReachHere
>
>
> C2 tries to generate a null-check for the get-static in `clzTwo = clzOne;`, because it thinks that ciKlass of `java.lang.Class` is not loaded.
>
> The ciKlass of `java.lang.Class` is generated by the following stack trace,
>
> (gdb) bt
> #0 SystemDictionary::find_instance_klass (class_name=0x800481080, class_loader=..., protection_domain=...) at /data/openjdk/jdk_dev/src/hotspot/share/classfile/systemDictionary.cpp:778
> #1 0x00007ffff610769d in SystemDictionary::find_instance_or_array_klass (class_name=0x800481080, class_loader=..., protection_domain=...) at /data/openjdk/jdk_dev/src/hotspot/share/classfile/systemDictionary.cpp:813
> #2 0x00007ffff610a55c in SystemDictionary::find_constrained_instance_or_array_klass (current=0x7ffff020a5e0, class_name=0x800481080, class_loader=...) at /data/openjdk/jdk_dev/src/hotspot/share/classfile/systemDictionary.cpp:1760
> #3 0x00007ffff55f1f29 in ciEnv::get_klass_by_name_impl (this=0x7fffc12066c0, accessing_klass=0x7fff8c0f6278, cpool=..., name=0x7ffff00334f8, require_local=false) at /data/openjdk/jdk_dev/src/hotspot/share/ci/ciEnv.cpp:519
> #4 0x00007ffff55f1d81 in ciEnv::get_klass_by_name_impl (this=0x7fffc12066c0, accessing_klass=0x7fff8c0f6278, cpool=..., name=0x7fff8c004518, require_local=false) at /data/openjdk/jdk_dev/src/hotspot/share/ci/ciEnv.cpp:488
> #5 0x00007ffff55f2466 in ciEnv::get_klass_by_index_impl (this=0x7fffc12066c0, cpool=..., index=34, is_accessible=@0x7fffc1204540: 112, accessor=0x7fff8c0f6278) at /data/openjdk/jdk_dev/src/hotspot/share/ci/ciEnv.cpp:611
> #6 0x00007ffff55f2632 in ciEnv::get_klass_by_index (this=0x7fffc12066c0, cpool=..., index=34, is_accessible=@0x7fffc1204540: 112, accessor=0x7fff8c0f6278) at /data/openjdk/jdk_dev/src/hotspot/share/ci/ciEnv.cpp:658
> #7 0x00007ffff55fb1bd in ciField::ciField (this=0x7fff8c0f9208, klass=0x7fff8c0f6278, index=65542) at /data/openjdk/jdk_dev/src/hotspot/share/ci/ciField.cpp:101
> #8 0x00007ffff55f344f in ciEnv::get_field_by_index_impl (this=0x7fffc12066c0, accessor=0x7fff8c0f6278, index=65542) at /data/openjdk/jdk_dev/src/hotspot/share/ci/ciEnv.cpp:798
> #9 0x00007ffff55f3506 in ciEnv::get_field_by_index (this=0x7fffc12066c0, accessor=0x7fff8c0f6278, index=65542) at /data/openjdk/jdk_dev/src/hotspot/share/ci/ciEnv.cpp:811
> #10 0x00007ffff56284ec in ciBytecodeStream::get_field (this=0x7fffc1204850, will_link=@0x7fffc12046cf: false) at /data/openjdk/jdk_dev/src/hotspot/share/ci/ciStreams.cpp:274
> #11 0x00007ffff562d10c in ciTypeFlow::StateVector::do_putstatic (this=0x7fff8c1bd078, str=0x7fffc1204850) at /data/openjdk/jdk_dev/src/hotspot/share/ci/ciTypeFlow.cpp:798
> #12 0x00007ffff562e960 in ciTypeFlow::StateVector::apply_one_bytecode (this=0x7fff8c1bd078, str=0x7fffc1204850) at /data/openjdk/jdk_dev/src/hotspot/share/ci/ciTypeFlow.cpp:1457
> #13 0x00007ffff563218d in ciTypeFlow::flow_block (this=0x7fff8c0f7a50, block=0x7fff8c0f8ec0, state=0x7fff8c1bd078, jsrs=0x7fff8c1bd0b8) at /data/openjdk/jdk_dev/src/hotspot/share/ci/ciTypeFlow.cpp:2364
> #14 0x00007ffff563332d in ciTypeFlow::df_flow_types (this=0x7fff8c0f7a50, start=0x7fff8c0f80a8, do_flow=true, temp_vector=0x7fff8c1bd078, temp_set=0x7fff8c1bd0b8) at /data/openjdk/jdk_dev/src/hotspot/share/ci/ciTypeFlow.cpp:2675
> #15 0x00007ffff563361f in ciTypeFlow::flow_types (this=0x7fff8c0f7a50) at /data/openjdk/jdk_dev/src/hotspot/share/ci/ciTypeFlow.cpp:2725
> #16 0x00007ffff5634081 in ciTypeFlow::do_flow (this=0x7fff8c0f7a50) at /data/openjdk/jdk_dev/src/hotspot/share/ci/ciTypeFlow.cpp:2886
> #17 0x00007ffff5605687 in ciMethod::get_flow_analysis (this=0x7fff8c0f6340) at /data/openjdk/jdk_dev/src/hotspot/share/ci/ciMethod.cpp:327
> #18 0x00007ffff5499d34 in InlineTree::check_can_parse (callee=0x7fff8c0f6340) at /data/openjdk/jdk_dev/src/hotspot/share/opto/bytecodeInfo.cpp:535
> #19 0x00007ffff55a0806 in CallGenerator::for_osr (m=0x7fff8c0f6340, osr_bci=22) at /data/openjdk/jdk_dev/src/hotspot/share/opto/callGenerator.cpp:299
> #20 0x00007ffff56aa2d9 in Compile::Compile (this=0x7fffc1205900, ci_env=0x7fffc12066c0, target=0x7fff8c0f6340, osr_bci=22, options=..., directive=0x7ffff01588a0) at /data/openjdk/jdk_dev/src/hotspot/share/opto/compile.cpp:687
> #21 0x00007ffff559da3e in C2Compiler::compile_method (this=0x7ffff0209f40, env=0x7fffc12066c0, target=0x7fff8c0f6340, entry_bci=22, install_code=true, directive=0x7ffff01588a0) at /data/openjdk/jdk_dev/src/hotspot/share/opto/c2compiler.cpp:108
> #22 0x00007ffff56c7f6e in CompileBroker::invoke_compiler_on_method (task=0x7ffff02322d0) at /data/openjdk/jdk_dev/src/hotspot/share/compiler/compileBroker.cpp:2291
> #23 0x00007ffff56c6aed in CompileBroker::compiler_thread_loop () at /data/openjdk/jdk_dev/src/hotspot/share/compiler/compileBroker.cpp:1966
> #24 0x00007ffff56e6f9f in CompilerThread::thread_entry (thread=0x7ffff020a5e0, __the_thread__=0x7ffff020a5e0) at /data/openjdk/jdk_dev/src/hotspot/share/compiler/compilerThread.cpp:59
> #25 0x00007ffff614a42f in JavaThread::thread_main_inner (this=0x7ffff020a5e0) at /data/openjdk/jdk_dev/src/hotspot/share/runtime/thread.cpp:1297
> #26 0x00007ffff614a2c5 in JavaThread::run (this=0x7ffff020a5e0) at /data/openjdk/jdk_dev/src/hotspot/share/runtime/thread.cpp:1280
> #27 0x00007ffff6147b08 in Thread::call_run (this=0x7ffff020a5e0) at /data/openjdk/jdk_dev/src/hotspot/share/runtime/thread.cpp:358
> #28 0x00007ffff5eafa4f in thread_native_entry (thread=0x7ffff020a5e0) at /data/openjdk/jdk_dev/src/hotspot/os/linux/os_linux.cpp:705
> #29 0x00007ffff779cea5 in start_thread (arg=0x7fffc1207700) at pthread_create.c:307
> #30 0x00007ffff72c19fd in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:111
>
>
> When `CodeDependenciesTest.foo` is compiled, the classloader of the holder of this method is AppClassLoader, so it finds `java.lang.Class` in AppClassLoader, and finds nothing, so it thinks that `java.lang.Class` is not loaded, but `java.lang.Class` is a built-in class which is definitely loaded and initialized.
>
> The following patch is the first patch we implemented, it tries to find Klass in the parent classloader when the Klass can not be found in current classloader. But the patch has potential risk, because user-defined classloader may not follow the 'parent delegation' style of classloading.
>
> diff --git a/src/hotspot/share/ci/ciEnv.cpp b/src/hotspot/share/ci/ciEnv.cpp
> index e29b56a..3ceafc4 100644
> --- a/src/hotspot/share/ci/ciEnv.cpp
> +++ b/src/hotspot/share/ci/ciEnv.cpp
> @@ -514,11 +514,17 @@ ciKlass* ciEnv::get_klass_by_name_impl(ciKlass* accessing_klass,
> {
> ttyUnlocker ttyul; // release tty lock to avoid ordering problems
> MutexLocker ml(current, Compile_lock);
> - Klass* kls;
> - if (!require_local) {
> - kls = SystemDictionary::find_constrained_instance_or_array_klass(current, sym, loader);
> - } else {
> - kls = SystemDictionary::find_instance_or_array_klass(sym, loader, domain);
> + Klass* kls = NULL;
> + while (true) {
> + if (!require_local) {
> + kls = SystemDictionary::find_constrained_instance_or_array_klass(current, sym, loader);
> + } else {
> + kls = SystemDictionary::find_instance_or_array_klass(sym, loader, domain);
> + }
> + if (kls != NULL || loader() == NULL) {
> + break;
> + }
> + loader = Handle(current, java_lang_ClassLoader::parent(loader()));
> }
> found_klass = kls;
> }
>
>
> When the Klass of the field is not loaded, the generated 'null check' helps nothing, we think remove it is the right way to avoid the deoptmization.
@casparcwang , thanks for this PR! It's really a very interesting and tricky issue :)
What you actually see is not a deoptimization because of a null check, but rather the opposite, a deoptimization because of a not-null-check (i.e. `null_assert` is the opposit of `null_check` an asserts if a value is not null).
Let's look at a simplified version of your example:
public class CodeDependenciesSimple {
private static Class clzOne;
private static Class clzTwo;
public static void main(String[] args) throws Exception {
if (args.length == 1) {
clzOne = Object.class;
} else if (args.length == 2) {
clzOne = Class.forName("java.lang.Object");
}
for (int i = 0; i < 300000; i++) {
foo();
}
}
public static void foo() {
clzTwo = clzOne;
}
}
Calling this wiht `java CodeDependenciesSimple class` will reproduce your problem of continuous deoptimizations. The OptoAssembly looks as follows:
000 B1: # out( B3 B2 ) <- BLOCK HEAD IS JUNK Freq: 1
000 # stack bang (104 bytes)
pushq rbp # Save rbp
subq rsp, #16 # Create frame
00c movq R10, java/lang/Class:exact * # ptr
016 movq R11, [R10 + #176 (32-bit)] # ptr ! Field: CodeDependenciesSimple.clzOne
nop # 3 bytes pad for loops and calls
020 testq R11, R11 # ptr
023 jne,s B3 P=0,000001 C=-1,000000
025 B2: # out( N1 ) <- in( B1 ) Freq: 0,999999
025 movq [R10 + #184 (32-bit)], NULL # ptr ! Field: CodeDependenciesSimple.clzTwo
030 addq rsp, 16 # Destroy frame
popq rbp
cmpq rsp, poll_offset[r15_thread]
ja #safepoint_stub # Safepoint: poll for GC
042 ret
043 B3: # out( N1 ) <- in( B1 ) Freq: 1e-06
043 movl RSI, #-20 # int
048 movq RBP, R11 # spill
04b call,static wrapper for: uncommon_trap(reason='null_assert_or_unreached0' action='make_not_entrant' debug_id='0')
# CodeDependenciesSimple::foo @ bci:3 (line 15) STK[0]=RBP
# OopMap {rbp=Oop off=80/0x50}
050 stop # ShouldNotReachHere
As you can see, there's a check for `clzOne` being not zero at `0x020` and if `clzOne` is not zero (which will be true if you call the program with one parameter) you'll always jump to the uncommon trap in block B3.
If you call the program without arguments, the OptoAssembly of `foo()` will look exactly the same, but `clzOne` will be NULL (because it is not initialized) and you'll see no deoptimizations. This is a kind of C2 "optimization" because for NULL assignments, C2 doesn't have to prove that the field has the same type like object which is assigned to the field (the object is actually NULL).
I think all this weird behavior is really a corner case for class constants (i.e. `Object.class`) because they are directly translated into a `ldc java/lang/Object` bytecode and there's no need for the classloader which executes code to load and resolve the `java.lang.Class` class.
If you use ` Class.forName("java.lang.Object")` to initialize `clzOne` which is semantically the same (by executing `java CodeDependenciesSimple class forName`) the generated code changes and will look as follows:
000 B1: # out( N1 ) <- BLOCK HEAD IS JUNK Freq: 1
000 # stack bang (96 bytes)
pushq rbp # Save rbp
subq rsp, #16 # Create frame
00c movq R10, 0x00007fbdf0ade000 # ptr
016 movq R11, java/lang/Class:exact * # ptr
020 movq R8, [R11 + #176 (32-bit)] # ptr ! Field: CodeDependenciesSimple.clzOne
027 movq [R11 + #184 (32-bit)], R8 # ptr ! Field: CodeDependenciesSimple.clzTwo
02e movq R11, R11 # ptr -> long
02e shrq R11, #9
032 movb [R10 + R11], #0 # byte
037 addq rsp, 16 # Destroy frame
popq rbp
cmpq rsp, poll_offset[r15_thread]
ja #safepoint_stub # Safepoint: poll for GC
049 ret
```
As you see, there's no more uncommon trap in the code. This is because now that application class loader which executes ` Class.forName("java.lang.Object")` will trigger the loading of `java.lang.Class` and will be recorded as an initiating classloader for `java.lang.Class` (`java.lang.Class` will still be loaded, or has already been loaded to be more accurate, by the bootstrap class loader, which is recorded as the class' defining class loader).
Because the application class loader is an initiating classloader of `java.lang.Class`, the type flow analysis will correctly detect the type of the `clzOne` field and proove that it's the same as the type of the assigned object so there's no need for an uncommon trap any more.
I think this situation can only occur for fields of type `Class` because they can be assigned from the result of an `ldc <class_name>` instruction which prevents that the executing class loader triggers the loading of the corresponding class. By the way, `String` constants are also loaded with `ldc`, but the application class loader already initiates the loading of `java.lang.String` because it's in the argument list of the `main()` function so the problem can't be easily reproduced with `String` fields.
So to cut a long story short, I don't think that your proposed fix is correct. I have to think more about it, but maybe one approach for a fix could be to handle `java.lang.Class` special because it is always loaded in the bootstrap classloader during VM initialization.
I'll take another look at the problem next week :)
Have a nice weekend,
Volker
-------------
PR: https://git.openjdk.java.net/jdk/pull/6667
More information about the hotspot-compiler-dev
mailing list