Server compiler crash

Hiroshi Yamauchi yamauchi at google.com
Mon Sep 8 17:37:09 PDT 2008


Hi,

I encountered a server compiler crash which happens in a Google server.

Unfortunately, my attempt to create a small test for the crash wasn't
successful. If I extracted
the method from the server code, the crash didn't happen. Also, the
-Xcomp option did
not trigger the crash in the server. So, it seems to happen only when
inlining is applied and/or
profile data is used for compilation. But it happens consistently and
deterministically in the server.

The method that triggers this crash during its compilation has 132
lines of code (588 bytes)
and 4 loops (two of them are nested loops) without considering inlining.

I ran it with many versions of JDKs and found that it happens with 6u4
and all the way up to
the Mercurial master forest (from last Friday). It doesn't happen with
6u1 and 6u3.

The only JVM option that is critical to trigger this crash is
"-server". This crash can be suppressed
by -XX:-SplitIfBlocks or the patch below.

The following is more details about the crash.

The crash message:

# To suppress the following error report, specify this argument
# after -XX: or in .hotspotrc:  SuppressErrorAt=/loopnode.hpp:634
#
# A fatal error has been detected by the Java Runtime Environment:
#
#  Internal Error
(/home/yamauchi/ws/jdk7/hotspot/src/share/vm/opto/loopnode.hpp:634),
pid=20501, tid=3470654384
#  Error: assert(n != __null,"Bad immediate dominator info.")
#
# Java VM: OpenJDK Server VM (14.0-b03-fastdebug mixed mode linux-x86 )
# An error report file with more information is saved as:




The stack trace from gdb:

#0  0xffffe410 in __kernel_vsyscall ()
#1  0x4af1fc51 in raise () from /lib/tls/i686/cmov/libc.so.6
#2  0x4af21619 in abort () from /lib/tls/i686/cmov/libc.so.6
#3  0xf778aeb0 in os::abort (dump_core=4160507393) at
/home/yamauchi/ws/jdk7/hotspot/src/os/linux/vm/os_linux.cpp:1456
#4  0xf78f9d6e in VMError::report_and_die (this=0xcef4d2d0) at
/home/yamauchi/ws/jdk7/hotspot/src/share/vm/utilities/vmError.cpp:824
#5  0xf74aa12b in report_assertion_failure () at
/home/yamauchi/ws/jdk7/hotspot/src/share/vm/utilities/debug.cpp:173
#6  0xf76fa612 in PhaseIdealLoop::idom_no_update (this=0xcef4e1ec,
d=0x8b51b30) at
/home/yamauchi/ws/jdk7/hotspot/src/share/vm/opto/loopnode.hpp:634
#7  0xf76fa6b0 in PhaseIdealLoop::idom (this=0xcef4e1ec, d=0x8b51b30)
at /home/yamauchi/ws/jdk7/hotspot/src/share/vm/opto/loopnode.hpp:644
#8  0xf7845f1c in PhaseIdealLoop::spinup (this=0xcef4e1ec,
iff_dom=0x8ba68b0, new_false=0x81db03c, new_true=0x81db180,
use_blk=0x8b51b30, def=0x81da760, cache=0xcef4d4b0) at
/home/yamauchi/ws/jdk7/hotspot/src/share/vm/opto/split_if.cpp:256
#9  0xf7846361 in PhaseIdealLoop::handle_use (this=0xcef4e1ec,
use=0x81d9fac, def=0x81da760, cache=0xcef4d4b0, region_dom=0x8ba68b0,
new_false=0x81db03c, new_true=0x81db180, old_false=0x8ba7e40,
old_true=0x8ba7dec) at
/home/yamauchi/ws/jdk7/hotspot/src/share/vm/opto/split_if.cpp:370
#10 0xf7846bc9 in PhaseIdealLoop::do_split_if (this=0xcef4e1ec,
iff=0x8ba7d94) at
/home/yamauchi/ws/jdk7/hotspot/src/share/vm/opto/split_if.cpp:512
#11 0xf770aaeb in PhaseIdealLoop::split_if_with_blocks_post
(this=0xcef4e1ec, n=0x8ba7cfc) at
/home/yamauchi/ws/jdk7/hotspot/src/share/vm/opto/loopopts.cpp:803
#12 0xf770b441 in PhaseIdealLoop::split_if_with_blocks
(this=0xcef4e1ec, visited=0xcef4da14, nstack=0xcef4d9ec) at
/home/yamauchi/ws/jdk7/hotspot/src/share/vm/opto/loopopts.cpp:978
#13 0xf7705737 in PhaseIdealLoop::PhaseIdealLoop (this=0xcef4e1ec,
igvn=0xcef4de40, verify_me=0x0, do_split_ifs=4145916929) at
/home/yamauchi/ws/jdk7/hotspot/src/share/vm/opto/loopnode.cpp:1574
#14 0xf7461980 in Compile::Optimize (this=0xcef4eaa8) at
/home/yamauchi/ws/jdk7/hotspot/src/share/vm/opto/compile.cpp:1597
#15 0xf7464a85 in Compile::Compile (this=0xcef4eaa8,
ci_env=0xcef4ef98, compiler=0x816dd00, target=0x8698368, osr_bci=-1,
subsume_loads=3472156161, do_escape_analysis=4155553280) at
/home/yamauchi/ws/jdk7/hotspot/src/share/vm/opto/compile.cpp:614
#16 0xf73c4f14 in C2Compiler::compile_method (this=0x816dd00,
env=<incomplete type>, target=0x8698368, entry_bci=-1) at
/home/yamauchi/ws/jdk7/hotspot/src/share/vm/opto/c2compiler.cpp:110
#17 0xf746c75e in CompileBroker::invoke_compiler_on_method
(task=0x8ea3bd8) at
/home/yamauchi/ws/jdk7/hotspot/src/share/vm/compiler/compileBroker.cpp:1537
#18 0xf746ce63 in CompileBroker::compiler_thread_loop () at
/home/yamauchi/ws/jdk7/hotspot/src/share/vm/compiler/compileBroker.cpp:1389
#19 0xf78a8fbe in compiler_thread_entry (thread=0x817dc00,
__the_thread__=0x817dc00) at
/home/yamauchi/ws/jdk7/hotspot/src/share/vm/runtime/thread.cpp:2711
#20 0xf78aa998 in JavaThread::thread_main_inner (this=0x817dc00) at
/home/yamauchi/ws/jdk7/hotspot/src/share/vm/runtime/thread.cpp:1374
#21 0xf78aaaf3 in JavaThread::run (this=0x817dc00) at
/home/yamauchi/ws/jdk7/hotspot/src/share/vm/runtime/thread.cpp:1358
#22 0xf778cb27 in java_start (thread=0x817dc00) at
/home/yamauchi/ws/jdk7/hotspot/src/os/linux/vm/os_linux.cpp:779
#23 0x4b15e3a8 in start_thread () from /lib/tls/i686/cmov/libpthread.so.0
#24 0x4afc27fe in clone () from /lib/tls/i686/cmov/libc.so.6
(gdb) f 6
#6  0xf76fa612 in PhaseIdealLoop::idom_no_update (this=0xcef4e1ec,
d=0x8b51b30) at
/home/yamauchi/ws/jdk7/hotspot/src/share/vm/opto/loopnode.hpp:634
634	/home/yamauchi/ws/jdk7/hotspot/src/share/vm/opto/loopnode.hpp: No
such file or directory.
	in /home/yamauchi/ws/jdk7/hotspot/src/share/vm/opto/loopnode.hpp
(gdb) print *d
$9 = {_vptr.Node = 0xf7b3e9e8, _in = 0xce031bbc, _out = 0xcdf644b8,
_cnt = <error type>, _max = <error type>, _outcnt = <error type>,
_outmax = <error type>, _idx = <error type>, _class_id = 56, _flags =
0, NotAMachineReg = <optimized out or zero length>, _in_dump_cnt =
<optimized out or zero length>, _debug_idx = -2146001392, _debug_orig
= 0x0, _hash_lock = 0, _last_del = 0x81d9e30, _del_tick = <error
type>}
(gdb) print (int)d->_idx
$10 = 0
(gdb) f 9
#9  0xf7846361 in PhaseIdealLoop::handle_use (this=0xcef4e1ec,
use=0x81d9fac, def=0x81da760, cache=0xcef4d4b0, region_dom=0x8ba68b0,
new_false=0x81db03c, new_true=0x81db180, old_false=0x8ba7e40,
old_true=0x8ba7dec) at
/home/yamauchi/ws/jdk7/hotspot/src/share/vm/opto/split_if.cpp:370
370	/home/yamauchi/ws/jdk7/hotspot/src/share/vm/opto/split_if.cpp: No
such file or directory.
	in /home/yamauchi/ws/jdk7/hotspot/src/share/vm/opto/split_if.cpp
(gdb) print *use_blk
$11 = {_vptr.Node = 0xf7b3e9e8, _in = 0xce031bbc, _out = 0xcdf644b8,
_cnt = <error type>, _max = <error type>, _outcnt = <error type>,
_outmax = <error type>, _idx = <error type>, _class_id = 56, _flags =
0, NotAMachineReg = <optimized out or zero length>, _in_dump_cnt =
<optimized out or zero length>, _debug_idx = -2146001392, _debug_orig
= 0x0, _hash_lock = 0, _last_del = 0x81d9e30, _del_tick = <error
type>}
(gdb) print (int)use_blk->_idx
$12 = 0




A patch that suppresses the crash (on a recent jdk7 master workspace):

diff --git a/src/share/vm/opto/loopnode.hpp b/src/share/vm/opto/loopnode.hpp
--- a/src/share/vm/opto/loopnode.hpp
+++ b/src/share/vm/opto/loopnode.hpp
@@ -853,6 +853,8 @@
  // Helpers for filtered type
   const TypeInt* filtered_type_from_dominators( Node* val, Node *val_ctrl);

+  bool detectCrashEarly(Node* region);
+
   // Helper functions
   void register_new_node( Node *n, Node *blk );
   Node *spinup( Node *iff, Node *new_false, Node *new_true, Node
*region, Node *phi, small_cache *cache );
diff --git a/src/share/vm/opto/loopopts.cpp b/src/share/vm/opto/loopopts.cpp
--- a/src/share/vm/opto/loopopts.cpp
+++ b/src/share/vm/opto/loopopts.cpp
@@ -698,6 +698,61 @@
     : u_loop->_head->in(LoopNode::EntryControl);
 }

+// Detect a crash early
+//
+// A crash (assertion failure) happens in
+// hotspot/src/share/vm/opto/loopnode.hpp:634. The following is what
+// I think is happening.
+//
+// The node passed as the parameter to idom_no_update() has index
+// zero, meaning it's the root node of the graph. Because the root
+// does not have an immediate dominator, the _idom array has NULL for
+// index zero, which causes a null pointer dereference at the above
+// crash point.
+//
+// That is caused by the existence of a bad use node coming into the
+// if-splitting optimization. It's the very node that would later be
+// passed to idom_no_update() and cause the above crash. Specifically
+// the find_use_block() call on the bad use node (and the get_ctrl()
+// call in it) in PhaseIdealLoop::handle_use() in
+// hotspot/src/share/vm/opto/split_if.cpp:365 returns the same node as
+// the above root node, which seems wrong because find_use_block()
+// should return the control node (the basic block) that the use node
+// belongs to and the root node isn't likely to be that block.
+//
+// This function looks for the above bad use node early before the
+// if-splitting transformation occurs along with other sanity checks
+// so that we can bail out of the particular if-splitting.
+//
+// The bad use node first appears at the two levels down from the
+// region node against which the if-splitting is attempted.
+//
+// This function does what would happen right before the crash point
+// by calling get_ctrl() and checking the idom array for the bad
+// use node.
+//
+bool PhaseIdealLoop::detectCrashEarly(Node* region) {
+  for (DUIterator ii = region->outs(); region->has_out(ii); ii++) {
+    Node* node = region->out(ii);
+    if (node->is_Phi()) {
+      for (DUIterator jj = node->outs(); node->has_out(jj); jj++) {
+        Node* use = node->out(jj);
+        if (!use->is_CFG() && !use->is_Phi()) {
+          for (DUIterator kk = use->outs(); use->has_out(kk); kk++) {
+            Node* use2 = use->out(kk);
+            if (!use2->is_CFG() && !use2->is_Phi()) {
+              Node* use2_blk = get_ctrl(use2);
+              if (_idom[use2_blk->_idx] == NULL) {
+                return true;
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+}
+

 //------------------------------split_if_with_blocks_post----------------------
 // Do the real work in a non-recursive function.  CFG hackery wants to be
@@ -728,6 +783,11 @@
     assert( bol->is_Bool(), "expect a bool here" );
     if( bol->outcnt() != 1 ) return;// Multiple branches from 1 compare?
     Node *iff = bol->unique_out();
+
+    // Bail out if a crash condition is detected
+    if (iff->in(0) != NULL && detectCrashEarly(iff->in(0))) {
+      return;
+    }

     // Check some safety conditions
     if( iff->is_If() ) {        // Classic split-if?



It'd be great if server compiler experts can suggest how to diagnose this issue.
Alternatively, if this is a known issue, please advise a better fix.

Thanks,
Hiroshi



More information about the hotspot-compiler-dev mailing list