[aarch64-port-dev ] JDK9: Merge from upstream

Thu Nov 6 17:23:41 UTC 2014

Another merge.  This is to be the basis for the merge from
aarch64-port into jdk9/dev.

There's nothing very exciting on the AArch64 side, but I've included
the cpu-specific diffs for your information.

Andrew.


diff -r 32b3b34c04d1 -r 4bdc2d6606db src/cpu/aarch64/vm/c1_globals_aarch64.hpp

--- a/src/cpu/aarch64/vm/c1_globals_aarch64.hpp	Mon Nov 03 14:48:15 2014 +0000
+++ b/src/cpu/aarch64/vm/c1_globals_aarch64.hpp	Thu Nov 06 08:41:57 2014 -0500
@@ -57,6 +57,9 @@
 define_pd_global(intx, NewSizeThreadIncrease,        4*K  );
 define_pd_global(intx, InitialCodeCacheSize,         160*K);
 define_pd_global(intx, ReservedCodeCacheSize,        32*M );
+define_pd_global(intx, NonProfiledCodeHeapSize,      13*M );
+define_pd_global(intx, ProfiledCodeHeapSize,         14*M );
+define_pd_global(intx, NonNMethodCodeHeapSize,       5*M  );
 define_pd_global(bool, ProfileInterpreter,           false);
 define_pd_global(intx, CodeCacheExpansionSize,       32*K );
 define_pd_global(uintx, CodeCacheMinBlockLength,     1);
diff -r 32b3b34c04d1 -r 4bdc2d6606db src/cpu/aarch64/vm/c2_globals_aarch64.hpp
--- a/src/cpu/aarch64/vm/c2_globals_aarch64.hpp	Mon Nov 03 14:48:15 2014 +0000
+++ b/src/cpu/aarch64/vm/c2_globals_aarch64.hpp	Thu Nov 06 08:41:57 2014 -0500
@@ -75,6 +75,9 @@
 define_pd_global(bool, OptoBundling,                 false);

 define_pd_global(intx, ReservedCodeCacheSize,        48*M);
+define_pd_global(intx, NonProfiledCodeHeapSize,      21*M);
+define_pd_global(intx, ProfiledCodeHeapSize,         22*M);
+define_pd_global(intx, NonNMethodCodeHeapSize,       5*M );
 define_pd_global(uintx, CodeCacheMinBlockLength,     4);
 define_pd_global(uintx, CodeCacheMinimumUseSpace,    400*K);

diff -r 32b3b34c04d1 -r 4bdc2d6606db src/cpu/aarch64/vm/frame_aarch64.cpp
--- a/src/cpu/aarch64/vm/frame_aarch64.cpp	Mon Nov 03 14:48:15 2014 +0000
+++ b/src/cpu/aarch64/vm/frame_aarch64.cpp	Thu Nov 06 08:41:57 2014 -0500
@@ -823,3 +823,10 @@
   Method* m = (Method*)p[frame::interpreter_frame_method_offset];
   printbc(m, bcx);
 }
+
+#ifndef PRODUCT
+// This is a generic constructor which is only used by pns() in debug.cpp.
+frame::frame(void* sp, void* fp, void* pc) {
+  init((intptr_t*)sp, (intptr_t*)fp, (address)pc);
+}
+#endif
diff -r 32b3b34c04d1 -r 4bdc2d6606db src/cpu/aarch64/vm/frame_aarch64.hpp
--- a/src/cpu/aarch64/vm/frame_aarch64.hpp	Mon Nov 03 14:48:15 2014 +0000
+++ b/src/cpu/aarch64/vm/frame_aarch64.hpp	Thu Nov 06 08:41:57 2014 -0500
@@ -188,6 +188,8 @@

   frame(intptr_t* sp, intptr_t* fp);

+  void init(intptr_t* sp, intptr_t* fp, address pc);
+
   // accessors for the instance variables
   // Note: not necessarily the real 'frame pointer' (see real_fp)
   intptr_t*   fp() const { return _fp; }
diff -r 32b3b34c04d1 -r 4bdc2d6606db src/cpu/aarch64/vm/frame_aarch64.inline.hpp
--- a/src/cpu/aarch64/vm/frame_aarch64.inline.hpp	Mon Nov 03 14:48:15 2014 +0000
+++ b/src/cpu/aarch64/vm/frame_aarch64.inline.hpp	Thu Nov 06 08:41:57 2014 -0500
@@ -45,13 +45,15 @@

 static int spin;

-inline frame::frame(intptr_t* sp, intptr_t* fp, address pc) {
+inline void frame::init(intptr_t* sp, intptr_t* fp, address pc) {
   intptr_t a = intptr_t(sp);
   intptr_t b = intptr_t(fp);
+#ifndef PRODUCT
   if (fp)
     if (sp > fp || (fp - sp > 0x100000))
       for(;;)
 	asm("nop");
+#endif
   _sp = sp;
   _unextended_sp = sp;
   _fp = fp;
@@ -69,6 +71,10 @@
   }
 }

+inline frame::frame(intptr_t* sp, intptr_t* fp, address pc) {
+  init(sp, fp, pc);
+}
+
 inline frame::frame(intptr_t* sp, intptr_t* unextended_sp, intptr_t* fp, address pc) {
   intptr_t a = intptr_t(sp);
   intptr_t b = intptr_t(fp);
diff -r 32b3b34c04d1 -r 4bdc2d6606db src/cpu/aarch64/vm/interpreterGenerator_aarch64.hpp
--- a/src/cpu/aarch64/vm/interpreterGenerator_aarch64.hpp	Mon Nov 03 14:48:15 2014 +0000
+++ b/src/cpu/aarch64/vm/interpreterGenerator_aarch64.hpp	Thu Nov 06 08:41:57 2014 -0500
@@ -42,9 +42,10 @@
   address generate_native_entry(bool synchronized);
   address generate_abstract_entry(void);
   address generate_math_entry(AbstractInterpreter::MethodKind kind);
-void generate_transcendental_entry(AbstractInterpreter::MethodKind kind, int fpargs);
-  address generate_empty_entry(void);
-  address generate_accessor_entry(void);
+  address generate_jump_to_normal_entry(void);
+  address generate_accessor_entry(void) { return generate_jump_to_normal_entry(); }
+  address generate_empty_entry(void) { return generate_jump_to_normal_entry(); }
+  void generate_transcendental_entry(AbstractInterpreter::MethodKind kind, int fpargs);
   address generate_Reference_get_entry();
   address generate_CRC32_update_entry();
   address generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind);
diff -r 32b3b34c04d1 -r 4bdc2d6606db src/cpu/aarch64/vm/interpreter_aarch64.cpp
--- a/src/cpu/aarch64/vm/interpreter_aarch64.cpp	Mon Nov 03 14:48:15 2014 +0000
+++ b/src/cpu/aarch64/vm/interpreter_aarch64.cpp	Thu Nov 06 08:41:57 2014 -0500
@@ -237,6 +237,17 @@
   __ blrt(rscratch1, gpargs, fpargs, rtype);
 }

+// Jump into normal path for accessor and empty entry to jump to normal entry
+// The "fast" optimization don't update compilation count therefore can disable inlining
+// for these functions that should be inlined.
+address InterpreterGenerator::generate_jump_to_normal_entry(void) {
+  address entry_point = __ pc();
+
+  assert(Interpreter::entry_for_kind(Interpreter::zerolocals) != NULL, "should already be generated");
+  __ b(Interpreter::entry_for_kind(Interpreter::zerolocals));
+  return entry_point;
+}
+
 // Abstract method entry
 // Attempt to execute abstract method. Throw exception
 address InterpreterGenerator::generate_abstract_entry(void) {
@@ -262,42 +273,6 @@
 }


-// Empty method, generate a very fast return.
-
-address InterpreterGenerator::generate_empty_entry(void) {
-  // rmethod: Method*
-  // r13: sender sp must set sp to this value on return
-
-  if (!UseFastEmptyMethods) {
-    return NULL;
-  }
-
-  address entry_point = __ pc();
-
-  // If we need a safepoint check, generate full interpreter entry.
-  Label slow_path;
-  {
-    unsigned long offset;
-    assert(SafepointSynchronize::_not_synchronized == 0,
-	   "SafepointSynchronize::_not_synchronized");
-    __ adrp(rscratch2, SafepointSynchronize::address_of_state(), offset);
-    __ ldrw(rscratch2, Address(rscratch2, offset));
-    __ cbnz(rscratch2, slow_path);
-  }
-
-  // do nothing for empty methods (do not even increment invocation counter)
-  // Code: _return
-  // _return
-  // return w/o popping parameters
-  __ mov(sp, r13); // Restore caller's SP
-  __ br(lr);
-
-  __ bind(slow_path);
-  (void) generate_normal_entry(false);
-  return entry_point;
-
-}
-
 void Deoptimization::unwind_callee_save_values(frame* f, vframeArray* vframe_array) {

   // This code is sort of the equivalent of C2IAdapter::setup_stack_frame back in
diff -r 32b3b34c04d1 -r 4bdc2d6606db src/cpu/aarch64/vm/templateInterpreter_aarch64.cpp
--- a/src/cpu/aarch64/vm/templateInterpreter_aarch64.cpp	Mon Nov 03 14:48:15 2014 +0000
+++ b/src/cpu/aarch64/vm/templateInterpreter_aarch64.cpp	Thu Nov 06 08:41:57 2014 -0500
@@ -660,12 +660,6 @@
 //
 //

-// Call an accessor method (assuming it is resolved, otherwise drop
-// into vanilla (slow path) entry
-address InterpreterGenerator::generate_accessor_entry(void) {
-  return NULL;
-}
-
 // Method entry for java.lang.ref.Reference.get.
 address InterpreterGenerator::generate_Reference_get_entry(void) {
   return NULL;
@@ -1411,100 +1405,6 @@
   return entry_point;
 }

-// Entry points
-//
-// Here we generate the various kind of entries into the interpreter.
-// The two main entry type are generic bytecode methods and native
-// call method.  These both come in synchronized and non-synchronized
-// versions but the frame layout they create is very similar. The
-// other method entry types are really just special purpose entries
-// that are really entry and interpretation all in one. These are for
-// trivial methods like accessor, empty, or special math methods.
-//
-// When control flow reaches any of the entry types for the interpreter
-// the following holds ->
-//
-// Arguments:
-//
-// rmethod: Method*
-//
-// Stack layout immediately at entry
-//
-// [ return address     ] <--- rsp
-// [ parameter n        ]
-//   ...
-// [ parameter 1        ]
-// [ expression stack   ] (caller's java expression stack)
-
-// Assuming that we don't go to one of the trivial specialized entries
-// the stack will look like below when we are ready to execute the
-// first bytecode (or call the native routine). The register usage
-// will be as the template based interpreter expects (see
-// interpreter_aarch64.hpp).
-//
-// local variables follow incoming parameters immediately; i.e.
-// the return address is moved to the end of the locals).
-//
-// [ monitor entry      ] <--- esp
-//   ...
-// [ monitor entry      ]
-// [ expr. stack bottom ]
-// [ saved rbcp         ]
-// [ current rlocals    ]
-// [ Method*            ]
-// [ saved rfp          ] <--- rfp
-// [ return address     ]
-// [ local variable m   ]
-//   ...
-// [ local variable 1   ]
-// [ parameter n        ]
-//   ...
-// [ parameter 1        ] <--- rlocals
-
-address AbstractInterpreterGenerator::generate_method_entry(
-                                        AbstractInterpreter::MethodKind kind) {
-  // determine code generation flags
-  bool synchronized = false;
-  address entry_point = NULL;
-
-  switch (kind) {
-  case Interpreter::zerolocals             :                                                                             break;
-  case Interpreter::zerolocals_synchronized: synchronized = true;                                                        break;
-  case Interpreter::native                 : entry_point = ((InterpreterGenerator*) this)->generate_native_entry(false); break;
-  case Interpreter::native_synchronized    : entry_point = ((InterpreterGenerator*) this)->generate_native_entry(true);  break;
-  case Interpreter::empty                  : entry_point = ((InterpreterGenerator*) this)->generate_empty_entry();       break;
-  case Interpreter::accessor               : entry_point = ((InterpreterGenerator*) this)->generate_accessor_entry();    break;
-  case Interpreter::abstract               : entry_point = ((InterpreterGenerator*) this)->generate_abstract_entry();    break;
-
-  case Interpreter::java_lang_math_sin     : // fall thru
-  case Interpreter::java_lang_math_cos     : // fall thru
-  case Interpreter::java_lang_math_tan     : // fall thru
-  case Interpreter::java_lang_math_abs     : // fall thru
-  case Interpreter::java_lang_math_log     : // fall thru
-  case Interpreter::java_lang_math_log10   : // fall thru
-  case Interpreter::java_lang_math_sqrt    : // fall thru
-  case Interpreter::java_lang_math_pow     : // fall thru
-  case Interpreter::java_lang_math_exp     : entry_point = ((InterpreterGenerator*) this)->generate_math_entry(kind);    break;
-  case Interpreter::java_lang_ref_reference_get
-                                           : entry_point = ((InterpreterGenerator*)this)->generate_Reference_get_entry(); break;
-  case Interpreter::java_util_zip_CRC32_update
-                                           : entry_point = ((InterpreterGenerator*)this)->generate_CRC32_update_entry();  break;
-  case Interpreter::java_util_zip_CRC32_updateBytes
-                                           : // fall thru
-  case Interpreter::java_util_zip_CRC32_updateByteBuffer
-                                           : entry_point = ((InterpreterGenerator*)this)->generate_CRC32_updateBytes_entry(kind); break;
-  default                                  : ShouldNotReachHere();                                                       break;
-  }
-
-  if (entry_point) {
-    return entry_point;
-  }
-
-  return ((InterpreterGenerator*) this)->
-                                generate_normal_entry(synchronized);
-}
-
-
 // These should never be compiled since the interpreter will prefer
 // the compiled version to the intrinsic version.
 bool AbstractInterpreter::can_be_compiled(methodHandle m) {
@@ -1549,7 +1449,7 @@
                                          int callee_locals,
                                          bool is_top_frame) {
   // Note: This calculation must exactly parallel the frame setup
-  // in AbstractInterpreterGenerator::generate_method_entry.
+  // in InterpreterGenerator::generate_method_entry.

   // fixed size of an interpreter frame:
   int overhead = frame::sender_sp_offset -
diff -r 32b3b34c04d1 -r 4bdc2d6606db src/cpu/aarch64/vm/templateTable_aarch64.cpp
--- a/src/cpu/aarch64/vm/templateTable_aarch64.cpp	Mon Nov 03 14:48:15 2014 +0000
+++ b/src/cpu/aarch64/vm/templateTable_aarch64.cpp	Thu Nov 06 08:41:57 2014 -0500
@@ -1761,11 +1761,10 @@
       // r2: scratch
       __ cbz(r0, dispatch);	// test result -- no osr if null
       // nmethod may have been invalidated (VM may block upon call_VM return)
-      __ ldrw(r2, Address(r0, nmethod::entry_bci_offset()));
-      // InvalidOSREntryBci == -2 which overflows cmpw as unsigned
-      // use cmnw against -InvalidOSREntryBci which does the same thing
-      __ cmn(r2, -InvalidOSREntryBci);
-      __ br(Assembler::EQ, dispatch);
+      __ ldrb(r2, Address(r0, nmethod::state_offset()));
+      if (nmethod::in_use != 0)
+	__ sub(r2, r2, nmethod::in_use);
+      __ cbnz(r2, dispatch);

       // We have the address of an on stack replacement routine in r0
       // We need to prepare to execute the OSR method. First we must
diff -r 32b3b34c04d1 -r 4bdc2d6606db src/os_cpu/linux_aarch64/vm/os_linux_aarch64.cpp
--- a/src/os_cpu/linux_aarch64/vm/os_linux_aarch64.cpp	Mon Nov 03 14:48:15 2014 +0000
+++ b/src/os_cpu/linux_aarch64/vm/os_linux_aarch64.cpp	Thu Nov 06 08:41:57 2014 -0500
@@ -690,6 +690,11 @@
 }
 #endif

+int os::extra_bang_size_in_bytes() {
+  // AArch64 does not require the additional stack bang.
+  return 0;
+}
+
 extern "C" {
   int SpinPause() {
   }