RFR: AArch64: compiler/7196199/Test7196199.java fails on AArch64-Linux with MaxVectorSize > 8

Felix Yang felix.yang at linaro.org
Wed Sep 9 14:39:36 UTC 2015


Hi JIT developers,

    The testcase passes if MaxVectorSize < 16 or UseLoopSafepoints option
is turned off.
    After warmup, the test_incrc method failed verification once loop safe
point interrupt triggers for the outer loop.
    Watching at the JIT code, I find that C2 compiler emit superword
instruction, i.e., fadd.4s, which uses the full vector registers(128 bits).
    But currently the AArch64 safe point interrupt handler only
save/restore a half of each vector register(64 bits), which causes the
issue.
    The following patch  expands the interrupt handler to support wide
vector registers, that is, supporting POLL_AT_VECTOR_LOOP.
    It is kind of similiar to the way X86 port takes to fix the issue. The
testcase passes with the patch.
    I am doing a JTReg regression test for the patch, comments and
suggestions are welcome.
    Thanks.


PATCH:

diff -r 22f4e54b965a src/cpu/aarch64/vm/macroAssembler_aarch64.cpp
--- a/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp Tue Sep 01 09:36:33
2015 +0000
+++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp Wed Sep 09 20:57:02
2015 +0800
@@ -2193,18 +2193,30 @@
 }
 #endif

-void MacroAssembler::push_CPU_state() {
-    push(0x3fffffff, sp);         // integer registers except lr & sp
-
+void MacroAssembler::push_CPU_state(bool save_vectors) {
+  push(0x3fffffff, sp);         // integer registers except lr & sp
+
+  if (!save_vectors) {
     for (int i = 30; i >= 0; i -= 2)
       stpd(as_FloatRegister(i), as_FloatRegister(i+1),
    Address(pre(sp, -2 * wordSize)));
+  } else {
+    for (int i = 30; i >= 0; i -= 2)
+      stpq(as_FloatRegister(i), as_FloatRegister(i+1),
+           Address(pre(sp, -4 * wordSize)));
+  }
 }

-void MacroAssembler::pop_CPU_state() {
-  for (int i = 0; i < 32; i += 2)
-    ldpd(as_FloatRegister(i), as_FloatRegister(i+1),
- Address(post(sp, 2 * wordSize)));
+void MacroAssembler::pop_CPU_state(bool restore_vectors) {
+  if (!restore_vectors) {
+    for (int i = 0; i < 32; i += 2)
+      ldpd(as_FloatRegister(i), as_FloatRegister(i+1),
+           Address(post(sp, 2 * wordSize)));
+  } else {
+    for (int i = 0; i < 32; i += 2)
+      ldpq(as_FloatRegister(i), as_FloatRegister(i+1),
+           Address(post(sp, 4 * wordSize)));
+  }

   pop(0x3fffffff, sp);         // integer registers except lr & sp
 }
diff -r 22f4e54b965a src/cpu/aarch64/vm/macroAssembler_aarch64.hpp
--- a/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp Tue Sep 01 09:36:33
2015 +0000
+++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp Wed Sep 09 20:57:02
2015 +0800
@@ -768,8 +768,8 @@

   DEBUG_ONLY(void verify_heapbase(const char* msg);)

-  void push_CPU_state();
-  void pop_CPU_state() ;
+  void push_CPU_state(bool save_vectors = false);
+  void pop_CPU_state(bool restore_vectors = false) ;

   // Round up to a power of two
   void round_to(Register reg, int modulus);
diff -r 22f4e54b965a src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp
--- a/src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp Tue Sep 01 09:36:33 2015
+0000
+++ b/src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp Wed Sep 09 20:57:02 2015
+0800
@@ -74,8 +74,8 @@
 // FIXME -- this is used by C1
 class RegisterSaver {
  public:
-  static OopMap* save_live_registers(MacroAssembler* masm, int
additional_frame_words, int* total_frame_words);
-  static void restore_live_registers(MacroAssembler* masm);
+  static OopMap* save_live_registers(MacroAssembler* masm, int
additional_frame_words, int* total_frame_words, bool save_vectors = false);
+  static void restore_live_registers(MacroAssembler* masm, bool
restore_vectors = false);

   // Offsets into the register save area
   // Used by deoptimization when it is managing result register
@@ -107,7 +107,18 @@

 };

-OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int
additional_frame_words, int* total_frame_words) {
+OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int
additional_frame_words, int* total_frame_words, bool save_vectors) {
+#ifdef COMPILER2
+  if (save_vectors) {
+    assert(MaxVectorSize == 16, "only 128bit vectors are supported now");
+    // Save upper half of vector registers
+    int vect_words = 32 * 8 / wordSize;
+    additional_frame_words += vect_words;
+  }
+#else
+  assert(!save_vectors, "vectors are generated only by C2");
+#endif
+
   int frame_size_in_bytes = round_to(additional_frame_words*wordSize +
                                      reg_save_size*BytesPerInt, 16);
   // OopMap frame size is in compiler stack slots (jint's) not bytes or
words
@@ -121,7 +132,7 @@
   // Save registers, fpu state, and flags.

   __ enter();
-  __ push_CPU_state();
+  __ push_CPU_state(save_vectors);

   // Set an oopmap for the call site.  This oopmap will map all
   // oop-registers and debug-info registers as callee-saved.  This
@@ -153,8 +164,15 @@
   return oop_map;
 }

-void RegisterSaver::restore_live_registers(MacroAssembler* masm) {
-  __ pop_CPU_state();
+void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool
restore_vectors) {
+#ifdef COMPILER2
+  if (restore_vectors) {
+    assert(MaxVectorSize == 16, "only 128bit vectors are supported now");
+  }
+#else
+  assert(!restore_vectors, "vectors are generated only by C2");
+#endif
+  __ pop_CPU_state(restore_vectors);
   __ leave();
 }

@@ -176,9 +194,9 @@
 }

 // Is vector's size (in bytes) bigger than a size saved by default?
-// 16 bytes XMM registers are saved by default using fxsave/fxrstor
instructions.
+// 8 bytes vector registers are saved by default on AArch64.
 bool SharedRuntime::is_wide_vector(int size) {
-  return size > 16;
+  return size > 8;
 }
 // The java_calling_convention describes stack locations as ideal slots on
 // a frame with no abi restrictions. Since we must observe abi restrictions
@@ -2785,7 +2803,7 @@
   bool save_vectors = (poll_type == POLL_AT_VECTOR_LOOP);

   // Save registers, fpu state, and flags
-  map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words);
+  map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words,
save_vectors);

   // The following is basically a call_VM.  However, we need the precise
   // address of the call in order to generate an oopmap. Hence, we do all
the
@@ -2836,7 +2854,7 @@
   __ bind(noException);

   // Normal exit, restore registers and exit.
-  RegisterSaver::restore_live_registers(masm);
+  RegisterSaver::restore_live_registers(masm, save_vectors);

   __ ret(lr);

diff -r 22f4e54b965a src/cpu/aarch64/vm/vm_version_aarch64.cpp
--- a/src/cpu/aarch64/vm/vm_version_aarch64.cpp Tue Sep 01 09:36:33 2015
+0000
+++ b/src/cpu/aarch64/vm/vm_version_aarch64.cpp Wed Sep 09 20:57:02 2015
+0800
@@ -249,4 +249,8 @@
                                    g.generate_getPsrInfo());

   get_processor_features();
+
+#ifdef COMPILER2
+  MaxVectorSize = 16;
+#endif
 }
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.openjdk.java.net/pipermail/hotspot-compiler-dev/attachments/20150909/a0663d10/attachment-0001.html>
-------------- next part --------------
diff -r 22f4e54b965a src/cpu/aarch64/vm/macroAssembler_aarch64.cpp
--- a/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp	Tue Sep 01 09:36:33 2015 +0000
+++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp	Wed Sep 09 20:57:02 2015 +0800
@@ -2193,18 +2193,30 @@
 }
 #endif
 
-void MacroAssembler::push_CPU_state() {
-    push(0x3fffffff, sp);         // integer registers except lr & sp
-
+void MacroAssembler::push_CPU_state(bool save_vectors) {
+  push(0x3fffffff, sp);         // integer registers except lr & sp
+
+  if (!save_vectors) {
     for (int i = 30; i >= 0; i -= 2)
       stpd(as_FloatRegister(i), as_FloatRegister(i+1),
 	   Address(pre(sp, -2 * wordSize)));
+  } else {
+    for (int i = 30; i >= 0; i -= 2)
+      stpq(as_FloatRegister(i), as_FloatRegister(i+1),
+           Address(pre(sp, -4 * wordSize)));
+  }
 }
 
-void MacroAssembler::pop_CPU_state() {
-  for (int i = 0; i < 32; i += 2)
-    ldpd(as_FloatRegister(i), as_FloatRegister(i+1),
-	 Address(post(sp, 2 * wordSize)));
+void MacroAssembler::pop_CPU_state(bool restore_vectors) {
+  if (!restore_vectors) {
+    for (int i = 0; i < 32; i += 2)
+      ldpd(as_FloatRegister(i), as_FloatRegister(i+1),
+           Address(post(sp, 2 * wordSize)));
+  } else {
+    for (int i = 0; i < 32; i += 2)
+      ldpq(as_FloatRegister(i), as_FloatRegister(i+1),
+           Address(post(sp, 4 * wordSize)));
+  }
 
   pop(0x3fffffff, sp);         // integer registers except lr & sp
 }
diff -r 22f4e54b965a src/cpu/aarch64/vm/macroAssembler_aarch64.hpp
--- a/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp	Tue Sep 01 09:36:33 2015 +0000
+++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp	Wed Sep 09 20:57:02 2015 +0800
@@ -768,8 +768,8 @@
 
   DEBUG_ONLY(void verify_heapbase(const char* msg);)
 
-  void push_CPU_state();
-  void pop_CPU_state() ;
+  void push_CPU_state(bool save_vectors = false);
+  void pop_CPU_state(bool restore_vectors = false) ;
 
   // Round up to a power of two
   void round_to(Register reg, int modulus);
diff -r 22f4e54b965a src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp
--- a/src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp	Tue Sep 01 09:36:33 2015 +0000
+++ b/src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp	Wed Sep 09 20:57:02 2015 +0800
@@ -74,8 +74,8 @@
 // FIXME -- this is used by C1
 class RegisterSaver {
  public:
-  static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words);
-  static void restore_live_registers(MacroAssembler* masm);
+  static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors = false);
+  static void restore_live_registers(MacroAssembler* masm, bool restore_vectors = false);
 
   // Offsets into the register save area
   // Used by deoptimization when it is managing result register
@@ -107,7 +107,18 @@
 
 };
 
-OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words) {
+OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors) {
+#ifdef COMPILER2
+  if (save_vectors) {
+    assert(MaxVectorSize == 16, "only 128bit vectors are supported now");
+    // Save upper half of vector registers
+    int vect_words = 32 * 8 / wordSize;
+    additional_frame_words += vect_words;
+  }
+#else
+  assert(!save_vectors, "vectors are generated only by C2");
+#endif
+
   int frame_size_in_bytes = round_to(additional_frame_words*wordSize +
                                      reg_save_size*BytesPerInt, 16);
   // OopMap frame size is in compiler stack slots (jint's) not bytes or words
@@ -121,7 +132,7 @@
   // Save registers, fpu state, and flags.
 
   __ enter();
-  __ push_CPU_state();
+  __ push_CPU_state(save_vectors);
 
   // Set an oopmap for the call site.  This oopmap will map all
   // oop-registers and debug-info registers as callee-saved.  This
@@ -153,8 +164,15 @@
   return oop_map;
 }
 
-void RegisterSaver::restore_live_registers(MacroAssembler* masm) {
-  __ pop_CPU_state();
+void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) {
+#ifdef COMPILER2
+  if (restore_vectors) {
+    assert(MaxVectorSize == 16, "only 128bit vectors are supported now");
+  }
+#else
+  assert(!restore_vectors, "vectors are generated only by C2");
+#endif
+  __ pop_CPU_state(restore_vectors);
   __ leave();
 }
 
@@ -176,9 +194,9 @@
 }
 
 // Is vector's size (in bytes) bigger than a size saved by default?
-// 16 bytes XMM registers are saved by default using fxsave/fxrstor instructions.
+// 8 bytes vector registers are saved by default on AArch64.
 bool SharedRuntime::is_wide_vector(int size) {
-  return size > 16;
+  return size > 8;
 }
 // The java_calling_convention describes stack locations as ideal slots on
 // a frame with no abi restrictions. Since we must observe abi restrictions
@@ -2785,7 +2803,7 @@
   bool save_vectors = (poll_type == POLL_AT_VECTOR_LOOP);
 
   // Save registers, fpu state, and flags
-  map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words);
+  map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, save_vectors);
 
   // The following is basically a call_VM.  However, we need the precise
   // address of the call in order to generate an oopmap. Hence, we do all the
@@ -2836,7 +2854,7 @@
   __ bind(noException);
 
   // Normal exit, restore registers and exit.
-  RegisterSaver::restore_live_registers(masm);
+  RegisterSaver::restore_live_registers(masm, save_vectors);
 
   __ ret(lr);
 
diff -r 22f4e54b965a src/cpu/aarch64/vm/vm_version_aarch64.cpp
--- a/src/cpu/aarch64/vm/vm_version_aarch64.cpp	Tue Sep 01 09:36:33 2015 +0000
+++ b/src/cpu/aarch64/vm/vm_version_aarch64.cpp	Wed Sep 09 20:57:02 2015 +0800
@@ -249,4 +249,8 @@
                                    g.generate_getPsrInfo());
 
   get_processor_features();
+
+#ifdef COMPILER2
+  MaxVectorSize = 16;
+#endif
 }


More information about the hotspot-compiler-dev mailing list