ARM: Reduce size of safepoint code, etc.

Andrew Haley aph at redhat.com
Tue May 29 08:31:43 PDT 2012


A few miscellaneous improvements.  These lead to reduced code size and
less memory traffic in the normal (i.e. non-safepoint) case.

The result is like this:

    0 : 10 24          bipush
0x4085ec58:	movs	r3, #36	; 0x24
    2 : ac             ireturn
0x4085ec5a:	movw	r0, #0
0x4085ec5e:	movt	r0, #16670	; 0x411e
0x4085ec62:	ldr	r0, [r0, #8]
0x4085ec64:	b.n	0x4085ec78
0x4085ec66:			; <UNDEFINED> instruction: 0xdead
0x4085ec68:	str.w	r3, [r4, #-4]!
0x4085ec6c:	movs	r1, #50	; 0x32
0x4085ec6e:	adds	r2, r4, #4
0x4085ec70:	bl	0x4072f782
0x4085ec74:	ldr.w	r3, [r4], #4
0x4085ec78:

Note that r3 is only pushed to memory if we take a safepoint.

Also, the code to tear down the stack frame and place the return
value is only generated once, and all the returns jump to it.


2012-05-29  Andrew Haley  <aph at redhat.com>

	* thumb2.cpp (SAVE_STACK, RESTORE_STACK): New.
	(Thumb2_Pop_Multiple): Allow nregs == 0.
	(Thumb2_Safepoint): Move the Thumb2_Flush so that registers are
	only flushed if we actually take a safepoint.
	Don't restore locals at a return.
	Move the stack pointer adjustment out of line into the helper.
	Combine multiple returns by branching to a common point rather
	than generating code every time.

Andrew.


diff -r 5b6a9a63a280 src/cpu/zero/vm/thumb2.cpp
--- a/src/cpu/zero/vm/thumb2.cpp	Mon May 28 08:48:42 2012 -0400
+++ b/src/cpu/zero/vm/thumb2.cpp	Tue May 29 11:20:29 2012 -0400
@@ -3168,6 +3168,8 @@
   unsigned i;
   Reg r;

+  if (nregs == 0)
+    return;
   JASSERT(nregs > 0, "nregs must be > 0");
   if (nregs == 1) {
     ldr_imm(codebuf, regs[0], Rstack, 4, 0, 1);
@@ -3445,6 +3447,21 @@
   jstack->depth = 0;
 }

+// SAVE_STACK and RESTORE_STACK save the stack state so that it's
+// possible to do a stack flush to memory and restore that stack state
+// to the same registers.
+#define SAVE_STACK(JSTACK)					\
+  unsigned saved_stack_elements[JSTACK->depth];			\
+  unsigned saved_stack_depth;					\
+  memcpy(saved_stack_elements, JSTACK->stack,			\
+	 JSTACK->depth * sizeof saved_stack_elements[0]);	\
+  saved_stack_depth = JSTACK->depth;
+#define RESTORE_STACK(JSTACK, CODEBUF)					\
+  Thumb2_Pop_Multiple(CODEBUF, saved_stack_elements, saved_stack_depth); \
+  memcpy(JSTACK->stack, saved_stack_elements,				\
+	 JSTACK->depth * sizeof saved_stack_elements[0]);		\
+  JSTACK->depth = saved_stack_depth;
+
 // Call this when we are about to corrupt a local
 // The local may already be on the stack
 // For example
@@ -4164,7 +4181,6 @@

 void Thumb2_Safepoint(Thumb2_Info *jinfo, int stackdepth, int bci, int offset)
 {
-  Thumb2_Flush(jinfo);
   // normal case: read the polling page and branch to skip
   // the safepoint test
   // abnormal case: read the polling page, trap to handler
@@ -4203,6 +4219,8 @@
   //
   //  n.b. for a return there is no need save or restore locals

+  bool is_return = offset == 0; // This is some kind of return bytecode
+
   int r_tmp = Thumb2_Tmp(jinfo, 0);
   unsigned dest;
   if (offset < 0) {
@@ -4239,27 +4257,32 @@
   // now write a magic word after the branch so the signal handler can
   // test that a polling page read is kosher
   out_16(jinfo->codebuf, THUMB2_POLLING_PAGE_MAGIC);
+
+  {
+    // Flush the stack to memory and save its register state.
+    SAVE_STACK(jinfo->jstack);
+    Thumb2_Flush(jinfo);
+
   // now the safepoint polling code itself
-  // n.b. no need for save or restore of locals at return i.e. when offset == 0
-  //if (offset != 0) {
+
+    // We save the locals at a return bytecode even though we aren't
+    // going to restore them: we do so because otherwise the GC might
+    // scan garbage.  It might be better to save a bunch of null
+    // references.
     Thumb2_save_locals(jinfo, stackdepth);
-    //}
-
-  // The frame walking code used by the garbage collector
-  // (frame::interpreter_frame_tos_address()) assumes that the stack
-  // pointer points one word below the top item on the stack, so we
-  // have to adjust the SP saved in istate accordingly.  If we don't,
-  // the value on TOS won't be seen by the GC and we will crash later.
-  sub_imm(jinfo->codebuf, ARM_R0, Rstack, 4);
-  store_istate(jinfo, ARM_R0, ISTATE_STACK, stackdepth);

   mov_imm(jinfo->codebuf, ARM_R1, bci+CONSTMETHOD_CODEOFFSET);
   add_imm(jinfo->codebuf, ARM_R2, ISTATE_REG(jinfo),
 	  ISTATE_OFFSET(jinfo, stackdepth, 0));
   bl(jinfo->codebuf, handlers[H_SAFEPOINT]);
-  //if (offset != 0) {
+
+    // We don't restore locals if we're returning.
+    if (! is_return)
     Thumb2_restore_locals(jinfo, stackdepth);
-    //}
+
+    // But we always restore the register state of the stack.
+    RESTORE_STACK(jinfo->jstack, jinfo->codebuf);
+
   if (offset < 0) {
     // needs another unconditional backward branch
     branch_uncond(jinfo->codebuf, dest);
@@ -4268,6 +4291,7 @@
   branch_narrow_patch(jinfo->codebuf, read_loc + 2);
   }
 }
+}

 // If this is a backward branch, compile a safepoint check
 void Thumb2_Cond_Safepoint(Thumb2_Info *jinfo, int stackdepth, int bci) {
@@ -4467,7 +4491,7 @@
       branch_uncond(jinfo->codebuf, ret_idx);
       return;
     }
-    if (OSPACE) jinfo->compiled_return = jinfo->codebuf->idx * 2;
+    jinfo->compiled_return = jinfo->codebuf->idx * 2;
   } else {
     if (opcode == opc_lreturn || opcode == opc_dreturn) {
       Thumb2_Fill(jinfo, 2);
@@ -4482,7 +4506,7 @@
         branch_uncond(jinfo->codebuf, ret_idx);
         return;
       }
-      if (OSPACE) jinfo->compiled_word_return[r] = jinfo->codebuf->idx * 2;
+      jinfo->compiled_word_return[r] = jinfo->codebuf->idx * 2;
     }
   }

@@ -7847,6 +7871,14 @@
   handlers[H_SAFEPOINT] = out_pos(&codebuf);
   stm(&codebuf, (1<<ARM_LR), ARM_SP, PUSH_FD, 1);

+  // The frame walking code used by the garbage collector
+  // (frame::interpreter_frame_tos_address()) assumes that the stack
+  // pointer points one word below the top item on the stack, so we
+  // have to adjust the SP saved in istate accordingly.  If we don't,
+  // the value on TOS won't be seen by the GC and we will crash later.
+  sub_imm(&codebuf, ARM_R0, Rstack, 4);
+  str_imm(&codebuf, ARM_R0, ARM_R2, ISTATE_STACK, 1, 0);
+
   // Set up BytecodeInterpreter->_bcp for the GC
   // bci+CONSTMETHOD_CODEOFFSET is passed in ARM_R1
   // istate is passed in ARM_R2



More information about the distro-pkg-dev mailing list