[aarch64-port-dev ] RFR: Add support for G1GC

Edward Nevill edward.nevill at linaro.org
Tue Mar 18 10:52:25 UTC 2014


Hi,

The following patch adds support for G1GC.

This is disabled by default and only enabled with the -XX:+UseG1GC option.

I have tested this against JTreg hotspot and it gives no additional failures/errors.

OK to push?
Ed.

--- CUT HERE ---
exporting patch:
# HG changeset patch
# User Edward Nevill edward.nevill at linaro.org
# Date 1395139429 0
#      Tue Mar 18 10:43:49 2014 +0000
# Node ID 53205a277e07e8be32c4592ba0982f7bc3817717
# Parent  939480aaf1b23f1013de7bca05dd6a2c3cef3430
Add support for G1GC

diff -r 939480aaf1b2 -r 53205a277e07 src/cpu/aarch64/vm/aarch64.ad
--- a/src/cpu/aarch64/vm/aarch64.ad	Tue Mar 11 15:44:21 2014 +0000
+++ b/src/cpu/aarch64/vm/aarch64.ad	Tue Mar 18 10:43:49 2014 +0000
@@ -5112,6 +5112,19 @@
 
 // Store Instructions
 
+// Store CMS card-mark Immediate
+instruct storeimmCM0(immI0 zero, memory mem)
+%{
+  match(Set mem (StoreCM mem zero));
+
+  ins_cost(MEMORY_REF_COST);
+  format %{ "strb zr, $mem\t# byte" %}
+
+  ins_encode(aarch64_enc_strb0(mem));
+
+  ins_pipe(pipe_class_memory);
+%}
+
 // Store Byte
 instruct storeB(iRegI src, memory mem)
 %{
@@ -5126,6 +5139,7 @@
   ins_pipe(pipe_class_memory);
 %}
 
+
 instruct storeimmB0(immI0 zero, memory mem)
 %{
   match(Set mem (StoreB mem zero));
diff -r 939480aaf1b2 -r 53205a277e07 src/cpu/aarch64/vm/c1_CodeStubs_aarch64.cpp
--- a/src/cpu/aarch64/vm/c1_CodeStubs_aarch64.cpp	Tue Mar 11 15:44:21 2014 +0000
+++ b/src/cpu/aarch64/vm/c1_CodeStubs_aarch64.cpp	Tue Mar 18 10:43:49 2014 +0000
@@ -542,14 +542,46 @@
 /////////////////////////////////////////////////////////////////////////////
 #if INCLUDE_ALL_GCS
 
-void G1PreBarrierStub::emit_code(LIR_Assembler* ce) { Unimplemented(); }
+void G1PreBarrierStub::emit_code(LIR_Assembler* ce) {
+  // At this point we know that marking is in progress.
+  // If do_load() is true then we have to emit the
+  // load of the previous value; otherwise it has already
+  // been loaded into _pre_val.
+
+  __ bind(_entry);
+  assert(pre_val()->is_register(), "Precondition.");
+
+  Register pre_val_reg = pre_val()->as_register();
+
+  if (do_load()) {
+    ce->mem2reg(addr(), pre_val(), T_OBJECT, patch_code(), info(), false /*wide*/, false /*unaligned*/);
+  }
+  __ cbz(pre_val_reg, _continuation);
+  ce->store_parameter(pre_val()->as_register(), 0);
+  __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::g1_pre_barrier_slow_id)));
+  __ b(_continuation);
+}
 
 jbyte* G1PostBarrierStub::_byte_map_base = NULL;
 
-jbyte* G1PostBarrierStub::byte_map_base_slow() { Unimplemented(); return 0; }
+jbyte* G1PostBarrierStub::byte_map_base_slow() {
+  BarrierSet* bs = Universe::heap()->barrier_set();
+  assert(bs->is_a(BarrierSet::G1SATBCTLogging),
+         "Must be if we're using this.");
+  return ((G1SATBCardTableModRefBS*)bs)->byte_map_base;
+}
 
 
-void G1PostBarrierStub::emit_code(LIR_Assembler* ce) { Unimplemented(); }
+void G1PostBarrierStub::emit_code(LIR_Assembler* ce) {
+  __ bind(_entry);
+  assert(addr()->is_register(), "Precondition.");
+  assert(new_val()->is_register(), "Precondition.");
+  Register new_val_reg = new_val()->as_register();
+  __ cbz(new_val_reg, _continuation);
+  ce->store_parameter(addr()->as_pointer_register(), 0);
+  __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::g1_post_barrier_slow_id)));
+  __ b(_continuation);
+}
 
 #endif // INCLUDE_ALL_GCS
 /////////////////////////////////////////////////////////////////////////////
diff -r 939480aaf1b2 -r 53205a277e07 src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp
--- a/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp	Tue Mar 11 15:44:21 2014 +0000
+++ b/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp	Tue Mar 18 10:43:49 2014 +0000
@@ -1866,47 +1866,47 @@
 void LIR_Assembler::logic_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dst) {
   
   assert(left->is_single_cpu() || left->is_double_cpu(), "expect single or double register");
-  if (left->is_single_cpu()) {
-    assert (right->is_single_cpu() || right->is_constant(), "single register or constant expected");
-    if (right->is_constant()
-	&& Assembler::operand_valid_for_logical_immediate(true, right->as_jint())) {
-
-      switch (code) {
-      case lir_logic_and: __ andw (dst->as_register(), left->as_register(), right->as_jint()); break;
-      case lir_logic_or:  __ orrw (dst->as_register(), left->as_register(), right->as_jint()); break;
-      case lir_logic_xor: __ eorw (dst->as_register(), left->as_register(), right->as_jint()); break;
-      default: ShouldNotReachHere(); break;
-      }
-    } else {
-      switch (code) {
-      case lir_logic_and: __ andw (dst->as_register(), left->as_register(), right->as_register()); break;
-      case lir_logic_or:  __ orrw (dst->as_register(), left->as_register(), right->as_register()); break;
-      case lir_logic_xor: __ eorw (dst->as_register(), left->as_register(), right->as_register()); break;
-      default: ShouldNotReachHere(); break;
-      }
-    }
-  } else {
-    assert (right->is_double_cpu() || right->is_constant(), "single register or constant expected");
-    if (right->is_double_cpu()) {
-      switch (code) {
-      case lir_logic_and: __ andr(dst->as_register_lo(), left->as_register_lo(), right->as_register_lo()); break;
-      case lir_logic_or:  __ orr (dst->as_register_lo(), left->as_register_lo(), right->as_register_lo()); break;
-      case lir_logic_xor: __ eor (dst->as_register_lo(), left->as_register_lo(), right->as_register_lo()); break;
-      default:
-	ShouldNotReachHere();
-	break;
-      }
-    } else {
-      switch (code) {
-      case lir_logic_and: __ andr(dst->as_register_lo(), left->as_register_lo(), right->as_jlong()); break;
-      case lir_logic_or:  __ orr (dst->as_register_lo(), left->as_register_lo(), right->as_jlong()); break;
-      case lir_logic_xor: __ eor (dst->as_register_lo(), left->as_register_lo(), right->as_jlong()); break;
-      default:
-	ShouldNotReachHere();
-	break;
-      }
-    }
-  }
+  Register Rleft = left->is_single_cpu() ? left->as_register() :
+                                           left->as_register_lo();
+   if (dst->is_single_cpu()) {
+     Register Rdst = dst->as_register();
+     if (right->is_constant()) {
+       switch (code) {
+         case lir_logic_and: __ andw (Rdst, Rleft, right->as_jint()); break;
+         case lir_logic_or:  __ orrw (Rdst, Rleft, right->as_jint()); break;
+         case lir_logic_xor: __ eorw (Rdst, Rleft, right->as_jint()); break;
+         default: ShouldNotReachHere(); break;
+       }
+     } else {
+       Register Rright = right->is_single_cpu() ? right->as_register() :
+                                                  right->as_register_lo();
+       switch (code) {
+         case lir_logic_and: __ andw (Rdst, Rleft, Rright); break;
+         case lir_logic_or:  __ orrw (Rdst, Rleft, Rright); break;
+         case lir_logic_xor: __ eorw (Rdst, Rleft, Rright); break;
+         default: ShouldNotReachHere(); break;
+       }
+     }
+   } else {
+     Register Rdst = dst->as_register_lo();
+     if (right->is_constant()) {
+       switch (code) {
+         case lir_logic_and: __ andr (Rdst, Rleft, right->as_jlong()); break;
+         case lir_logic_or:  __ orr (Rdst, Rleft, right->as_jlong()); break;
+         case lir_logic_xor: __ eor (Rdst, Rleft, right->as_jlong()); break;
+         default: ShouldNotReachHere(); break;
+       }
+     } else {
+       Register Rright = right->is_single_cpu() ? right->as_register() :
+                                                  right->as_register_lo();
+       switch (code) {
+         case lir_logic_and: __ andr (Rdst, Rleft, Rright); break;
+         case lir_logic_or:  __ orr (Rdst, Rleft, Rright); break;
+         case lir_logic_xor: __ eor (Rdst, Rleft, Rright); break;
+         default: ShouldNotReachHere(); break;
+       }
+     }
+   }
 }
 
 
diff -r 939480aaf1b2 -r 53205a277e07 src/cpu/aarch64/vm/c1_Runtime1_aarch64.cpp
--- a/src/cpu/aarch64/vm/c1_Runtime1_aarch64.cpp	Tue Mar 11 15:44:21 2014 +0000
+++ b/src/cpu/aarch64/vm/c1_Runtime1_aarch64.cpp	Tue Mar 18 10:43:49 2014 +0000
@@ -42,6 +42,9 @@
 #include "runtime/vframe.hpp"
 #include "runtime/vframeArray.hpp"
 #include "vmreg_aarch64.inline.hpp"
+#if INCLUDE_ALL_GCS
+#include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
+#endif
 
 
 // Implementation of StubAssembler
@@ -1148,6 +1151,133 @@
       }
       break;
 
+#if INCLUDE_ALL_GCS
+    case g1_pre_barrier_slow_id:
+      {
+        StubFrame f(sasm, "g1_pre_barrier", dont_gc_arguments);
+        // arg0 : previous value of memory
+
+        BarrierSet* bs = Universe::heap()->barrier_set();
+        if (bs->kind() != BarrierSet::G1SATBCTLogging) {
+	  __ mov(r0, (int)id);
+	  __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), r0);
+	  __ should_not_reach_here();
+          break;
+        }
+
+        const Register pre_val = r0;
+        const Register thread = rthread;
+        const Register tmp = rscratch1;
+
+        Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
+                                             PtrQueue::byte_offset_of_active()));
+
+        Address queue_index(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
+                                             PtrQueue::byte_offset_of_index()));
+        Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
+                                        PtrQueue::byte_offset_of_buf()));
+
+        Label done;
+        Label runtime;
+
+	//__ push(r0->bit(1) | r1->bit(1), sp);
+	__ push(r0->bit(1) | r1->bit(1) | rscratch1->bit(1) | rscratch2->bit(1), sp);
+        // Can we store original value in the thread's buffer?
+        f.load_argument(0, pre_val);
+        __ ldr(tmp, queue_index);
+        __ cbz(tmp, runtime);
+
+        __ sub(tmp, tmp, wordSize);
+        __ str(tmp, queue_index);
+        __ ldr(rscratch2, buffer);
+        __ add(tmp, tmp, rscratch2);
+        __ str(pre_val, Address(tmp, 0));
+        __ b(done);
+
+        __ bind(runtime);
+        __ push(0xfc, sp);
+        __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), pre_val, thread);
+	__ pop(0xfc, sp);
+        __ bind(done);
+	//__ pop(r0->bit(1) | r1->bit(1), sp);
+	__ pop(r0->bit(1) | r1->bit(1) | rscratch1->bit(1) | rscratch2->bit(1), sp);
+      }
+      break;
+    case g1_post_barrier_slow_id:
+      {
+        StubFrame f(sasm, "g1_post_barrier", dont_gc_arguments);
+
+        // arg0: store_address
+        Address store_addr(rfp, 2*BytesPerWord);
+
+        BarrierSet* bs = Universe::heap()->barrier_set();
+        CardTableModRefBS* ct = (CardTableModRefBS*)bs;
+        assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
+
+        Label done;
+        Label runtime;
+
+        // At this point we know new_value is non-NULL and the new_value crosses regions.
+        // Must check to see if card is already dirty
+
+        const Register thread = rthread;
+
+        Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
+                                             PtrQueue::byte_offset_of_index()));
+        Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
+                                        PtrQueue::byte_offset_of_buf()));
+
+        const Register card_addr = rscratch2;
+
+	//__ push(r0->bit(1) | r1->bit(1), sp);
+	__ push(r0->bit(1) | r1->bit(1) | rscratch1->bit(1) | rscratch2->bit(1), sp);
+        f.load_argument(0, card_addr);
+        __ lsr(card_addr, card_addr, CardTableModRefBS::card_shift);
+        // Do not use ExternalAddress to load 'byte_map_base', since 'byte_map_base' is NOT
+        // a valid address and therefore is not properly handled by the relocation code.
+	__ mov(rscratch1, (intptr_t)ct->byte_map_base);
+        __ add(card_addr, card_addr, rscratch1);
+        __ ldrb(rscratch1, Address(card_addr, 0));
+        __ cmpw(rscratch1, (int)G1SATBCardTableModRefBS::g1_young_card_val());
+	__ br(Assembler::EQ, done);
+
+        __ membar(Assembler::Membar_mask_bits(Assembler::StoreLoad));
+        __ ldrb(rscratch1, Address(card_addr, 0));
+        __ cmpw(rscratch1, (int)CardTableModRefBS::dirty_card_val());
+	__ br(Assembler::EQ, done);
+
+        // storing region crossing non-NULL, card is clean.
+        // dirty card and log.
+
+        __ mov(rscratch1, (int)CardTableModRefBS::dirty_card_val());
+        __ strb(rscratch1, Address(card_addr, 0));
+
+        __ ldr(rscratch1, queue_index);
+        __ cbz(rscratch1, runtime);
+        __ sub(rscratch1, rscratch1, wordSize);
+        __ str(rscratch1, queue_index);
+
+        const Register buffer_addr = rscratch2;
+
+	__ push(card_addr->bit(1), sp);
+	__ ldr(buffer_addr, buffer);
+	__ add(rscratch1, buffer_addr, rscratch1);
+	__ pop(card_addr->bit(1), sp);
+	__ str(card_addr, Address(rscratch1, 0));
+	__ b(done);
+
+        __ bind(runtime);
+	__ push(0xfc, sp);
+        __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread);
+	__ pop(0xfc, sp);
+        __ bind(done);
+	//__ pop(r0->bit(1) | r1->bit(1), sp);
+	__ pop(r0->bit(1) | r1->bit(1) | rscratch1->bit(1) | rscratch2->bit(1), sp);
+
+      }
+      break;
+#endif
+
     case predicate_failed_trap_id:
       {
         StubFrame f(sasm, "predicate_failed_trap", dont_gc_arguments);
diff -r 939480aaf1b2 -r 53205a277e07 src/cpu/aarch64/vm/macroAssembler_aarch64.cpp
--- a/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp	Tue Mar 11 15:44:21 2014 +0000
+++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp	Tue Mar 18 10:43:49 2014 +0000
@@ -47,11 +47,12 @@
 // #include "runtime/os.hpp"
 // #include "runtime/sharedRuntime.hpp"
 // #include "runtime/stubRoutines.hpp"
-// #if INCLUDE_ALL_GCS
-// #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
-// #include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
-// #include "gc_implementation/g1/heapRegion.hpp"
-// #endif
+
+#if INCLUDE_ALL_GCS
+#include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
+#include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
+#include "gc_implementation/g1/heapRegion.hpp"
+#endif
 
 #ifdef PRODUCT
 #define BLOCK_COMMENT(str) /* nothing */
@@ -2409,13 +2410,174 @@
                                           Register thread,
                                           Register tmp,
                                           bool tosca_live,
-                                          bool expand_call) { Unimplemented(); }
+                                          bool expand_call) {
+  // If expand_call is true then we expand the call_VM_leaf macro
+  // directly to skip generating the check by
+  // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp.
+
+#ifdef _LP64
+  assert(thread == rthread, "must be");
+#endif // _LP64
+
+  Label done;
+  Label runtime;
+
+  assert(pre_val != noreg, "check this code");
+
+  if (obj != noreg)
+    assert_different_registers(obj, pre_val, tmp);
+
+  Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
+                                       PtrQueue::byte_offset_of_active()));
+  Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
+                                       PtrQueue::byte_offset_of_index()));
+  Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
+                                       PtrQueue::byte_offset_of_buf()));
+
+
+  // Is marking active?
+  if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
+    ldrw(tmp, in_progress);
+  } else {
+    assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption");
+    ldrb(tmp, in_progress);
+  }
+  cbzw(tmp, done);
+
+  // Do we need to load the previous value?
+  if (obj != noreg) {
+    load_heap_oop(pre_val, Address(obj, 0));
+  }
+
+  // Is the previous value null?
+  cbz(pre_val, done);
+
+  // Can we store original value in the thread's buffer?
+  // Is index == 0?
+  // (The index field is typed as size_t.)
+
+  ldr(tmp, index);                      // tmp := *index_adr
+  cbz(tmp, runtime);                    // tmp == 0?
+                                        // If yes, goto runtime
+
+  sub(tmp, tmp, wordSize);              // tmp := tmp - wordSize
+  str(tmp, index);                      // *index_adr := tmp
+  ldr(rscratch1, buffer);
+  add(tmp, tmp, rscratch1);             // tmp := tmp + *buffer_adr
+
+  // Record the previous value
+  str(pre_val, Address(tmp, 0));
+  b(done);
+
+  bind(runtime);
+  // save the live input values
+  push(r0->bit(tosca_live) | obj->bit(obj != noreg) | pre_val->bit(true), sp);
+
+  // Calling the runtime using the regular call_VM_leaf mechanism generates
+  // code (generated by InterpreterMacroAssember::call_VM_leaf_base)
+  // that checks that the *(ebp+frame::interpreter_frame_last_sp) == NULL.
+  //
+  // If we care generating the pre-barrier without a frame (e.g. in the
+  // intrinsified Reference.get() routine) then ebp might be pointing to
+  // the caller frame and so this check will most likely fail at runtime.
+  //
+  // Expanding the call directly bypasses the generation of the check.
+  // So when we do not have have a full interpreter frame on the stack
+  // expand_call should be passed true.
+
+  if (expand_call) {
+    LP64_ONLY( assert(pre_val != c_rarg1, "smashed arg"); )
+    pass_arg1(this, thread);
+    pass_arg0(this, pre_val);
+    MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), 2);
+  } else {
+    call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), pre_val, thread);
+  }
+
+  pop(r0->bit(tosca_live) | obj->bit(obj != noreg) | pre_val->bit(true), sp);
+
+  bind(done);
+}
 
 void MacroAssembler::g1_write_barrier_post(Register store_addr,
                                            Register new_val,
                                            Register thread,
                                            Register tmp,
-                                           Register tmp2) { Unimplemented(); }
+                                           Register tmp2) {
+#ifdef _LP64
+  assert(thread == rthread, "must be");
+#endif // _LP64
+
+  Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
+                                       PtrQueue::byte_offset_of_index()));
+  Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
+                                       PtrQueue::byte_offset_of_buf()));
+
+  BarrierSet* bs = Universe::heap()->barrier_set();
+  CardTableModRefBS* ct = (CardTableModRefBS*)bs;
+  assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
+
+  Label done;
+  Label runtime;
+
+  // Does store cross heap regions?
+
+  mov(tmp, store_addr);
+  eor(tmp, tmp, new_val);
+  lsr(tmp, tmp, HeapRegion::LogOfHRGrainBytes);
+  cbz(tmp, done);
+
+  // crosses regions, storing NULL?
+
+  cbz(new_val, done);
+
+  // storing region crossing non-NULL, is card already dirty?
+
+  ExternalAddress cardtable((address) ct->byte_map_base);
+  assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
+  const Register card_addr = tmp;
+
+  mov(card_addr, store_addr);
+  lsr(card_addr, card_addr, CardTableModRefBS::card_shift);
+
+  unsigned long offset;
+  adrp(tmp2, cardtable, offset);
+
+  // get the address of the card
+  add(card_addr, card_addr, tmp2);
+  ldrb(tmp2, Address(card_addr, offset));
+  cmpw(tmp2, (int)G1SATBCardTableModRefBS::g1_young_card_val());
+  br(Assembler::EQ, done);
+
+  membar(Assembler::Membar_mask_bits(Assembler::StoreLoad));
+  ldrb(tmp2, Address(card_addr, offset));
+  cmpw(tmp2, (int)CardTableModRefBS::dirty_card_val());
+  br(Assembler::EQ, done);
+
+  // storing a region crossing, non-NULL oop, card is clean.
+  // dirty card and log.
+
+  mov(tmp2, (int)CardTableModRefBS::dirty_card_val());
+  strb(tmp2, Address(card_addr, offset));
+
+  ldr(rscratch1, queue_index);
+  cbz(rscratch1, runtime);
+  sub(rscratch1, rscratch1, wordSize);
+  str(rscratch1, queue_index);
+
+  ldr(tmp2, buffer);
+  add(tmp2, tmp2, rscratch1);
+  str(card_addr, Address(tmp2, 0));
+  b(done);
+
+  bind(runtime);
+  // save the live input values
+  push(store_addr->bit(true) | new_val->bit(true), sp);
+  call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread);
+  pop(store_addr->bit(true) | new_val->bit(true), sp);
+
+  bind(done);
+}
 
 #endif // INCLUDE_ALL_GCS

--- CUT HERE ---




More information about the aarch64-port-dev mailing list