[aarch64-port-dev ] Optimize handling of compressed OOPs

Andrew Haley aph at redhat.com
Fri Nov 8 08:27:01 PST 2013


In most cases there is no offset or shift associated with compressed
OOps, so we can use them directly to form addresses.  This means that
in the default case compressed OOPs are free: we simply treat a narrow
OOP as an address and use it.  We can get away with this because all
loads of narrow OOPs zero-extend the value into a 64-bit register, so
we can assume that the upper half of a register that contains a narrow
OOP is clean.

I've also taken the opportunity to revamp compressed OOPs in a few
other places.  Tested with a variety of heap sizes using Eclipse.

Andrew.



comparing with ssh://hg.openjdk.java.net/aarch64-port/jdk8//hotspot
searching for changes
remote: X11 forwarding request failed on channel 0
changeset:   6075:ff0cd72b8121
tag:         tip
user:        aph
date:        Fri Nov 08 16:15:17 2013 +0000
summary:     Optimize handling of compressed OOPs

diff -r efdf6da15ac0 -r ff0cd72b8121 src/cpu/aarch64/vm/aarch64.ad
--- a/src/cpu/aarch64/vm/aarch64.ad	Wed Nov 06 17:28:27 2013 +0000
+++ b/src/cpu/aarch64/vm/aarch64.ad	Fri Nov 08 16:15:17 2013 +0000
@@ -1608,9 +1608,7 @@
 // Implicit_null_check optimization moves the Decode along with the
 // memory operation back up before the NullCheck.
 bool Matcher::narrow_oop_use_complex_address() {
-// TODO
-// decide whether we need to set this to true
-  return false;
+  return Universe::narrow_oop_shift() == 0;
 }

 bool Matcher::narrow_klass_use_complex_address() {
@@ -1784,11 +1782,20 @@
 			 Register base, int index, int size, int disp)
   {
     Address::extend scale;
-    if (opcode == INDINDEXSCALEDOFFSETI2L
-	|| opcode == INDINDEXSCALEDI2L)
+
+    // Hooboy, this is fugly.  We need a way to communicate to the
+    // encoder that the index needs to be sign extended, so we have to
+    // enumerate all the cases.
+    switch (opcode) {
+    case INDINDEXSCALEDOFFSETI2L:
+    case INDINDEXSCALEDI2L:
+    case INDINDEXSCALEDOFFSETI2LN:
+    case INDINDEXSCALEDI2LN:
       scale = Address::sxtw(size);
-    else
+      break;
+    default:
       scale = Address::lsl(size);
+    }

     if (index == -1) {
       (masm.*insn)(reg, Address(base, disp));
@@ -1807,13 +1814,19 @@
 			 Register base, int index, int size, int disp)
   {
     Address::extend scale;
-    if (opcode == INDINDEXSCALEDOFFSETI2L
-	|| opcode == INDINDEXSCALEDI2L)
+
+    switch (opcode) {
+    case INDINDEXSCALEDOFFSETI2L:
+    case INDINDEXSCALEDI2L:
+    case INDINDEXSCALEDOFFSETI2LN:
+    case INDINDEXSCALEDI2LN:
       scale = Address::sxtw(size);
-    else
+      break;
+    default:
       scale = Address::lsl(size);
-
-    if (index == -1) {
+    }
+
+     if (index == -1) {
       (masm.*insn)(reg, Address(base, disp));
     } else {
       if (disp == 0) {
@@ -4118,6 +4131,7 @@
 %}

 //----------Memory Operands----------------------------------------------------
+
 operand indirect(iRegP reg)
 %{
   constraint(ALLOC_IN_RC(ptr_reg));
@@ -4244,6 +4258,144 @@
   %}
 %}

+
+operand indirectN(iRegN reg)
+%{
+  predicate(Universe::narrow_oop_shift() == 0);
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(DecodeN reg);
+  op_cost(0);
+  format %{ "[$reg]\t# narrow" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index(0xffffffff);
+    scale(0x0);
+    disp(0x0);
+  %}
+%}
+
+operand indIndexScaledOffsetIN(iRegN reg, iRegL lreg, immIScale scale, immIAddSub off)
+%{
+  predicate(Universe::narrow_oop_shift() == 0);
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
+  op_cost(DEFAULT_COST);
+  format %{ "$reg, $lreg lsl($scale), $off\t# narrow" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index($lreg);
+    scale($scale);
+    disp($off);
+  %}
+%}
+
+operand indIndexScaledOffsetLN(iRegN reg, iRegL lreg, immIScale scale, immLAddSub off)
+%{
+  predicate(Universe::narrow_oop_shift() == 0);
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
+  op_cost(DEFAULT_COST);
+  format %{ "$reg, $lreg lsl($scale), $off\t# narrow" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index($lreg);
+    scale($scale);
+    disp($off);
+  %}
+%}
+
+operand indIndexScaledOffsetI2LN(iRegN reg, iRegI ireg, immIScale scale, immLAddSub off)
+%{
+  predicate(Universe::narrow_oop_shift() == 0);
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L ireg) scale)) off);
+  op_cost(DEFAULT_COST);
+  format %{ "$reg, $ireg sxtw($scale), $off I2L\t# narrow" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index($ireg);
+    scale($scale);
+    disp($off);
+  %}
+%}
+
+operand indIndexScaledI2LN(iRegN reg, iRegI ireg, immIScale scale)
+%{
+  predicate(Universe::narrow_oop_shift() == 0);
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(AddP (DecodeN reg) (LShiftL (ConvI2L ireg) scale));
+  op_cost(0);
+  format %{ "$reg, $ireg sxtw($scale), 0, I2L\t# narrow" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index($ireg);
+    scale($scale);
+    disp(0x0);
+  %}
+%}
+
+operand indIndexScaledN(iRegN reg, iRegL lreg, immIScale scale)
+%{
+  predicate(Universe::narrow_oop_shift() == 0);
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(AddP (DecodeN reg) (LShiftL lreg scale));
+  op_cost(0);
+  format %{ "$reg, $lreg lsl($scale)\t# narrow" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index($lreg);
+    scale($scale);
+    disp(0x0);
+  %}
+%}
+
+operand indIndexN(iRegN reg, iRegL lreg)
+%{
+  predicate(Universe::narrow_oop_shift() == 0);
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(AddP (DecodeN reg) lreg);
+  op_cost(0);
+  format %{ "$reg, $lreg\t# narrow" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index($lreg);
+    scale(0x0);
+    disp(0x0);
+  %}
+%}
+
+operand indOffIN(iRegN reg, immIAddSub off)
+%{
+  predicate(Universe::narrow_oop_shift() == 0);
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(AddP (DecodeN reg) off);
+  op_cost(0);
+  format %{ "[$reg, $off]\t# narrow" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index(0xffffffff);
+    scale(0x0);
+    disp($off);
+  %}
+%}
+
+operand indOffLN(iRegN reg, immLAddSub off)
+%{
+  predicate(Universe::narrow_oop_shift() == 0);
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(AddP (DecodeN reg) off);
+  op_cost(0);
+  format %{ "[$reg, $off]\t# narrow" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index(0xffffffff);
+    scale(0x0);
+    disp($off);
+  %}
+%}
+
+
+
 // AArch64 opto stubs need to write to the pc slot in the thread anchor
 operand thread_anchor_pc(thread_RegP reg, immL_pc_off off)
 %{
@@ -4407,7 +4559,8 @@
 // memory is used to define read/write location for load/store
 // instruction defs. we can turn a memory op into an Address

-opclass memory(indirect, indIndexScaledOffsetI,  indIndexScaledOffsetL, indIndexScaledOffsetI2L, indIndexScaled, indIndexScaledI2L, indIndex, indOffI, indOffL);
+opclass memory(indirect, indIndexScaledOffsetI,  indIndexScaledOffsetL, indIndexScaledOffsetI2L, indIndexScaled, indIndexScaledI2L, indIndex, indOffI, indOffL,
+	       indirectN, indIndexScaledOffsetIN,  indIndexScaledOffsetLN, indIndexScaledOffsetI2LN, indIndexScaledN, indIndexScaledI2LN, indIndexN, indOffIN, indOffLN);

 // iRegIorL2I is used for src inputs in rules for 32 bit int (I)
 // operations. it allows the src to be either an iRegI or a (ConvL2I
@@ -5854,16 +6007,13 @@
 instruct encodeHeapOop(iRegNNoSp dst, iRegP src, rFlagsReg cr) %{
   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
   match(Set dst (EncodeP src));
+  effect(KILL cr);
   ins_cost(DEFAULT_COST * 3);
-  effect(KILL cr);
   format %{ "encode_heap_oop $dst, $src" %}
   ins_encode %{
     Register s = $src$$Register;
     Register d = $dst$$Register;
-    if (s != d) {
-      __ mov(d, s);
-    }
-    __ encode_heap_oop(d);
+    __ encode_heap_oop(d, s);
   %}
   ins_pipe(pipe_class_default);
 %}
@@ -5871,7 +6021,6 @@
 instruct encodeHeapOop_not_null(iRegNNoSp dst, iRegP src, rFlagsReg cr) %{
   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
   match(Set dst (EncodeP src));
-  effect(KILL cr);
   ins_cost(DEFAULT_COST * 3);
   format %{ "encode_heap_oop_not_null $dst, $src" %}
   ins_encode %{
@@ -5884,18 +6033,12 @@
   predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
             n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
   match(Set dst (DecodeN src));
-  effect(KILL cr);
   ins_cost(DEFAULT_COST * 3);
   format %{ "decode_heap_oop $dst, $src" %}
   ins_encode %{
     Register s = $src$$Register;
     Register d = $dst$$Register;
-    if (s != d) {
-    // TODO
-    // think this should be a movw but Intel uses movq. why?
-      __ mov(d, s);
-    }
-    __ decode_heap_oop(d);
+    __ decode_heap_oop(d, s);
   %}
   ins_pipe(pipe_class_default);
 %}
@@ -5904,17 +6047,12 @@
   predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
             n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
   match(Set dst (DecodeN src));
-  effect(KILL cr);
   ins_cost(DEFAULT_COST * 3);
   format %{ "decode_heap_oop_not_null $dst, $src" %}
   ins_encode %{
     Register s = $src$$Register;
     Register d = $dst$$Register;
-    if (s != d) {
-      __ decode_heap_oop_not_null(d, s);
-    } else {
-      __ decode_heap_oop_not_null(d);
-    }
+    __ decode_heap_oop_not_null(d, s);
   %}
   ins_pipe(pipe_class_default);
 %}
diff -r efdf6da15ac0 -r ff0cd72b8121 src/cpu/aarch64/vm/macroAssembler_aarch64.cpp
--- a/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp	Wed Nov 06 17:28:27 2013 +0000
+++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp	Fri Nov 08 16:15:17 2013 +0000
@@ -2019,26 +2019,26 @@
     mov(dst, src);
 }

-void  MacroAssembler::decode_heap_oop(Register r) {
+void  MacroAssembler::decode_heap_oop(Register d, Register s) {
 #ifdef ASSERT
   verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?");
 #endif
   if (Universe::narrow_oop_base() == NULL) {
-    if (Universe::narrow_oop_shift() != 0) {
-      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
-      lsl(r, r, LogMinObjAlignmentInBytes);
+    if (Universe::narrow_oop_shift() != 0 || d != s) {
+      lsl(d, s, Universe::narrow_oop_shift());
     }
   } else {
     Label done;
-    cbz(r, done);
-    add(r, rheapbase, r, Assembler::LSL, LogMinObjAlignmentInBytes);
+    if (d != s)
+      mov(d, s);
+    cbz(s, done);
+    add(d, rheapbase, s, Assembler::LSL, LogMinObjAlignmentInBytes);
     bind(done);
   }
-  verify_oop(r, "broken oop in decode_heap_oop");
+  verify_oop(d, "broken oop in decode_heap_oop");
 }

 void  MacroAssembler::decode_heap_oop_not_null(Register r) {
-  // Note: it will change flags
   assert (UseCompressedOops, "should only be used for compressed headers");
   assert (Universe::heap() != NULL, "java heap should be initialized");
   // Cannot assert, unverified entry point counts instructions (see .ad file)
@@ -2057,7 +2057,6 @@
 }

 void  MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
-  // Note: it will change flags
   assert (UseCompressedOops, "should only be used for compressed headers");
   assert (Universe::heap() != NULL, "java heap should be initialized");
   // Cannot assert, unverified entry point counts instructions (see .ad file)
diff -r efdf6da15ac0 -r ff0cd72b8121 src/cpu/aarch64/vm/macroAssembler_aarch64.hpp
--- a/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp	Wed Nov 06 17:28:27 2013 +0000
+++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp	Fri Nov 08 16:15:17 2013 +0000
@@ -710,7 +710,8 @@

   void encode_heap_oop(Register d, Register s);
   void encode_heap_oop(Register r) { encode_heap_oop(r, r); }
-  void decode_heap_oop(Register r);
+  void decode_heap_oop(Register d, Register s);
+  void decode_heap_oop(Register r) { decode_heap_oop(r, r); }
   void encode_heap_oop_not_null(Register r);
   void decode_heap_oop_not_null(Register r);
   void encode_heap_oop_not_null(Register dst, Register src);




More information about the aarch64-port-dev mailing list