[aarch64-port-dev ] Allow cmpxchg to fall through when it fails
Andrew Haley
aph at redhat.com
Wed Oct 9 05:53:15 PDT 2013
This is a small optimization.
Rather than branching when a cmpxchg fails, we can now optionally fall
through. This saves a branch in the failure case.
Andrew.
# HG changeset patch
# User aph
# Date 1381322616 -3600
# Node ID 10b833f09e6a617fbc95fb3ca648bf0bc2c9b39e
# Parent 557a6ed9e5d03270183926ca313ea72cce2896cf
Allow cmpxchg to fall through when it fails
diff -r 557a6ed9e5d0 -r 10b833f09e6a src/cpu/aarch64/vm/aarch64.ad
--- a/src/cpu/aarch64/vm/aarch64.ad Wed Oct 09 13:32:47 2013 +0100
+++ b/src/cpu/aarch64/vm/aarch64.ad Wed Oct 09 13:43:36 2013 +0100
@@ -3192,7 +3192,7 @@
/*where=*/oop,
/*result=*/tmp,
cont,
- cas_failed);
+ /*fail*/NULL);
assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
// If the compare-and-exchange succeeded, then we found an unlocked
@@ -3227,8 +3227,8 @@
/*exchange_value=*/rthread,
/*where=*/tmp,
/*result=*/rscratch1,
- next,
- next);
+ /*succeed*/next,
+ /*fail*/NULL);
__ bind(next);
// store a non-null value into the box.
@@ -3310,7 +3310,7 @@
/*where=*/oop,
/*result=*/tmp,
cont,
- cas_failed);
+ /*cas_failed*/NULL);
assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
__ bind(cas_failed);
diff -r 557a6ed9e5d0 -r 10b833f09e6a src/cpu/aarch64/vm/c1_MacroAssembler_aarch64.cpp
--- a/src/cpu/aarch64/vm/c1_MacroAssembler_aarch64.cpp Wed Oct 09 13:32:47 2013 +0100
+++ b/src/cpu/aarch64/vm/c1_MacroAssembler_aarch64.cpp Wed Oct 09 13:43:36 2013 +0100
@@ -96,9 +96,8 @@
// displaced header address in the object header - if it is not the same, get the
// object header instead
lea(rscratch2, Address(obj, hdr_offset));
- cmpxchgptr(hdr, disp_hdr, rscratch2, rscratch1, done, fail);
+ cmpxchgptr(hdr, disp_hdr, rscratch2, rscratch1, done, /*fallthough*/NULL);
// if the object header was the same, we're done
- bind(fail);
// if the object header was not the same, it is now in the hdr register
// => test if it is a stack pointer into the same stack (recursive locking), i.e.:
//
@@ -159,9 +158,9 @@
// we do unlocking via runtime call
if (hdr_offset) {
lea(rscratch1, Address(obj, hdr_offset));
- cmpxchgptr(disp_hdr, hdr, rscratch1, rscratch2, done, slow_case);
+ cmpxchgptr(disp_hdr, hdr, rscratch1, rscratch2, done, &slow_case);
} else {
- cmpxchgptr(disp_hdr, hdr, obj, rscratch2, done, slow_case);
+ cmpxchgptr(disp_hdr, hdr, obj, rscratch2, done, &slow_case);
}
// done
bind(done);
diff -r 557a6ed9e5d0 -r 10b833f09e6a src/cpu/aarch64/vm/interp_masm_aarch64.cpp
--- a/src/cpu/aarch64/vm/interp_masm_aarch64.cpp Wed Oct 09 13:32:47 2013 +0100
+++ b/src/cpu/aarch64/vm/interp_masm_aarch64.cpp Wed Oct 09 13:43:36 2013 +0100
@@ -635,16 +635,16 @@
Label fail;
if (PrintBiasedLockingStatistics) {
Label fast;
- cmpxchgptr(swap_reg, lock_reg, obj_reg, rscratch1, fast, fail);
+ cmpxchgptr(swap_reg, lock_reg, obj_reg, rscratch1, fast, &fail);
bind(fast);
// cond_inc32(Assembler::zero,
// ExternalAddress((address) BiasedLocking::fast_path_entry_count_addr()));
call_Unimplemented();
b(done);
+ bind(fail);
} else {
- cmpxchgptr(swap_reg, lock_reg, obj_reg, rscratch1, done, fail);
+ cmpxchgptr(swap_reg, lock_reg, obj_reg, rscratch1, done, /*fallthrough*/NULL);
}
- bind(fail);
// Test if the oopMark is an obvious stack pointer, i.e.,
// 1) (mark & 7) == 0, and
@@ -734,9 +734,7 @@
cbz(header_reg, done);
// Atomic swap back the old header
- Label fail;
- cmpxchgptr(swap_reg, header_reg, obj_reg, rscratch1, done, fail);
- bind(fail);
+ cmpxchgptr(swap_reg, header_reg, obj_reg, rscratch1, done, /*fallthrough*/NULL);
// Call the runtime routine for slow case.
str(obj_reg, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes())); // restore obj
diff -r 557a6ed9e5d0 -r 10b833f09e6a src/cpu/aarch64/vm/macroAssembler_aarch64.cpp
--- a/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp Wed Oct 09 13:32:47 2013 +0100
+++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp Wed Oct 09 13:43:36 2013 +0100
@@ -1737,7 +1737,7 @@
// register+offset Address.
void MacroAssembler::cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp,
- Label &succeed, Label &fail) {
+ Label &succeed, Label *fail) {
// oldv holds comparison value
// newv holds value to write in exchange
// addr identifies memory word to compare against/update
@@ -1759,13 +1759,12 @@
// if the memory word differs we return it in oldv and signal a fail
bind(nope);
mov(oldv, tmp);
- // if (fail)
- // b(*fail);
- b(fail);
+ if (fail)
+ b(*fail);
}
void MacroAssembler::cmpxchgw(Register oldv, Register newv, Register addr, Register tmp,
- Label &succeed, Label &fail) {
+ Label &succeed, Label *fail) {
// oldv holds comparison value
// newv holds value to write in exchange
// addr identifies memory word to compare against/update
@@ -1787,9 +1786,8 @@
// if the memory word differs we return it in oldv and signal a fail
bind(nope);
mov(oldv, tmp);
- // if (fail)
- // b(*fail);
- b(fail);
+ if (fail)
+ b(*fail);
}
void MacroAssembler::incr_allocated_bytes(Register thread,
diff -r 557a6ed9e5d0 -r 10b833f09e6a src/cpu/aarch64/vm/macroAssembler_aarch64.hpp
--- a/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp Wed Oct 09 13:32:47 2013 +0100
+++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp Wed Oct 09 13:43:36 2013 +0100
@@ -1061,10 +1061,10 @@
void cmpptr(Address src1, int32_t src2) { Unimplemented(); }
void cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp,
- Label &suceed, Label &fail);
+ Label &suceed, Label *fail);
void cmpxchgw(Register oldv, Register newv, Register addr, Register tmp,
- Label &suceed, Label &fail);
+ Label &suceed, Label *fail);
void imulptr(Register dst, Register src) { Unimplemented(); }
diff -r 557a6ed9e5d0 -r 10b833f09e6a src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp
--- a/src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp Wed Oct 09 13:32:47 2013 +0100
+++ b/src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp Wed Oct 09 13:43:36 2013 +0100
@@ -1827,8 +1827,7 @@
// src -> dest iff dest == r0 else r0 <- dest
{ Label here;
- __ cmpxchgptr(r0, lock_reg, obj_reg, rscratch1, lock_done, here);
- __ bind(here);
+ __ cmpxchgptr(r0, lock_reg, obj_reg, rscratch1, lock_done, /*fallthrough*/NULL);
}
// Hmm should this move to the slow path code area???
@@ -2033,7 +2032,7 @@
// Atomic swap old header if oop still contains the stack lock
Label succeed;
- __ cmpxchgptr(r0, old_hdr, obj_reg, rscratch1, succeed, slow_path_unlock);
+ __ cmpxchgptr(r0, old_hdr, obj_reg, rscratch1, succeed, &slow_path_unlock);
__ bind(succeed);
// slow path re-enters here
diff -r 557a6ed9e5d0 -r 10b833f09e6a src/cpu/aarch64/vm/templateTable_aarch64.cpp
--- a/src/cpu/aarch64/vm/templateTable_aarch64.cpp Wed Oct 09 13:32:47 2013 +0100
+++ b/src/cpu/aarch64/vm/templateTable_aarch64.cpp Wed Oct 09 13:43:36 2013 +0100
@@ -3240,7 +3240,7 @@
Label succeed;
// if someone beat us on the allocation, try again, otherwise continue
- __ cmpxchgptr(r0, r1, RtopAddr, rscratch1, succeed, retry);
+ __ cmpxchgptr(r0, r1, RtopAddr, rscratch1, succeed, &retry);
__ bind(succeed);
__ incr_allocated_bytes(rthread, r3, 0, rscratch1);
}
More information about the aarch64-port-dev
mailing list