[aarch64-port-dev ] Missing concurrency barriers and other bugs.
Andrew Haley
aph at redhat.com
Wed Oct 1 18:23:12 UTC 2014
This is a bunch of fixes.
First, we were using str and strw almost randomly when writing
JavaThread::thread_state. And, we weren't using any barriers, which
we seem to need.
MacroAssembler::serialize_memory was wrong; it now does what other
ports do.
The use_XOR_for_compressed_class_base was executed every time an
instance of Assembler was constructed, but it only needs to be
executed once.
Andrew.
# HG changeset patch
# User aph
# Date 1412186700 14400
# Wed Oct 01 14:05:00 2014 -0400
# Node ID f77a6f417a54aeb12cedc962a753fb81b6dab958
# Parent b43b612fe37291952d164b7e3dcee6ae760ade42
Missing concurrency barriers and other bugs.
JavaThread::thread_state accesses need barriers.
Correct implementation of serialize_memory().
Reduce the overhead of checking for use_XOR_for_compressed_class_base.
diff -r b43b612fe372 -r f77a6f417a54 src/cpu/aarch64/vm/macroAssembler_aarch64.cpp
--- a/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp Wed Oct 01 12:51:45 2014 -0400
+++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp Wed Oct 01 14:05:00 2014 -0400
@@ -64,6 +64,9 @@
#define STOP(error) block_comment(error); stop(error)
#endif
+bool MacroAssembler::_use_XOR_for_compressed_class_base;
+address MacroAssembler::_narrow_klass_base;
+
#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
void MacroAssembler::pd_patch_instruction(address branch, address target) {
@@ -107,8 +110,8 @@
// up treating a type 3 relocation as a type 1 or 2 just because it happened
// to be followed by a random unrelated ldr/str or add instruction.
//
- // In the case of a type 3 relocation, we know that these are only generated
- // for the safepoint polling page, or for the card type byte map base so we
+ // In the case of a type 3 relocation, we know that these are
+ // only generated for a few well-known pages addresses so we
// assert as much and of course that the offset is 0.
//
unsigned insn2 = ((unsigned*)branch)[1];
@@ -130,7 +133,8 @@
assert((jbyte *)target ==
((CardTableModRefBS*)(Universe::heap()->barrier_set()))->byte_map_base ||
target == StubRoutines::crc_table_addr() ||
- (address)target == os::get_polling_page(),
+ target == os::get_polling_page() ||
+ target == os::get_memory_serialize_page(),
"adrp must be polling page or byte map base");
assert(offset_lo == 0, "offset must be 0 for polling page or byte map base");
}
@@ -250,8 +254,16 @@
return address(((uint64_t)insn_addr + (offset << 2)));
}
-void MacroAssembler::serialize_memory(Register thread, Register tmp) {
- dsb(Assembler::SY);
+void MacroAssembler::serialize_memory(Register tmp1, Register tmp2) {
+ andr(tmp1, rthread, (os::vm_page_size() - sizeof(int)) << os::get_serialize_page_shift_count());
+
+ unsigned long offset;
+ adrp(tmp2, os::get_memory_serialize_page(), offset);
+ guarantee(offset == 0, "should be a page address");
+
+ // Size of store must match masking code above
+ add(tmp1, tmp2, tmp1, LSR, os::get_serialize_page_shift_count());
+ stlrw(tmp2, tmp1);
}
@@ -2678,7 +2690,7 @@
return;
}
- if (use_XOR_for_compressed_class_base) {
+ if (_use_XOR_for_compressed_class_base) {
if (Universe::narrow_klass_shift() != 0) {
eor(dst, src, (uint64_t)Universe::narrow_klass_base());
lsr(dst, dst, LogKlassAlignmentInBytes);
@@ -2727,7 +2739,7 @@
return;
}
- if (use_XOR_for_compressed_class_base) {
+ if (_use_XOR_for_compressed_class_base) {
if (Universe::narrow_klass_shift() != 0) {
lsl(dst, src, LogKlassAlignmentInBytes);
eor(dst, dst, (uint64_t)Universe::narrow_klass_base());
diff -r b43b612fe372 -r f77a6f417a54 src/cpu/aarch64/vm/macroAssembler_aarch64.hpp
--- a/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp Wed Oct 01 12:51:45 2014 -0400
+++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp Wed Oct 01 14:05:00 2014 -0400
@@ -91,16 +91,19 @@
void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions = true);
- // Maximum size of class area in Metaspace when compressed
- uint64_t use_XOR_for_compressed_class_base;
+ static bool _use_XOR_for_compressed_class_base;
+ static address _narrow_klass_base;
public:
MacroAssembler(CodeBuffer* code) : Assembler(code) {
- use_XOR_for_compressed_class_base
- = (operand_valid_for_logical_immediate(false /*is32*/,
- (uint64_t)Universe::narrow_klass_base())
- && ((uint64_t)Universe::narrow_klass_base()
- > (1u << log2_intptr(CompressedClassSpaceSize))));
+ if (Universe::narrow_klass_base() != _narrow_klass_base) {
+ _use_XOR_for_compressed_class_base
+ = (operand_valid_for_logical_immediate(false /*is32*/,
+ (uint64_t)Universe::narrow_klass_base())
+ && ((uint64_t)Universe::narrow_klass_base()
+ > (1u << log2_intptr(CompressedClassSpaceSize))));
+ _narrow_klass_base = Universe::narrow_klass_base();
+ }
}
// Biased locking support
@@ -877,7 +880,10 @@
int offset);
// Support for serializing memory accesses between threads
- void serialize_memory(Register thread, Register tmp);
+ void serialize_memory(Register tmp1, Register tmp2);
+
+ void release() { membar(LoadStore|StoreStore); }
+ void acquire() { membar(LoadLoad|LoadStore); }
// Arithmetics
diff -r b43b612fe372 -r f77a6f417a54 src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp
--- a/src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp Wed Oct 01 12:51:45 2014 -0400
+++ b/src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp Wed Oct 01 14:05:00 2014 -0400
@@ -1872,7 +1872,8 @@
// Now set thread in native
__ mov(rscratch1, _thread_in_native);
- __ str(rscratch1, Address(rthread, JavaThread::thread_state_offset()));
+ __ release();
+ __ strw(rscratch1, Address(rthread, JavaThread::thread_state_offset()));
{
int return_type = 0;
@@ -1929,18 +1930,20 @@
// Thread A is resumed to finish this native method, but doesn't block here since it
// didn't see any synchronization is progress, and escapes.
__ mov(rscratch1, _thread_in_native_trans);
- __ str(rscratch1, Address(rthread, JavaThread::thread_state_offset()));
+ __ release();
+ __ strw(rscratch1, Address(rthread, JavaThread::thread_state_offset()));
if(os::is_MP()) {
if (UseMembar) {
// Force this write out before the read below
- __ dmb(Assembler::SY);
+ __ membar(Assembler::LoadLoad | Assembler::LoadStore |
+ Assembler::StoreLoad | Assembler::StoreStore);
} else {
// Write serialization page so VM thread can do a pseudo remote membar.
// We use the current thread pointer to calculate a thread specific
// offset to write to within the page. This minimizes bus traffic
// due to cache line collision.
- __ serialize_memory(rthread, r2);
+ __ serialize_memory(rscratch1, r2);
}
}
@@ -1993,7 +1996,8 @@
// change thread state
__ mov(rscratch1, _thread_in_Java);
- __ str(rscratch1, Address(rthread, JavaThread::thread_state_offset()));
+ __ release();
+ __ strw(rscratch1, Address(rthread, JavaThread::thread_state_offset()));
__ bind(after_transition);
Label reguard;
diff -r b43b612fe372 -r f77a6f417a54 src/cpu/aarch64/vm/templateInterpreter_aarch64.cpp
--- a/src/cpu/aarch64/vm/templateInterpreter_aarch64.cpp Wed Oct 01 12:51:45 2014 -0400
+++ b/src/cpu/aarch64/vm/templateInterpreter_aarch64.cpp Wed Oct 01 14:05:00 2014 -0400
@@ -1031,6 +1031,7 @@
#endif
// Change state to native
+ __ release();
__ mov(rscratch1, _thread_in_native);
__ strw(rscratch1, Address(rthread, JavaThread::thread_state_offset()));
@@ -1052,19 +1053,21 @@
__ push(ltos);
// change thread state
+ __ release();
__ mov(rscratch1, _thread_in_native_trans);
__ strw(rscratch1, Address(rthread, JavaThread::thread_state_offset()));
if (os::is_MP()) {
if (UseMembar) {
// Force this write out before the read below
- __ dsb(Assembler::SY);
+ __ membar(Assembler::LoadLoad | Assembler::LoadStore |
+ Assembler::StoreLoad | Assembler::StoreStore);
} else {
// Write serialization page so VM thread can do a pseudo remote membar.
// We use the current thread pointer to calculate a thread specific
// offset to write to within the page. This minimizes bus traffic
// due to cache line collision.
- __ serialize_memory(rthread, rscratch2);
+ __ serialize_memory(rscratch1, rscratch2);
}
}
@@ -1101,6 +1104,7 @@
}
// change thread state
+ __ release();
__ mov(rscratch1, _thread_in_Java);
__ strw(rscratch1, Address(rthread, JavaThread::thread_state_offset()));
More information about the aarch64-port-dev
mailing list