[aarch64-port-dev ] RFR: Optimise addressing of card table byte map base
Edward Nevill
ed at camswl.com
Mon May 12 16:47:07 UTC 2014
On Mon, 2014-05-12 at 14:29 +0100, Andrew Haley wrote:
> On 05/12/2014 02:21 PM, Edward Nevill wrote:
> > + enc_class aarch64_enc_mov_byte_map_base(iRegP dst, immByteMapBase src) %{
> > + MacroAssembler _masm(&cbuf);
> > + address page = (address)$src$$constant;
> > + Register dst_reg = as_Register($dst$$reg);
> > + unsigned long off;
> > + __ adrp(dst_reg, Address(page, relocInfo::poll_type), off);
>
> Is this reloc type correct? It should be an external address, no?
Yes. An ExternalAddress is correct. Thanks for spotting this. Modified as follows:-
--- a/src/cpu/aarch64/vm/aarch64.ad Mon May 12 14:09:27 2014 +0100
+++ b/src/cpu/aarch64/vm/aarch64.ad Mon May 12 17:24:33 2014 +0100
@@ -2570,7 +2570,7 @@
address page = (address)$src$$constant;
Register dst_reg = as_Register($dst$$reg);
unsigned long off;
- __ adrp(dst_reg, Address(page, relocInfo::poll_type), off);
+ __ adrp(dst_reg, ExternalAddress(page), off);
assert(off == 0, "assumed offset == 0");
%}
> What is the cost of this insn?
1 * INSN_CONST for a single data processing insn. This is specified in loadByteMapBase.
instruct loadByteMapBase(iRegPNoSp dst, immByteMapBase con)
%{
match(Set dst con);
ins_cost(INSN_COST);
format %{ "adr $dst, $con\t# Byte Map Base" %}
ins_encode(aarch64_enc_mov_byte_map_base(dst, con));
ins_pipe(pipe_class_default);
%}
> > diff -r 6523308f9626 -r 1a6e4b95d268 src/cpu/aarch64/vm/macroAssembler_aarch64.cpp
> > --- a/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp Mon May 12 13:41:43 2014 +0100
> > +++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp Mon May 12 14:09:27 2014 +0100
> > @@ -94,7 +94,9 @@
> > offset = adr_page - pc_page;
> >
> > unsigned insn2 = ((unsigned*)branch)[1];
> > - if ((address)target == os::get_polling_page()) {
> > + if ((jbyte *)target ==
> > + ((CardTableModRefBS*)(Universe::heap()->barrier_set()))->byte_map_base ||
> > + (address)target == os::get_polling_page()) {
> > assert(offset_lo == 0, "offset must be 0 for polling page");
> > } else if (Instruction_aarch64::extract(insn2, 29, 24) == 0b111001) {
> > // Load/store register (unsigned immediate)
> > @@ -182,7 +184,9 @@
> > uint64_t target_page = ((uint64_t)insn_addr) + offset;
> > target_page &= ((uint64_t)-1) << shift;
> > unsigned insn2 = ((unsigned*)insn_addr)[1];
> > - if ((address)target_page == os::get_polling_page()) {
> > + if ((jbyte *)target_page ==
> > + ((CardTableModRefBS*)(Universe::heap()->barrier_set()))->byte_map_base ||
> > + (address)target_page == os::get_polling_page()) {
> > return (address)target_page;
> > } else if (Instruction_aarch64::extract(insn2, 29, 24) == 0b111001) {
> > // Load/store register (unsigned immediate)
>
> I'm getting rather unhappy about these special cases in the reloc code.
Understood. All of the special case code is really only there to support the assert()/ShouldNotReachHere() logic (IE. What it does is ensures that if we find an adrp on its own (IE not followed by a ldr/str/add instruction) then we assert that it is trying to address either the polling page, or the byte map base page (we also of course assert that the offset is 0)).
It all becomes much clearer (and shorter!) if rewritten as below.
Does this ease your concerns?
Ed.
diff -r 1a6e4b95d268 src/cpu/aarch64/vm/macroAssembler_aarch64.cpp
--- a/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp Mon May 12 14:09:27 2014 +0100
+++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp Mon May 12 17:40:30 2014 +0100
@@ -94,6 +94,7 @@
offset = adr_page - pc_page;
unsigned insn2 = ((unsigned*)branch)[1];
+#if 0
if ((jbyte *)target ==
((CardTableModRefBS*)(Universe::heap()->barrier_set()))->byte_map_base ||
(address)target == os::get_polling_page()) {
@@ -135,6 +136,44 @@
} else {
ShouldNotReachHere();
}
+#else
+ // We handle 3 types of PC relative addressing
+ // 1 - adrp Rx, target_page
+ // ldr/str Ry, [Rx, #offset_in_page]
+ // 2 - adrp Rx, target_page
+ // add Ry, Rx, #offset_in_page
+ // 3 - adrp Rx, target_page (page aligned reloc, offset == 0)
+ // In the first 2 cases we must check that Rx is the same in the adrp and the
+ // subsequent ldr/str or add instruction. Otherwise we could accidentally end
+ // up treating a type 3 relocation as a type 1 or 2 just because it happened
+ // to be followed by a random unrelated ldr/str or add instruction.
+ //
+ // In the case of a type 3 relocation, we know that these are only generated
+ // for the safepoint polling page, or for the card type byte map base so we
+ // assert as much and of course that the offset is 0.
+ //
+ if (Instruction_aarch64::extract(insn2, 29, 24) == 0b111001 &&
+ Instruction_aarch64::extract(insn, 4, 0) ==
+ Instruction_aarch64::extract(insn2, 9, 5)) {
+ // Load/store register (unsigned immediate)
+ unsigned size = Instruction_aarch64::extract(insn2, 31, 30);
+ Instruction_aarch64::patch(branch + sizeof (unsigned),
+ 21, 10, offset_lo >> size);
+ guarantee(((dest >> size) << size) == dest, "misaligned target");
+ } else if (Instruction_aarch64::extract(insn2, 31, 22) == 0b1001000100 &&
+ Instruction_aarch64::extract(insn, 4, 0) ==
+ Instruction_aarch64::extract(insn2, 4, 0)) {
+ // add (immediate)
+ Instruction_aarch64::patch(branch + sizeof (unsigned),
+ 21, 10, offset_lo);
+ } else {
+ assert((jbyte *)target ==
+ ((CardTableModRefBS*)(Universe::heap()->barrier_set()))->byte_map_base ||
+ (address)target == os::get_polling_page(),
+ "adrp must be polling page or byte map base");
+ assert(offset_lo == 0, "offset must be 0 for polling page or byte map base");
+ }
+#endif
}
int offset_lo = offset & 3;
offset >>= 2;
@@ -184,6 +223,7 @@
uint64_t target_page = ((uint64_t)insn_addr) + offset;
target_page &= ((uint64_t)-1) << shift;
unsigned insn2 = ((unsigned*)insn_addr)[1];
+#if 0
if ((jbyte *)target_page ==
((CardTableModRefBS*)(Universe::heap()->barrier_set()))->byte_map_base ||
(address)target_page == os::get_polling_page()) {
@@ -196,6 +236,40 @@
} else {
ShouldNotReachHere();
}
+#else
+ // Return the target address for the following sequences
+ // 1 - adrp Rx, target_page
+ // ldr/str Ry, [Rx, #offset_in_page]
+ // [ 2 - adrp Rx, target_page ] Not handled
+ // [ add Ry, Rx, #offset_in_page ]
+ // 3 - adrp Rx, target_page (page aligned reloc, offset == 0)
+ //
+ // In the case of type 1 we check that the register is the same and
+ // return the target_page + the offset within the page.
+ //
+ // Otherwise we assume it is a page aligned relocation and return
+ // the target page only. The only cases this is generated is for
+ // the safepoint polling page or for the card table byte map base so
+ // we assert as much.
+ //
+ // Note: Strangely, we do not handle 'type 2' relocation (adrp followed
+ // by add) which is handled in pd_patch_instruction above.
+ //
+ if (Instruction_aarch64::extract(insn2, 29, 24) == 0b111001 &&
+ Instruction_aarch64::extract(insn, 4, 0) ==
+ Instruction_aarch64::extract(insn2, 9, 5)) {
+ // Load/store register (unsigned immediate)
+ unsigned int byte_offset = Instruction_aarch64::extract(insn2, 21, 10);
+ unsigned int size = Instruction_aarch64::extract(insn2, 31, 30);
+ return address(target_page + (byte_offset << size));
+ } else {
+ assert((jbyte *)target_page ==
+ ((CardTableModRefBS*)(Universe::heap()->barrier_set()))->byte_map_base ||
+ (address)target_page == os::get_polling_page(),
+ "adrp must be polling page or byte map base");
+ return (address)target_page;
+ }
+#endif
} else {
ShouldNotReachHere();
}
More information about the aarch64-port-dev
mailing list