[aarch64-port-dev ] RFR: Optimise store of 0 byte into card table
Edward Nevill
edward.nevill at linaro.org
Fri Aug 29 14:47:17 UTC 2014
Hi,
The following patch optimises the storing of 0 bytes into the card table.
Firstly the existing code was generating
mov wS, zr
stlrb wS, [xN]
because it didn't have a rule for storing 0.
This has been optimised to
stlrb zr, [xN]
Note: I have only done this optimisation for bytes, should I also do it for 16, 32 & 64 bit values? How often do these actually occur?
Secondly, if the byte in memory is already 0 it skips the store. Since in the vast majority of cases the byte is in fact 0 because the card is already dirty this avoids doing unnecessary STRLB instructions.
So it generates
ldr rScratch, [xN]
cbz rScratch, skip
stlrb zr, [xN]
skip:
This, in combination with the previous patch generates significant performance improvements on programs that do extensive stores of non volatile oops.
OK to push?
Ed.
Patch also available at http://people.linaro.org/~edward.nevill/patches/memorder.patch in case there is any problem with the formatting below.
--- CUT HERE ---
# HG changeset patch
# User Edward Nevill edward.nevill at linaro.org
# Date 1409322430 -3600
# Fri Aug 29 15:27:10 2014 +0100
# Node ID 953a1b5e5b1726470045bfa0dbe1b2bff799b906
# Parent 4aa306297dafb02943645761f2477d0d95c4a157
Optimise store of 0 byte into card table
diff -r 4aa306297daf -r 953a1b5e5b17 src/cpu/aarch64/vm/aarch64.ad
--- a/src/cpu/aarch64/vm/aarch64.ad Fri Aug 29 11:12:45 2014 +0100
+++ b/src/cpu/aarch64/vm/aarch64.ad Fri Aug 29 15:27:10 2014 +0100
@@ -2160,6 +2160,18 @@
rscratch1, stlrb);
%}
+ // Special case of storing 0 to volatile for storing into card table
+ enc_class aarch64_enc_stlrb0(memory mem) %{
+ Label skip;
+ {
+ MacroAssembler _masm(&cbuf);
+ __ ldrb(rscratch1, as_Register($mem$$base));
+ __ cbz(rscratch1, skip);
+ }
+ MOV_VOLATILE(zr, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, rscratch1, stlrb);
+ __ bind(skip);
+ %}
+
enc_class aarch64_enc_stlrh(iRegI src, memory mem) %{
MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
rscratch1, stlrh);
@@ -5909,6 +5921,19 @@
ins_pipe(pipe_class_memory);
%}
+// Special rule for store of 0 byte to volatile for card table
+instruct storeB_volatile_imm0(immI0 zero, /* sync_memory*/indirect mem)
+%{
+ match(Set mem (StoreB mem zero));
+
+ ins_cost(VOLATILE_REF_COST);
+ format %{ "stlrb zr, $mem\t# byte" %}
+
+ ins_encode(aarch64_enc_stlrb0(mem));
+
+ ins_pipe(pipe_class_memory);
+%}
+
// Store Char/Short
instruct storeC_volatile(iRegI src, /* sync_memory*/indirect mem)
%{
diff -r 4aa306297daf -r 953a1b5e5b17 src/cpu/aarch64/vm/assembler_aarch64.hpp
--- a/src/cpu/aarch64/vm/assembler_aarch64.hpp Fri Aug 29 11:12:45 2014 +0100
+++ b/src/cpu/aarch64/vm/assembler_aarch64.hpp Fri Aug 29 15:27:10 2014 +0100
@@ -1081,7 +1081,7 @@
Register Rn, enum operand_size sz, int op, int o0) {
starti;
f(sz, 31, 30), f(0b001000, 29, 24), f(op, 23, 21);
- rf(Rs, 16), f(o0, 15), rf(Rt2, 10), rf(Rn, 5), rf(Rt1, 0);
+ rf(Rs, 16), f(o0, 15), rf(Rt2, 10), rf(Rn, 5), zrf(Rt1, 0);
}
#define INSN4(NAME, sz, op, o0) /* Four registers */ \
--- CUT HERE ---
More information about the aarch64-port-dev
mailing list