[aarch64-port-dev ] HasNegatives
Stuart Monteith
stuart.monteith at linaro.org
Wed Jul 19 22:57:31 UTC 2017
Hello,
Please find below my latest version of the has negatives implementation.
It passes JTregs, and the HasNegatives regression test looks adequate
for finding errors.
I understand the Bellsoft guys have a patch in the works too - so
it'll be best to compare their version once it is uploaded, and we can
perhaps compare how they run.
The results I get in JMH are:
Platform A:
Benchmark (length) Mode Cnt Score Error Units
HasNegatives.loopingFastMethod 4 avgt 50 3823.895 ± 0.264 ns/op
HasNegatives.loopingFastMethod 31 avgt 50 7977.361 ± 141.724 ns/op
HasNegatives.loopingFastMethod 65 avgt 50 12303.588 ± 100.645 ns/op
HasNegatives.loopingFastMethod 101 avgt 50 14464.835 ± 126.982 ns/op
HasNegatives.loopingFastMethod 256 avgt 50 38142.723 ± 3.266 ns/op
HasNegatives.steamFastMethod 4 avgt 50 6208.206 ± 0.401 ns/op
HasNegatives.steamFastMethod 31 avgt 50 23370.868 ± 1.337 ns/op
HasNegatives.steamFastMethod 65 avgt 50 52450.499 ± 6.449 ns/op
HasNegatives.steamFastMethod 101 avgt 50 61013.218 ± 73.249 ns/op
HasNegatives.steamFastMethod 256 avgt 50 159738.530 ± 12.301 ns/op
Platform B:
Benchmark (length) Mode Cnt Score
Error Units
HasNegatives.loopingFastMethod 4 avgt 50 6509.262 ±
0.058 ns/op
HasNegatives.loopingFastMethod 31 avgt 50 19014.390 ±
0.209 ns/op
HasNegatives.loopingFastMethod 65 avgt 50 26918.376 ±
150.078 ns/op
HasNegatives.loopingFastMethod 101 avgt 50 33021.181 ±
0.306 ns/op
HasNegatives.loopingFastMethod 256 avgt 50 58033.081 ±
0.349 ns/op
HasNegatives.steamFastMethod 4 avgt 50 11511.223 ±
0.142 ns/op
HasNegatives.steamFastMethod 31 avgt 50 56736.881 ±
419.531 ns/op
HasNegatives.steamFastMethod 65 avgt 50 77044.199 ±
0.720 ns/op
HasNegatives.steamFastMethod 101 avgt 50 119524.822 ±
1042.359 ns/op
HasNegatives.steamFastMethod 256 avgt 50 276267.518 ±
2.961 ns/op
Platform C:
Benchmark (length) Mode Cnt Score Error Units
HasNegatives.loopingFastMethod 4 avgt 50 5013.519 ± 0.229 ns/op
HasNegatives.loopingFastMethod 31 avgt 50 9186.669 ± 0.482 ns/op
HasNegatives.loopingFastMethod 65 avgt 50 12938.151 ± 0.558 ns/op
HasNegatives.loopingFastMethod 101 avgt 50 15689.530 ± 101.980 ns/op
HasNegatives.loopingFastMethod 256 avgt 50 26903.405 ± 104.391 ns/op
HasNegatives.steamFastMethod 4 avgt 50 9185.878 ± 0.326 ns/op
HasNegatives.steamFastMethod 31 avgt 50 45037.950 ± 1.993 ns/op
HasNegatives.steamFastMethod 65 avgt 50 87140.886 ± 4.777 ns/op
HasNegatives.steamFastMethod 101 avgt 50 119742.041 ± 71.852 ns/op
HasNegatives.steamFastMethod 256 avgt 50 291104.970 ± 18.563 ns/op
The patch:
# HG changeset patch
# User smonteith
# Date 1499083520 -3600
# Mon Jul 03 13:05:20 2017 +0100
# Node ID f138dbd8e5da70b4b4a1b384d6cae7954ab91f38
# Parent 38018e6e25b3180591c01d80c623f60c17009a9e
[mq]: has_neg
diff -r 38018e6e25b3 -r f138dbd8e5da src/cpu/aarch64/vm/aarch64.ad
--- a/src/cpu/aarch64/vm/aarch64.ad Thu Jul 13 01:28:24 2017 +0000
+++ b/src/cpu/aarch64/vm/aarch64.ad Mon Jul 03 13:05:20 2017 +0100
@@ -15747,6 +15747,20 @@
ins_pipe(pipe_class_memory);
%}
+// fast detection of a negative byte in byte[]
+instruct has_negatives(iRegP_R1 array, iRegI_R2 len, iRegI_R0 result,
iRegI tmp1,
+ iRegI tmp2, iRegI tmp3, iRegI tmp4, rFlagsReg cr)
+%{
+ match(Set result (HasNegatives array len));
+ effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL array,
USE_KILL len, KILL cr);
+
+ format %{ "has negatives byte[] $array,$len -> $result // KILL
$array, $len" %}
+ ins_encode %{
+ __ has_negatives($array$$Register, $len$$Register, $result$$Register,
+ $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register);
+ %}
+ ins_pipe( pipe_slow );
+%}
// fast char[] to byte[] compression
instruct string_compress(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
diff -r 38018e6e25b3 -r f138dbd8e5da
src/cpu/aarch64/vm/macroAssembler_aarch64.cpp
--- a/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp Thu Jul 13
01:28:24 2017 +0000
+++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp Mon Jul 03
13:05:20 2017 +0100
@@ -4829,6 +4829,129 @@
BLOCK_COMMENT("} string_compare");
}
+// java.lang.StringCoding.hasNegatives intrinsic.
+// Returns 1 in "result" when one or more bytes in "array" of length "len" has
+// its top bit set.
+void MacroAssembler::has_negatives(Register array, Register len,
Register result,
+ Register tmp1, Register tmp2, Register tmp3,
+ Register tmp4)
+{
+ // The top bits of every byte in a 64-bit constant.
+ const uint64_t UPPER_BIT_MASK=0x8080808080808080;
+ Label EXIT_REAL, EXIT_FALSE, EXIT_TRUE, LONGER, LOOP16, LAST8, TAIL;
+
+ BLOCK_COMMENT("has_negatives {");
+
+ // If there are 0 or less bytes, no negative bytes were found.
+ cmpw(len, 0);
+ br(Assembler::LE, EXIT_FALSE);
+
+ // Unaligned load of first 64-bits. Used in LONGER and for testing 8 bytes
+ // or less on fallthrough.
+ ldr(tmp1, Address(array));
+
+ cmp(len, 8);
+ br(Assembler::GT, LONGER);
+
+ // Test 8 bytes or less
+
+ // tmp1 loaded 8 bytes from the array. Shift off bytes that are past the
+ // end of the string.
+ mov(tmp2, 8);
+ sub(tmp3, tmp2, len);
+ lsl(tmp3, tmp3, 3);
+ lslv(tmp4, tmp1, tmp3);
+
+ tst(tmp4, UPPER_BIT_MASK);
+ br(Assembler::NE, EXIT_TRUE);
+ b(EXIT_FALSE);
+
+ // Test against any string over 8 bytes in length.
+ BIND(LONGER);
+
+ // Unconditionally test the first 64-bits.
+ tst(tmp1, UPPER_BIT_MASK);
+ br(Assembler::NE, EXIT_TRUE);
+
+ // Align start of array to next 8-byte aligned boundary and set the
+ // remaining length accordingly.
+ mov(tmp2, 8);
+ andw(tmp4, array, 7);
+ sub(tmp3, tmp2, tmp4);
+
+ add(array, array, tmp3);
+ sub(len, len, tmp3);
+
+ cmpw(len, 16);
+ br(Assembler::LT, LAST8);
+
+ // Align array on 16 byte boundary, if necessary.
+ tst(array, 8);
+ br(Assembler::EQ, LOOP16);
+
+ ldr(tmp1, Address(post(array, 8)));
+ tst(tmp1, UPPER_BIT_MASK);
+ br(Assembler::NE, EXIT_TRUE);
+
+ sub(len, len, 8);
+
+ cmpw(len, 16);
+ br(Assembler::LT, LAST8);
+
+ // Loop to test 16-byte chunks.
+ BIND(LOOP16);
+
+ ldp(tmp1, tmp2, Address(post(array, 16)));
+ orr(tmp3, tmp1, tmp2);
+
+ tst(tmp3, UPPER_BIT_MASK);
+ br(Assembler::NE, EXIT_TRUE);
+
+ sub(len, len, 16);
+
+ cmpw(len, 16);
+ br(Assembler::GE, LOOP16);
+
+ // Handle the last whole 8 bytes.
+ BIND(LAST8);
+
+ cmpw(len, 8);
+ br(Assembler::LE, TAIL);
+ ldr(tmp1, Address(post(array,8)));
+
+ tst(tmp1, UPPER_BIT_MASK);
+ br(Assembler::NE, EXIT_TRUE);
+
+ sub(len, len, 8);
+
+ // Handle the last bytes - may be 8 or less bytes.
+ bind(TAIL);
+
+ cbz(len, EXIT_FALSE);
+
+ // Align end of the 64-bit load with the last byte.
+ mov(tmp1, 8);
+ subw(tmp3, tmp1, len);
+ sub(array, array, tmp3);
+
+ ldr(tmp1, Address(array));
+ tst(tmp1, UPPER_BIT_MASK);
+ br(Assembler::NE, EXIT_TRUE);
+
+ BIND(EXIT_FALSE);
+ mov(result, 0);
+
+ b(EXIT_REAL);
+
+ BIND(EXIT_TRUE);
+
+ mov(result, 1);
+
+ BIND(EXIT_REAL);
+
+ BLOCK_COMMENT("} has_negatives");
+}
+
// Compare Strings or char/byte arrays.
// is_string is true iff this is a string comparison.
diff -r 38018e6e25b3 -r f138dbd8e5da
src/cpu/aarch64/vm/macroAssembler_aarch64.hpp
--- a/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp Thu Jul 13
01:28:24 2017 +0000
+++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp Mon Jul 03
13:05:20 2017 +0100
@@ -1209,6 +1209,9 @@
Register tmp1,
FloatRegister vtmp, FloatRegister vtmpZ, int ae);
+ void has_negatives(Register array, Register len, Register result,
+ Register tmp1, Register tmp2, Register tmp3,
Register tmp4);
+
void arrays_equals(Register a1, Register a2,
Register result, Register cnt1,
int elem_size, bool is_string);
More information about the aarch64-port-dev
mailing list