[aarch64-port-dev ] population count intrinsic performance
Alexeev, Alexander
Alexander.Alexeev at caviumnetworks.com
Thu Jun 11 08:10:30 UTC 2015
> But you need to let the register allocator know!
This is the main reason why I called this patch preliminary and it was a mistake to neglect that.
Now it is clear.
After applying recommended changes results for both versions are the same.
Baseline:
Benchmark Mode Cnt Score Error Units
BitCount.bitCountInteger avgt 5 11.004 ? 0.000 ns/op
BitCount.bitCountLong avgt 5 11.005 ? 0.000 ns/op
SIMD version:
Benchmark Mode Cnt Score Error Units
BitCount.bitCountInteger avgt 5 11.004 ? 0.001 ns/op
BitCount.bitCountLong avgt 5 11.004 ? 0.000 ns/op
Updated patch is below.
--- CUT HERE ---
diff -r 93cc4d7535ce src/cpu/aarch64/vm/aarch64.ad
--- a/src/cpu/aarch64/vm/aarch64.ad Wed Jun 10 12:29:07 2015 +0000
+++ b/src/cpu/aarch64/vm/aarch64.ad Thu Jun 11 07:28:28 2015 +0000
@@ -7402,6 +7402,42 @@
ins_pipe(ialu_reg);
%}
+//---------- Population Count Instructions -------------------------------------
+//
+
+instruct popCountI(iRegINoSp dst, iRegIorL2I src, vRegD tmp) %{
+ match(Set dst (PopCountI src));
+ effect(TEMP tmp);
+ ins_cost(INSN_COST * 13);
+
+ format %{ "TODO popCountI\n\t" %}
+ ins_encode %{
+ __ mov($tmp$$FloatRegister, __ T1D, 0, as_Register($src$$reg));
+ __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
+ __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
+ __ mov(as_Register($dst$$reg), $tmp$$FloatRegister, __ T1D, 0);
+ %}
+
+ ins_pipe(pipe_class_default);
+%}
+
+// Note: Long.bitCount(long) returns an int.
+instruct popCountL(iRegINoSp dst, iRegL src, vRegD tmp) %{
+ match(Set dst (PopCountL src));
+ effect(TEMP tmp);
+ ins_cost(INSN_COST * 13);
+
+ format %{ "TODO popCountL\n\t" %}
+ ins_encode %{
+ __ mov($tmp$$FloatRegister, __ T1D, 0, as_Register($src$$reg));
+ __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
+ __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
+ __ mov(as_Register($dst$$reg), $tmp$$FloatRegister, __ T1D, 0);
+ %}
+
+ ins_pipe(pipe_class_default);
+%}
+
// ============================================================================
// MemBar Instruction
diff -r 93cc4d7535ce src/cpu/aarch64/vm/assembler_aarch64.hpp
--- a/src/cpu/aarch64/vm/assembler_aarch64.hpp Wed Jun 10 12:29:07 2015 +0000
+++ b/src/cpu/aarch64/vm/assembler_aarch64.hpp Thu Jun 11 07:28:28 2015 +0000
@@ -2050,6 +2050,9 @@
INSN(negr, 1, 0b100000101110);
INSN(notr, 1, 0b100000010110);
INSN(addv, 0, 0b110001101110);
+ INSN(cls, 0, 0b100000010010);
+ INSN(clz, 1, 0b100000010010);
+ INSN(cnt, 0, 0b100000010110);
#undef INSN
diff -r 93cc4d7535ce src/cpu/aarch64/vm/macroAssembler_aarch64.hpp
--- a/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp Wed Jun 10 12:29:07 2015 +0000
+++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp Thu Jun 11 07:28:28 2015 +0000
@@ -36,6 +36,7 @@
class MacroAssembler: public Assembler {
friend class LIR_Assembler;
+ public:
using Assembler::mov;
using Assembler::movi;
--- CUT HERE ---
Regards,
Alexander
More information about the aarch64-port-dev
mailing list