[aarch64-port-dev ] population count intrinsic performance

Alexeev, Alexander Alexander.Alexeev at caviumnetworks.com
Thu Jun 11 08:10:30 UTC 2015


> But you need to let the register allocator know!
This is the main reason why I called this patch preliminary and it was a mistake to neglect that.
Now it is clear.

After applying recommended changes results for both versions are the same.
 
Baseline:
Benchmark                 Mode  Cnt   Score   Error  Units
BitCount.bitCountInteger  avgt    5  11.004 ? 0.000  ns/op
BitCount.bitCountLong     avgt    5  11.005 ? 0.000  ns/op

SIMD version:
Benchmark                 Mode  Cnt   Score   Error  Units
BitCount.bitCountInteger  avgt    5  11.004 ? 0.001  ns/op
BitCount.bitCountLong     avgt    5  11.004 ? 0.000  ns/op

Updated patch is below.

--- CUT HERE ---
diff -r 93cc4d7535ce src/cpu/aarch64/vm/aarch64.ad
--- a/src/cpu/aarch64/vm/aarch64.ad	Wed Jun 10 12:29:07 2015 +0000
+++ b/src/cpu/aarch64/vm/aarch64.ad	Thu Jun 11 07:28:28 2015 +0000
@@ -7402,6 +7402,42 @@
   ins_pipe(ialu_reg);
 %}
 
+//---------- Population Count Instructions -------------------------------------
+//
+
+instruct popCountI(iRegINoSp dst,  iRegIorL2I src, vRegD tmp) %{
+  match(Set dst (PopCountI src));
+  effect(TEMP tmp);
+  ins_cost(INSN_COST * 13);
+
+  format %{ "TODO popCountI\n\t" %}
+  ins_encode %{
+    __ mov($tmp$$FloatRegister, __ T1D, 0, as_Register($src$$reg));
+    __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
+    __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
+    __ mov(as_Register($dst$$reg), $tmp$$FloatRegister, __ T1D, 0);
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
+// Note: Long.bitCount(long) returns an int.
+instruct popCountL(iRegINoSp dst, iRegL src, vRegD tmp) %{
+  match(Set dst (PopCountL src));
+  effect(TEMP tmp);
+  ins_cost(INSN_COST * 13);
+
+  format %{ "TODO popCountL\n\t" %}
+  ins_encode %{
+    __ mov($tmp$$FloatRegister, __ T1D, 0, as_Register($src$$reg));
+    __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
+    __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
+    __ mov(as_Register($dst$$reg), $tmp$$FloatRegister, __ T1D, 0);
+  %}
+
+  ins_pipe(pipe_class_default);
+%}
+
 // ============================================================================
 // MemBar Instruction
 
diff -r 93cc4d7535ce src/cpu/aarch64/vm/assembler_aarch64.hpp
--- a/src/cpu/aarch64/vm/assembler_aarch64.hpp	Wed Jun 10 12:29:07 2015 +0000
+++ b/src/cpu/aarch64/vm/assembler_aarch64.hpp	Thu Jun 11 07:28:28 2015 +0000
@@ -2050,6 +2050,9 @@
   INSN(negr,  1, 0b100000101110);
   INSN(notr,  1, 0b100000010110);
   INSN(addv,  0, 0b110001101110);
+  INSN(cls,   0, 0b100000010010);
+  INSN(clz,   1, 0b100000010010);
+  INSN(cnt,   0, 0b100000010110);
 
 #undef INSN
 
diff -r 93cc4d7535ce src/cpu/aarch64/vm/macroAssembler_aarch64.hpp
--- a/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp	Wed Jun 10 12:29:07 2015 +0000
+++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp	Thu Jun 11 07:28:28 2015 +0000
@@ -36,6 +36,7 @@
 class MacroAssembler: public Assembler {
   friend class LIR_Assembler;
 
+ public:
   using Assembler::mov;
   using Assembler::movi;
--- CUT HERE ---

Regards,
Alexander


More information about the aarch64-port-dev mailing list