[aarch64-port-dev ] population count intrinsic performance

Alexeev, Alexander Alexander.Alexeev at caviumnetworks.com
Wed Jun 10 18:41:01 UTC 2015


Ed, I removed those 'vscratch1' & 'vscratch2' as redundant.
Patch is below.

Regards,
Alexander

--- CUT HERE ---
diff -r 11af3990d56c src/cpu/aarch64/vm/aarch64.ad
--- a/src/cpu/aarch64/vm/aarch64.ad	Thu Jun 04 18:50:05 2015 -0700
+++ b/src/cpu/aarch64/vm/aarch64.ad	Wed Jun 10 18:12:27 2015 +0000
@@ -7402,6 +7402,40 @@
   ins_pipe(ialu_reg);
 %}
 
+//---------- Population Count Instructions -------------------------------------
+//
+
+instruct popCountI(iRegINoSp dst,  iRegIorL2I src) %{
+  match(Set dst (PopCountI src));
+  ins_cost(INSN_COST * 13);
+
+  format %{ "TODO popCountI\n\t" %}
+  ins_encode %{
+    __ mov(v0, __ T1D, 0, as_Register($src$$reg));
+    __ cnt(v1, __ T8B, v0);
+    __ addv(v0, __ T8B, v1);
+    __ mov(as_Register($dst$$reg), v0, __ T1D, 0);
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+
+// Note: Long.bitCount(long) returns an int.
+instruct popCountL(iRegINoSp dst, iRegL src) %{
+  match(Set dst (PopCountL src));
+  ins_cost(INSN_COST * 13);
+
+  format %{ "TODO popCountL\n\t" %}
+  ins_encode %{
+    __ mov(v0, __ T1D, 0, as_Register($src$$reg));
+    __ cnt(v1, __ T8B, v0);
+    __ addv(v0, __ T8B, v1);
+    __ mov(as_Register($dst$$reg), v0, __ T1D, 0);
+  %}
+
+  ins_pipe(ialu_reg);
+%}
+
 // ============================================================================
 // MemBar Instruction
 
diff -r 11af3990d56c src/cpu/aarch64/vm/assembler_aarch64.hpp
--- a/src/cpu/aarch64/vm/assembler_aarch64.hpp	Thu Jun 04 18:50:05 2015 -0700
+++ b/src/cpu/aarch64/vm/assembler_aarch64.hpp	Wed Jun 10 18:12:27 2015 +0000
@@ -2050,6 +2050,9 @@
   INSN(negr,  1, 0b100000101110);
   INSN(notr,  1, 0b100000010110);
   INSN(addv,  0, 0b110001101110);
+  INSN(cls,   0, 0b100000010010);
+  INSN(clz,   1, 0b100000010010);
+  INSN(cnt,   0, 0b100000010110);
 
 #undef INSN
 
diff -r 11af3990d56c src/cpu/aarch64/vm/macroAssembler_aarch64.hpp
--- a/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp	Thu Jun 04 18:50:05 2015 -0700
+++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp	Wed Jun 10 18:12:27 2015 +0000
@@ -36,6 +36,7 @@
 class MacroAssembler: public Assembler {
   friend class LIR_Assembler;
 
+ public:
   using Assembler::mov;
   using Assembler::movi;

--- CUT HERE ---

> -----Original Message-----
> From: Edward Nevill [mailto:edward.nevill at gmail.com]
> Sent: Wednesday, June 10, 2015 5:24 PM
> To: Alexeev, Alexander
> Cc: aarch64-port-dev at openjdk.java.net
> Subject: Re: [aarch64-port-dev ] population count intrinsic performance
> 
> On Wed, 2015-06-10 at 14:06 +0000, Alexeev, Alexander wrote:
> 
> >
> >
> > instruct popCountI(iRegINoSp dst,  iRegIorL2I src) %{
> >   match(Set dst (PopCountI src));
> >   ins_cost(INSN_COST * 13);
> >
> >   format %{ "popCountI TODO\n\t" %}
> >   ins_encode %{
> >       __ mov(vscratch1, __ T1D, 0, as_Register($src$$reg));
> >       __ cnt(vscratch2, __ T8B, vscratch1);
> >       __ addv(vscratch1, __ T8B, vscratch2);
> >       __ mov(as_Register($dst$$reg), vscratch1, __ T1D, 0);
> >   %}
> >
> >   ins_pipe(ialu_reg);
> > %}
> 
> What are 'vscratch1' & 'vscratch2'. Could you send the complete patch so I
> can try this out,
> 
> Thanks,
> Ed.
> 



More information about the aarch64-port-dev mailing list