[aarch64-port-dev ] Fast string comparison

Andrew Haley aph at redhat.com
Fri Jun 27 10:51:21 UTC 2014


String.compareTo(), 4 characters at a time.  I haven't used the SIMD
instructions because of the extra bookkeeping that this requires.

Andrew.


comparing with ssh://hg.openjdk.java.net/aarch64-port/jdk8//hotspot
searching for changes
remote: X11 forwarding request failed on channel 0
changeset:   7183:1d342713037a
tag:         tip
user:        aph
date:        Fri Jun 27 11:25:47 2014 +0100
summary:     Fast string comparison

diff -r 511a29302d28 -r 1d342713037a src/cpu/aarch64/vm/aarch64.ad
--- a/src/cpu/aarch64/vm/aarch64.ad	Mon Jun 23 18:56:33 2014 +0100
+++ b/src/cpu/aarch64/vm/aarch64.ad	Fri Jun 27 11:25:47 2014 +0100
@@ -375,6 +375,15 @@
     R30
 );

+// Singleton class for R0 int register
+reg_class int_r0_reg(R0);
+
+// Singleton class for R2 int register
+reg_class int_r2_reg(R2);
+
+// Singleton class for R4 int register
+reg_class int_r4_reg(R4);
+
 // Class for all long integer registers (including RSP)
 reg_class any_reg(
     R0, R0_H,
@@ -482,11 +491,21 @@
     R0, R0_H
 );

+// Class for 64 bit register r1
+reg_class r1_reg(
+    R1, R1_H
+);
+
 // Class for 64 bit register r2
 reg_class r2_reg(
     R2, R2_H
 );

+// Class for 64 bit register r3
+reg_class r3_reg(
+    R3, R3_H
+);
+
 // Class for 64 bit register r4
 reg_class r4_reg(
     R4, R4_H
@@ -3986,6 +4005,18 @@
   interface(REG_INTER);
 %}

+// Pointer 64 bit Register R1 only
+operand iRegP_R1()
+%{
+  constraint(ALLOC_IN_RC(r1_reg));
+  match(RegP);
+  // match(iRegP);
+  match(iRegPNoSp);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
 // Pointer 64 bit Register R2 only
 operand iRegP_R2()
 %{
@@ -3998,6 +4029,18 @@
   interface(REG_INTER);
 %}

+// Pointer 64 bit Register R3 only
+operand iRegP_R3()
+%{
+  constraint(ALLOC_IN_RC(r3_reg));
+  match(RegP);
+  // match(iRegP);
+  match(iRegPNoSp);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
 // Pointer 64 bit Register R4 only
 operand iRegP_R4()
 %{
@@ -4059,7 +4102,7 @@
 // Register R0 only
 operand iRegI_R0()
 %{
-  constraint(ALLOC_IN_RC(r0_reg));
+  constraint(ALLOC_IN_RC(int_r0_reg));
   match(RegI);
   match(iRegINoSp);
   op_cost(0);
@@ -4067,6 +4110,29 @@
   interface(REG_INTER);
 %}

+// Register R2 only
+operand iRegI_R2()
+%{
+  constraint(ALLOC_IN_RC(int_r2_reg));
+  match(RegI);
+  match(iRegINoSp);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+// Register R2 only
+operand iRegI_R4()
+%{
+  constraint(ALLOC_IN_RC(int_r4_reg));
+  match(RegI);
+  match(iRegINoSp);
+  op_cost(0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+
 // Pointer Register Operands
 // Narrow Pointer Register
 operand iRegN()
@@ -11267,6 +11333,21 @@
   ins_pipe(pipe_class_memory);
 %}

+instruct string_compare(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
+                        iRegI_R0 result, iRegP tmp1, rFlagsReg cr)
+%{
+  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
+  effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
+
+  format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1" %}
+  ins_encode %{
+    __ string_compare($str1$$Register, $str2$$Register,
+                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
+                      $tmp1$$Register);
+  %}
+  ins_pipe(pipe_class_memory);
+%}
+
 // ============================================================================
 // This name is KNOWN by the ADLC and cannot be changed.
 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
diff -r 511a29302d28 -r 1d342713037a src/cpu/aarch64/vm/macroAssembler_aarch64.cpp
--- a/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp	Mon Jun 23 18:56:33 2014 +0100
+++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp	Fri Jun 27 11:25:47 2014 +0100
@@ -3353,3 +3353,86 @@
   }
 }

+// Compare strings.
+void MacroAssembler::string_compare(Register str1, Register str2,
+                                    Register cnt1, Register cnt2, Register result,
+                                    Register tmp1) {
+  Label LENGTH_DIFF, DONE, SHORT_LOOP, SHORT_STRING,
+    NEXT_WORD, DIFFERENCE;
+
+  BLOCK_COMMENT("string_compare {");
+
+  // Compute the minimum of the string lengths and save the difference.
+  subsw(tmp1, cnt1, cnt2);
+  cselw(cnt2, cnt1, cnt2, Assembler::LE); // min
+
+  // A very short string
+  cmpw(cnt2, 4);
+  br(Assembler::LT, SHORT_STRING);
+
+  // Check if the strings start at the same location.
+  cmp(str1, str2);
+  br(Assembler::EQ, LENGTH_DIFF);
+
+  // Compare longwords
+  {
+    subw(cnt2, cnt2, 4); // The last longword is a special case
+
+    // Move both string pointers to the last longword of their
+    // strings, negate the remaining count, and convert it to bytes.
+    lea(str1, Address(str1, cnt2, Address::uxtw(1)));
+    lea(str2, Address(str2, cnt2, Address::uxtw(1)));
+    sub(cnt2, zr, cnt2, LSL, 1);
+
+    // Loop, loading longwords and comparing them into rscratch2.
+    bind(NEXT_WORD);
+    ldr(result, Address(str1, cnt2));
+    ldr(cnt1, Address(str2, cnt2));
+    adds(cnt2, cnt2, wordSize);
+    eor(rscratch2, result, cnt1);
+    cbnz(rscratch2, DIFFERENCE);
+    br(Assembler::LT, NEXT_WORD);
+
+    // Last longword.  In the case where length == 4 we compare the
+    // same longword twice, but that's still faster than another
+    // conditional branch.
+
+    ldr(result, Address(str1));
+    ldr(cnt1, Address(str2));
+    eor(rscratch2, result, cnt1);
+    cbz(rscratch2, LENGTH_DIFF);
+
+    // Find the first different characters in the longwords and
+    // compute their difference.
+    bind(DIFFERENCE);
+    rev(rscratch2, rscratch2);
+    clz(rscratch2, rscratch2);
+    andr(rscratch2, rscratch2, -16);
+    lsrv(result, result, rscratch2);
+    lsrv(cnt1, cnt1, rscratch2);
+    sub(result, result, cnt1);
+    sxthw(result, result);
+    b(DONE);
+  }
+
+  bind(SHORT_STRING);
+  // Is the minimum length zero?
+  cbz(cnt2, LENGTH_DIFF);
+
+  bind(SHORT_LOOP);
+  load_unsigned_short(result, Address(post(str1, 2)));
+  load_unsigned_short(cnt1, Address(post(str2, 2)));
+  subw(result, result, cnt1);
+  cbnz(result, DONE);
+  sub(cnt2, cnt2, 1);
+  cbnz(cnt2, SHORT_LOOP);
+
+  // Strings are equal up to min length.  Return the length difference.
+  bind(LENGTH_DIFF);
+  mov(result, tmp1);
+
+  // That's it
+  bind(DONE);
+
+  BLOCK_COMMENT("} string_compare");
+}
diff -r 511a29302d28 -r 1d342713037a src/cpu/aarch64/vm/macroAssembler_aarch64.hpp
--- a/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp	Mon Jun 23 18:56:33 2014 +0100
+++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp	Fri Jun 27 11:25:47 2014 +0100
@@ -1290,6 +1290,10 @@
   void update_word_crc32(Register crc, Register v, Register tmp,
         Register table0, Register table1, Register table2, Register table3,
         bool upper = false);
+
+  void string_compare(Register str1, Register str2,
+                                    Register cnt1, Register cnt2, Register result,
+                                    Register tmp1);
 };

 // Used by aarch64.ad to control code generation



More information about the aarch64-port-dev mailing list