[aarch64-port-dev ] Whether or not to revert of checkcast_arraycopy stub to generate return value expected by generic C2 code

Andrew Dinn adinn at redhat.com
Tue Oct 15 08:22:04 PDT 2013


Hmm, yes but I did borrow the counting up from negative from the Intel
code and also ended up adding code to the zero, success, fail cases that
is merged in your original. Here's a proper rewrite based more closely
on your code which avoids a lot of the redundancy in my original
version. It really just adds the save of the original count and the
compute of -1^K so the extra cost is minimal.

regards,


Andrew Dinn
-----------


diff -r 75997cf311bb src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp
--- a/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp	Tue Oct 15 14:16:04
2013 +0100
+++ b/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp	Tue Oct 15 16:17:45
2013 +0100
@@ -2223,10 +2223,12 @@
     __ ldr(src,              Address(sp, 4*BytesPerWord));

     if (copyfunc_addr != NULL) {
-      __ subw(rscratch1, length, r0); // Number of oops actually copied
+      // r0 is -1^K where K == partial copied count
+      __ eonw(rscratch1, r0, 0);
+      // adjust length down and src/end pos up by partial copied count
+      __ subw(length, length, rscratch1);
       __ addw(src_pos, src_pos, rscratch1);
       __ addw(dst_pos, dst_pos, rscratch1);
-      __ mov(length, r0); // Number of oops left to copy
     }
     __ b(*stub->entry());

@@ -2401,10 +2403,12 @@
 	__ ldp(length,  src_pos, Address(sp, 2*BytesPerWord));
 	__ ldr(src,              Address(sp, 4*BytesPerWord));

-	__ subw(rscratch1, length, r0); // Number of oops actually copied
+        // return value is -1^K where K is partial copied count
+        __ eonw(rscratch1, r0, zr);
+        // adjust length down and src/end pos up by partial copied count
+	__ subw(length, length, rscratch1);
 	__ addw(src_pos, src_pos, rscratch1);
 	__ addw(dst_pos, dst_pos, rscratch1);
-	__ mov(length, r0); // Number of oops left to copy
       }

       __ b(*stub->entry());
diff -r 75997cf311bb src/cpu/aarch64/vm/stubGenerator_aarch64.cpp
--- a/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp	Tue Oct 15 14:16:04
2013 +0100
+++ b/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp	Tue Oct 15 16:17:45
2013 +0100
@@ -1447,7 +1447,8 @@
   //    c_rarg4   - oop ckval (super_klass)
   //
   //  Output:
-  //    r0        -  count of oops remaining to copy
+  //    r0 ==  0  -  success
+  //    r0 == -1^K - failure, where K is partial transfer count
   //
   address generate_checkcast_copy(const char *name, address *entry,
                                   bool dest_uninitialized = false) {
@@ -1462,6 +1463,7 @@
     const Register ckval       = c_rarg4;   // super_klass

     // Registers used as temps (r18, r19, r20 are save-on-entry)
+    const Register count_save  = r21;       // orig elementscount
     const Register start_to    = r20;       // destination array start
address
     const Register copied_oop  = r18;       // actual oop copied
     const Register r19_klass   = r19;       // oop._klass
@@ -1474,7 +1476,7 @@
     // checked.

     assert_different_registers(from, to, count, ckoff, ckval, start_to,
-			       copied_oop, r19_klass);
+			       copied_oop, r19_klass, count_save);

     __ align(CodeEntryAlignment);
     StubCodeMark mark(this, "StubRoutines", name);
@@ -1498,10 +1500,10 @@
       BLOCK_COMMENT("Entry:");
     }

-    // Empty array:  Nothing to do.
+     // Empty array:  Nothing to do.
     __ cbz(count, L_done);

-    __ push(r18->bit() | r19->bit() | r20->bit(), sp);
+    __ push(r18->bit() | r19->bit() | r20->bit() | r21->bit(), sp);

 #ifdef ASSERT
     BLOCK_COMMENT("assert consistent ckoff/ckval");
@@ -1517,6 +1519,9 @@
     }
 #endif //ASSERT

+    // save the original count
+    __ mov(count_save, count);
+
     // Copy from low to high addresses
     __ mov(start_to, to);              // Save destination array start
address
     __ b(L_load_element);
@@ -1546,22 +1551,22 @@
     // ======== end loop ========

     // It was a real error; we must depend on the caller to finish the job.
-    // Register r0 = number of *remaining* oops
+    // Register count = remaining oops, count_orig = total oops.
     // Emit GC store barriers for the oops we have copied and report
     // their number to the caller.

-    DEBUG_ONLY(__ nop());
+    __ sub(count, count_save, count);     // K = partially copied oop count
+    __ eon(count, count, zr);                   // report (-1^K) to caller

-    // Common exit point (success or failure).
     __ BIND(L_do_card_marks);
     __ add(to, to, -heapOopSize);         // make an inclusive end pointer
     gen_write_ref_array_post_barrier(start_to, to, rscratch1);

-    __ pop(r18->bit() | r19->bit() | r20->bit(), sp);
+    __ pop(r18->bit() | r19->bit() | r20->bit()| r21->bit(), sp);
     inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr);

     __ bind(L_done);
-    __ mov(r0, count);                    // report count remaining to
caller
+    __ mov(r0, count);
     __ leave();
     __ ret(lr);



More information about the aarch64-port-dev mailing list