[aarch64-port-dev ] Implement generate_checkcast_copy()

Andrew Haley aph at redhat.com
Thu Sep 19 10:08:32 PDT 2013


This is the last of the C1 arraycopy intrinsics.

Andrew.


# HG changeset patch
# User aph
# Date 1379607403 -3600
# Node ID 9a83c28d309a91ae474b700b34cab633149363fb
# Parent  7f4e0407752a69b3913280236704e5530342cf23
Implement generate_checkcast_copy()

diff -r 7f4e0407752a -r 9a83c28d309a src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp
--- a/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp	Thu Sep 19 17:14:14 2013 +0100
+++ b/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp	Thu Sep 19 17:16:43 2013 +0100
@@ -2217,7 +2217,7 @@
     __ cbz(r0, *stub->continuation());

     if (copyfunc_addr != NULL) {
-      __ eor(tmp, r0, -1);
+      __ eon(tmp, r0, zr);
     }

     // Reload values from the stack so they are where the stub
@@ -2354,8 +2354,9 @@
           Address klass_lh_addr(tmp, lh_offset);
           jint objArray_lh = Klass::array_layout_helper(T_OBJECT);
 	  __ ldrw(rscratch1, klass_lh_addr);
-          __ cmpw(rscratch1, objArray_lh);
-          __ br(Assembler::NE, *stub->entry());
+	  __ mov(rscratch2, objArray_lh);
+          __ eorw(rscratch1, rscratch1, rscratch2);
+          __ cbnzw(rscratch1, *stub->entry());
         }

        // Spill because stubs can use any register they like and it's
@@ -2389,7 +2390,7 @@
         }
 #endif

-        __ cbnz(r0, *stub->continuation());
+        __ cbz(r0, *stub->continuation());

 #ifndef PRODUCT
         if (PrintC1Statistics) {
@@ -2397,7 +2398,7 @@
         }
 #endif

-        __ eor(tmp, r0, -1);
+        __ eon(tmp, r0, zr);

         // Restore previously spilled arguments
 	__ ldp(dst,     dst_pos, Address(sp, 0*BytesPerWord));
diff -r 7f4e0407752a -r 9a83c28d309a src/cpu/aarch64/vm/stubGenerator_aarch64.cpp
--- a/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp	Thu Sep 19 17:14:14 2013 +0100
+++ b/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp	Thu Sep 19 17:16:43 2013 +0100
@@ -51,6 +51,7 @@

 #undef __
 #define __ _masm->
+#define TIMES_OOP Address::sxtw(exact_log2(UseCompressedOops ? 4 : 8))

 #ifdef PRODUCT
 #define BLOCK_COMMENT(str) /* nothing */
@@ -72,7 +73,12 @@
 #ifdef PRODUCT
 #define inc_counter_np(counter) (0)
 #else
-  void inc_counter_np_(int& counter) { Unimplemented(); }
+  void inc_counter_np_(int& counter) {
+    __ lea(rscratch2, ExternalAddress((address)&counter));
+    __ ldrw(rscratch1, Address(rscratch2));
+    __ addw(rscratch1, rscratch1, 1);
+    __ strw(rscratch1, Address(rscratch2));
+  }
 #define inc_counter_np(counter) \
   BLOCK_COMMENT("inc_counter " #counter); \
   inc_counter_np_(counter);
@@ -796,10 +802,10 @@
   //     c_rarg2 - element count
   //
   //  Output:
-  //     rax   - &from[element count - 1]
+  //     r0   - &from[element count - 1]
   //
   void array_overlap_test(address no_overlap_target, int sf) { Unimplemented(); }
-  void array_overlap_test(Label& L_no_overlap, int sf) { Unimplemented(); }
+  void array_overlap_test(Label& L_no_overlap, Address::sxtw sf) { __ b(L_no_overlap); }
   void array_overlap_test(address no_overlap_target, Label* NOLp, int sf) { Unimplemented(); }

   // Shuffle first three arg regs on Windows into Linux/Solaris locations.
@@ -1068,20 +1074,20 @@
     __ bind(done);
   }

-  // Scan over array at d for count oops, verifying each one.
-  // Preserves d and count, clobbers rscratch1 and rscratch2.
-  void verify_oop_array (size_t size, Register d, Register count, Register temp) {
+  // Scan over array at a for count oops, verifying each one.
+  // Preserves a and count, clobbers rscratch1 and rscratch2.
+  void verify_oop_array (size_t size, Register a, Register count, Register temp) {
     Label loop, end;
-    __ mov(rscratch1, d);
+    __ mov(rscratch1, a);
     __ mov(rscratch2, zr);
     __ bind(loop);
     __ cmp(rscratch2, count);
     __ br(Assembler::HS, end);
     if (size == (size_t)wordSize) {
-      __ ldr(temp, Address(d, rscratch2, Address::uxtw(exact_log2(size))));
+      __ ldr(temp, Address(a, rscratch2, Address::uxtw(exact_log2(size))));
       __ verify_oop(temp);
     } else {
-      __ ldrw(r16, Address(d, rscratch2, Address::uxtw(exact_log2(size))));
+      __ ldrw(r16, Address(a, rscratch2, Address::uxtw(exact_log2(size))));
       __ decode_heap_oop(temp); // calls verify_oop
     }
     __ add(rscratch2, rscratch2, size);
@@ -1130,7 +1136,7 @@
     if (is_oop) {
       __ pop(d->bit() | count->bit(), sp);
       if (VerifyOops)
-	verify_oop_array(size, s, d, count, r16);
+	verify_oop_array(size, d, count, r16);
       __ lea(count, Address(d, count, Address::uxtw(exact_log2(size))));
       gen_write_ref_array_post_barrier(d, count, rscratch1);
     }
@@ -1177,7 +1183,7 @@
     if (is_oop) {
       __ pop(d->bit() | count->bit(), sp);
       if (VerifyOops)
-	verify_oop_array(size, s, d, count, r16);
+	verify_oop_array(size, d, count, r16);
       __ lea(c_rarg2, Address(c_rarg1, c_rarg2, Address::uxtw(exact_log2(size))));
       gen_write_ref_array_post_barrier(c_rarg1, c_rarg2, rscratch1);
     }
@@ -1410,11 +1416,24 @@


   // Helper for generating a dynamic type check.
-  // Smashes no registers.
+  // Smashes rscratch1.
   void generate_type_check(Register sub_klass,
                            Register super_check_offset,
                            Register super_klass,
-                           Label& L_success) { Unimplemented(); }
+                           Label& L_success) {
+    assert_different_registers(sub_klass, super_check_offset, super_klass);
+
+    BLOCK_COMMENT("type_check:");
+
+    Label L_miss;
+
+    __ check_klass_subtype_fast_path(sub_klass, super_klass, noreg,        &L_success, &L_miss, NULL,
+                                     super_check_offset);
+    __ check_klass_subtype_slow_path(sub_klass, super_klass, noreg, noreg, &L_success, NULL);
+
+    // Fall through on failure!
+    __ BIND(L_miss);
+  }

   //
   //  Generate checkcasting array copy stub
@@ -1424,17 +1443,148 @@
   //    c_rarg1   - destination array address
   //    c_rarg2   - element count, treated as ssize_t, can be zero
   //    c_rarg3   - size_t ckoff (super_check_offset)
-  // not Win64
   //    c_rarg4   - oop ckval (super_klass)
-  // Win64
-  //    rsp+40    - oop ckval (super_klass)
   //
   //  Output:
-  //    rax ==  0  -  success
-  //    rax == -1^K - failure, where K is partial transfer count
+  //    r0 ==  0  -  success
+  //    r0 == -1^K - failure, where K is partial transfer count
   //
   address generate_checkcast_copy(const char *name, address *entry,
-                                  bool dest_uninitialized = false) { Unimplemented(); return 0; }
+                                  bool dest_uninitialized = false) {
+
+    Label L_load_element, L_store_element, L_do_card_marks, L_done;
+
+    // Input registers (after setup_arg_regs)
+    const Register from        = c_rarg0;   // source array address
+    const Register to          = c_rarg1;   // destination array address
+    const Register length      = c_rarg2;   // elements count
+    const Register ckoff       = c_rarg3;   // super_check_offset
+    const Register ckval       = c_rarg4;   // super_klass
+
+    // Registers used as temps (r16, r17, r18, r19, r20 are save-on-entry)
+    const Register end_from    = from;  // source array end address
+    const Register end_to      = r16;   // destination array end address
+    const Register count       = r20;   // -(count_remaining)
+    const Register r17_length  = r17;   // saved copy of length
+    // End pointers are inclusive, and if length is not zero they point
+    // to the last unit copied:  end_to[0] := end_from[0]
+
+    const Register copied_oop  = r18;    // actual oop copied
+    const Register r19_klass   = r19;    // oop._klass
+
+    //---------------------------------------------------------------
+    // Assembler stub will be used for this call to arraycopy
+    // if the two arrays are subtypes of Object[] but the
+    // destination array type is not equal to or a supertype
+    // of the source type.  Each element must be separately
+    // checked.
+
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", name);
+    address start = __ pc();
+
+    __ enter(); // required for proper stackwalking of RuntimeStub frame
+
+#ifdef ASSERT
+    // caller guarantees that the arrays really are different
+    // otherwise, we would have to make conjoint checks
+    { Label L;
+      array_overlap_test(L, TIMES_OOP);
+      __ stop("checkcast_copy within a single array");
+      __ bind(L);
+    }
+#endif //ASSERT
+
+    // Caller of this entry point must set up the argument registers.
+    if (entry != NULL) {
+      *entry = __ pc();
+      BLOCK_COMMENT("Entry:");
+    }
+
+    __ push(r16->bit() | r17->bit() | r18->bit() | r19->bit() | r20->bit(), sp);
+
+#ifdef ASSERT
+    BLOCK_COMMENT("assert consistent ckoff/ckval");
+    // The ckoff and ckval must be mutually consistent,
+    // even though caller generates both.
+    { Label L;
+      int sco_offset = in_bytes(Klass::super_check_offset_offset());
+      __ ldrw(count, Address(ckval, sco_offset));
+      __ cmpw(ckoff, count);
+      __ br(Assembler::EQ, L);
+      __ stop("super_check_offset inconsistent");
+      __ bind(L);
+    }
+#endif //ASSERT
+
+    // Loop-invariant addresses.  They are exclusive end pointers.
+    Address end_from_addr(from, length, TIMES_OOP);
+    Address   end_to_addr(to,   length, TIMES_OOP);
+    // Loop-variant addresses.  They assume post-incremented count < 0.
+    Address from_element_addr(end_from, count, TIMES_OOP);
+    Address   to_element_addr(end_to,   count, TIMES_OOP);
+
+    gen_write_ref_array_pre_barrier(to, count, dest_uninitialized);
+
+    // Copy from low to high addresses, indexed from the end of each array.
+    __ lea(end_from, end_from_addr);
+    __ lea(end_to, end_to_addr);
+    __ mov(r17_length, length);        // save a copy of the length
+    __ neg(count, length);    // negate and test the length
+    __ cbnz(count, L_load_element);
+
+    // Empty array:  Nothing to do.
+    __ mov(r0, zr);                  // return 0 on (trivial) success
+    __ b(L_done);
+
+    // ======== begin loop ========
+    // (Loop is rotated; its entry is L_load_element.)
+    // Loop control:
+    //   for (count = -count; count != 0; count++)
+    // Base pointers src, dst are biased by 8*(count-1),to last element.
+    __ align(OptoLoopAlignment);
+
+    __ BIND(L_store_element);
+    __ store_heap_oop(to_element_addr, copied_oop);  // store the oop
+    __ add(count, count, 1);               // increment the count toward zero
+    __ cbz(count, L_do_card_marks);
+
+    // ======== loop entry is here ========
+    __ BIND(L_load_element);
+    __ load_heap_oop(copied_oop, from_element_addr); // load the oop
+    __ cbz(copied_oop, L_store_element);
+
+    __ load_klass(r19_klass, copied_oop);// query the object klass
+    generate_type_check(r19_klass, ckoff, ckval, L_store_element);
+    // ======== end loop ========
+
+    // It was a real error; we must depend on the caller to finish the job.
+    // Register r0 = -1 * number of *remaining* oops, r14 = *total* oops.
+    // Emit GC store barriers for the oops we have copied and report
+    // their number to the caller.
+    assert_different_registers(r0, r17_length, count, to, end_to, ckoff);
+    __ lea(end_to, to_element_addr);
+    __ add(end_to, end_to, -heapOopSize);      // make an inclusive end pointer
+    gen_write_ref_array_post_barrier(to, end_to, rscratch1);
+    __ add(r0, r17_length, count);                // K = (original - remaining) oops
+    __ eon(r0, r0, zr);                       // report (-1^K) to caller
+    __ b(L_done);
+
+    // Come here on success only.
+    __ BIND(L_do_card_marks);
+    __ add(end_to, end_to, -heapOopSize);         // make an inclusive end pointer
+    gen_write_ref_array_post_barrier(to, end_to, rscratch1);
+    __ mov(r0, zr);                  // return 0 on success
+
+    // Common exit point (success or failure).
+    __ BIND(L_done);
+    __ pop(r16->bit() | r17->bit() | r18->bit() | r19->bit() | r20->bit(), sp);
+    inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr);
+    __ leave(); // required for proper stackwalking of RuntimeStub frame
+    __ ret(lr);
+
+    return start;
+  }

   //
   //  Generate 'unsafe' array copy stub
@@ -1574,11 +1724,9 @@
     StubRoutines::_oop_disjoint_arraycopy_uninit     = StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit;
     StubRoutines::_oop_arraycopy_uninit              = StubRoutines::_arrayof_oop_arraycopy_uninit;

-#if 0
     StubRoutines::_checkcast_arraycopy        = generate_checkcast_copy("checkcast_arraycopy", &entry_checkcast_arraycopy);
     StubRoutines::_checkcast_arraycopy_uninit = generate_checkcast_copy("checkcast_arraycopy_uninit", NULL,
                                                                         /*dest_uninitialized*/true);
-#endif
   }

   void generate_math_stubs() { Unimplemented(); }



More information about the aarch64-port-dev mailing list