[aarch64-port-dev ] Whether or not to revert of checkcast_arraycopy stub to generate return value expected by generic C2 code
Andrew Dinn
adinn at redhat.com
Tue Oct 15 08:22:04 PDT 2013
Hmm, yes but I did borrow the counting up from negative from the Intel
code and also ended up adding code to the zero, success, fail cases that
is merged in your original. Here's a proper rewrite based more closely
on your code which avoids a lot of the redundancy in my original
version. It really just adds the save of the original count and the
compute of -1^K so the extra cost is minimal.
regards,
Andrew Dinn
-----------
diff -r 75997cf311bb src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp
--- a/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp Tue Oct 15 14:16:04
2013 +0100
+++ b/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp Tue Oct 15 16:17:45
2013 +0100
@@ -2223,10 +2223,12 @@
__ ldr(src, Address(sp, 4*BytesPerWord));
if (copyfunc_addr != NULL) {
- __ subw(rscratch1, length, r0); // Number of oops actually copied
+ // r0 is -1^K where K == partial copied count
+ __ eonw(rscratch1, r0, 0);
+ // adjust length down and src/end pos up by partial copied count
+ __ subw(length, length, rscratch1);
__ addw(src_pos, src_pos, rscratch1);
__ addw(dst_pos, dst_pos, rscratch1);
- __ mov(length, r0); // Number of oops left to copy
}
__ b(*stub->entry());
@@ -2401,10 +2403,12 @@
__ ldp(length, src_pos, Address(sp, 2*BytesPerWord));
__ ldr(src, Address(sp, 4*BytesPerWord));
- __ subw(rscratch1, length, r0); // Number of oops actually copied
+ // return value is -1^K where K is partial copied count
+ __ eonw(rscratch1, r0, zr);
+ // adjust length down and src/end pos up by partial copied count
+ __ subw(length, length, rscratch1);
__ addw(src_pos, src_pos, rscratch1);
__ addw(dst_pos, dst_pos, rscratch1);
- __ mov(length, r0); // Number of oops left to copy
}
__ b(*stub->entry());
diff -r 75997cf311bb src/cpu/aarch64/vm/stubGenerator_aarch64.cpp
--- a/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp Tue Oct 15 14:16:04
2013 +0100
+++ b/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp Tue Oct 15 16:17:45
2013 +0100
@@ -1447,7 +1447,8 @@
// c_rarg4 - oop ckval (super_klass)
//
// Output:
- // r0 - count of oops remaining to copy
+ // r0 == 0 - success
+ // r0 == -1^K - failure, where K is partial transfer count
//
address generate_checkcast_copy(const char *name, address *entry,
bool dest_uninitialized = false) {
@@ -1462,6 +1463,7 @@
const Register ckval = c_rarg4; // super_klass
// Registers used as temps (r18, r19, r20 are save-on-entry)
+ const Register count_save = r21; // orig elementscount
const Register start_to = r20; // destination array start
address
const Register copied_oop = r18; // actual oop copied
const Register r19_klass = r19; // oop._klass
@@ -1474,7 +1476,7 @@
// checked.
assert_different_registers(from, to, count, ckoff, ckval, start_to,
- copied_oop, r19_klass);
+ copied_oop, r19_klass, count_save);
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", name);
@@ -1498,10 +1500,10 @@
BLOCK_COMMENT("Entry:");
}
- // Empty array: Nothing to do.
+ // Empty array: Nothing to do.
__ cbz(count, L_done);
- __ push(r18->bit() | r19->bit() | r20->bit(), sp);
+ __ push(r18->bit() | r19->bit() | r20->bit() | r21->bit(), sp);
#ifdef ASSERT
BLOCK_COMMENT("assert consistent ckoff/ckval");
@@ -1517,6 +1519,9 @@
}
#endif //ASSERT
+ // save the original count
+ __ mov(count_save, count);
+
// Copy from low to high addresses
__ mov(start_to, to); // Save destination array start
address
__ b(L_load_element);
@@ -1546,22 +1551,22 @@
// ======== end loop ========
// It was a real error; we must depend on the caller to finish the job.
- // Register r0 = number of *remaining* oops
+ // Register count = remaining oops, count_orig = total oops.
// Emit GC store barriers for the oops we have copied and report
// their number to the caller.
- DEBUG_ONLY(__ nop());
+ __ sub(count, count_save, count); // K = partially copied oop count
+ __ eon(count, count, zr); // report (-1^K) to caller
- // Common exit point (success or failure).
__ BIND(L_do_card_marks);
__ add(to, to, -heapOopSize); // make an inclusive end pointer
gen_write_ref_array_post_barrier(start_to, to, rscratch1);
- __ pop(r18->bit() | r19->bit() | r20->bit(), sp);
+ __ pop(r18->bit() | r19->bit() | r20->bit()| r21->bit(), sp);
inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr);
__ bind(L_done);
- __ mov(r0, count); // report count remaining to
caller
+ __ mov(r0, count);
__ leave();
__ ret(lr);
More information about the aarch64-port-dev
mailing list