[aarch64-port-dev ] System.arraycopy() intrinsics
Andrew Haley
aph at redhat.com
Thu Sep 19 09:01:31 PDT 2013
We've been missing support for System.arraycopy intrinsics, so the
port has been falling back to native runtime calls. This is a fairly
complete set: the only thing missing is checkcast_copy/ I have written
it but it isn't full tested yet so it's not included here.
I haven't included any of the intrinsics that are only used by C2
because I don't have any way to test them right now.
Andrew.
diff -r 423577eb8f6e -r 7c900775ce48 src/cpu/aarch64/vm/assembler_aarch64.hpp
--- a/src/cpu/aarch64/vm/assembler_aarch64.hpp Fri Sep 13 18:22:52 2013 +0100
+++ b/src/cpu/aarch64/vm/assembler_aarch64.hpp Thu Sep 19 11:18:32 2013 +0100
@@ -613,16 +613,25 @@
#endif
public:
+ enum { instruction_size = 4 };
+
+ Address adjust(Register base, int offset, bool preIncrement) {
+ if (preIncrement)
+ return Address(Pre(base, offset));
+ else
+ return Address(Post(base, offset));
+ }
+
Address pre(Register base, int offset) {
- return Address(Pre(base, offset));
+ return adjust(base, offset, true);
}
Address post (Register base, int offset) {
- return Address(Post(base, offset));
+ return adjust(base, offset, false);
}
Instruction_aarch64* current;
-public:
+
void set_current(Instruction_aarch64* i) { current = i; }
void f(unsigned val, int msb, int lsb) {
diff -r 423577eb8f6e -r 7c900775ce48 src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp
--- a/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp Fri Sep 13 18:22:52 2013 +0100
+++ b/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp Thu Sep 19 11:18:32 2013 +0100
@@ -2177,7 +2177,8 @@
if (basic_type == T_ARRAY) basic_type = T_OBJECT;
// if we don't know anything, just go through the generic arraycopy
- if (default_type == NULL || (basic_type == T_OBJECT && UseCompressedOops)) {
+ if (default_type == NULL // || basic_type == T_OBJECT
+ ) {
Label done;
assert(src == r1 && src_pos == r2, "mismatch in calling convention");
@@ -2488,7 +2489,13 @@
bool aligned = (flags & LIR_OpArrayCopy::unaligned) == 0;
const char *name;
address entry = StubRoutines::select_arraycopy_function(basic_type, aligned, disjoint, name, false);
- __ call_VM_leaf(entry, 3);
+
+ CodeBlob *cb = CodeCache::find_blob(entry);
+ if (cb) {
+ __ bl(RuntimeAddress(entry));
+ } else {
+ __ call_VM_leaf(entry, 3);
+ }
__ bind(*stub->continuation());
}
diff -r 423577eb8f6e -r 7c900775ce48 src/cpu/aarch64/vm/macroAssembler_aarch64.hpp
--- a/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp Fri Sep 13 18:22:52 2013 +0100
+++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp Thu Sep 19 11:18:32 2013 +0100
@@ -1285,12 +1285,13 @@
void adrp(Register reg1, const Address &dest, unsigned long &byte_offset);
void tableswitch(Register index, jint lowbound, jint highbound,
- Label &jumptable, Label &jumptable_end) {
+ Label &jumptable, Label &jumptable_end, int stride = 1) {
adr(rscratch1, jumptable);
subsw(rscratch2, index, lowbound);
subsw(zr, rscratch2, highbound - lowbound);
br(Assembler::HS, jumptable_end);
- add(rscratch1, rscratch1, rscratch2, ext::sxtw, 2);
+ add(rscratch1, rscratch1, rscratch2,
+ ext::sxtw, exact_log2(stride * Assembler::instruction_size));
br(rscratch1);
}
diff -r 423577eb8f6e -r 7c900775ce48 src/cpu/aarch64/vm/stubGenerator_aarch64.cpp
--- a/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp Fri Sep 13 18:22:52 2013 +0100
+++ b/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp Thu Sep 19 11:18:32 2013 +0100
@@ -49,9 +49,8 @@
// For a more detailed description of the stub routine structure
// see the comment in stubRoutines.hpp
+#undef __
#define __ _masm->
-#define TIMES_OOP (UseCompressedOops ? Address::times_4 : Address::times_8)
-#define a__ ((Assembler*)_masm)->
#ifdef PRODUCT
#define BLOCK_COMMENT(str) /* nothing */
@@ -826,7 +825,40 @@
//
// Destroy no registers!
//
- void gen_write_ref_array_pre_barrier(Register addr, Register count, bool dest_uninitialized) { Unimplemented(); }
+ void gen_write_ref_array_pre_barrier(Register addr, Register count, bool dest_uninitialized) {
+ BarrierSet* bs = Universe::heap()->barrier_set();
+ switch (bs->kind()) {
+ case BarrierSet::G1SATBCT:
+ case BarrierSet::G1SATBCTLogging:
+ // With G1, don't generate the call if we statically know that the target in uninitialized
+ if (!dest_uninitialized) {
+ __ push(0x3fffffff, sp); // integer registers except lr & sp
+ if (count == c_rarg0) {
+ if (addr == c_rarg1) {
+ // exactly backwards!!
+ __ stp(c_rarg0, c_rarg1, __ pre(sp, -2 * wordSize));
+ __ ldp(c_rarg1, c_rarg0, __ post(sp, -2 * wordSize));
+ } else {
+ __ mov(c_rarg1, count);
+ __ mov(c_rarg0, addr);
+ }
+ } else {
+ __ mov(c_rarg0, addr);
+ __ mov(c_rarg1, count);
+ }
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre), 2);
+ __ pop(0x3fffffff, sp); // integer registers except lr & sp }
+ break;
+ case BarrierSet::CardTableModRef:
+ case BarrierSet::CardTableExtension:
+ case BarrierSet::ModRef:
+ break;
+ default:
+ ShouldNotReachHere();
+
+ }
+ }
+ }
//
// Generate code for an array write post barrier
@@ -838,38 +870,323 @@
//
// The input registers are overwritten.
// The ending address is inclusive.
- void gen_write_ref_array_post_barrier(Register start, Register end, Register scratch) { Unimplemented(); }
+ void gen_write_ref_array_post_barrier(Register start, Register end, Register scratch) {
+ assert_different_registers(start, end, scratch);
+ BarrierSet* bs = Universe::heap()->barrier_set();
+ switch (bs->kind()) {
+ case BarrierSet::G1SATBCT:
+ case BarrierSet::G1SATBCTLogging:
+ {
+ __ push(0x3fffffff, sp); // integer registers except lr & sp
+ // must compute element count unless barrier set interface is changed (other platforms supply count)
+ assert_different_registers(start, end, scratch);
+ __ lea(scratch, Address(end, BytesPerHeapOop));
+ __ sub(scratch, scratch, start); // subtract start to get #bytes
+ __ lsr(scratch, scratch, LogBytesPerHeapOop); // convert to element count
+ __ mov(c_rarg0, start);
+ __ mov(c_rarg1, scratch);
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post), 2);
+ __ pop(0x3fffffff, sp); // integer registers except lr & sp }
+ }
+ break;
+ case BarrierSet::CardTableModRef:
+ case BarrierSet::CardTableExtension:
+ {
+ CardTableModRefBS* ct = (CardTableModRefBS*)bs;
+ assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
- // Copy big chunks forward
+ Label L_loop;
+
+ __ lsr(start, start, CardTableModRefBS::card_shift);
+ __ add(end, end, BytesPerHeapOop);
+ __ lsr(end, end, CardTableModRefBS::card_shift);
+ __ sub(end, end, start); // number of bytes to copy
+
+ const Register count = end; // 'end' register contains bytes count now
+ __ mov(scratch, (address)ct->byte_map_base);
+ __ add(start, start, scratch);
+ __ BIND(L_loop);
+ __ strb(zr, Address(start, count));
+ __ subs(count, count, 1);
+ __ br(Assembler::HI, L_loop);
+ }
+ break;
+ default:
+ ShouldNotReachHere();
+
+ }
+ }
+
+ typedef enum {
+ copy_forwards = 1,
+ copy_backwards = -1
+ } copy_direction;
+
+ void copy_longs_small(Register s, Register d, Register count, copy_direction direction) {
+ Label again, around;
+ __ cbz(count, around);
+ __ bind(again);
+ __ ldr(r16, Address(__ adjust(s, wordSize * direction, direction == copy_backwards)));
+ __ str(r16, Address(__ adjust(d, wordSize * direction, direction == copy_backwards)));
+ __ sub(count, count, 1);
+ __ cbnz(count, again);
+ __ bind(around);
+ }
+
+ void copy_longs(Register s, Register d, Register count, Register tmp, copy_direction direction) {
+ __ andr(tmp, count, 3);
+ copy_longs_small(s, d, tmp, direction);
+ __ andr(count, count, -4);
+
+ Label again, around;
+ __ cbz(count, around);
+ __ push(r18->bit() | r19->bit(), sp);
+ __ bind(again);
+ if (direction != copy_backwards) {
+ __ prfm(Address(s, direction == copy_forwards ? 4 * wordSize : -6 * wordSize));
+ __ prfm(Address(s, direction == copy_forwards ? 6 * wordSize : -8 * wordSize));
+ }
+ __ ldp(r16, r17, Address(__ adjust(s, 2 * wordSize * direction, direction == copy_backwards)));
+ __ ldp(r18, r19, Address(__ adjust(s, 2 * wordSize * direction, direction == copy_backwards)));
+ __ stp(r16, r17, Address(__ adjust(d, 2 * wordSize * direction, direction == copy_backwards)));
+ __ stp(r18, r19, Address(__ adjust(d, 2 * wordSize * direction, direction == copy_backwards)));
+ __ subs(count, count, 4);
+ __ cbnz(count, again);
+ __ pop(r18->bit() | r19->bit(), sp);
+ __ bind(around);
+ }
+
+ void copy_memory_small(Register s, Register d, Register count, Register tmp, int step, Label &done) {
+ // Small copy: less than one word.
+ bool is_backwards = step < 0;
+ int granularity = abs(step);
+
+ __ cbz(count, done);
+ {
+ Label loop;
+ __ bind(loop);
+ switch (granularity) {
+ case 1:
+ __ ldrb(tmp, Address(__ adjust(s, step, is_backwards)));
+ __ strb(tmp, Address(__ adjust(d, step, is_backwards)));
+ break;
+ case 2:
+ __ ldrh(tmp, Address(__ adjust(s, step, is_backwards)));
+ __ strh(tmp, Address(__ adjust(d, step, is_backwards)));
+ break;
+ case 4:
+ __ ldrw(tmp, Address(__ adjust(s, step, is_backwards)));
+ __ strw(tmp, Address(__ adjust(d, step, is_backwards)));
+ break;
+ default:
+ assert(false, "copy_memory called with impossible step");
+ }
+ __ sub(count, count, 1);
+ __ cbnz(count, loop);
+ __ b(done);
+ }
+ }
+
+ // All-singing all-dancing memory copy.
+ //
+ // Copy count units of memory from s to d. The size of a unit is
+ // step, which can be positive or negative depending on the direction
+ // of copy. If is_aligned is false, we align the source address.
+ //
+
+ void copy_memory(bool is_aligned, Register s, Register d,
+ Register count, Register tmp, int step) {
+ copy_direction direction = step < 0 ? copy_backwards : copy_forwards;
+ bool is_backwards = step < 0;
+ int granularity = abs(step);
+
+ if (is_backwards) {
+ __ lea(s, Address(s, count, Address::uxtw(exact_log2(-step))));
+ __ lea(d, Address(d, count, Address::uxtw(exact_log2(-step))));
+ }
+
+ if (granularity == wordSize) {
+ copy_longs(c_rarg0, c_rarg1, c_rarg2, rscratch1, direction);
+ return;
+ }
+
+ Label done, large;
+
+ if (! is_aligned) {
+ __ cmp(count, wordSize/granularity);
+ __ br(Assembler::HS, large);
+ copy_memory_small(s, d, count, tmp, step, done);
+ __ bind(large);
+
+ // Now we've got the small case out of the way we can align the
+ // source address.
+ {
+ Label skip1, skip2, skip4;
+
+ switch (granularity) {
+ case 1:
+ __ tst(s, 1);
+ __ br(Assembler::EQ, skip1);
+ __ ldrb(tmp, Address(__ adjust(s, direction, is_backwards)));
+ __ strb(tmp, Address(__ adjust(d, direction, is_backwards)));
+ __ sub(count, count, 1);
+ __ bind(skip1);
+ // fall through
+ case 2:
+ __ tst(s, 2/granularity);
+ __ br(Assembler::EQ, skip2);
+ __ ldrh(tmp, Address(__ adjust(s, 2 * direction, is_backwards)));
+ __ strh(tmp, Address(__ adjust(d, 2 * direction, is_backwards)));
+ __ sub(count, count, 2/granularity);
+ __ bind(skip2);
+ // fall through
+ case 4:
+ __ tst(s, 4/granularity);
+ __ br(Assembler::EQ, skip4);
+ __ ldrw(tmp, Address(__ adjust(s, 4 * direction, is_backwards)));
+ __ strw(tmp, Address(__ adjust(d, 4 * direction, is_backwards)));
+ __ sub(count, count, 4/granularity);
+ __ bind(skip4);
+ }
+ }
+ }
+
+ // s is now word-aligned.
+
+ // We have a count of units and some trailing bytes. Adjust the
+ // count and do a bulk copy of words.
+ __ lsr(rscratch2, count, exact_log2(wordSize/granularity));
+ __ sub(count, count, rscratch2, Assembler::LSL, exact_log2(wordSize/granularity));
+
+ copy_longs(s, d, rscratch2, rscratch1, direction);
+
+ // And the tail.
+
+ copy_memory_small(s, d, count, tmp, step, done);
+
+ __ bind(done);
+ }
+
+ // Scan over array at d for count oops, verifying each one.
+ // Preserves d and count, clobbers rscratch1 and rscratch2.
+ void verify_oop_array (size_t size, Register d, Register count, Register temp) {
+ Label loop, end;
+ __ mov(rscratch1, d);
+ __ mov(rscratch2, zr);
+ __ bind(loop);
+ __ cmp(rscratch2, count);
+ __ br(Assembler::HS, end);
+ if (size == (size_t)wordSize) {
+ __ ldr(temp, Address(d, rscratch2, Address::uxtw(exact_log2(size))));
+ __ verify_oop(temp);
+ } else {
+ __ ldrw(r16, Address(d, rscratch2, Address::uxtw(exact_log2(size))));
+ __ decode_heap_oop(temp); // calls verify_oop
+ }
+ __ add(rscratch2, rscratch2, size);
+ __ b(loop);
+ __ bind(end);
+ }
+
+ // Arguments:
+ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
+ // ignored
+ // is_oop - true => oop array, so generate store check code
+ // name - stub name string
//
// Inputs:
- // end_from - source arrays end address
- // end_to - destination array end address
- // qword_count - 64-bits element count, negative
- // to - scratch
- // L_copy_32_bytes - entry label
- // L_copy_8_bytes - exit label
+ // c_rarg0 - source array address
+ // c_rarg1 - destination array address
+ // c_rarg2 - element count, treated as ssize_t, can be zero
//
- void copy_32_bytes_forward(Register end_from, Register end_to,
- Register qword_count, Register to,
- Label& L_copy_32_bytes, Label& L_copy_8_bytes) { Unimplemented(); }
+ // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
+ // the hardware handle it. The two dwords within qwords that span
+ // cache line boundaries will still be loaded and stored atomicly.
+ //
+ // Side Effects:
+ // disjoint_int_copy_entry is set to the no-overlap entry point
+ // used by generate_conjoint_int_oop_copy().
+ //
+ address generate_disjoint_copy(size_t size, bool aligned, bool is_oop, address *entry,
+ const char *name, bool dest_uninitialized = false) {
+ Register s = c_rarg0, d = c_rarg1, count = c_rarg2;
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", name);
+ address start = __ pc();
+ if (entry != NULL) {
+ *entry = __ pc();
+ // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
+ BLOCK_COMMENT("Entry:");
+ }
+ __ enter();
+ __ push(r16->bit() | r17->bit(), sp);
+ if (is_oop) {
+ __ push(d->bit() | count->bit(), sp);
+ // no registers are destroyed by this call
+ gen_write_ref_array_pre_barrier(d, count, dest_uninitialized);
+ }
+ copy_memory(aligned, s, d, count, rscratch1, size);
+ if (is_oop) {
+ __ pop(d->bit() | count->bit(), sp);
+ if (VerifyOops)
+ verify_oop_array(size, s, d, count, r16);
+ __ lea(count, Address(d, count, Address::uxtw(exact_log2(size))));
+ gen_write_ref_array_post_barrier(d, count, rscratch1);
+ }
+ __ pop(r16->bit() | r17->bit(), sp);
+ __ leave();
+ __ ret(lr);
+ return start;
+ }
-
- // Copy big chunks backward
+ // Arguments:
+ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
+ // ignored
+ // is_oop - true => oop array, so generate store check code
+ // name - stub name string
//
// Inputs:
- // from - source arrays address
- // dest - destination array address
- // qword_count - 64-bits element count
- // to - scratch
- // L_copy_32_bytes - entry label
- // L_copy_8_bytes - exit label
+ // c_rarg0 - source array address
+ // c_rarg1 - destination array address
+ // c_rarg2 - element count, treated as ssize_t, can be zero
//
- void copy_32_bytes_backward(Register from, Register dest,
- Register qword_count, Register to,
- Label& L_copy_32_bytes, Label& L_copy_8_bytes) { Unimplemented(); }
+ // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
+ // the hardware handle it. The two dwords within qwords that span
+ // cache line boundaries will still be loaded and stored atomicly.
+ //
+ address generate_conjoint_copy(size_t size, bool aligned, bool is_oop, address nooverlap_target,
+ address *entry, const char *name,
+ bool dest_uninitialized = false) {
+ Register s = c_rarg0, d = c_rarg1, count = c_rarg2;
+ StubCodeMark mark(this, "StubRoutines", name);
+ address start = __ pc();
+
+ __ cmp(d, s);
+ __ br(Assembler::LS, nooverlap_target);
+
+ __ enter();
+ __ push(r16->bit() | r17->bit(), sp);
+ if (is_oop) {
+ __ push(d->bit() | count->bit(), sp);
+ // no registers are destroyed by this call
+ gen_write_ref_array_pre_barrier(d, count, dest_uninitialized);
+ }
+ copy_memory(aligned, s, d, count, rscratch1, -size);
+ if (is_oop) {
+ __ pop(d->bit() | count->bit(), sp);
+ if (VerifyOops)
+ verify_oop_array(size, s, d, count, r16);
+ __ lea(c_rarg2, Address(c_rarg1, c_rarg2, Address::uxtw(exact_log2(size))));
+ gen_write_ref_array_post_barrier(c_rarg1, c_rarg2, rscratch1);
+ }
+ __ pop(r16->bit() | r17->bit(), sp);
+ __ leave();
+ __ ret(lr);
+
+ return start;
+}
// Arguments:
// aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
@@ -887,10 +1204,19 @@
// and stored atomically.
//
// Side Effects:
+ // disjoint_byte_copy_entry is set to the no-overlap entry point //
+ // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
+ // we let the hardware handle it. The one to eight bytes within words,
+ // dwords or qwords that span cache line boundaries will still be loaded
+ // and stored atomically.
+ //
+ // Side Effects:
// disjoint_byte_copy_entry is set to the no-overlap entry point
// used by generate_conjoint_byte_copy().
//
- address generate_disjoint_byte_copy(bool aligned, address* entry, const char *name) { Unimplemented(); return 0; }
+ address generate_disjoint_byte_copy(bool aligned, address* entry, const char *name) {
+ return generate_disjoint_copy(sizeof (jbyte), aligned, /*is_oop*/false, entry, name);
+ }
// Arguments:
// aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
@@ -908,7 +1234,9 @@
// and stored atomically.
//
address generate_conjoint_byte_copy(bool aligned, address nooverlap_target,
- address* entry, const char *name) { Unimplemented(); return 0; }
+ address* entry, const char *name) {
+ return generate_conjoint_copy(sizeof (jbyte), aligned, /*is_oop*/false, nooverlap_target, entry, name);
+ }
// Arguments:
// aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
@@ -929,9 +1257,10 @@
// disjoint_short_copy_entry is set to the no-overlap entry point
// used by generate_conjoint_short_copy().
//
- address generate_disjoint_short_copy(bool aligned, address *entry, const char *name) { Unimplemented(); return 0; }
-
- address generate_fill(BasicType t, bool aligned, const char *name) { Unimplemented(); return 0; }
+ address generate_disjoint_short_copy(bool aligned,
+ address* entry, const char *name) {
+ return generate_disjoint_copy(sizeof (jshort), aligned, /*is_oop*/false, entry, name);
+ }
// Arguments:
// aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
@@ -949,12 +1278,13 @@
// and stored atomically.
//
address generate_conjoint_short_copy(bool aligned, address nooverlap_target,
- address *entry, const char *name) { Unimplemented(); return 0; }
+ address *entry, const char *name) {
+ return generate_conjoint_copy(sizeof (jshort), aligned, /*is_oop*/false, nooverlap_target, entry, name);
+ }
// Arguments:
// aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
// ignored
- // is_oop - true => oop array, so generate store check code
// name - stub name string
//
// Inputs:
@@ -970,13 +1300,15 @@
// disjoint_int_copy_entry is set to the no-overlap entry point
// used by generate_conjoint_int_oop_copy().
//
- address generate_disjoint_int_oop_copy(bool aligned, bool is_oop, address* entry,
- const char *name, bool dest_uninitialized = false) { Unimplemented(); return 0; }
+ address generate_disjoint_int_copy(bool aligned, address *entry,
+ const char *name, bool dest_uninitialized = false) {
+ const bool not_oop = false;
+ return generate_disjoint_copy(sizeof (jint), aligned, not_oop, entry, name);
+ }
// Arguments:
// aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
// ignored
- // is_oop - true => oop array, so generate store check code
// name - stub name string
//
// Inputs:
@@ -988,42 +1320,93 @@
// the hardware handle it. The two dwords within qwords that span
// cache line boundaries will still be loaded and stored atomicly.
//
- address generate_conjoint_int_oop_copy(bool aligned, bool is_oop, address nooverlap_target,
- address *entry, const char *name,
- bool dest_uninitialized = false) { Unimplemented(); return 0; }
+ address generate_conjoint_int_copy(bool aligned, address nooverlap_target,
+ address *entry, const char *name,
+ bool dest_uninitialized = false) {
+ const bool not_oop = false;
+ return generate_conjoint_copy(sizeof (jint), aligned, not_oop, nooverlap_target, entry, name);
+ }
+
// Arguments:
// aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
// ignored
- // is_oop - true => oop array, so generate store check code
// name - stub name string
//
// Inputs:
// c_rarg0 - source array address
// c_rarg1 - destination array address
- // c_rarg2 - element count, treated as ssize_t, can be zero
+ // c_rarg2 - element count, treated as size_t, can be zero
//
- // Side Effects:
+ // Side Effects:
// disjoint_oop_copy_entry or disjoint_long_copy_entry is set to the
// no-overlap entry point used by generate_conjoint_long_oop_copy().
//
- address generate_disjoint_long_oop_copy(bool aligned, bool is_oop, address *entry,
- const char *name, bool dest_uninitialized = false) { Unimplemented(); return 0; }
+ address generate_disjoint_long_copy(bool aligned, address *entry,
+ const char *name, bool dest_uninitialized = false) {
+ const bool not_oop = false;
+ return generate_disjoint_copy(sizeof (jlong), aligned, not_oop, entry, name);
+ }
// Arguments:
// aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
// ignored
- // is_oop - true => oop array, so generate store check code
// name - stub name string
//
// Inputs:
// c_rarg0 - source array address
// c_rarg1 - destination array address
- // c_rarg2 - element count, treated as ssize_t, can be zero
+ // c_rarg2 - element count, treated as size_t, can be zero
//
- address generate_conjoint_long_oop_copy(bool aligned, bool is_oop,
- address nooverlap_target, address *entry,
- const char *name, bool dest_uninitialized = false) { Unimplemented(); return 0; }
+ address generate_conjoint_long_copy(bool aligned,
+ address nooverlap_target, address *entry,
+ const char *name, bool dest_uninitialized = false) {
+ const bool not_oop = false;
+ return generate_conjoint_copy(sizeof (jlong), aligned, not_oop, nooverlap_target, entry, name);
+ }
+
+ // Arguments:
+ // aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
+ // ignored
+ // name - stub name string
+ //
+ // Inputs:
+ // c_rarg0 - source array address
+ // c_rarg1 - destination array address
+ // c_rarg2 - element count, treated as size_t, can be zero
+ //
+ // Side Effects:
+ // disjoint_oop_copy_entry or disjoint_long_copy_entry is set to the
+ // no-overlap entry point used by generate_conjoint_long_oop_copy().
+ //
+ address generate_disjoint_oop_copy(bool aligned, address *entry,
+ const char *name, bool dest_uninitialized = false) {
+ const bool is_oop = true;
+ if (UseCompressedOops)
+ return generate_disjoint_copy(sizeof (jint), aligned, is_oop, entry, name);
+ else
+ return generate_disjoint_copy(sizeof (jlong), aligned, is_oop, entry, name);
+ }
+
+ // Arguments:
+ // aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
+ // ignored
+ // name - stub name string
+ //
+ // Inputs:
+ // c_rarg0 - source array address
+ // c_rarg1 - destination array address
+ // c_rarg2 - element count, treated as size_t, can be zero
+ //
+ address generate_conjoint_oop_copy(bool aligned,
+ address nooverlap_target, address *entry,
+ const char *name, bool dest_uninitialized = false) {
+ const bool is_oop = true;
+ if (UseCompressedOops)
+ return generate_conjoint_copy(sizeof (jint), aligned, is_oop, nooverlap_target, entry, name);
+ else
+ return generate_conjoint_copy(sizeof (jlong), aligned, is_oop, nooverlap_target, entry, name);
+ }
// Helper for generating a dynamic type check.
@@ -1110,60 +1493,91 @@
assert(count == 0, "huh?");
}
+
void generate_arraycopy_stubs() {
- // Call the conjoint generation methods immediately after
- // the disjoint ones so that short branches from the former
- // to the latter can be generated.
-#if 0
- StubRoutines::_jbyte_disjoint_arraycopy = (address) fake_arraycopy_stub;
- StubRoutines::_jbyte_arraycopy = (address) fake_arraycopy_stub;
+ address entry;
+ address entry_jbyte_arraycopy;
+ address entry_jshort_arraycopy;
+ address entry_jint_arraycopy;
+ address entry_oop_arraycopy;
+ address entry_jlong_arraycopy;
+ address entry_checkcast_arraycopy;
- StubRoutines::_jshort_disjoint_arraycopy = (address) fake_arraycopy_stub;
- StubRoutines::_jshort_arraycopy = (address) fake_arraycopy_stub;
+ //*** jbyte
+ // Always need aligned and unaligned versions
+ StubRoutines::_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(false, &entry,
+ "jbyte_disjoint_arraycopy");
+ StubRoutines::_jbyte_arraycopy = generate_conjoint_byte_copy(false, entry,
+ &entry_jbyte_arraycopy,
+ "jbyte_arraycopy");
+ StubRoutines::_arrayof_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(true, &entry,
+ "arrayof_jbyte_disjoint_arraycopy");
+ StubRoutines::_arrayof_jbyte_arraycopy = generate_conjoint_byte_copy(true, entry, NULL,
+ "arrayof_jbyte_arraycopy");
- StubRoutines::_jint_disjoint_arraycopy = (address) fake_arraycopy_stub;
- StubRoutines::_jint_arraycopy = (address) fake_arraycopy_stub;
+ //*** jshort
+ // Always need aligned and unaligned versions
+ StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_short_copy(false, &entry,
+ "jshort_disjoint_arraycopy");
+ StubRoutines::_jshort_arraycopy = generate_conjoint_short_copy(false, entry,
+ &entry_jshort_arraycopy,
+ "jshort_arraycopy");
+ StubRoutines::_arrayof_jshort_disjoint_arraycopy = generate_disjoint_short_copy(true, &entry,
+ "arrayof_jshort_disjoint_arraycopy");
+ StubRoutines::_arrayof_jshort_arraycopy = generate_conjoint_short_copy(true, entry, NULL,
+ "arrayof_jshort_arraycopy");
- StubRoutines::_jlong_disjoint_arraycopy = (address) fake_arraycopy_stub;
- StubRoutines::_jlong_arraycopy = (address) fake_arraycopy_stub;
-#endif
+ //*** jint
+ // Aligned versions
+ StubRoutines::_arrayof_jint_disjoint_arraycopy = generate_disjoint_int_copy(true, &entry,
+ "arrayof_jint_disjoint_arraycopy");
+ StubRoutines::_arrayof_jint_arraycopy = generate_conjoint_int_copy(true, entry, &entry_jint_arraycopy,
+ "arrayof_jint_arraycopy");
+ // In 64 bit we need both aligned and unaligned versions of jint arraycopy.
+ // entry_jint_arraycopy always points to the unaligned version
+ StubRoutines::_jint_disjoint_arraycopy = generate_disjoint_int_copy(false, &entry,
+ "jint_disjoint_arraycopy");
+ StubRoutines::_jint_arraycopy = generate_conjoint_int_copy(false, entry,
+ &entry_jint_arraycopy,
+ "jint_arraycopy");
+
+ //*** jlong
+ // It is always aligned
+ StubRoutines::_arrayof_jlong_disjoint_arraycopy = generate_disjoint_long_copy(true, &entry,
+ "arrayof_jlong_disjoint_arraycopy");
+ StubRoutines::_arrayof_jlong_arraycopy = generate_conjoint_long_copy(true, entry, &entry_jlong_arraycopy,
+ "arrayof_jlong_arraycopy");
+ StubRoutines::_jlong_disjoint_arraycopy = StubRoutines::_arrayof_jlong_disjoint_arraycopy;
+ StubRoutines::_jlong_arraycopy = StubRoutines::_arrayof_jlong_arraycopy;
+
+ //*** oops
+ {
+ // With compressed oops we need unaligned versions; notice that
+ // we overwrite entry_oop_arraycopy.
+ bool aligned = !UseCompressedOops;
+
+ StubRoutines::_arrayof_oop_disjoint_arraycopy
+ = generate_disjoint_oop_copy(aligned, &entry, "arrayof_oop_disjoint_arraycopy");
+ StubRoutines::_arrayof_oop_arraycopy
+ = generate_conjoint_oop_copy(aligned, entry, &entry_oop_arraycopy, "arrayof_oop_arraycopy");
+ // Aligned versions without pre-barriers
+ StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit
+ = generate_disjoint_oop_copy(aligned, &entry, "arrayof_oop_disjoint_arraycopy_uninit",
+ /*dest_uninitialized*/true);
+ StubRoutines::_arrayof_oop_arraycopy_uninit
+ = generate_conjoint_oop_copy(aligned, entry, NULL, "arrayof_oop_arraycopy_uninit",
+ /*dest_uninitialized*/true);
+ }
+
+ StubRoutines::_oop_disjoint_arraycopy = StubRoutines::_arrayof_oop_disjoint_arraycopy;
+ StubRoutines::_oop_arraycopy = StubRoutines::_arrayof_oop_arraycopy;
+ StubRoutines::_oop_disjoint_arraycopy_uninit = StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit;
+ StubRoutines::_oop_arraycopy_uninit = StubRoutines::_arrayof_oop_arraycopy_uninit;
#if 0
- StubRoutines::_oop_disjoint_arraycopy = ShouldNotCallThisStub();
- StubRoutines::_oop_arraycopy = ShouldNotCallThisStub();
-
- StubRoutines::_checkcast_arraycopy = ShouldNotCallThisStub();
- StubRoutines::_unsafe_arraycopy = ShouldNotCallThisStub();
- StubRoutines::_generic_arraycopy = ShouldNotCallThisStub();
-#endif
-
-#if 0
- // We don't generate specialized code for HeapWord-aligned source
- // arrays, so just use the code we've already generated
- StubRoutines::_arrayof_jbyte_disjoint_arraycopy =
- StubRoutines::_jbyte_disjoint_arraycopy;
- StubRoutines::_arrayof_jbyte_arraycopy =
- StubRoutines::_jbyte_arraycopy;
-
- StubRoutines::_arrayof_jshort_disjoint_arraycopy =
- StubRoutines::_jshort_disjoint_arraycopy;
- StubRoutines::_arrayof_jshort_arraycopy =
- StubRoutines::_jshort_arraycopy;
-
- StubRoutines::_arrayof_jint_disjoint_arraycopy =
- StubRoutines::_jint_disjoint_arraycopy;
- StubRoutines::_arrayof_jint_arraycopy =
- StubRoutines::_jint_arraycopy;
-
- StubRoutines::_arrayof_jlong_disjoint_arraycopy =
- StubRoutines::_jlong_disjoint_arraycopy;
- StubRoutines::_arrayof_jlong_arraycopy =
- StubRoutines::_jlong_arraycopy;
-
- StubRoutines::_arrayof_oop_disjoint_arraycopy =
- StubRoutines::_oop_disjoint_arraycopy;
- StubRoutines::_arrayof_oop_arraycopy =
- StubRoutines::_oop_arraycopy;
+ StubRoutines::_checkcast_arraycopy = generate_checkcast_copy("checkcast_arraycopy", &entry_checkcast_arraycopy);
+ StubRoutines::_checkcast_arraycopy_uninit = generate_checkcast_copy("checkcast_arraycopy_uninit", NULL,
+ /*dest_uninitialized*/true);
#endif
}
More information about the aarch64-port-dev
mailing list