[aarch64-port-dev ] Type profiling for interpreter and C1

Andrew Haley aph at redhat.com
Tue Jun 24 12:35:27 UTC 2014


This is the AArch64 version of

http://cr.openjdk.java.net/~roland/8023657/
http://cr.openjdk.java.net/~roland/8026054/

New tests which pass:

Passed: compiler/types/TestSpeculationFailedHigherEqual.java
Passed: compiler/types/TestMeetTopArrayExactConstantArray.java
Passed: compiler/types/TypeSpeculation.java
Passed: compiler/types/correctness/CorrectnessTest.java
Passed: compiler/types/correctness/OffTest.java
Passed: compiler/profiling/TestMethodHandleInvokesIntrinsic.java
Passed: compiler/profiling/TestUnexpectedProfilingMismatch.java
Passed: compiler/profiling/unloadingconflict/TestProfileConflictClassUnloading.java

Andrew.


# HG changeset patch
# User aph
# Date 1403281989 -3600
#      Fri Jun 20 17:33:09 2014 +0100
# Node ID 050fe4f6976ab67316fd82b529b00bc73dbd6360
# Parent  3fbe9bbe91c1741ac00e23b0571cc5e37a5b1c4f
Implement type profiling in C1.

diff -r 3fbe9bbe91c1 -r 050fe4f6976a src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp
--- a/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp	Fri Jun 20 09:21:08 2014 -0400
+++ b/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp	Fri Jun 20 17:33:09 2014 +0100
@@ -2678,9 +2678,167 @@
 }

 void LIR_Assembler::emit_profile_type(LIR_OpProfileType* op) {
-  fatal("Type profiling not implemented on this platform");
+  COMMENT("emit_profile_type {");
+  Register obj = op->obj()->as_register();
+  Register tmp = op->tmp()->as_pointer_register();
+  Address mdo_addr = as_Address(op->mdp()->as_address_ptr());
+  ciKlass* exact_klass = op->exact_klass();
+  intptr_t current_klass = op->current_klass();
+  bool not_null = op->not_null();
+  bool no_conflict = op->no_conflict();
+
+  Label update, next, none;
+
+  bool do_null = !not_null;
+  bool exact_klass_set = exact_klass != NULL && ciTypeEntries::valid_ciklass(current_klass) == exact_klass;
+  bool do_update = !TypeEntries::is_type_unknown(current_klass) && !exact_klass_set;
+
+  assert(do_null || do_update, "why are we here?");
+  assert(!TypeEntries::was_null_seen(current_klass) || do_update, "why are we here?");
+  assert(mdo_addr.base() != rscratch1, "wrong register");
+
+  __ verify_oop(obj);
+
+  if (tmp != obj) {
+    __ mov(tmp, obj);
+  }
+  if (do_null) {
+    __ cbnz(tmp, update);
+    if (!TypeEntries::was_null_seen(current_klass)) {
+      __ ldr(rscratch2, mdo_addr);
+      __ orr(rscratch2, rscratch2, TypeEntries::null_seen);
+      __ str(rscratch2, mdo_addr);
+    }
+    if (do_update) {
+#ifndef ASSERT
+      __ b(next);
+    }
+#else
+      __ b(next);
+    }
+  } else {
+    __ cbnz(tmp, update);
+    __ stop("unexpected null obj");
+#endif
+  }
+
+  __ bind(update);
+
+  if (do_update) {
+#ifdef ASSERT
+    if (exact_klass != NULL) {
+      Label ok;
+      __ load_klass(tmp, tmp);
+      __ mov_metadata(rscratch1, exact_klass->constant_encoding());
+      __ eor(rscratch1, tmp, rscratch1);
+      __ cbz(rscratch1, ok);
+      __ stop("exact klass and actual klass differ");
+      __ bind(ok);
+    }
+#endif
+    if (!no_conflict) {
+      if (exact_klass == NULL || TypeEntries::is_type_none(current_klass)) {
+        if (exact_klass != NULL) {
+          __ mov_metadata(tmp, exact_klass->constant_encoding());
+        } else {
+          __ load_klass(tmp, tmp);
+        }
+
+        __ ldr(rscratch2, mdo_addr);
+        __ eor(tmp, tmp, rscratch2);
+        __ andr(rscratch1, tmp, TypeEntries::type_klass_mask);
+        // klass seen before, nothing to do. The unknown bit may have been
+        // set already but no need to check.
+        __ cbz(rscratch1, next);
+
+        __ andr(rscratch1, tmp, TypeEntries::type_unknown);
+        __ cbnz(rscratch1, next); // already unknown. Nothing to do anymore.
+
+        if (TypeEntries::is_type_none(current_klass)) {
+          __ cbz(rscratch2, none);
+          __ cmp(rscratch2, TypeEntries::null_seen);
+          __ br(Assembler::EQ, none);
+          // There is a chance that the checks above (re-reading profiling
+          // data from memory) fail if another thread has just set the
+          // profiling to this obj's klass
+          __ dmb(Assembler::ISHLD);
+          __ ldr(rscratch2, mdo_addr);
+          __ eor(tmp, tmp, rscratch2);
+          __ andr(rscratch1, tmp, TypeEntries::type_klass_mask);
+          __ cbz(rscratch1, next);
+        }
+      } else {
+        assert(ciTypeEntries::valid_ciklass(current_klass) != NULL &&
+               ciTypeEntries::valid_ciklass(current_klass) != exact_klass, "conflict only");
+
+        __ ldr(tmp, mdo_addr);
+        __ andr(rscratch1, tmp, TypeEntries::type_unknown);
+        __ cbnz(rscratch1, next); // already unknown. Nothing to do anymore.
+      }
+
+      // different than before. Cannot keep accurate profile.
+      __ ldr(rscratch2, mdo_addr);
+      __ orr(rscratch2, rscratch2, TypeEntries::type_unknown);
+      __ str(rscratch2, mdo_addr);
+
+      if (TypeEntries::is_type_none(current_klass)) {
+        __ b(next);
+
+        __ bind(none);
+        // first time here. Set profile type.
+        __ str(tmp, mdo_addr);
+      }
+    } else {
+      // There's a single possible klass at this profile point
+      assert(exact_klass != NULL, "should be");
+      if (TypeEntries::is_type_none(current_klass)) {
+        __ mov_metadata(tmp, exact_klass->constant_encoding());
+        __ ldr(rscratch2, mdo_addr);
+        __ eor(tmp, tmp, rscratch2);
+        __ andr(rscratch1, tmp, TypeEntries::type_klass_mask);
+        __ cbz(rscratch1, next);
+#ifdef ASSERT
+        {
+          Label ok;
+          __ ldr(rscratch1, mdo_addr);
+          __ cbz(rscratch1, ok);
+          __ cmp(rscratch1, TypeEntries::null_seen);
+          __ br(Assembler::EQ, ok);
+          // may have been set by another thread
+          __ dmb(Assembler::ISHLD);
+          __ mov_metadata(rscratch1, exact_klass->constant_encoding());
+          __ ldr(rscratch2, mdo_addr);
+          __ eor(rscratch2, rscratch1, rscratch2);
+          __ andr(rscratch2, rscratch2, TypeEntries::type_mask);
+          __ cbz(rscratch2, ok);
+
+          __ stop("unexpected profiling mismatch");
+          __ bind(ok);
+          __ pop(tmp);
+        }
+#endif
+        // first time here. Set profile type.
+        __ ldr(tmp, mdo_addr);
+      } else {
+        assert(ciTypeEntries::valid_ciklass(current_klass) != NULL &&
+               ciTypeEntries::valid_ciklass(current_klass) != exact_klass, "inconsistent");
+
+        __ ldr(tmp, mdo_addr);
+        __ andr(rscratch1, tmp, TypeEntries::type_unknown);
+        __ cbnz(rscratch1, next); // already unknown. Nothing to do anymore.
+
+        __ orr(tmp, tmp, TypeEntries::type_unknown);
+        __ str(tmp, mdo_addr);
+        // FIXME: Write barrier needed here?
+      }
+    }
+
+    __ bind(next);
+  }
+  COMMENT("} emit_profile_type");
 }

+
 void LIR_Assembler::align_backward_branch_target() {
 }

diff -r 3fbe9bbe91c1 -r 050fe4f6976a src/share/vm/c1/c1_LIR.cpp
--- a/src/share/vm/c1/c1_LIR.cpp	Fri Jun 20 09:21:08 2014 -0400
+++ b/src/share/vm/c1/c1_LIR.cpp	Fri Jun 20 17:33:09 2014 +0100
@@ -2114,8 +2114,12 @@

 // LIR_OpProfileType
 void LIR_OpProfileType::print_instr(outputStream* out) const {
-  out->print("exact = "); exact_klass()->print_name_on(out);
-  out->print("current = "); ciTypeEntries::print_ciklass(out, current_klass());
+  out->print("exact = ");
+  if (exact_klass())
+    exact_klass()->print_name_on(out);
+  else
+    out->print("(null)");
+  out->print(" current = "); ciTypeEntries::print_ciklass(out, current_klass());
   mdp()->print(out);          out->print(" ");
   obj()->print(out);          out->print(" ");
   tmp()->print(out);          out->print(" ");
# HG changeset patch
# User aph
# Date 1403546193 -3600
#      Mon Jun 23 18:56:33 2014 +0100
# Node ID 511a29302d283ba14baaaadea3a03fc583f56bf0
# Parent  050fe4f6976ab67316fd82b529b00bc73dbd6360
AArch64 type profiling support

diff -r 050fe4f6976a -r 511a29302d28 src/cpu/aarch64/vm/globals_aarch64.hpp
--- a/src/cpu/aarch64/vm/globals_aarch64.hpp	Fri Jun 20 17:33:09 2014 +0100
+++ b/src/cpu/aarch64/vm/globals_aarch64.hpp	Mon Jun 23 18:56:33 2014 +0100
@@ -72,7 +72,7 @@
 // GC Ergo Flags
 define_pd_global(uintx, CMSYoungGenPerWorker, 64*M);  // default max size of CMS young gen, per GC worker thread

-define_pd_global(uintx, TypeProfileLevel, 0);
+define_pd_global(uintx, TypeProfileLevel, 111);

 #if defined(COMPILER1) || defined(COMPILER2)
 define_pd_global(intx, InlineSmallCode,          1000);
diff -r 050fe4f6976a -r 511a29302d28 src/cpu/aarch64/vm/interp_masm_aarch64.cpp
--- a/src/cpu/aarch64/vm/interp_masm_aarch64.cpp	Fri Jun 20 17:33:09 2014 +0100
+++ b/src/cpu/aarch64/vm/interp_masm_aarch64.cpp	Mon Jun 23 18:56:33 2014 +0100
@@ -1479,3 +1479,208 @@
   restore_locals();
 }

+void InterpreterMacroAssembler::profile_obj_type(Register obj, const Address& mdo_addr) {
+  Label update, next, none;
+
+  verify_oop(obj);
+
+  cbnz(obj, update);
+  orptr(mdo_addr, TypeEntries::null_seen);
+  b(next);
+
+  bind(update);
+  load_klass(obj, obj);
+
+  ldr(rscratch1, mdo_addr);
+  eor(obj, obj, rscratch1);
+  tst(obj, TypeEntries::type_klass_mask);
+  br(Assembler::EQ, next); // klass seen before, nothing to
+                           // do. The unknown bit may have been
+                           // set already but no need to check.
+
+  tst(obj, TypeEntries::type_unknown);
+  br(Assembler::NE, next); // already unknown. Nothing to do anymore.
+
+  ldr(rscratch1, mdo_addr);
+  cbz(rscratch1, none);
+  cmp(rscratch1, TypeEntries::null_seen);
+  br(Assembler::EQ, none);
+  // There is a chance that the checks above (re-reading profiling
+  // data from memory) fail if another thread has just set the
+  // profiling to this obj's klass
+  ldr(rscratch1, mdo_addr);
+  eor(obj, obj, rscratch1);
+  tst(obj, TypeEntries::type_klass_mask);
+  br(Assembler::EQ, next);
+
+  // different than before. Cannot keep accurate profile.
+  orptr(mdo_addr, TypeEntries::type_unknown);
+  b(next);
+
+  bind(none);
+  // first time here. Set profile type.
+  str(obj, mdo_addr);
+
+  bind(next);
+}
+
+void InterpreterMacroAssembler::profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual) {
+  if (!ProfileInterpreter) {
+    return;
+  }
+
+  if (MethodData::profile_arguments() || MethodData::profile_return()) {
+    Label profile_continue;
+
+    test_method_data_pointer(mdp, profile_continue);
+
+    int off_to_start = is_virtual ? in_bytes(VirtualCallData::virtual_call_data_size()) : in_bytes(CounterData::counter_data_size());
+
+    ldrb(rscratch1, Address(mdp, in_bytes(DataLayout::tag_offset()) - off_to_start));
+    cmp(rscratch1, is_virtual ? DataLayout::virtual_call_type_data_tag : DataLayout::call_type_data_tag);
+    br(Assembler::NE, profile_continue);
+
+    if (MethodData::profile_arguments()) {
+      Label done;
+      int off_to_args = in_bytes(TypeEntriesAtCall::args_data_offset());
+      add(mdp, mdp, off_to_args);
+
+      for (int i = 0; i < TypeProfileArgsLimit; i++) {
+        if (i > 0 || MethodData::profile_return()) {
+          // If return value type is profiled we may have no argument to profile
+          ldr(tmp, Address(mdp, in_bytes(TypeEntriesAtCall::cell_count_offset())-off_to_args));
+          sub(tmp, tmp, i*TypeStackSlotEntries::per_arg_count());
+          cmp(tmp, TypeStackSlotEntries::per_arg_count());
+          br(Assembler::LT, done);
+        }
+        ldr(tmp, Address(callee, Method::const_offset()));
+        load_unsigned_short(tmp, Address(tmp, ConstMethod::size_of_parameters_offset()));
+        // stack offset o (zero based) from the start of the argument
+        // list, for n arguments translates into offset n - o - 1 from
+        // the end of the argument list
+	ldr(rscratch1, Address(mdp, in_bytes(TypeEntriesAtCall::stack_slot_offset(i))-off_to_args));
+        sub(tmp, tmp, rscratch1);
+        sub(tmp, tmp, 1);
+        Address arg_addr = argument_address(tmp);
+        ldr(tmp, arg_addr);
+
+        Address mdo_arg_addr(mdp, in_bytes(TypeEntriesAtCall::argument_type_offset(i))-off_to_args);
+        profile_obj_type(tmp, mdo_arg_addr);
+
+        int to_add = in_bytes(TypeStackSlotEntries::per_arg_size());
+        add(mdp, mdp, to_add);
+        off_to_args += to_add;
+      }
+
+      if (MethodData::profile_return()) {
+        ldr(tmp, Address(mdp, in_bytes(TypeEntriesAtCall::cell_count_offset())-off_to_args));
+        sub(tmp, tmp, TypeProfileArgsLimit*TypeStackSlotEntries::per_arg_count());
+      }
+
+      bind(done);
+
+      if (MethodData::profile_return()) {
+        // We're right after the type profile for the last
+        // argument. tmp is the number of cells left in the
+        // CallTypeData/VirtualCallTypeData to reach its end. Non null
+        // if there's a return to profile.
+        assert(ReturnTypeEntry::static_cell_count() < TypeStackSlotEntries::per_arg_count(), "can't move past ret type");
+        add(mdp, mdp, tmp, LSL, exact_log2(DataLayout::cell_size));
+      }
+      str(mdp, Address(rfp, frame::interpreter_frame_mdx_offset * wordSize));
+    } else {
+      assert(MethodData::profile_return(), "either profile call args or call ret");
+      update_mdp_by_constant(mdp, in_bytes(TypeEntriesAtCall::return_only_size()));
+    }
+
+    // mdp points right after the end of the
+    // CallTypeData/VirtualCallTypeData, right after the cells for the
+    // return value type if there's one
+
+    bind(profile_continue);
+  }
+}
+
+void InterpreterMacroAssembler::profile_return_type(Register mdp, Register ret, Register tmp) {
+  assert_different_registers(mdp, ret, tmp, rbcp);
+  if (ProfileInterpreter && MethodData::profile_return()) {
+    Label profile_continue, done;
+
+    test_method_data_pointer(mdp, profile_continue);
+
+    if (MethodData::profile_return_jsr292_only()) {
+      // If we don't profile all invoke bytecodes we must make sure
+      // it's a bytecode we indeed profile. We can't go back to the
+      // begining of the ProfileData we intend to update to check its
+      // type because we're right after it and we don't known its
+      // length
+      Label do_profile;
+      ldrb(rscratch1, Address(rbcp, 0));
+      cmp(rscratch1, Bytecodes::_invokedynamic);
+      br(Assembler::EQ, do_profile);
+      cmp(rscratch1, Bytecodes::_invokehandle);
+      br(Assembler::EQ, do_profile);
+      get_method(tmp);
+      ldrb(rscratch1, Address(tmp, Method::intrinsic_id_offset_in_bytes()));
+      cmp(rscratch1, vmIntrinsics::_compiledLambdaForm);
+      br(Assembler::NE, profile_continue);
+
+      bind(do_profile);
+    }
+
+    Address mdo_ret_addr(mdp, -in_bytes(ReturnTypeEntry::size()));
+    mov(tmp, ret);
+    profile_obj_type(tmp, mdo_ret_addr);
+
+    bind(profile_continue);
+  }
+}
+
+void InterpreterMacroAssembler::profile_parameters_type(Register mdp, Register tmp1, Register tmp2) {
+  if (ProfileInterpreter && MethodData::profile_parameters()) {
+    Label profile_continue, done;
+
+    test_method_data_pointer(mdp, profile_continue);
+
+    // Load the offset of the area within the MDO used for
+    // parameters. If it's negative we're not profiling any parameters
+    ldr(tmp1, Address(mdp, in_bytes(MethodData::parameters_type_data_di_offset()) - in_bytes(MethodData::data_offset())));
+    cmp(tmp1, 0u);
+    br(Assembler::LT, profile_continue);
+
+    // Compute a pointer to the area for parameters from the offset
+    // and move the pointer to the slot for the last
+    // parameters. Collect profiling from last parameter down.
+    // mdo start + parameters offset + array length - 1
+    add(mdp, mdp, tmp1);
+    ldr(tmp1, Address(mdp, ArrayData::array_len_offset()));
+    sub(tmp1, tmp1, TypeStackSlotEntries::per_arg_count());
+
+    Label loop;
+    bind(loop);
+
+    int off_base = in_bytes(ParametersTypeData::stack_slot_offset(0));
+    int type_base = in_bytes(ParametersTypeData::type_offset(0));
+    int per_arg_scale = exact_log2(DataLayout::cell_size);
+    add(rscratch1, mdp, off_base);
+    add(rscratch2, mdp, type_base);
+
+    Address arg_off(rscratch1, tmp1, Address::lsl(per_arg_scale));
+    Address arg_type(rscratch2, tmp1, Address::lsl(per_arg_scale));
+
+    // load offset on the stack from the slot for this parameter
+    ldr(tmp2, arg_off);
+    neg(tmp2, tmp2);
+    // read the parameter from the local area
+    ldr(tmp2, Address(rlocals, tmp2, Address::lsl(Interpreter::logStackElementSize)));
+
+    // profile the parameter
+    profile_obj_type(tmp2, arg_type);
+
+    // go to next parameter
+    subs(tmp1, tmp1, TypeStackSlotEntries::per_arg_count());
+    br(Assembler::GE, loop);
+
+    bind(profile_continue);
+  }
+}
diff -r 050fe4f6976a -r 511a29302d28 src/cpu/aarch64/vm/interp_masm_aarch64.hpp
--- a/src/cpu/aarch64/vm/interp_masm_aarch64.hpp	Fri Jun 20 17:33:09 2014 +0100
+++ b/src/cpu/aarch64/vm/interp_masm_aarch64.hpp	Mon Jun 23 18:56:33 2014 +0100
@@ -263,6 +263,11 @@
   void profile_switch_case(Register index_in_scratch, Register mdp,
                            Register scratch2);

+  void profile_obj_type(Register obj, const Address& mdo_addr);
+  void profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual);
+  void profile_return_type(Register mdp, Register ret, Register tmp);
+  void profile_parameters_type(Register mdp, Register tmp1, Register tmp2);
+
   // Debugging
   // only if +VerifyOops && state == atos
   void verify_oop(Register reg, TosState state = atos);
diff -r 050fe4f6976a -r 511a29302d28 src/cpu/aarch64/vm/macroAssembler_aarch64.hpp
--- a/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp	Fri Jun 20 17:33:09 2014 +0100
+++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp	Mon Jun 23 18:56:33 2014 +0100
@@ -1054,6 +1054,15 @@
   void xorptr(Register dst, Address src) { Unimplemented(); }
 #endif

+  void orptr(Address adr, RegisterOrConstant src) {
+    ldr(rscratch2, adr);
+    if (src.is_register())
+      orr(rscratch2, rscratch2, src.as_register());
+    else
+      orr(rscratch2, rscratch2, src.as_constant());
+    str(rscratch2, adr);
+  }
+
   // Calls

   // void call(Label& L, relocInfo::relocType rtype);
diff -r 050fe4f6976a -r 511a29302d28 src/cpu/aarch64/vm/templateTable_aarch64.cpp
--- a/src/cpu/aarch64/vm/templateTable_aarch64.cpp	Fri Jun 20 17:33:09 2014 +0100
+++ b/src/cpu/aarch64/vm/templateTable_aarch64.cpp	Mon Jun 23 18:56:33 2014 +0100
@@ -3004,6 +3004,7 @@

   // profile this call
   __ profile_final_call(r0);
+  __ profile_arguments_type(r0, method, r4, true);

   __ jump_from_interpreted(method, r0);

@@ -3018,6 +3019,7 @@

   // get target methodOop & entry point
   __ lookup_virtual_method(r0, index, method);
+  __ profile_arguments_type(r3, method, r4, true);
   // FIXME -- this looks completely redundant. is it?
   // __ ldr(r3, Address(method, Method::interpreter_entry_offset()));
   __ jump_from_interpreted(method, r3);
@@ -3048,6 +3050,7 @@
   __ null_check(r2);
   // do the call
   __ profile_call(r0);
+  __ profile_arguments_type(r0, rmethod, rbcp, false);
   __ jump_from_interpreted(rmethod, r0);
 }

@@ -3059,6 +3062,7 @@
   prepare_invoke(byte_no, rmethod);  // get f1 Method*
   // do the call
   __ profile_call(r0);
+  __ profile_arguments_type(r0, rmethod, r4, false);
   __ jump_from_interpreted(rmethod, r0);
 }

@@ -3113,6 +3117,8 @@
   //       method.
   __ cbz(rmethod, no_such_method);

+  __ profile_arguments_type(r3, rmethod, r13, true);
+
   // do the call
   // r2: receiver
   // rmethod,: methodOop
@@ -3162,6 +3168,7 @@
   // r13 is safe to use here as a scratch reg because it is about to
   // be clobbered by jump_from_interpreted().
   __ profile_final_call(r13);
+  __ profile_arguments_type(r13, rmethod, r4, true);

   __ jump_from_interpreted(rmethod, r0);
 }
@@ -3191,6 +3198,7 @@
   // %%% should make a type profile for any invokedynamic that takes a ref argument
   // profile this call
   __ profile_call(rbcp);
+  __ profile_arguments_type(r3, rmethod, r13, false);

   __ verify_oop(r0);



More information about the aarch64-port-dev mailing list