RTC Thumb2 JIT enhancements (diffs part 4)

ed at camswl.com ed at camswl.com
Thu Mar 11 03:25:38 PST 2010


diff -ruNE old/icedtea6/ports/hotspot/src/cpu/zero/vm/thumb2.cpp new2/icedtea6/ports/hotspot/src/cpu/zero/vm/thumb2.cpp
--- old/icedtea6/ports/hotspot/src/cpu/zero/vm/thumb2.cpp	2010-03-10 17:38:48.000000000 +0000
+++ new2/icedtea6/ports/hotspot/src/cpu/zero/vm/thumb2.cpp	2010-03-11 11:06:30.000000000 +0000
@@ -16,35 +16,47 @@
  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  */
 
-#undef THUMB2EE
+#undef T2JIT
 #if !defined(DISABLE_THUMB2) && defined(HOTSPOT_ASM) && !defined(SHARK)
-#define THUMB2EE
+#define T2JIT
 #endif
 
-#ifdef THUMB2EE
+#ifdef T2JIT
 
-#define T2EE_PRINT_COMPILATION
-#define T2EE_PRINT_STATISTICS
-//#define T2EE_PRINT_DISASS
-#define T2EE_PRINT_REGUSAGE
+#define CODE_ALIGN 64
+
+#define SLOW_ENTRY_OFFSET 24
+#define FAST_ENTRY_OFFSET 40
+
+#define T2_PRINT_COMPILATION
+#define T2_PRINT_STATISTICS
+//#define T2_PRINT_DISASS
+#define T2_PRINT_REGUSAGE
 
-#ifdef T2EE_PRINT_COMPILATION
-static char *t2ee_print_compilation;
+#ifdef T2_PRINT_COMPILATION
+static char *t2_print_compilation;
 #endif
 
-#ifdef T2EE_PRINT_STATISTICS
-static char *t2ee_print_statistics;
+#ifdef T2_PRINT_STATISTICS
+static char *t2_print_statistics;
 #endif
 
-#ifdef T2EE_PRINT_DISASS
-static char *t2ee_print_disass;
+#ifdef T2_PRINT_DISASS
+static char *t2_print_disass;
 #endif
 
-#ifdef T2EE_PRINT_REGUSAGE
-static char *t2ee_print_regusage;
+#ifdef T2_PRINT_REGUSAGE
+static char *t2_print_regusage;
 #endif
 
+static char *t2_ospace;
+#define OSPACE t2_ospace
+
+#ifdef PRODUCT
 #define THUMB2_CODEBUF_SIZE (8 * 1024 * 1024)
+#else
+#define THUMB2_CODEBUF_SIZE (4 * 1024 * 1024)
+#endif
 #define THUMB2_MAX_BYTECODE_SIZE 10000
 #define THUMB2_MAX_T2CODE_SIZE 65000
 #define THUMB2_MAXLOCALS 1000
@@ -53,7 +65,7 @@
 
 #include "incls/_precompiled.incl"
 
-#ifdef T2EE_PRINT_DISASS
+#ifdef T2_PRINT_DISASS
 #include "dis-asm.h"
 #include "bfd.h"
 #endif
@@ -339,21 +351,14 @@
 
 #define H_EXIT_TO_INTERPRETER		28
 
-#define H_GETSTATIC			H_EXIT_TO_INTERPRETER
-#define H_PUTSTATIC			H_EXIT_TO_INTERPRETER
-#define H_JSR				H_EXIT_TO_INTERPRETER
 #define H_RET				H_EXIT_TO_INTERPRETER
-#define H_ZOMBIE			H_EXIT_TO_INTERPRETER
-#define H_MONITOR			H_EXIT_TO_INTERPRETER
+#define H_DEADCODE			H_EXIT_TO_INTERPRETER
 #define H_ATHROW			H_EXIT_TO_INTERPRETER
 
 #define H_HANDLE_EXCEPTION		29
 #define H_ARRAYBOUND			30
-#define H_UNKNOWN			31
 
-#define H_DEBUG_METHODENTRY		32
-#define H_DEBUG_METHODEXIT		33
-#define H_DEBUG_METHODCALL		34
+#define H_LDC_W				31
 
 #define H_INVOKEINTERFACE		35
 #define H_INVOKEVIRTUAL			36
@@ -388,7 +393,15 @@
 
 #define H_HANDLE_EXCEPTION_NO_REGS	60
 
-unsigned handlers[61];
+#define H_INVOKESTATIC_RESOLVED		61
+#define H_INVOKESPECIAL_RESOLVED	62
+#define H_INVOKEVIRTUAL_RESOLVED	63
+#define H_INVOKEVFINAL			64
+
+#define H_MONITORENTER			65
+#define H_MONITOREXIT			66
+
+unsigned handlers[67];
 
 #define LEAF_STACK_SIZE			200
 #define STACK_SPARE			40
@@ -435,7 +448,7 @@
 unsigned stack[1000];
 unsigned r_local[1000];
 
-#ifdef T2EE_PRINT_DISASS
+#ifdef T2_PRINT_DISASS
 short start_bci[THUMB2_MAX_T2CODE_SIZE];
 short end_bci[THUMB2_MAX_T2CODE_SIZE];
 #endif
@@ -448,16 +461,13 @@
 
 #include "offsets_arm.s"
 
-#define BC_FLAGS_MASK		0xfc000000
-#define BC_VISITED_P1		0x80000000
+#define BC_FLAGS_MASK		0xf0000000
+#define BC_VISITED		0x80000000
 #define BC_BRANCH_TARGET	0x40000000
 #define BC_COMPILED		0x20000000
-#define BC_VISITED_P2		0x10000000
-#define BC_ZOMBIE		0x08000000
-#define BC_BACK_TARGET		0x04000000
+#define BC_BACK_TARGET		0x10000000
 
-#define IS_DEAD(x)	(((x) & BC_VISITED_P1) == 0)
-#define IS_ZOMBIE(x)	(((x) & BC_ZOMBIE) || ((x) & BC_VISITED_P2) == 0)
+#define IS_DEAD(x)	(((x) & BC_VISITED) == 0)
 
 #define LOCAL_MODIFIED		31
 #define LOCAL_REF		30
@@ -521,22 +531,21 @@
 #define VFP_D6		70
 #define VFP_D7		71
 
-#define PREGS	5
+#define PREGS	6
 
 #define JAZ_V1	ARM_R5
 #define JAZ_V2	ARM_R6
 #define JAZ_V3	ARM_R7
-#define JAZ_V4	ARM_R10
-#define JAZ_V5	ARM_R11
+#define JAZ_V4	ARM_R8
+#define JAZ_V5	ARM_R9
+#define JAZ_V6	ARM_R11
 
 #define Rstack		ARM_R4
 #define Rlocals		ARM_R7
 #define Ristate		ARM_R8
-#define Rthread		ARM_R9
+#define Rthread		ARM_R10
 
-#define Rint_stack	ARM_R4
 #define Rint_jpc	ARM_R5
-#define Rint_istate	ARM_R8
 
 #define IS_ARM_INT_REG(r) ((r) <= ARM_PC)
 #define IS_ARM_FP_REG(r) (!IS_ARM_INT_REG(r))
@@ -563,9 +572,10 @@
     //		.align	CODE_ALIGN
     // slow_entry:				@ callee save interface
     // 		push	{r4, r5, r6, r7, r9, r10, r11, lr}
+    // 		mov	Rthread, r2
     // 		bl	fast_entry
     // 		pop	{r4, r5, r6, r7, r9, r10, r11, pc}
-    unsigned slow_entry[3];
+    unsigned slow_entry[4];
     unsigned *osr_table;			// pointer to the osr table
     unsigned *exception_table;
     Compiled_Method *next;
@@ -573,20 +583,7 @@
     // This is used when receovering from an exception so we can push
     // the register back into the local variables pool.
     short regusage[6];
-    // OSR Entry point:
-    // 	R0 = entry point within compiled method
-    // 	R1 = locals - 4000 * 4
-    // 	R2 = thread
-    // 	R3 = locals - 31 * 4
-    // osr_entry:
-    // 		@ Load each local into it register allocated register
-    // 		ldr	<reg>, [R1, #(4000-<local>) * 4]
-    //    or	ldr	<reg>, [R3, #(31-<local>) * 4]
-    // 		...
-    // 		mov	Rthread, R2
-    // 		bx	R0
-    // 		.align	CODE_ALIGN
-    unsigned osr_entry[1];
+    unsigned header_end[1];
     // fast_entry:
     // 		push	{r8, lr}
     // 		...	@ The compiled code
@@ -606,7 +603,7 @@
 
 typedef struct Thumb2_Entrypoint {
   unsigned compiled_entrypoint;
-  unsigned osr_entry;
+  short *regusage;
 } Thumb2_Entrypoint;
 
 typedef struct CodeBuf {
@@ -640,8 +637,9 @@
     Thumb2_Stack *jstack;
     Thumb2_Registers *jregs;
     unsigned compiled_return;
-    unsigned zombie_bytes;
+    unsigned compiled_word_return[12];  // R0 .. R11
     unsigned is_leaf;
+    unsigned use_istate;
 } Thumb2_Info;
 
 #define IS_INT_SIZE_BASE_TYPE(c) (c=='B' || c=='C' || c=='F' || c=='I' || c=='S' || c=='Z')
@@ -712,7 +710,7 @@
 
 static const char *local_types[] = { "int", "long", "float", "double", "ref" };
 
-#ifdef T2EE_PRINT_DISASS
+#ifdef T2_PRINT_DISASS
 void Thumb2_disass(Thumb2_Info *jinfo)
 {
   unsigned code_size = jinfo->code_size;
@@ -729,21 +727,7 @@
   unsigned short *codebuf = jinfo->codebuf->codebuf;
   unsigned idx, compiled_len;
 
-#if 0
-  printf("Local Variable Usage\n");
-  printf("====================\n");
-  for (idx = 0; idx < nlocals; idx++) {
-    unsigned linfo = locals_info[idx];
-    unsigned typ = (linfo >> LOCAL_INT) & 0x1f;
-
-    printf("Local %d, type = %s (%x)", idx, typ ? local_types[LOG2(typ)] : "!!!unknown!!!", typ);
-    if (linfo & (1 << LOCAL_MODIFIED)) printf(", modified");
-    if (idx < (unsigned)jinfo->method->size_of_parameters()) printf(", parameter");
-    putchar('\n');
-  }
-#endif
-
-  init_disassemble_info(&info, stdout, (fprintf_ftype)fprintf);
+  init_disassemble_info(&info, stderr, (fprintf_ftype)fprintf);
   info.arch = bfd_arch_arm;
   disassemble_init_for_target(&info);
   info.endian = BFD_ENDIAN_LITTLE;
@@ -766,10 +750,10 @@
 	int len;
 
 	if (stackinfo & BC_BRANCH_TARGET)
-	  printf("----- Basic Block -----\n");
+	  fprintf(stderr, "----- Basic Block -----\n");
 	JASSERT(bci > last_bci, "disass not advancing");
 	last_bci = bci;
-	printf("%c%4d : ", (stackinfo & BC_VISITED_P1) ? ' ' : '?', bci);
+	fprintf(stderr, "%c%4d : ", (stackinfo & BC_VISITED) ? ' ' : '?', bci);
 	opcode = code_base[bci];
 	if (opcode > OPC_LAST_JAVA_OP) {
 	  if (Bytecodes::is_defined((Bytecodes::Code)opcode))
@@ -786,11 +770,11 @@
 	    int def;
 	    unsigned n, i;
 
-	    printf("%02x ", opcode);
+	    fprintf(stderr, "%02x ", opcode);
 	    for (int i = 1; i < 5; i++)
-	      printf("   ");
-	    printf("%s\n", Bytecodes::name((Bytecodes::Code)opcode));
-	    printf("\t%d bytes padding\n", nbci - (bci+1));
+	      fprintf(stderr, "   ");
+	    fprintf(stderr, "%s\n", Bytecodes::name((Bytecodes::Code)opcode));
+	    fprintf(stderr, "\t%d bytes padding\n", nbci - (bci+1));
 	    w = *(unsigned int *)(code_base + nbci + 4);
 	    low = (int)BYTESEX_REVERSE(w);
 	    w = *(unsigned int *)(code_base + nbci + 8);
@@ -798,30 +782,30 @@
 	    w = *(unsigned int *)(code_base + nbci + 0);
 	    def = (int)BYTESEX_REVERSE(w);
 	    table = (unsigned int *)(code_base + nbci + 12);
-	    printf("\tdefault:\t0x%08x\n", def);
-	    printf("\tlow:\t\t0x%08x\n", low);
-	    printf("\thigh:\t\t0x%08x\n", high);
+	    fprintf(stderr, "\tdefault:\t0x%08x\n", def);
+	    fprintf(stderr, "\tlow:\t\t0x%08x\n", low);
+	    fprintf(stderr, "\thigh:\t\t0x%08x\n", high);
 	    n = high - low + 1;
 	    while (low <= high) {
 	      int off;
 
 	      w = *table++;
 	      off = (int)BYTESEX_REVERSE(w);
-	      printf("\toffset %d:\t0x%08x\n", low, off);
+	      fprintf(stderr, "\toffset %d:\t0x%08x\n", low, off);
 	      low++;
 	    }
 	    bci += len;
 	    for (i = 0; i < 4; i++) {
-	      printf("0x%08x:\t", (int)codebuf+idx);
+	      fprintf(stderr, "0x%08x:\t", (int)codebuf+idx);
 	      {
 		int len = print_insn_little_arm((bfd_vma)codebuf+idx, &info);
 		if (len == -1) len = 2;
 		idx += len;
-		putchar('\n');
+		fputc('\n', stderr);
 	      }
 	    }
 	    for (i = 0; i < n; i++) {
-	      printf("0x%08x:\t.short\t0x%04x\n", (int)codebuf+idx, *(short *)((int)codebuf + idx));
+	      fprintf(stderr, "0x%08x:\t.short\t0x%04x\n", (int)codebuf+idx, *(short *)((int)codebuf + idx));
 	      idx += 2;
 	    }
 	    nodisass = 1;
@@ -834,19 +818,19 @@
 	    int npairs;	// The Java spec says signed but must be >= 0??
 	    unsigned *table;
 
-	    printf("%02x ", opcode);
+	    fprintf(stderr, "%02x ", opcode);
 	    for (int i = 1; i < 5; i++)
-	      printf("   ");
-	    printf("%s\n", Bytecodes::name((Bytecodes::Code)opcode));
-	    printf("\t%d bytes padding\n", nbci - (bci+1));
+	      fprintf(stderr, "   ");
+	    fprintf(stderr, "%s\n", Bytecodes::name((Bytecodes::Code)opcode));
+	    fprintf(stderr, "\t%d bytes padding\n", nbci - (bci+1));
 
 	    w = *(unsigned int *)(code_base + nbci + 0);
 	    def = (int)BYTESEX_REVERSE(w);
 	    w = *(unsigned int *)(code_base + nbci + 4);
 	    npairs = (int)BYTESEX_REVERSE(w);
 	    table = (unsigned int *)(code_base + nbci + 8);
-	    printf("\tdefault:\t0x%08x\n", def);
-	    printf("\tnpairs:\t\t0x%08x\n", npairs);
+	    fprintf(stderr, "\tdefault:\t0x%08x\n", def);
+	    fprintf(stderr, "\tnpairs:\t\t0x%08x\n", npairs);
 	    for (int i = 0; i < npairs; i++) {
 	      unsigned match, off;
 	      w = table[0];
@@ -854,7 +838,7 @@
 	      w = table[1];
 	      table += 2;
 	      off = BYTESEX_REVERSE(w);
-	      printf("\t  match: 0x%08x, offset: 0x%08x\n", match, off);
+	      fprintf(stderr, "\t  match: 0x%08x, offset: 0x%08x\n", match, off);
 	    }
 	    break;
 	  }
@@ -862,39 +846,45 @@
 	  default:
 	    for (int i = 0; i < 5; i++) {
 	      if (i < len)
-		printf("%02x ", code_base[bci+i]);
+		fprintf(stderr, "%02x ", code_base[bci+i]);
 	      else
-		printf("   ");
+		fprintf(stderr, "   ");
 	    }
-	    printf("%s\n", Bytecodes::name((Bytecodes::Code)code_base[bci]));
+	    fprintf(stderr, "%s\n", Bytecodes::name((Bytecodes::Code)code_base[bci]));
 	    break;
 	}
 	bci += len;
       }
     }
     if (!nodisass) {
-      printf("0x%08x:\t", (int)codebuf+idx);
-      {
-	int len;
+      fprintf(stderr, "0x%08x:\t", (int)codebuf+idx);
+      int len;
+      if (idx >= offsetof(Compiled_Method, osr_table) && idx < offsetof(Compiled_Method, regusage)) {
+	fprintf(stderr, ".word\t0x%08x", *(unsigned *)((int)codebuf + idx));
+	len = 4;
+      } else if (idx >= offsetof(Compiled_Method, regusage) && idx < offsetof(Compiled_Method, header_end)) {
+	fprintf(stderr, ".short\t0x%04x", *(unsigned short *)((int)codebuf + idx));
+	len = 2;
+      } else {
 	unsigned s1, s2;
 
 	s1 = *(unsigned short *)((int)codebuf + idx);
 	s2 = *(unsigned short *)((int)codebuf + idx + 2);
 	if (s1 == T_UNDEFINED_16 || ((s1 << 16) + s2) == T_UNDEFINED_32) {
 	  if (s1 == T_UNDEFINED_16) {
-	    printf("undefined (0xde00) - UNPATCHED BRANCH???");
+	    fprintf(stderr, "undefined (0xde00) - UNPATCHED BRANCH???");
 	    len = 2;
 	  } else {
-	    printf("undefined (0xf7f0a000) - UNPATCHED BRANCH???");
+	    fprintf(stderr, "undefined (0xf7f0a000) - UNPATCHED BRANCH???");
 	    len = 4;
 	  }
 	} else {
 	  len = print_insn_little_arm((bfd_vma)codebuf+idx, &info);
 	  if (len == -1) len = 2;
-	  idx += len;
 	}
-	putchar('\n');
       }
+      idx += len;
+      fputc('\n', stderr);
     }
   }
 }
@@ -1179,7 +1169,7 @@
 	BCI(0, 0, 0, 1, 0, 0, 0, 0, 0),	// impdep2
 };
 
-void Thumb2_pass1(Thumb2_Info *jinfo, unsigned bci)
+void Thumb2_pass1(Thumb2_Info *jinfo, unsigned stackdepth, unsigned bci)
 {
   unsigned code_size = jinfo->code_size;
   jubyte *code_base = jinfo->code_base;
@@ -1193,387 +1183,11 @@
     unsigned bytecodeinfo;
     unsigned opcode;
 
-    if (stackinfo & BC_VISITED_P1) break;
-    bc_stackinfo[bci] = (stackinfo & BC_FLAGS_MASK) | BC_VISITED_P1;
-    opcode = code_base[bci];
-//	printf("bci = 0x%04x, opcode = 0x%02x (%s)", bci, opcode,  Bytecodes::name((Bytecodes::Code)opcode));
-    bytecodeinfo = bcinfo[opcode];
-    if (!BCI_SPECIAL(bytecodeinfo)) {
-      bci += BCI_LEN(bytecodeinfo);
-      continue;
-    }
-
-    switch (opcode) {
-
-      case opc_goto: {
-	int off = GET_JAVA_S2(code_base+bci+1);
-	bci += off;
-	bc_stackinfo[bci] |= BC_BRANCH_TARGET;
-	if (off < 0) bc_stackinfo[bci] |= BC_BACK_TARGET;
-	break;
-      }
-      case opc_goto_w: {
-	int off = GET_JAVA_U4(code_base+bci+1);
-	bci += off;
-	bc_stackinfo[bci] |= BC_BRANCH_TARGET;
-	if (off < 0) bc_stackinfo[bci] |= BC_BACK_TARGET;
-	break;
-      }
-
-      case opc_if_icmpeq:
-      case opc_if_icmpne:
-      case opc_if_icmplt:
-      case opc_if_icmpge:
-      case opc_if_icmpgt:
-      case opc_if_icmple:
-      case opc_if_acmpeq:
-      case opc_if_acmpne:
-      case opc_ifeq:
-      case opc_ifne:
-      case opc_iflt:
-      case opc_ifge:
-      case opc_ifgt:
-      case opc_ifle:
-      case opc_ifnull:
-      case opc_ifnonnull: {
-	int off = GET_JAVA_S2(code_base+bci+1);
-	if (off < 0) bc_stackinfo[bci+off] |= BC_BACK_TARGET;
-	Thumb2_pass1(jinfo, bci + off);
-	bci += 3;
-	break;
-      }
-
-      case opc_jsr: {
-	int off = GET_JAVA_S2(code_base+bci+1);
-	if (off < 0) bc_stackinfo[bci+off] |= BC_BACK_TARGET;
-	Thumb2_pass1(jinfo, bci + off);
-	bci += 3;
-	break;
-      }
-      case opc_jsr_w: {
-	int off = GET_JAVA_U4(code_base+bci+1);
-	if (off < 0) bc_stackinfo[bci+off] |= BC_BACK_TARGET;
-	Thumb2_pass1(jinfo, bci + off);
-	bci += 5;
-	break;
-      }
-
-      case opc_ireturn:
-      case opc_lreturn:
-      case opc_freturn:
-      case opc_dreturn:
-      case opc_areturn:
-      case opc_return:
-      case opc_return_register_finalizer:
-      case opc_ret:
-      case opc_athrow:
-	// The test for BC_VISITED_P1 above will break out of the loop!!!
-	break;
-
-      case opc_tableswitch: {
-	int low, high;
-	unsigned w;
-	unsigned *table;
-	unsigned nbci;
-	int def;
-
-	nbci = bci & ~3;
-	w = *(unsigned int *)(code_base + nbci + 8);
-	low = (int)BYTESEX_REVERSE(w);
-	w = *(unsigned int *)(code_base + nbci + 12);
-	high = (int)BYTESEX_REVERSE(w);
-	w = *(unsigned int *)(code_base + nbci + 4);
-	def = (int)BYTESEX_REVERSE(w);
-	table = (unsigned int *)(code_base + nbci + 16);
-
-	while (low <= high) {
-	  int off;
-	  w = *table++;
-	  off = (int)BYTESEX_REVERSE(w);
-	  if (off < 0) bc_stackinfo[bci+off] |= BC_BACK_TARGET;
-	  Thumb2_pass1(jinfo, bci + off);
-	  low++;
-	}
-
-	bci += def;
-	bc_stackinfo[bci] |= BC_BRANCH_TARGET;
-	if (def < 0) bc_stackinfo[bci] |= BC_BACK_TARGET;
-	break;
-      }
-
-      case opc_lookupswitch: {
-	unsigned w;
-	unsigned nbci;
-	int def;
-	int npairs;	// The Java spec says signed but must be >= 0??
-	unsigned *table;
-
-	nbci = bci & ~3;
-	w = *(unsigned int *)(code_base + nbci + 4);
-	def = (int)BYTESEX_REVERSE(w);
-	w = *(unsigned int *)(code_base + nbci + 8);
-	npairs = (int)BYTESEX_REVERSE(w);
-	table = (unsigned int *)(code_base + nbci + 16);
-
-	for (int i = 0; i < npairs; i++) {
-	  int off;
-	  w = *table;
-	  table += 2;
-	  off = (int)BYTESEX_REVERSE(w);
-	  if (off < 0) bc_stackinfo[bci+off] |= BC_BACK_TARGET;
-	  Thumb2_pass1(jinfo, bci + off);
-	}
-
-	bci += def;
-	bc_stackinfo[bci] |= BC_BRANCH_TARGET;
-	if (def < 0) bc_stackinfo[bci] |= BC_BACK_TARGET;
-	break;
-      }
-
-      case opc_getstatic:
-      case opc_putstatic:
-      case opc_getfield:
-      case opc_putfield: {
-	bci += 3;
-	break;
-      }
-
-      case opc_invokeresolved:
-      case opc_invokespecialresolved:
-      case opc_invokestaticresolved:
-      case opc_invokevfinal:
-      case opc_invokevirtual:
-      case opc_invokespecial:
-      case opc_invokestatic:
-	jinfo->is_leaf = 0;
-	bci += 3;
-	break;
-
-      case opc_invokeinterface:
-	jinfo->is_leaf = 0;
-	bci += 5;
-	break;
-
-      case opc_multianewarray:
-	bci += 4;
-	break;
-
-      case opc_wide:
-	opcode = code_base[bci+1];
-	if (opcode == opc_iinc) {
-	  bci += 6;
-	} else {
-	  bci += 4;
-	}
-	break;
-
-      default:
-	opcode = code_base[bci];
-	fatal1("Undefined opcode %d\n", opcode);
-	break;
-    }
-  }
-}
-
-#ifdef ZOMBIE_DETECTION
-int Thumb2_is_zombie(Thumb2_Info *jinfo, unsigned bci)
-{
-  unsigned code_size = jinfo->code_size;
-  jubyte *code_base = jinfo->code_base;
-  unsigned bytecodeinfo;
-  unsigned opcode;
-  unsigned *bc_stackinfo = jinfo->bc_stackinfo;
-
-  do {
-    opcode = code_base[bci];
-    // Short circuit exit - commented out because even if it has been executed
-    // we treat throw, jsr, and ret as zombies because they will call out to the
-    // interpreter.
-    // if (opcode > OPC_LAST_JAVA_OP) return 0;
-    bytecodeinfo = bcinfo[opcode];
-    if (!BCI_SPECIAL(bytecodeinfo)) {
-	bci += BCI_LEN(bytecodeinfo);
-#if 0
-	if (opcode >= opc_iload_iload) {
-	  opcode = code_base[bci];
-	  bci += BCI_LEN(bcinfo[opcode]);
-	} else if (BCI_ISLOCAL(bytecodeinfo)) {
-	  if (opcode == opc_iload || (opcode >= opc_iload_0 && opcode <= opc_iload_3)) {
-	    opcode = code_base[bci];
-	    if (opcode == opc_iload || (opcode >= opc_iload_0 && opcode <= opc_iload_3) ||
-					(opcode >= opc_iconst_m1 && opcode <= opc_iconst_5)) {
-		printf("found new zombie at %d\n", bci);
-		return 1;
-	    }
-	  }
-	} else if (opcode == opc_iadd || opcode == opc_isub ||
-		      opcode == opc_iand || opcode == opc_ior || opcode == opc_ixor) {
-	    opcode = code_base[bci];
-	    if (opcode == opc_istore || (opcode >= opc_istore_0 && opcode <= opc_istore_3)) {
-		printf("found new zombie at %d\n", bci);
-		return 1;
-	    }
-	}
-#endif
-    } else {
-      switch (opcode) {
-	case opc_goto:
-	case opc_goto_w:
-	case opc_ifeq:
-	case opc_ifne:
-	case opc_iflt:
-	case opc_ifge:
-	case opc_ifgt:
-	case opc_ifle:
-	case opc_ifnull:
-	case opc_ifnonnull:
-	case opc_if_icmpeq:
-	case opc_if_icmpne:
-	case opc_if_icmplt:
-	case opc_if_icmpge:
-	case opc_if_icmpgt:
-	case opc_if_icmple:
-	case opc_if_acmpeq:
-	case opc_if_acmpne:
-	case opc_tableswitch:
-	case opc_lookupswitch:
-	  return 0;
-	case opc_ireturn:
-	case opc_lreturn:
-	case opc_freturn:
-	case opc_dreturn:
-	case opc_areturn:
-	case opc_return:
-	case opc_return_register_finalizer:
-	    return 0;
-	case opc_jsr:
-	case opc_jsr_w:
-	case opc_ret:
-	case opc_athrow:
-	    return 1;
-	case opc_invokeinterface:
-	case opc_invokevirtual:
-	case opc_invokespecial:
-	case opc_invokestatic:
-	case opc_putfield:
-	case opc_getfield:
-	case opc_putstatic:
-	case opc_getstatic: {
-	  constantPoolCacheOop  cp = jinfo->method->constants()->cache();
-	  ConstantPoolCacheEntry* cache;
-	  int index = GET_NATIVE_U2(code_base+bci+1);
-
-	  cache = cp->entry_at(index);
-	  if (!cache->is_resolved((Bytecodes::Code)opcode)) return 1;
-	  bci += 3;
-	  if (opcode == opc_invokeinterface) bci += 2;
-	  break;
-
-	}
-	case opc_invokeresolved:
-	case opc_invokespecialresolved:
-	case opc_invokestaticresolved:
-	case opc_invokevfinal:
-	  bci += 3;
-	  break;
-
-	case opc_multianewarray:
-	  bci += 4;
-	  break;
-
-	case opc_wide:
-	  opcode = code_base[bci+1];
-	  if (opcode == opc_iinc) {
-	    bci += 6;
-	  } else {
-	    bci += 4;
-	  }
-	  break;
-
-	default:
-	  opcode = code_base[bci];
-	  fatal1("Undefined opcode %d\n", opcode);
-	  break;
-      }
-    }
-    if (bci >= code_size) break;
-  } while (!(bc_stackinfo[bci] & BC_BRANCH_TARGET));
-  return 0;
-}
-#endif // ZOMBIT_DETECTION
-
-void Thumb2_RegAlloc(Thumb2_Info *jinfo)
-{
-  unsigned *locals_info = jinfo->locals_info;
-  unsigned i, j;
-  unsigned linfo;
-  unsigned score, max_score;
-  unsigned local;
-  unsigned nlocals = jinfo->method->max_locals();
-  unsigned *pregs = jinfo->jregs->pregs;
-  unsigned npregs = jinfo->jregs->npregs;
-
-  for (i = 0; i < npregs; i++) jinfo->jregs->mapping[i] = -1;
-  for (i = 0; i < npregs; i++) {
-    max_score = 0;
-    for (j = 0; j < nlocals; j++) {
-      linfo = locals_info[j];
-
-      if (linfo & ((1<<LOCAL_ALLOCATED)|(1<<LOCAL_DOUBLE))) continue;
-      score = LOCAL_READS(linfo) + LOCAL_WRITES(linfo);
-      if (linfo & (1<<LOCAL_MODIFIED)) score = (score+1) >> 2;
-      if (linfo & (1<<LOCAL_REF)) score = score - (score >> 2);
-      if (linfo & (1<<LOCAL_LONG)) score = (score+1) >> 2;
-      if (score > max_score) max_score = score, local = j;
-    }
-    if (max_score < 2) break;
-    locals_info[local] |= 1<<LOCAL_ALLOCATED;
-    jinfo->jregs->r_local[local] = pregs[i];
-    jinfo->jregs->mapping[i] = local;
-  }
-#ifdef T2EE_PRINT_REGUSAGE
-  if (t2ee_print_regusage) {
-    printf("Regalloc: %d physical registers allocated as follows\n", npregs);
-    for (j = 0; j < nlocals; j++) {
-      unsigned r = jinfo->jregs->r_local[j];
-      if (r) {
-	unsigned typ = (locals_info[j] >> LOCAL_INT) & 0x1f;
-	printf("  ARM Reg R%d -> local %d (type = %s)\n", r, j, local_types[LOG2(typ)]);
-      }
-    }
-  }
-#endif
-}
-
-void Thumb2_pass2(Thumb2_Info *jinfo, unsigned stackdepth, unsigned bci)
-{
-  unsigned code_size = jinfo->code_size;
-  jubyte *code_base = jinfo->code_base;
-  unsigned *bc_stackinfo = jinfo->bc_stackinfo;
-  unsigned *locals_info = jinfo->locals_info;
-  unsigned check_zombie = 0;
-  //constantPoolCacheOop cp = jinfo->method->constants()->cache();
-
-  while (bci < code_size) {
-    unsigned stackinfo = bc_stackinfo[bci];
-    unsigned bytecodeinfo;
-    unsigned opcode;
-
-    if (stackinfo & BC_VISITED_P2) break;
+    if (stackinfo & BC_VISITED) break;
     JASSERT((int)stackdepth >= 0, "stackdepth < 0!!");
-    bc_stackinfo[bci] = (stackinfo & BC_FLAGS_MASK) | stackdepth | BC_VISITED_P2;
-#ifdef ZOMBIE_DETECTION
-    if (check_zombie || (stackinfo & BC_BRANCH_TARGET)) {
-      if (Thumb2_is_zombie(jinfo, bci)) {
-	printf("zombie code at %d\n", bci);
-	bc_stackinfo[bci] |= BC_ZOMBIE;
-	return;
-      }
-      check_zombie = 0;
-    }
-#endif
+    bc_stackinfo[bci] = (stackinfo & BC_FLAGS_MASK) | stackdepth | BC_VISITED;
     opcode = code_base[bci];
-//	printf("bci = 0x%04x, opcode = 0x%02x (%s), stackdepth = %d\n", bci, opcode,  Bytecodes::name((Bytecodes::Code)opcode), stackdepth);
+//	printf("bci = 0x%04x, opcode = 0x%02x (%s)", bci, opcode,  Bytecodes::name((Bytecodes::Code)opcode));
     bytecodeinfo = bcinfo[opcode];
     if (!BCI_SPECIAL(bytecodeinfo)) {
       if (BCI_ISLOCAL(bytecodeinfo)) {
@@ -1606,12 +1220,20 @@
 
     switch (opcode) {
 
-      case opc_goto:
-	bci += GET_JAVA_S2(code_base+bci+1);
+      case opc_goto: {
+	int off = GET_JAVA_S2(code_base+bci+1);
+	bci += off;
+	bc_stackinfo[bci] |= BC_BRANCH_TARGET;
+	if (off < 0) bc_stackinfo[bci] |= BC_BACK_TARGET;
 	break;
-      case opc_goto_w:
-	bci += GET_JAVA_U4(code_base+bci+1);
+      }
+      case opc_goto_w: {
+	int off = GET_JAVA_U4(code_base+bci+1);
+	bci += off;
+	bc_stackinfo[bci] |= BC_BRANCH_TARGET;
+	if (off < 0) bc_stackinfo[bci] |= BC_BACK_TARGET;
 	break;
+      }
 
       case opc_ifeq:
       case opc_ifne:
@@ -1620,12 +1242,14 @@
       case opc_ifgt:
       case opc_ifle:
       case opc_ifnull:
-      case opc_ifnonnull:
+      case opc_ifnonnull: {
+	int off = GET_JAVA_S2(code_base+bci+1);
+	if (off < 0) bc_stackinfo[bci+off] |= BC_BACK_TARGET;
 	stackdepth -= 1;
-	Thumb2_pass2(jinfo, stackdepth, bci + GET_JAVA_S2(code_base+bci+1));
-	check_zombie = 1;
+        Thumb2_pass1(jinfo, stackdepth, bci + off);
 	bci += 3;
 	break;
+      }
 
       case opc_if_icmpeq:
       case opc_if_icmpne:
@@ -1634,22 +1258,30 @@
       case opc_if_icmpgt:
       case opc_if_icmple:
       case opc_if_acmpeq:
-      case opc_if_acmpne:
+      case opc_if_acmpne: {
+	int off = GET_JAVA_S2(code_base+bci+1);
+	if (off < 0) bc_stackinfo[bci+off] |= BC_BACK_TARGET;
 	stackdepth -= 2;
-	Thumb2_pass2(jinfo, stackdepth, bci + GET_JAVA_S2(code_base+bci+1));
-	check_zombie = 1;
+        Thumb2_pass1(jinfo, stackdepth, bci + off);
 	bci += 3;
 	break;
+      }
 
-      case opc_jsr:
-	Thumb2_pass2(jinfo, stackdepth+1, bci + GET_JAVA_S2(code_base+bci+1));
+      case opc_jsr: {
+	int off = GET_JAVA_S2(code_base+bci+1);
+	if (off < 0) bc_stackinfo[bci+off] |= BC_BACK_TARGET;
+        Thumb2_pass1(jinfo, stackdepth+1, bci + off);
 	bci += 3;
 	stackdepth = 0;
 	break;
-      case opc_jsr_w:
-	Thumb2_pass2(jinfo, stackdepth+1, bci + GET_JAVA_U4(code_base+bci+1));
+      }
+      case opc_jsr_w: {
+	int off = GET_JAVA_U4(code_base+bci+1);
+	if (off < 0) bc_stackinfo[bci+off] |= BC_BACK_TARGET;
+        Thumb2_pass1(jinfo, stackdepth+1, bci + off);
 	bci += 5;
 	break;
+      }
 
       case opc_ireturn:
       case opc_lreturn:
@@ -1660,7 +1292,7 @@
       case opc_return_register_finalizer:
       case opc_ret:
       case opc_athrow:
-	// The test for BC_VISITED_P2 above will break out of the loop!!!
+	// The test for BC_VISITED above will break out of the loop!!!
 	break;
 
       case opc_tableswitch: {
@@ -1684,12 +1316,14 @@
 	  int off;
 	  w = *table++;
 	  off = (int)BYTESEX_REVERSE(w);
-	  Thumb2_pass2(jinfo, stackdepth, bci + off);
+	  if (off < 0) bc_stackinfo[bci+off] |= BC_BACK_TARGET;
+	  Thumb2_pass1(jinfo, stackdepth, bci + off);
 	  low++;
 	}
 
-	check_zombie = 1;
 	bci += def;
+	bc_stackinfo[bci] |= BC_BRANCH_TARGET;
+	if (def < 0) bc_stackinfo[bci] |= BC_BACK_TARGET;
 	break;
       }
 
@@ -1713,11 +1347,13 @@
 	  w = *table;
 	  table += 2;
 	  off = (int)BYTESEX_REVERSE(w);
-	  Thumb2_pass2(jinfo, stackdepth, bci + off);
+	  if (off < 0) bc_stackinfo[bci+off] |= BC_BACK_TARGET;
+	  Thumb2_pass1(jinfo, stackdepth, bci + off);
 	}
 
-	check_zombie = 1;
 	bci += def;
+	bc_stackinfo[bci] |= BC_BRANCH_TARGET;
+	if (def < 0) bc_stackinfo[bci] |= BC_BACK_TARGET;
 	break;
       }
 
@@ -1749,16 +1385,17 @@
       case opc_invokespecialresolved:
       case opc_invokestaticresolved:
       case opc_invokevfinal:
-      case opc_invokeinterface:
       case opc_invokevirtual:
       case opc_invokespecial:
-      case opc_invokestatic: {
+      case opc_invokestatic:
+      case opc_invokeinterface: {
 	int index = GET_JAVA_U2(code_base+bci+1);
 	constantPoolOop pool = jinfo->method->constants();
 	//symbolOop name = pool->name_ref_at(index);
 	symbolOop sig = pool->signature_ref_at(index);
 	jbyte *base = sig->base();
 
+	jinfo->is_leaf = 0;
 	//tty->print("%d: %s: %s\n", opcode, name->as_C_string(), sig->as_C_string());
 	stackdepth += method_stackchange(base);
 	opcode = code_base[bci];
@@ -1774,7 +1411,7 @@
 	bci += 4;
 	break;
 
-      case opc_wide:
+      case opc_wide: {
 	opcode = code_base[bci+1];
 	if (opcode == opc_iinc) {
 	  bci += 6;
@@ -1794,6 +1431,7 @@
 	    fatal1("Undefined wide opcode %d\n", opcode);
 	}
 	break;
+      }
 
       default:
 	opcode = code_base[bci];
@@ -1803,6 +1441,50 @@
   }
 }
 
+void Thumb2_RegAlloc(Thumb2_Info *jinfo)
+{
+  unsigned *locals_info = jinfo->locals_info;
+  unsigned i, j;
+  unsigned linfo;
+  unsigned score, max_score;
+  unsigned local;
+  unsigned nlocals = jinfo->method->max_locals();
+  unsigned *pregs = jinfo->jregs->pregs;
+  unsigned npregs = jinfo->jregs->npregs;
+
+  for (i = 0; i < npregs; i++) jinfo->jregs->mapping[i] = -1;
+  for (i = 0; i < npregs; i++) {
+    if (jinfo->use_istate && pregs[i] == Ristate) continue;
+    max_score = 0;
+    for (j = 0; j < nlocals; j++) {
+      linfo = locals_info[j];
+
+      if (linfo & ((1<<LOCAL_ALLOCATED)|(1<<LOCAL_DOUBLE))) continue;
+      score = LOCAL_READS(linfo) + LOCAL_WRITES(linfo);
+      if (linfo & (1<<LOCAL_MODIFIED)) score = (score+1) >> 2;
+      if (linfo & (1<<LOCAL_REF)) score = score - (score >> 2);
+      if (linfo & (1<<LOCAL_LONG)) score = (score+1) >> 2;
+      if (score > max_score) max_score = score, local = j;
+    }
+    if (max_score < (OSPACE ? 8 : 2)) break;
+    locals_info[local] |= 1<<LOCAL_ALLOCATED;
+    jinfo->jregs->r_local[local] = pregs[i];
+    jinfo->jregs->mapping[i] = local;
+  }
+#ifdef T2_PRINT_REGUSAGE
+  if (t2_print_regusage) {
+    fprintf(stderr, "Regalloc: %d physical registers allocated as follows\n", npregs);
+    for (j = 0; j < nlocals; j++) {
+      unsigned r = jinfo->jregs->r_local[j];
+      if (r) {
+	unsigned typ = (locals_info[j] >> LOCAL_INT) & 0x1f;
+	fprintf(stderr, "  ARM Reg R%d -> local %d (type = %s)\n", r, j, local_types[LOG2(typ)]);
+      }
+    }
+  }
+#endif
+}
+
 //-------------------------------------------------------------------------------------
 
 #define Thumb2		1
@@ -2168,12 +1850,15 @@
   return codebuf->idx * 2;
 }
 
-#define CODE_ALIGN 64
-#define CODE_ALIGN_SIZE 64
-
 u32 out_align(CodeBuf *codebuf, unsigned align)
 {
-  codebuf->idx += (((out_pos(codebuf) + (align-1)) & ~(align-1)) - out_pos(codebuf)) / sizeof(short);
+  while ((out_pos(codebuf) & (align-1)) != 0) out_16(codebuf, 0);
+  return out_pos(codebuf);
+}
+
+u32 out_align_offset(CodeBuf *codebuf, unsigned align, unsigned offset)
+{
+  while ((out_pos(codebuf) & (align-1)) != offset) out_16(codebuf, 0);
   return out_pos(codebuf);
 }
 
@@ -2630,6 +2315,16 @@
 //  return dop_reg(codebuf, DP_MOV, dst, 0, src, SHIFT_LSL, 0);
 }
 
+int nop_16(CodeBuf *codebuf)
+{
+  return out_16(codebuf, T_MOV(ARM_R0, ARM_R0));
+}
+
+int nop_32(CodeBuf *codebuf)
+{
+  return dop_reg(codebuf, DP_MOV, ARM_R8, 0, ARM_R8, SHIFT_LSL, 0);
+}
+
 int mvn_reg(CodeBuf *codebuf, u32 dst, u32 src)
 {
   if (dst < ARM_R8 && src < ARM_R8)
@@ -2838,7 +2533,7 @@
 
 int cmp_imm(CodeBuf *codebuf, Reg src, u32 imm)
 {
-  if (src <= ARM_R8 && imm < 256) return out_16(codebuf, T_CMP_IMM(src, imm));
+  if (src < ARM_R8 && imm < 256) return out_16(codebuf, T_CMP_IMM(src, imm));
   return dop_imm(codebuf, DP_CMP, 0x0f, src, imm);
 }
 
@@ -2847,28 +2542,6 @@
   return dop_imm(codebuf, DP_TST, 0x0f, src, imm);
 }
 
-int hbl(CodeBuf *codebuf, unsigned handler)
-{
-  mov_imm(codebuf, ARM_IP, 0);
-  str_imm(codebuf, ARM_IP, ARM_IP, 0, 1, 0);
-#if 0
-  if ((Thumb2 && ThumbEE))
-    return out_16(codebuf, T_HBL(handler));
-  if (TESTING)
-    return mov_imm(codebuf, ARM_R8, handler);
-  J_Unimplemented();
-#endif
-}
-
-#if 0
-int enter_leave(CodeBuf *codebuf, unsigned enter)
-{
-  if ((Thumb2 && ThumbEE))
-    return out_16x2(codebuf, T_ENTER_LEAVE(enter));
-  J_Unimplemented();
-}
-#endif
-
 int tbh(CodeBuf *codebuf, Reg base, Reg idx)
 {
   out_16x2(codebuf, T_TBH(base, idx));
@@ -3278,58 +2951,6 @@
   ldm(codebuf, regset, Rstack, POP_FD, 1);
 }
 
-#if 0
-int load_multiple(CodeBuf *codebuf, Reg base, Reg *regs, u32 nregs, u32 st, u32 wb)
-{
-  unsigned regset = 0;
-  unsigned regmask;
-  unsigned pre = 0;
-  int dir = 1;
-  unsigned u;
-  Reg r;
-
-  if (st == IB || st == DB) pre = 4;
-  if (st == DA || st == DB) dir = -4;
-  JASSERT(nregs > 0, "nregs must be > 0");
-  if (nregs == 1)
-    return ldr_imm(codebuf, regs[0], base, dir, pre, wb);
-  if (dir > 0) {
-    u = 0;
-    do {
-      r = regs[u];
-      regmask = 1<<r;
-      if (regset != 0 && regmask >= regset) {
-	if (!wb && base != ARM_IP) {
-	  mov_reg(codebuf, ARM_IP, base);
-	  base = ARM_IP;
-	}
-	ldm(codebuf, regset, base, st, 1);
-	regset = 0;
-      }
-      regset |= regmask;
-    } while (++u < nregs);
-    ldm(codebuf, regset, base, st, wb);
-  } else {
-    u = nregs;
-    do {
-      u--;
-      r = regs[u];
-      regmask = 1<<r;
-      if (regmask <= (regset & -regset)) {
-	if (!wb && base != ARM_IP) {
-	  mov_reg(codebuf, ARM_IP, base);
-	  base = ARM_IP;
-	}
-	ldm(codebuf, regset, base, st, 1);
-	regset = 0;
-      }
-      regset |= regmask;
-    } while (u > 0);
-    ldm(codebuf, regset, base, st, wb);
-  }
-}
-#endif
-
 int mov_multiple(CodeBuf *codebuf, Reg *dst, Reg *src, unsigned nregs)
 {
   unsigned u, n, p;
@@ -3792,25 +3413,42 @@
 }
 
 #define LOCAL_OFFSET(local, stackdepth, nlocals) ((stackdepth)*4 + FRAME_SIZE + ((nlocals)-1-(local))*4)
+#define ISTATE_REG(jinfo)	  ((jinfo)->use_istate ? Ristate : Rstack)
+#define ISTATE(jinfo, stackdepth) ((jinfo)->use_istate ? 0 : (((stackdepth)-(jinfo)->jstack->depth)*4))
+#define ISTATE_OFFSET(jinfo, stackdepth, offset) (ISTATE(jinfo, stackdepth) + (offset))
 
 void load_local(Thumb2_Info *jinfo, Reg r, unsigned local, unsigned stackdepth)
 {
-#ifdef USE_RLOCAL
-  ldr_imm(jinfo->codebuf, r, Rlocals, -local * 4, 1, 0);
-#else
   int nlocals = jinfo->method->max_locals();
-  ldr_imm(jinfo->codebuf, r, Rstack, LOCAL_OFFSET(local, stackdepth, nlocals), 1, 0);
-#endif
+  if (jinfo->use_istate)
+    ldr_imm(jinfo->codebuf, r, Ristate, FRAME_SIZE + (nlocals-1-local) * 4, 1, 0);
+  else
+    ldr_imm(jinfo->codebuf, r, Rstack, LOCAL_OFFSET(local, stackdepth, nlocals), 1, 0);
 }
 
 void store_local(Thumb2_Info *jinfo, Reg r, unsigned local, unsigned stackdepth)
 {
-#ifdef USE_RLOCAL
-  str_imm(jinfo->codebuf, r, Rlocals, -local << 2, 1, 0);
-#else
   int nlocals = jinfo->method->max_locals();
-  str_imm(jinfo->codebuf, r, Rstack, LOCAL_OFFSET(local, stackdepth, nlocals), 1, 0);
-#endif
+  if (jinfo->use_istate)
+    str_imm(jinfo->codebuf, r, Ristate, FRAME_SIZE + (nlocals-1-local) * 4, 1, 0);
+  else
+    str_imm(jinfo->codebuf, r, Rstack, LOCAL_OFFSET(local, stackdepth, nlocals), 1, 0);
+}
+
+void load_istate(Thumb2_Info *jinfo, Reg r, unsigned istate_offset, unsigned stackdepth)
+{
+  if (jinfo->use_istate)
+    ldr_imm(jinfo->codebuf, r, Ristate, istate_offset, 1, 0);
+  else
+    ldr_imm(jinfo->codebuf, r, Rstack, ISTATE_OFFSET(jinfo, stackdepth, istate_offset), 1, 0);
+}
+
+void store_istate(Thumb2_Info *jinfo, Reg r, unsigned istate_offset, unsigned stackdepth)
+{
+  if (jinfo->use_istate)
+    str_imm(jinfo->codebuf, r, Ristate, istate_offset, 1, 0);
+  else
+    str_imm(jinfo->codebuf, r, Rstack, ISTATE_OFFSET(jinfo, stackdepth, istate_offset), 1, 0);
 }
 
 void Thumb2_Load(Thumb2_Info *jinfo, int local, unsigned stackdepth)
@@ -3827,7 +3465,6 @@
     Thumb2_Spill(jinfo, 1, 0);
     JASSERT(stackdepth >= jstack->depth, "negative stack offset?");
     stackdepth -= jstack->depth;
-    if (jinfo->method->is_synchronized()) stackdepth += frame::interpreter_frame_monitor_size();
     r = JSTACK_REG(jstack);
     PUSH(jstack, r);
     load_local(jinfo, r, local, stackdepth);
@@ -3849,7 +3486,6 @@
     } else {
       Thumb2_Spill(jinfo, 1, 0);
       stackdepth -= jstack->depth;
-      if (jinfo->method->is_synchronized()) stackdepth += frame::interpreter_frame_monitor_size();
       PUSH(jstack, r_hi);
       r_lo = PUSH(jstack, JSTACK_REG(jstack));
       load_local(jinfo, r_lo, local+1, stackdepth);
@@ -3859,14 +3495,12 @@
     if (r_lo) {
       Thumb2_Spill(jinfo, 1, 0);
       stackdepth -= jstack->depth;
-      if (jinfo->method->is_synchronized()) stackdepth += frame::interpreter_frame_monitor_size();
       r_hi = PUSH(jstack, JSTACK_REG(jstack));
       load_local(jinfo, r_hi, local, stackdepth);
       PUSH(jstack, r_lo);
     } else {
       Thumb2_Spill(jinfo, 2, 0);
       stackdepth -= jstack->depth;
-      if (jinfo->method->is_synchronized()) stackdepth += frame::interpreter_frame_monitor_size();
       r_hi = PUSH(jstack, JSTACK_REG(jstack));
       r_lo = PUSH(jstack, JSTACK_REG(jstack));
       load_local(jinfo, r_hi, local, stackdepth);
@@ -3883,7 +3517,6 @@
 
   Thumb2_Fill(jinfo, 1);
   stackdepth -= jstack->depth;
-  if (jinfo->method->is_synchronized()) stackdepth += frame::interpreter_frame_monitor_size();
   r = POP(jstack);
   r_local = jinfo->jregs->r_local[local];
   if (r_local) {
@@ -3902,7 +3535,6 @@
   int nlocals = jinfo->method->max_locals();
 
   Thumb2_Fill(jinfo, 2);
-  if (jinfo->method->is_synchronized()) stackdepth += frame::interpreter_frame_monitor_size();
   r_lo = POP(jstack);
   r_hi = POP(jstack);
   stackdepth -= 2;
@@ -4236,15 +3868,6 @@
   mov_imm(jinfo->codebuf, ARM_R1, bci);
   mov_imm(jinfo->codebuf, ARM_IP, 0);
   str_imm(jinfo->codebuf, ARM_IP, ARM_IP, 0, 1, 0);
-//  hbl(jinfo->codebuf, handler);
-}
-
-void Thumb2_Debug(Thumb2_Info *jinfo, unsigned handler)
-{
-#if 0
-  Thumb2_Flush(jinfo);
-  bl(jinfo->codebuf, handlers[handler]);
-#endif
 }
 
 void Thumb2_codegen(Thumb2_Info *jinfo, unsigned start);
@@ -4284,20 +3907,88 @@
     return -1;
 }
 
-void Thumb2_Return(Thumb2_Info *jinfo, unsigned opcode)
+void Thumb2_save_locals(Thumb2_Info *jinfo, unsigned stackdepth)
 {
-  Reg r_lo, r;
-  Thumb2_Stack *jstack = jinfo->jstack;
+  int nlocals = jinfo->method->max_locals();
+  unsigned *locals_info = jinfo->locals_info;
+  int i;
+
+  JASSERT(jinfo->jstack->depth == 0, "stack not empty");
+  for (i = 0; i < nlocals; i++) {
+    Reg r = jinfo->jregs->r_local[i];
+    if (r) {
+      if ((locals_info[i] & (1 << LOCAL_REF)) && (locals_info[i] & (1 << LOCAL_MODIFIED))) {
+	store_local(jinfo, r, i, stackdepth);
+      }
+    }
+  }
+}
 
-  if (0 /*jinfo->compiled_return*/) {
-    unsigned bci = jinfo->compiled_return;
+void Thumb2_restore_locals(Thumb2_Info *jinfo, unsigned stackdepth)
+{
+  int nlocals = jinfo->method->max_locals();
+  unsigned *locals_info = jinfo->locals_info;
+  int i;
 
-    JASSERT(jinfo->bc_stackinfo[bci] & BC_COMPILED, "return not compiled");
-    JASSERT(jinfo->code_base[bci] == opcode, "type of return changed");
-    branch_uncond(jinfo->codebuf, jinfo->bc_stackinfo[bci] & ~BC_FLAGS_MASK);
-    return;
+  JASSERT(jinfo->jstack->depth == 0, "stack not empty");
+  for (i = 0; i < nlocals; i++) {
+    Reg r = jinfo->jregs->r_local[i];
+    if (r) {
+      if (locals_info[i] & (1<<LOCAL_REF)) {
+	load_local(jinfo, r, i, stackdepth);
+      }
+    }
+  }
+}
+
+void Thumb2_invoke_save(Thumb2_Info *jinfo, unsigned stackdepth)
+{
+  int nlocals = jinfo->method->max_locals();
+  unsigned *locals_info = jinfo->locals_info;
+  int i;
+
+  JASSERT(jinfo->jstack->depth == 0, "stack not empty");
+  for (i = 0; i < nlocals; i++) {
+    Reg r = jinfo->jregs->r_local[i];
+    if (r) {
+      if (locals_info[i] & (1 << LOCAL_MODIFIED)) {
+	store_local(jinfo, r, i, stackdepth);
+      }
+    }
+  }
+}
+
+void Thumb2_invoke_restore(Thumb2_Info *jinfo, unsigned stackdepth)
+{
+  int nlocals = jinfo->method->max_locals();
+  unsigned *locals_info = jinfo->locals_info;
+  int i;
+
+  JASSERT(jinfo->jstack->depth == 0, "stack not empty");
+  for (i = 0; i < nlocals; i++) {
+    Reg r = jinfo->jregs->r_local[i];
+    if (r) {
+	load_local(jinfo, r, i, stackdepth);
+    }
   }
+}
+
+void Thumb2_Exit(Thumb2_Info *jinfo, unsigned handler, unsigned bci, unsigned stackdepth)
+{
+    Thumb2_Flush(jinfo);
+    Thumb2_invoke_save(jinfo, stackdepth);
+    mov_imm(jinfo->codebuf, ARM_R0, bci+CONSTMETHOD_CODEOFFSET);
+    bl(jinfo->codebuf, handlers[handler]);
+}
+
+void Thumb2_Return(Thumb2_Info *jinfo, unsigned opcode, unsigned bci, unsigned stackdepth)
+{
+  Reg r_lo, r;
+  Thumb2_Stack *jstack = jinfo->jstack;
 
+  if (jinfo->method->has_monitor_bytecodes()) {
+    Thumb2_Exit(jinfo, H_EXIT_TO_INTERPRETER, bci, stackdepth);
+  }
   if (jinfo->method->is_synchronized()) {
     unsigned loc_success1, loc_success2, loc_failed, loc_retry, loc_exception;
     unsigned loc_illegal_monitor_state;
@@ -4305,7 +3996,7 @@
 //    Thumb2_save_locals(jinfo);
     // Free the monitor
     //
-    // 		sub	r1, Ristate, #8
+    // 		add	r1, #<stackdepth>-8
     // 		ldr	r2, [r1, #4]
     //		cbz	r2, throw_illegal_monitor_state
     //		ldr	r0, [r1, #0]
@@ -4326,7 +4017,8 @@
     //
     // JAZ_V1 == tmp2
     // JAZ_V2 == tmp1
-    sub_imm(jinfo->codebuf, ARM_R1, Ristate, frame::interpreter_frame_monitor_size()*wordSize);
+    add_imm(jinfo->codebuf, ARM_R1, ISTATE_REG(jinfo), ISTATE(jinfo, stackdepth) - frame::interpreter_frame_monitor_size()*wordSize);
+
     ldr_imm(jinfo->codebuf, ARM_R2, ARM_R1, 4, 1, 0);
     loc_illegal_monitor_state = forward_16(jinfo->codebuf);
     ldr_imm(jinfo->codebuf, ARM_R0, ARM_R1, 0, 1, 0);
@@ -4352,7 +4044,15 @@
     cbz_patch(jinfo->codebuf, ARM_R3, loc_success2);
   }
 
-  if (opcode != opc_return) {
+  if (opcode == opc_return) {
+    if (jinfo->compiled_return) {
+      unsigned ret_idx = jinfo->compiled_return;
+
+      branch_uncond(jinfo->codebuf, ret_idx);
+      return;
+    }
+    if (OSPACE) jinfo->compiled_return = jinfo->codebuf->idx * 2;
+  } else {
     if (opcode == opc_lreturn || opcode == opc_dreturn) {
       Thumb2_Fill(jinfo, 2);
       r_lo = POP(jstack);
@@ -4360,6 +4060,13 @@
     } else {
       Thumb2_Fill(jinfo, 1);
       r = POP(jstack);
+      if (jinfo->compiled_word_return[r]) {
+        unsigned ret_idx = jinfo->compiled_word_return[r];
+
+        branch_uncond(jinfo->codebuf, ret_idx);
+        return;
+      }
+      if (OSPACE) jinfo->compiled_word_return[r] = jinfo->codebuf->idx * 2;
     }
   }
 
@@ -4378,121 +4085,17 @@
       str_imm(jinfo->codebuf, r, Rstack, jinfo->method->max_locals() * sizeof(int), 1, 1);
   }
 
-//  sub_imm(jinfo->codebuf, Ristate, ARM_LR, ISTATE_NEXT_FRAME);
   str_imm(jinfo->codebuf, ARM_LR, Rthread, THREAD_TOP_ZERO_FRAME, 1, 0);
   str_imm(jinfo->codebuf, Rstack, Rthread, THREAD_JAVA_SP, 1, 0);
-  Thumb2_Debug(jinfo, H_DEBUG_METHODEXIT);
-//  enter_leave(jinfo->codebuf, 0);
   ldm(jinfo->codebuf, C_REGSET + (1<<ARM_PC), ARM_SP, POP_FD, 1);
 }
 
-#if 0
-void Thumb2_save_all_locals(Thumb2_Info *jinfo, unsigned stackdepth)
-{
-  int nlocals = jinfo->method->max_locals();
-  int i;
-
-  JASSERT(jinfo->jstack->depth == 0, "stack not empty");
-  if (jinfo->method->is_synchronized()) stackdepth += frame::interpreter_frame_monitor_size();
-  for (i = 0; i < nlocals; i++) {
-    Reg r = jinfo->jregs->r_local[i];
-    if (r) {
-	store_local(jinfo, r, i, stackdepth);
-    }
-  }
-}
-#endif
-
-void Thumb2_save_locals(Thumb2_Info *jinfo, unsigned stackdepth)
-{
-  int nlocals = jinfo->method->max_locals();
-  unsigned *locals_info = jinfo->locals_info;
-  int i;
-
-  JASSERT(jinfo->jstack->depth == 0, "stack not empty");
-  if (jinfo->method->is_synchronized()) stackdepth += frame::interpreter_frame_monitor_size();
-  for (i = 0; i < nlocals; i++) {
-    Reg r = jinfo->jregs->r_local[i];
-    if (r) {
-      if ((locals_info[i] & (1 << LOCAL_REF)) && (locals_info[i] & (1 << LOCAL_MODIFIED))) {
-	store_local(jinfo, r, i, stackdepth);
-      }
-    }
-  }
-}
-
-void Thumb2_restore_locals(Thumb2_Info *jinfo, unsigned stackdepth)
-{
-  int nlocals = jinfo->method->max_locals();
-  unsigned *locals_info = jinfo->locals_info;
-  int i;
-
-  JASSERT(jinfo->jstack->depth == 0, "stack not empty");
-  if (jinfo->method->is_synchronized()) stackdepth += frame::interpreter_frame_monitor_size();
-  for (i = 0; i < nlocals; i++) {
-    Reg r = jinfo->jregs->r_local[i];
-    if (r) {
-      if (locals_info[i] & (1<<LOCAL_REF)) {
-	load_local(jinfo, r, i, stackdepth);
-      }
-    }
-  }
-}
-
-void Thumb2_invoke_save(Thumb2_Info *jinfo, unsigned stackdepth)
-{
-  int nlocals = jinfo->method->max_locals();
-  unsigned *locals_info = jinfo->locals_info;
-  int i;
-
-  JASSERT(jinfo->jstack->depth == 0, "stack not empty");
-  if (jinfo->method->is_synchronized()) stackdepth += frame::interpreter_frame_monitor_size();
-  for (i = 0; i < nlocals; i++) {
-    Reg r = jinfo->jregs->r_local[i];
-    if (r) {
-      if (locals_info[i] & (1 << LOCAL_MODIFIED)) {
-	store_local(jinfo, r, i, stackdepth);
-      }
-    }
-  }
-}
-
-void Thumb2_invoke_restore(Thumb2_Info *jinfo, unsigned stackdepth)
-{
-  int nlocals = jinfo->method->max_locals();
-  unsigned *locals_info = jinfo->locals_info;
-  int i;
-
-  JASSERT(jinfo->jstack->depth == 0, "stack not empty");
-  if (jinfo->method->is_synchronized()) stackdepth += frame::interpreter_frame_monitor_size();
-  for (i = 0; i < nlocals; i++) {
-    Reg r = jinfo->jregs->r_local[i];
-    if (r) {
-	load_local(jinfo, r, i, stackdepth);
-    }
-  }
-}
-
-void Thumb2_Exit(Thumb2_Info *jinfo, unsigned handler, unsigned bci, unsigned stackdepth)
-{
-    Thumb2_Flush(jinfo);
-    Thumb2_invoke_save(jinfo, stackdepth);
-    mov_imm(jinfo->codebuf, ARM_R0, bci+CONSTMETHOD_CODEOFFSET);
-    bl(jinfo->codebuf, handlers[handler]);
-}
-
-void Thumb2_Jsr(Thumb2_Info *jinfo, unsigned bci, unsigned stackdepth)
-{
-      Thumb2_Exit(jinfo, H_JSR, bci, stackdepth);
-}
-
 int Thumb2_Accessor(Thumb2_Info *jinfo)
 {
   jubyte *code_base = jinfo->code_base;
   constantPoolCacheOop  cp = jinfo->method->constants()->cache();
   ConstantPoolCacheEntry* cache;
   int index = GET_NATIVE_U2(code_base+2);
-  unsigned loc;
   unsigned *bc_stackinfo = jinfo->bc_stackinfo;
 
   JASSERT(code_base[0] == opc_aload_0 || code_base[0] == opc_iaccess_0, "not an aload_0 in accessor");
@@ -4503,28 +4106,28 @@
   TosState tos_type = cache->flag_state();
   int field_offset = cache->f2();
 
-  // Slow entry point
-  loc = forward_32(jinfo->codebuf);
-  out_32(jinfo->codebuf, 0);
-  out_32(jinfo->codebuf, 0);
+  // Slow entry point - callee save
+  // R0 = method
+  // R2 = thread
+  stm(jinfo->codebuf, (1<<Rthread) + (1<<ARM_LR), ARM_SP, PUSH_FD, 1);
+  mov_reg(jinfo->codebuf, Rthread, ARM_R2);
+  bl(jinfo->codebuf, out_pos(jinfo->codebuf) + FAST_ENTRY_OFFSET - 6);
+  ldm(jinfo->codebuf, (1<<Rthread) + (1<<ARM_PC), ARM_SP, POP_FD, 1);
+  out_16(jinfo->codebuf, 0);
 
   out_32(jinfo->codebuf, 0);	// pointer to osr table
   out_32(jinfo->codebuf, 0);	// Space for exception_table pointer
   out_32(jinfo->codebuf, 0);	// next compiled method
 
-  out_32(jinfo->codebuf, 0);    // regusage
-  out_32(jinfo->codebuf, 0);
-  out_32(jinfo->codebuf, 0);
-
-  // OSR entry point
-  mov_reg(jinfo->codebuf, ARM_PC, ARM_R0);
+  out_32(jinfo->codebuf, -1);    // regusage
+  out_32(jinfo->codebuf, -1);
+  out_32(jinfo->codebuf, -1);
 
   out_align(jinfo->codebuf, CODE_ALIGN);
 
   // fast entry point
   bc_stackinfo[0] = (bc_stackinfo[0] & BC_FLAGS_MASK) | (jinfo->codebuf->idx * 2) | BC_COMPILED;
-  branch_uncond_patch(jinfo->codebuf, loc, jinfo->codebuf->idx * 2);
-  ldr_imm(jinfo->codebuf, ARM_R1, ARM_R2, THREAD_JAVA_SP, 1, 0);
+  ldr_imm(jinfo->codebuf, ARM_R1, Rthread, THREAD_JAVA_SP, 1, 0);
   ldr_imm(jinfo->codebuf, ARM_R0, ARM_R1, 0, 1, 0);
   if (tos_type == btos)
     ldrsb_imm(jinfo->codebuf, ARM_R0, ARM_R0, field_offset, 1, 0);
@@ -4540,19 +4143,26 @@
   return 1;
 }
 
+#define STACKDEPTH(jinfo, stackinfo) (((stackinfo) & ~BC_FLAGS_MASK) + \
+	((jinfo)->method->is_synchronized() ? frame::interpreter_frame_monitor_size() : 0))
+
+
 void Thumb2_Enter(Thumb2_Info *jinfo)
 {
   int parms = jinfo->method->size_of_parameters();
   int extra_locals = jinfo->method->max_locals() - parms;
   unsigned *locals_info = jinfo->locals_info;
   int i;
+  unsigned stackdepth = 0;
 
   // Slow entry point - callee save
   // R0 = method
   // R2 = thread
   stm(jinfo->codebuf, I_REGSET + (1<<ARM_LR), ARM_SP, PUSH_FD, 1);
-  bl(jinfo->codebuf, out_pos(jinfo->codebuf) + CODE_ALIGN - 4);
+  mov_reg(jinfo->codebuf, Rthread, ARM_R2);
+  bl(jinfo->codebuf, out_pos(jinfo->codebuf) + FAST_ENTRY_OFFSET - 6);
   ldm(jinfo->codebuf, I_REGSET + (1<<ARM_PC), ARM_SP, POP_FD, 1);
+  out_16(jinfo->codebuf, 0);
 
   out_32(jinfo->codebuf, 0);	// Space for osr_table pointer
   out_32(jinfo->codebuf, 0);	// Space for exception_table pointer
@@ -4562,43 +4172,20 @@
   out_32(jinfo->codebuf, 0);
   out_32(jinfo->codebuf, 0);
 
-  // OSR entry point == Slow entry + 16 - caller save
-  // R0 = entry point within compiled method
-  // R1 = locals - THUMB2_MAXLOCALS * 4
-  // R2 = thread
-  // R3 = locals - 31 * 4
-  {
-    int nlocals = jinfo->method->max_locals();
-
-    for (i = 0; i < nlocals; i++) {
-      Reg r = jinfo->jregs->r_local[i];
-      if (r) {
-	ldr_imm(jinfo->codebuf, r,
-		(i < 32) ? ARM_R3 : ARM_R1,
-		(i < 32) ? (31 - i) * 4 : (THUMB2_MAXLOCALS - i) * 4,
-	  	1, 0);
-      }
-    }
-    mov_reg(jinfo->codebuf, Rthread, ARM_R2);
-    mov_reg(jinfo->codebuf, ARM_PC, ARM_R0);
-  }
-
   out_align(jinfo->codebuf, CODE_ALIGN);
 
   // Fast entry point == Slow entry + 64 - caller save
   // R0 = method
   // R2 = thread
   stm(jinfo->codebuf, C_REGSET + (1<<ARM_LR), ARM_SP, PUSH_FD, 1);
-//  enter_leave(jinfo->codebuf, 1);
-  ldr_imm(jinfo->codebuf, Rstack, ARM_R2, THREAD_JAVA_SP, 1, 0);
-  Thumb2_Debug(jinfo, H_DEBUG_METHODENTRY);
+  ldr_imm(jinfo->codebuf, Rstack, Rthread, THREAD_JAVA_SP, 1, 0);
   {
     unsigned stacksize;
 
     stacksize = (extra_locals + jinfo->method->max_stack()) * sizeof(int);
     stacksize += FRAME_SIZE + STACK_SPARE;
     if (!jinfo->is_leaf || stacksize > LEAF_STACK_SIZE) {
-      ldr_imm(jinfo->codebuf, ARM_R3, ARM_R2, THREAD_JAVA_STACK_BASE, 1, 0);
+      ldr_imm(jinfo->codebuf, ARM_R3, Rthread, THREAD_JAVA_STACK_BASE, 1, 0);
       sub_imm(jinfo->codebuf, ARM_R1, Rstack, stacksize + LEAF_STACK_SIZE);
       cmp_reg(jinfo->codebuf, ARM_R3, ARM_R1);
       it(jinfo->codebuf, COND_CS, IT_MASK_T);
@@ -4619,13 +4206,19 @@
 
   ldr_imm(jinfo->codebuf, ARM_IP, ARM_R0, METHOD_CONSTANTS, 1, 0);
 
-  sub_imm(jinfo->codebuf, Ristate, Rstack, FRAME_SIZE);
-
   add_imm(jinfo->codebuf, Rlocals, Rstack, (jinfo->method->max_locals()-1) * sizeof(int));
-  str_imm(jinfo->codebuf, Rlocals, Ristate, ISTATE_LOCALS, 1, 0);
+
+  sub_imm(jinfo->codebuf, Rstack, Rstack, FRAME_SIZE);
+
+  if (jinfo->use_istate) mov_reg(jinfo->codebuf, Ristate, Rstack);
+
+  store_istate(jinfo, Rstack, ISTATE_MONITOR_BASE, stackdepth);
+
+  store_istate(jinfo, Rlocals, ISTATE_LOCALS, stackdepth);
 
   if (jinfo->method->is_synchronized()) {
-    sub_imm(jinfo->codebuf, Rstack, Ristate, frame::interpreter_frame_monitor_size()*wordSize);
+    sub_imm(jinfo->codebuf, Rstack, Rstack, frame::interpreter_frame_monitor_size()*wordSize);
+    stackdepth = frame::interpreter_frame_monitor_size();
     if (jinfo->method->is_static()) {
       ldr_imm(jinfo->codebuf, ARM_R3, ARM_IP, CONSTANTPOOL_POOL_HOLDER, 1, 0);
       ldr_imm(jinfo->codebuf, JAZ_V1, ARM_R3, KLASS_PART+KLASS_JAVA_MIRROR, 1, 0);
@@ -4633,39 +4226,34 @@
       ldr_imm(jinfo->codebuf, JAZ_V1, Rlocals, 0, 1, 0);
     }
     str_imm(jinfo->codebuf, JAZ_V1, Rstack, 4, 1, 0);
-  } else
-    mov_reg(jinfo->codebuf, Rstack, Ristate);
+  }
 
-  str_imm(jinfo->codebuf, ARM_R1, Ristate, ISTATE_MSG, 1, 0);
-  str_imm(jinfo->codebuf, ARM_R1, Ristate, ISTATE_OOP_TEMP, 1, 0);
+  store_istate(jinfo, ARM_R1, ISTATE_MSG, stackdepth);
+  store_istate(jinfo, ARM_R1, ISTATE_OOP_TEMP, stackdepth);
 
   sub_imm(jinfo->codebuf, ARM_R3, Rstack, jinfo->method->max_stack() * sizeof(int));
-  str_imm(jinfo->codebuf, ARM_R3, ARM_R2, THREAD_JAVA_SP, 1, 0);
+  str_imm(jinfo->codebuf, ARM_R3, Rthread, THREAD_JAVA_SP, 1, 0);
 
-  str_imm(jinfo->codebuf, Rstack, Ristate, ISTATE_STACK_BASE, 1, 0);
+  store_istate(jinfo, Rstack, ISTATE_STACK_BASE, stackdepth);
 
   sub_imm(jinfo->codebuf, ARM_R3, ARM_R3, 4);
-  str_imm(jinfo->codebuf, ARM_R3, Ristate, ISTATE_STACK_LIMIT, 1, 0);
+  store_istate(jinfo, ARM_R3, ISTATE_STACK_LIMIT, stackdepth);
 
-  ldr_imm(jinfo->codebuf, ARM_R3, ARM_R2, THREAD_TOP_ZERO_FRAME, 1, 0);
-  str_imm(jinfo->codebuf, ARM_R3, Ristate, ISTATE_NEXT_FRAME, 1, 0);
+  ldr_imm(jinfo->codebuf, ARM_R3, Rthread, THREAD_TOP_ZERO_FRAME, 1, 0);
+  store_istate(jinfo, ARM_R3, ISTATE_NEXT_FRAME, stackdepth);
 
   mov_imm(jinfo->codebuf, ARM_R3, INTERPRETER_FRAME);
-  str_imm(jinfo->codebuf, ARM_R3, Ristate, ISTATE_FRAME_TYPE, 1, 0);
+  store_istate(jinfo, ARM_R3, ISTATE_FRAME_TYPE, stackdepth);
 
-  str_imm(jinfo->codebuf, Ristate, Ristate, ISTATE_MONITOR_BASE, 1, 0);
-
-  add_imm(jinfo->codebuf, ARM_R3, Ristate, ISTATE_NEXT_FRAME);
-  str_imm(jinfo->codebuf, ARM_R3, ARM_R2, THREAD_TOP_ZERO_FRAME, 1, 0);
-  str_imm(jinfo->codebuf, ARM_R3, ARM_R2, THREAD_LAST_JAVA_SP, 1, 0);
+  add_imm(jinfo->codebuf, ARM_R3, ISTATE_REG(jinfo), ISTATE(jinfo, stackdepth) + ISTATE_NEXT_FRAME);
+  str_imm(jinfo->codebuf, ARM_R3, Rthread, THREAD_TOP_ZERO_FRAME, 1, 0);
+  str_imm(jinfo->codebuf, ARM_R3, Rthread, THREAD_LAST_JAVA_SP, 1, 0);
 
   ldr_imm(jinfo->codebuf, ARM_R3, ARM_IP, CONSTANTPOOL_CACHE, 1, 0);
-  str_imm(jinfo->codebuf, ARM_R3, Ristate, ISTATE_CONSTANTS, 1, 0);
-
-  str_imm(jinfo->codebuf, ARM_R2, Ristate, ISTATE_THREAD, 1, 0);
-  str_imm(jinfo->codebuf, ARM_R0, Ristate, ISTATE_METHOD, 1, 0);
+  store_istate(jinfo, ARM_R3, ISTATE_CONSTANTS, stackdepth);
 
-  mov_reg(jinfo->codebuf, Rthread, ARM_R2);
+  store_istate(jinfo, Rthread, ISTATE_THREAD, stackdepth);
+  store_istate(jinfo, ARM_R0, ISTATE_METHOD, stackdepth);
 
   if (jinfo->method->is_synchronized()) {
     unsigned loc_retry, loc_failed, loc_success, loc_exception;
@@ -4674,14 +4262,13 @@
     //
     // Try to acquire the monitor. Seems very sub-optimal
     // 		ldr	r3, [JAZ_V1, #0]
-    // 		sub	r1, Ristate, #8
     // 		orr	r3, r3, #1
-    // 		str	r3, [r1, #0]
+    // 		str	r3, [Rstack, #0]
     // 	retry:
     // 		ldrex	r0, [JAZ_V1, #0]
     // 		cmp	r3, r0
     // 		bne	failed
-    // 		strex	r0, r1, [JAZ_V1, #0]
+    // 		strex	r0, Rstack, [JAZ_V1, #0]
     // 		cbz	r0, success
     // 		b	retry
     // 	failed:
@@ -4690,15 +4277,14 @@
     // 		<success - acquired the monitor>
     //
     ldr_imm(jinfo->codebuf, ARM_R3, JAZ_V1, 0, 1, 0);
-    sub_imm(jinfo->codebuf, ARM_R1, Ristate, frame::interpreter_frame_monitor_size()*wordSize);
     orr_imm(jinfo->codebuf, ARM_R3, ARM_R3, 1);
-    str_imm(jinfo->codebuf, ARM_R3, ARM_R1, 0, 1, 0);
+    str_imm(jinfo->codebuf, ARM_R3, Rstack, 0, 1, 0);
     loc_retry = out_loc(jinfo->codebuf);
 // retry:
     ldrex_imm(jinfo->codebuf, ARM_R0, JAZ_V1, 0);
     cmp_reg(jinfo->codebuf, ARM_R3, ARM_R0);
     loc_failed = forward_16(jinfo->codebuf);
-    strex_imm(jinfo->codebuf, ARM_R0, ARM_R1, JAZ_V1, 0);
+    strex_imm(jinfo->codebuf, ARM_R0, Rstack, JAZ_V1, 0);
     loc_success = forward_16(jinfo->codebuf);
     branch_uncond(jinfo->codebuf, loc_retry);
     bcc_patch(jinfo->codebuf, COND_NE, loc_failed);
@@ -4709,8 +4295,6 @@
     bl(jinfo->codebuf, handlers[H_HANDLE_EXCEPTION_NO_REGS]);
     cbz_patch(jinfo->codebuf, ARM_R0, loc_exception);
     cbz_patch(jinfo->codebuf, ARM_R0, loc_success);
-//    mov_imm(jinfo->codebuf, ARM_R0, 0+CONSTMETHOD_CODEOFFSET);
-//    bl(jinfo->codebuf, handlers[H_MONITOR]);
 // success:
 
   }
@@ -4721,11 +4305,11 @@
     for (i = 0; i < nlocals; i++) {
       Reg r = jinfo->jregs->r_local[i];
       if (r) {
-	unsigned stackdepth = 0;
-	if (jinfo->method->is_synchronized()) stackdepth += frame::interpreter_frame_monitor_size();
-	if (i < parms || (locals_info[i] & (1<<LOCAL_REF))) {
+	unsigned stackdepth = STACKDEPTH(jinfo, 0);
+        if (i < parms)
 	  load_local(jinfo, r, i, stackdepth);
-	}
+        else if (locals_info[i] & (1<<LOCAL_REF))
+          mov_reg(jinfo->codebuf, r, ARM_R1);
       }
     }
   }
@@ -4775,16 +4359,26 @@
   unsigned stackdepth;
 
   for (bci = start; bci < code_size; ) {
-#ifdef T2EE_PRINT_DISASS
-    unsigned start_idx = jinfo->codebuf->idx;
-    if (start_bci[start_idx] == -1) start_bci[start_idx] = bci;
-#endif
     opcode = code_base[bci];
     stackinfo = bc_stackinfo[bci];
+#ifdef T2_PRINT_DISASS
+    unsigned start_idx;
+#endif
 
     if (stackinfo & BC_BRANCH_TARGET) Thumb2_Flush(jinfo);
+
+    if (!OSPACE && (stackinfo & BC_BACK_TARGET)) {
+      if (out_pos(codebuf) & 0x02) nop_16(codebuf);
+      if (out_pos(codebuf) & 0x04) nop_32(codebuf);
+    }
+
+#ifdef T2_PRINT_DISASS
+    start_idx = jinfo->codebuf->idx;
+    if (start_bci[start_idx] == -1) start_bci[start_idx] = bci;
+#endif
+
     JASSERT(!(stackinfo & BC_COMPILED), "code already compiled for this bytecode?");
-    stackdepth = stackinfo & ~BC_FLAGS_MASK;
+    stackdepth = STACKDEPTH(jinfo, stackinfo);
     bc_stackinfo[bci] = (stackinfo & BC_FLAGS_MASK) | (codebuf->idx * 2) | BC_COMPILED;
 
     if (opcode > OPC_LAST_JAVA_OP && opcode != opc_return_register_finalizer) {
@@ -4795,13 +4389,13 @@
     len = Bytecodes::length_for((Bytecodes::Code)opcode);
     if (len <= 0) len = Bytecodes::special_length_at((address)(code_base+bci), (address)(code_base+code_size));
 
-    if (IS_DEAD(stackinfo) || IS_ZOMBIE(stackinfo)) {
+    if (IS_DEAD(stackinfo)) {
       unsigned zlen = 0;
-#ifdef T2EE_PRINT_DISASS
+#ifdef T2_PRINT_DISASS
       unsigned start_bci = bci;
 #endif
 
-      Thumb2_Exit(jinfo, H_ZOMBIE, bci, stackdepth);
+      Thumb2_Exit(jinfo, H_DEADCODE, bci, stackdepth);
       do {
 	zlen += len;
 	bci += len;
@@ -4810,7 +4404,7 @@
 	stackinfo = bc_stackinfo[bci];
 
 	if (stackinfo & BC_BRANCH_TARGET) break;
-	if (!(IS_DEAD(stackinfo) || IS_ZOMBIE(stackinfo))) break;
+	if (!IS_DEAD(stackinfo)) break;
 
 	bc_stackinfo[bci] = (stackinfo & BC_FLAGS_MASK) | (codebuf->idx * 2);
 
@@ -4823,21 +4417,20 @@
 	if (len <= 0) len = Bytecodes::special_length_at((address)(code_base+bci), (address)(code_base+code_size));
 
       } while (1);
-#ifdef T2EE_PRINT_DISASS
+#ifdef T2_PRINT_DISASS
       end_bci[start_idx] = start_bci + zlen;
 #endif
-      jinfo->zombie_bytes += zlen;
       continue;
     }
 
 #if 0
-    if (bci >= 2620) {
+    if (bci >= 4) {
       unsigned zlen = 0;
-#ifdef T2EE_PRINT_DISASS
+#ifdef T2_PRINT_DISASS
       unsigned start_bci = bci;
 #endif
 
-      Thumb2_Exit(jinfo, H_ZOMBIE, bci, stackdepth);
+      Thumb2_Exit(jinfo, H_DEADCODE, bci, stackdepth);
       do {
 	zlen += len;
 	bci += len;
@@ -4847,6 +4440,8 @@
 
 	if (stackinfo & BC_BRANCH_TARGET) break;
 
+	bc_stackinfo[bci] = (stackinfo & BC_FLAGS_MASK) | (codebuf->idx * 2);
+
 	if (opcode > OPC_LAST_JAVA_OP) {
 	  if (Bytecodes::is_defined((Bytecodes::Code)opcode))
 	    opcode = (unsigned)Bytecodes::java_code((Bytecodes::Code)opcode);
@@ -4856,15 +4451,14 @@
 	if (len <= 0) len = Bytecodes::special_length_at((address)(code_base+bci), (address)(code_base+code_size));
 
       } while (1);
-#ifdef T2EE_PRINT_DISASS
+#ifdef T2_PRINT_DISASS
       end_bci[start_idx] = start_bci + zlen;
 #endif
-      jinfo->zombie_bytes += zlen;
       continue;
     }
 #endif
 
-#ifdef T2EE_PRINT_DISASS
+#ifdef T2_PRINT_DISASS
     end_bci[start_idx] = bci + len;
 #endif
 
@@ -4923,16 +4517,6 @@
 	    v = (unsigned)constants->int_at(index);
 	    len += Thumb2_Imm(jinfo, v, bci+len);
 	    break;
-#if 0
-	  case JVM_CONSTANT_String:
-	    v = (unsigned)constants->resolved_string_at(index);
-	    len += Thumb2_Imm(jinfo, v, bci+len);
-	    break;
-	  case JVM_CONSTANT_Class:
-	    v = (unsigned)constants->resolved_klass_at(index)->klass_part()->java_mirror();
-	    len += Thumb2_Imm(jinfo, v, bci+len);
-	    break;
-#endif
 	  case JVM_CONSTANT_Long:
 	  case JVM_CONSTANT_Double: {
 	    unsigned long long v;
@@ -4946,7 +4530,7 @@
 	    Thumb2_Spill(jinfo, 1, 0);
 	    r = JSTACK_REG(jstack);
 	    PUSH(jstack, r);
-	    ldr_imm(jinfo->codebuf, r, Ristate, ISTATE_METHOD, 1, 0);
+	    load_istate(jinfo, r, ISTATE_METHOD, stackdepth+1);
 	    ldr_imm(jinfo->codebuf, r, r, METHOD_CONSTANTS, 1, 0);
 	    ldr_imm(jinfo->codebuf, r, r, CONSTANTPOOL_BASE + (index << 2), 1, 0);
 	    if (v == JVM_CONSTANT_Class)
@@ -4960,8 +4544,8 @@
 	    Thumb2_Flush(jinfo);
 	    mov_imm(jinfo->codebuf, ARM_R0, bci+CONSTMETHOD_CODEOFFSET);
 	  Thumb2_save_locals(jinfo, stackdepth);
-	    mov_imm(jinfo->codebuf, ARM_R1, opcode != opc_ldc);
-	    bl(jinfo->codebuf, handlers[H_LDC]);
+//	    mov_imm(jinfo->codebuf, ARM_R1, opcode != opc_ldc);
+	    bl(jinfo->codebuf, handlers[opcode == opc_ldc ? H_LDC : H_LDC_W]);
 	  Thumb2_restore_locals(jinfo, stackdepth);
 	    ldr_imm(jinfo->codebuf, ARM_R0, Rthread, THREAD_VM_RESULT, 1, 0);
 	    mov_imm(jinfo->codebuf, ARM_R2, 0);
@@ -5318,14 +4902,10 @@
 	  mov_reg(jinfo->codebuf, ARM_R0, r_lho);
 	  mov_reg(jinfo->codebuf, ARM_R1, r_rho);
 	}
-#if 1
 	if (opcode == opc_frem)
 	  bl(jinfo->codebuf, OPCODE2HANDLER(opcode));
 	else
 	  blx(jinfo->codebuf, OPCODE2HANDLER(opcode));
-#else
-	bl(jinfo->codebuf, OPCODE2HANDLER(opcode));
-#endif
 	PUSH(jstack, ARM_R0);
 	break;
       }
@@ -5505,7 +5085,6 @@
 	  int nlocals = jinfo->method->max_locals();
 	  r = Thumb2_Tmp(jinfo, 0);
 	  stackdepth -= jstack->depth;
-	  if (jinfo->method->is_synchronized()) stackdepth += frame::interpreter_frame_monitor_size();
 	  load_local(jinfo, r, local, stackdepth);
 	  add_imm(jinfo->codebuf, r, r, constant);
 	  store_local(jinfo, r, local, stackdepth);
@@ -5540,7 +5119,7 @@
 	  mov_imm(jinfo->codebuf, ARM_R0, bci+CONSTMETHOD_CODEOFFSET);
 	  mov_imm(jinfo->codebuf, ARM_R1, index);
 	  blx(jinfo->codebuf, handlers[handler]);
-	  Thumb2_restore_locals(jinfo, bc_stackinfo[bci+len] & ~BC_FLAGS_MASK);
+	  Thumb2_restore_locals(jinfo, STACKDEPTH(jinfo, bc_stackinfo[bci+len]));
 	  break;
 	}
 
@@ -5575,11 +5154,6 @@
 	break;
       }
 
-      case opc_monitorexit:
-      case opc_monitorenter:
-	  Thumb2_Exit(jinfo, H_MONITOR, bci, stackdepth);
-	  break;
-
       case opc_getstatic: {
 	constantPoolCacheOop  cp = jinfo->method->constants()->cache();
         ConstantPoolCacheEntry* cache;
@@ -5603,7 +5177,7 @@
 	  mov_imm(jinfo->codebuf, ARM_R0, bci+CONSTMETHOD_CODEOFFSET);
 	  mov_imm(jinfo->codebuf, ARM_R1, index);
 	  blx(jinfo->codebuf, handlers[handler]);
-	  Thumb2_restore_locals(jinfo, bc_stackinfo[bci+len] & ~BC_FLAGS_MASK);
+	  Thumb2_restore_locals(jinfo, STACKDEPTH(jinfo, bc_stackinfo[bci+len]));
 	  break;
 	}
 
@@ -5615,7 +5189,7 @@
 	  Thumb2_Spill(jinfo, 2, 0);
 	  r_hi = PUSH(jstack, JSTACK_REG(jstack));
 	  r_lo = PUSH(jstack, JSTACK_REG(jstack));
-	  ldr_imm(jinfo->codebuf, r_lo, Ristate, ISTATE_CONSTANTS, 1, 0);
+	  load_istate(jinfo, r_lo, ISTATE_CONSTANTS, stackdepth+2);
 	  ldr_imm(jinfo->codebuf, r_lo, r_lo, CP_OFFSET + (index << 4) + 4, 1, 0);
 	  ldrd_imm(jinfo->codebuf, r_lo, r_hi, r_lo, field_offset, 1, 0);
 	} else {
@@ -5623,7 +5197,7 @@
 	  Thumb2_Spill(jinfo, 1, 0);
 	  r = JSTACK_REG(jstack);
 	  PUSH(jstack, r);
-	  ldr_imm(jinfo->codebuf, r, Ristate, ISTATE_CONSTANTS, 1, 0);
+	  load_istate(jinfo, r, ISTATE_CONSTANTS, stackdepth+1);
 	  ldr_imm(jinfo->codebuf, r, r, CP_OFFSET + (index << 4) + 4, 1, 0);
 	  if (tos_type == btos)
 	    ldrsb_imm(jinfo->codebuf, r, r, field_offset, 1, 0);
@@ -5661,7 +5235,7 @@
 	  mov_imm(jinfo->codebuf, ARM_R0, bci+CONSTMETHOD_CODEOFFSET);
 	  mov_imm(jinfo->codebuf, ARM_R1, index);
 	  blx(jinfo->codebuf, handlers[handler]);
-	  Thumb2_restore_locals(jinfo, bc_stackinfo[bci+len] & ~BC_FLAGS_MASK);
+	  Thumb2_restore_locals(jinfo, STACKDEPTH(jinfo, bc_stackinfo[bci+len]));
 	  break;
 	}
 
@@ -5719,7 +5293,7 @@
 	  mov_imm(jinfo->codebuf, ARM_R0, bci+CONSTMETHOD_CODEOFFSET);
 	  mov_imm(jinfo->codebuf, ARM_R1, index);
 	  blx(jinfo->codebuf, handlers[handler]);
-	  Thumb2_restore_locals(jinfo, bc_stackinfo[bci+len] & ~BC_FLAGS_MASK);
+	  Thumb2_restore_locals(jinfo, STACKDEPTH(jinfo, bc_stackinfo[bci+len]));
 	  break;
 	}
 
@@ -5735,7 +5309,7 @@
 	  Thumb2_Spill(jinfo, 1, (1<<r_lo)|(1<<r_hi));
 	  r_obj = JSTACK_PREFER(jstack, ~((1<<r_lo)|(1<<r_hi)));
 	  JASSERT(r_obj != r_lo && r_obj != r_hi, "corruption in putstatic");
-	  ldr_imm(jinfo->codebuf, r_obj, Ristate, ISTATE_CONSTANTS, 1, 0);
+	  load_istate(jinfo, r_obj, ISTATE_CONSTANTS, stackdepth-2);
 	  ldr_imm(jinfo->codebuf, r_obj, r_obj, CP_OFFSET + (index << 4) + 4, 1, 0);
 	  strd_imm(jinfo->codebuf, r_lo, r_hi, r_obj, field_offset, 1, 0);
 	} else {
@@ -5745,7 +5319,7 @@
 	  Thumb2_Spill(jinfo, 1, (1<<r));
 	  r_obj = JSTACK_PREFER(jstack, ~(1<<r));
 	  JASSERT(r_obj != r, "corruption in putstatic");
-	  ldr_imm(jinfo->codebuf, r_obj, Ristate, ISTATE_CONSTANTS, 1, 0);
+	  load_istate(jinfo, r_obj, ISTATE_CONSTANTS, stackdepth-1);
 	  ldr_imm(jinfo->codebuf, r_obj, r_obj, CP_OFFSET + (index << 4) + 4, 1, 0);
 	  if (tos_type == btos)
 	    strb_imm(jinfo->codebuf, r, r_obj, field_offset, 1, 0);
@@ -5763,6 +5337,7 @@
 	break;
       }
 
+      case opc_invokevirtual:
       case opc_invokestatic:
       case opc_invokespecial: {
 	constantPoolCacheOop  cp = jinfo->method->constants()->cache();
@@ -5778,13 +5353,14 @@
 	  mov_imm(jinfo->codebuf, ARM_R0, bci+CONSTMETHOD_CODEOFFSET);
 	  mov_imm(jinfo->codebuf, ARM_R1, index);
 	  blx(jinfo->codebuf,
-	    handlers[opcode == opc_invokestatic ? H_INVOKESTATIC : H_INVOKESPECIAL]);
-	  Thumb2_invoke_restore(jinfo, bc_stackinfo[bci+len] & ~BC_FLAGS_MASK);
+	    handlers[opcode == opc_invokestatic ? H_INVOKESTATIC :
+		     opcode == opc_invokespecial ? H_INVOKESPECIAL : H_INVOKEVIRTUAL]);
+	  Thumb2_invoke_restore(jinfo, STACKDEPTH(jinfo, bc_stackinfo[bci+len]));
 	  break;
 	}
 
-	callee = (methodOop)cache->f1();
-	if (callee->is_accessor()) {
+	callee = opcode == opc_invokevirtual ? (methodOop)cache->f2() : (methodOop)cache->f1();
+	if ((opcode != opc_invokevirtual || cache->is_vfinal()) && callee->is_accessor()) {
 	  u1 *code = callee->code_base();
 	  int index = GET_NATIVE_U2(&code[2]);
 	  constantPoolCacheOop callee_cache = callee->constants()->cache();
@@ -5792,11 +5368,6 @@
 	  Reg r_obj, r;
 
 	  if (entry->is_resolved(Bytecodes::_getfield)) {
-#if 0
-	    tty->print("Inlining accessor (opcode = %s) ", opcode == opc_invokestatic ? "invokestatic" : "invokespecial");
-	    callee->print_short_name(tty);
-	    tty->print("\n");
-#endif
 	    JASSERT(cache->parameter_size() == 1, "not 1 parameter to accessor");
 
 	    TosState tos_type = entry->flag_state();
@@ -5822,39 +5393,53 @@
 	}
 
 	Thumb2_Flush(jinfo);
-  ldr_imm(jinfo->codebuf, ARM_R2, Ristate, ISTATE_METHOD, 1, 0);
-	ldr_imm(jinfo->codebuf, ARM_R0, Ristate, ISTATE_CONSTANTS, 1, 0);
+
+	if (OSPACE) {
+	  Thumb2_invoke_save(jinfo, stackdepth);
+	  mov_imm(jinfo->codebuf, ARM_R0, bci+CONSTMETHOD_CODEOFFSET);
+	  mov_imm(jinfo->codebuf, ARM_R1, index);
+	  blx(jinfo->codebuf, handlers[
+	      opcode == opc_invokestatic ? H_INVOKESTATIC_RESOLVED :
+	      opcode == opc_invokespecial ? H_INVOKESPECIAL_RESOLVED :
+	      cache->is_vfinal() ? H_INVOKEVFINAL : H_INVOKEVIRTUAL_RESOLVED]);
+	  Thumb2_invoke_restore(jinfo, STACKDEPTH(jinfo, bc_stackinfo[bci+len]));
+	  break;
+	}
+
+	load_istate(jinfo, ARM_R2, ISTATE_METHOD, stackdepth);
 	mov_imm(jinfo->codebuf, ARM_R1, 0);
-  ldr_imm(jinfo->codebuf, ARM_R2, ARM_R2, METHOD_CONSTMETHOD, 1, 0);
-	if (opcode == opc_invokespecial)
+	if (opcode != opc_invokestatic)
 	  ldr_imm(jinfo->codebuf, ARM_R3, Rstack, (cache->parameter_size()-1) * sizeof(int), 1, 0);
-	ldr_imm(jinfo->codebuf, ARM_R0, ARM_R0, CP_OFFSET + (index << 4) + 4, 1, 0);
-  add_imm(jinfo->codebuf, ARM_R2, ARM_R2, bci+CONSTMETHOD_CODEOFFSET);
-	if (opcode == opc_invokespecial)
-	  ldr_imm(jinfo->codebuf, ARM_R3, ARM_R3, 0, 1, 0); // Null pointer check - cbz better?
+	if (opcode != opc_invokevirtual || cache->is_vfinal())
+	  load_istate(jinfo, ARM_R0, ISTATE_CONSTANTS, stackdepth);
+	ldr_imm(jinfo->codebuf, ARM_R2, ARM_R2, METHOD_CONSTMETHOD, 1, 0);
+	if (opcode != opc_invokestatic)
+	  ldr_imm(jinfo->codebuf, ARM_R3, ARM_R3, 4, 1, 0);
+
+	if (opcode != opc_invokevirtual || cache->is_vfinal())
+	  ldr_imm(jinfo->codebuf, ARM_R0, ARM_R0,
+		CP_OFFSET + (index << 4) + (opcode == opc_invokevirtual ? 8 : 4), 1, 0);
+	else
+	  ldr_imm(jinfo->codebuf, ARM_R0, ARM_R3, INSTANCEKLASS_VTABLE_OFFSET + cache->f2() * 4, 1, 0);
+
+	add_imm(jinfo->codebuf, ARM_R2, ARM_R2, bci+CONSTMETHOD_CODEOFFSET);
 	str_imm(jinfo->codebuf, ARM_R1, Rthread, THREAD_LAST_JAVA_SP, 1, 0);
 	ldr_imm(jinfo->codebuf, ARM_R1, ARM_R0, METHOD_FROM_INTERPRETED, 1, 0);
-  str_imm(jinfo->codebuf, ARM_R2, Ristate, ISTATE_BCP, 1, 0);
+	store_istate(jinfo, ARM_R2, ISTATE_BCP, stackdepth);
 	str_imm(jinfo->codebuf, Rstack, Rthread, THREAD_JAVA_SP, 1, 0);
-	  Thumb2_Debug(jinfo, H_DEBUG_METHODCALL);
 	Thumb2_invoke_save(jinfo, stackdepth);
-  sub_imm(jinfo->codebuf, Rstack, Rstack, 4);
+	sub_imm(jinfo->codebuf, Rstack, Rstack, 4);
+
 	ldr_imm(jinfo->codebuf, ARM_R3, ARM_R1, 0, 1, 0);
-	mov_reg(jinfo->codebuf, ARM_R2, Rthread);
-  str_imm(jinfo->codebuf, Rstack, Ristate, ISTATE_STACK, 1, 0);
-add_imm(jinfo->codebuf, ARM_R3, ARM_R3, CODE_ALIGN_SIZE);
-//	enter_leave(jinfo->codebuf, 0);
+	store_istate(jinfo, Rstack, ISTATE_STACK, stackdepth+1);
+	add_imm(jinfo->codebuf, ARM_R3, ARM_R3, FAST_ENTRY_OFFSET);
 	blx_reg(jinfo->codebuf, ARM_R3);
-//	enter_leave(jinfo->codebuf, 1);
-  ldr_imm(jinfo->codebuf, Rthread, Ristate, ISTATE_THREAD, 1, 0);
-#ifdef USE_RLOCAL
-  ldr_imm(jinfo->codebuf, Rlocals, Ristate, ISTATE_LOCALS, 1, 0);
-#endif
-	ldr_imm(jinfo->codebuf, Rstack, Rthread, THREAD_JAVA_SP, 1, 0);
-	ldr_imm(jinfo->codebuf, ARM_R2, Ristate, ISTATE_STACK_LIMIT, 1, 0);
 	JASSERT(!(bc_stackinfo[bci+len] & BC_COMPILED), "code already compiled for this bytecode?");
-	Thumb2_invoke_restore(jinfo, bc_stackinfo[bci+len] & ~BC_FLAGS_MASK);
+	stackdepth = STACKDEPTH(jinfo, bc_stackinfo[bci+len]);
+	ldr_imm(jinfo->codebuf, Rstack, Rthread, THREAD_JAVA_SP, 1, 0);
+	load_istate(jinfo, ARM_R2, ISTATE_STACK_LIMIT, stackdepth);
 	ldr_imm(jinfo->codebuf, ARM_R1, Rthread, THREAD_TOP_ZERO_FRAME, 1, 0);
+	Thumb2_invoke_restore(jinfo, stackdepth);
 	add_imm(jinfo->codebuf, ARM_R2, ARM_R2, 4);
 	ldr_imm(jinfo->codebuf, ARM_R3, Rthread, THREAD_PENDING_EXC, 1, 0);
 	str_imm(jinfo->codebuf, ARM_R2, Rthread, THREAD_JAVA_SP, 1, 0);
@@ -5873,158 +5458,29 @@
 
 // Currently we just call the unresolved invokeinterface entry for resolved /
 // unresolved alike!
-    Thumb2_Flush(jinfo);
-    Thumb2_invoke_save(jinfo, stackdepth);
-    mov_imm(jinfo->codebuf, ARM_R0, bci+CONSTMETHOD_CODEOFFSET);
-    mov_imm(jinfo->codebuf, ARM_R1, index);
-    blx(jinfo->codebuf, handlers[H_INVOKEINTERFACE]);
-    Thumb2_invoke_restore(jinfo, bc_stackinfo[bci+len] & ~BC_FLAGS_MASK);
-	break;
-      }
-
-      case opc_invokevirtual: {
-	constantPoolCacheOop  cp = jinfo->method->constants()->cache();
-        ConstantPoolCacheEntry* cache;
-	int index = GET_NATIVE_U2(code_base+bci+1);
-	unsigned loc;
-
-        cache = cp->entry_at(index);
-        if (!cache->is_resolved((Bytecodes::Code)opcode)) {
-	  Thumb2_Flush(jinfo);
-	  Thumb2_invoke_save(jinfo, stackdepth);
-	  mov_imm(jinfo->codebuf, ARM_R0, bci+CONSTMETHOD_CODEOFFSET);
-	  mov_imm(jinfo->codebuf, ARM_R1, index);
-	  blx(jinfo->codebuf, handlers[H_INVOKEVIRTUAL]);
-	  Thumb2_invoke_restore(jinfo, bc_stackinfo[bci+len] & ~BC_FLAGS_MASK);
-	  break;
-	}
-
-	if (cache->is_vfinal()) {
-	  methodOop callee = (methodOop)cache->f2();
-	  if (callee->is_accessor()) {
-	    u1 *code = callee->code_base();
-	    int index = GET_NATIVE_U2(&code[2]);
-	    constantPoolCacheOop callee_cache = callee->constants()->cache();
-	    ConstantPoolCacheEntry *entry = callee_cache->entry_at(index);
-	    Reg r_obj, r;
-
-	    if (entry->is_resolved(Bytecodes::_getfield)) {
-#if 0
-	      tty->print("Inlining accessor (opcode = invokevfinal) ");
-	      callee->print_short_name(tty);
-	      tty->print("\n");
-#endif
-	      JASSERT(cache->parameter_size() == 1, "not 1 parameter to accessor");
-
-	      TosState tos_type = entry->flag_state();
-	      int field_offset = entry->f2();
-
-	      JASSERT(tos_type == btos || tos_type == ctos || tos_type == stos || tos_type == atos || tos_type == itos, "not itos or atos");
-
-	      Thumb2_Fill(jinfo, 1);
-	      r_obj = POP(jstack);
-	      Thumb2_Spill(jinfo, 1, 0);
-	      r = JSTACK_REG(jstack);
-	      PUSH(jstack, r);
-	      if (tos_type == btos)
-		ldrb_imm(jinfo->codebuf, r, r_obj, field_offset, 1, 0);
-	      else if (tos_type == ctos)
-		ldrh_imm(jinfo->codebuf, r, r_obj, field_offset, 1, 0);
-	      else if (tos_type == stos)
-		ldrsh_imm(jinfo->codebuf, r, r_obj, field_offset, 1, 0);
-	      else
-		ldr_imm(jinfo->codebuf, r, r_obj, field_offset, 1, 0);
-	      break;
-	    }
-	  }
-	}
-
 	Thumb2_Flush(jinfo);
-	if (cache->is_vfinal()) {
-  ldr_imm(jinfo->codebuf, ARM_R2, Ristate, ISTATE_METHOD, 1, 0);
-	  ldr_imm(jinfo->codebuf, ARM_R0, Ristate, ISTATE_CONSTANTS, 1, 0);
-	  mov_imm(jinfo->codebuf, ARM_R1, 0);
-	  ldr_imm(jinfo->codebuf, ARM_R3, Rstack, (cache->parameter_size()-1) * sizeof(int), 1, 0);
-	  ldr_imm(jinfo->codebuf, ARM_R0, ARM_R0, CP_OFFSET + (index << 4) + 8, 1, 0);
-  ldr_imm(jinfo->codebuf, ARM_R2, ARM_R2, METHOD_CONSTMETHOD, 1, 0);
-	  ldr_imm(jinfo->codebuf, ARM_R3, ARM_R3, 0, 1, 0); // Null pointer check - cbz better?
-	  str_imm(jinfo->codebuf, ARM_R1, Rthread, THREAD_LAST_JAVA_SP, 1, 0);
-	  ldr_imm(jinfo->codebuf, ARM_R1, ARM_R0, METHOD_FROM_INTERPRETED, 1, 0);
-  add_imm(jinfo->codebuf, ARM_R2, ARM_R2, bci+CONSTMETHOD_CODEOFFSET);
-	  str_imm(jinfo->codebuf, Rstack, Rthread, THREAD_JAVA_SP, 1, 0);
-	  Thumb2_Debug(jinfo, H_DEBUG_METHODCALL);
-	Thumb2_invoke_save(jinfo, stackdepth);
-  sub_imm(jinfo->codebuf, Rstack, Rstack, 4);
-	  ldr_imm(jinfo->codebuf, ARM_R3, ARM_R1, 0, 1, 0);
-  str_imm(jinfo->codebuf, ARM_R2, Ristate, ISTATE_BCP, 1, 0);
-	  mov_reg(jinfo->codebuf, ARM_R2, Rthread);
-  str_imm(jinfo->codebuf, Rstack, Ristate, ISTATE_STACK, 1, 0);
-add_imm(jinfo->codebuf, ARM_R3, ARM_R3, CODE_ALIGN_SIZE);
-//	  enter_leave(jinfo->codebuf, 0);
-	  blx_reg(jinfo->codebuf, ARM_R3);
-//	  enter_leave(jinfo->codebuf, 1);
-  ldr_imm(jinfo->codebuf, Rthread, Ristate, ISTATE_THREAD, 1, 0);
-#ifdef USE_RLOCAL
-  ldr_imm(jinfo->codebuf, Rlocals, Ristate, ISTATE_LOCALS, 1, 0);
-#endif
-	  ldr_imm(jinfo->codebuf, Rstack, Rthread, THREAD_JAVA_SP, 1, 0);
-	  ldr_imm(jinfo->codebuf, ARM_R2, Ristate, ISTATE_STACK_LIMIT, 1, 0);
-	JASSERT(!(bc_stackinfo[bci+len] & BC_COMPILED), "code already compiled for this bytecode?");
-	Thumb2_invoke_restore(jinfo, bc_stackinfo[bci+len] & ~BC_FLAGS_MASK);
-	  ldr_imm(jinfo->codebuf, ARM_R1, Rthread, THREAD_TOP_ZERO_FRAME, 1, 0);
-	  add_imm(jinfo->codebuf, ARM_R2, ARM_R2, 4);
-	  ldr_imm(jinfo->codebuf, ARM_R3, Rthread, THREAD_PENDING_EXC, 1, 0);
-	  str_imm(jinfo->codebuf, ARM_R2, Rthread, THREAD_JAVA_SP, 1, 0);
-	  str_imm(jinfo->codebuf, ARM_R1, Rthread, THREAD_LAST_JAVA_SP, 1, 0);
-	cmp_imm(jinfo->codebuf, ARM_R3, 0);
-	it(jinfo->codebuf, COND_NE, IT_MASK_T);
-	bl(jinfo->codebuf, handlers[H_HANDLE_EXCEPTION_NO_REGS]);
-	  break;
-	} else {
-  ldr_imm(jinfo->codebuf, ARM_R2, Ristate, ISTATE_METHOD, 1, 0);
-	  ldr_imm(jinfo->codebuf, ARM_R3, Rstack, (cache->parameter_size()-1) * sizeof(int), 1, 0);
-  ldr_imm(jinfo->codebuf, ARM_R2, ARM_R2, METHOD_CONSTMETHOD, 1, 0);
-	  ldr_imm(jinfo->codebuf, ARM_R3, ARM_R3, 4, 1, 0);
-	  mov_imm(jinfo->codebuf, ARM_R1, 0);
-	  ldr_imm(jinfo->codebuf, ARM_R0, ARM_R3, INSTANCEKLASS_VTABLE_OFFSET + cache->f2() * 4, 1, 0);
-  add_imm(jinfo->codebuf, ARM_R2, ARM_R2, bci+CONSTMETHOD_CODEOFFSET);
-	  str_imm(jinfo->codebuf, ARM_R1, Rthread, THREAD_LAST_JAVA_SP, 1, 0);
-	  ldr_imm(jinfo->codebuf, ARM_R1, ARM_R0, METHOD_FROM_INTERPRETED, 1, 0);
-  str_imm(jinfo->codebuf, ARM_R2, Ristate, ISTATE_BCP, 1, 0);
-	  str_imm(jinfo->codebuf, Rstack, Rthread, THREAD_JAVA_SP, 1, 0);
-	  Thumb2_Debug(jinfo, H_DEBUG_METHODCALL);
 	Thumb2_invoke_save(jinfo, stackdepth);
-  sub_imm(jinfo->codebuf, Rstack, Rstack, 4);
-	  ldr_imm(jinfo->codebuf, ARM_R3, ARM_R1, 0, 1, 0);
-	  mov_reg(jinfo->codebuf, ARM_R2, Rthread);
-  str_imm(jinfo->codebuf, Rstack, Ristate, ISTATE_STACK, 1, 0);
-add_imm(jinfo->codebuf, ARM_R3, ARM_R3, CODE_ALIGN_SIZE);
-//	  enter_leave(jinfo->codebuf, 0);
-	  blx_reg(jinfo->codebuf, ARM_R3);
-//	  enter_leave(jinfo->codebuf, 1);
-  ldr_imm(jinfo->codebuf, Rthread, Ristate, ISTATE_THREAD, 1, 0);
-#ifdef USE_RLOCAL
-  ldr_imm(jinfo->codebuf, Rlocals, Ristate, ISTATE_LOCALS, 1, 0);
-#endif
-	  ldr_imm(jinfo->codebuf, Rstack, Rthread, THREAD_JAVA_SP, 1, 0);
-	  ldr_imm(jinfo->codebuf, ARM_R2, Ristate, ISTATE_STACK_LIMIT, 1, 0);
-	JASSERT(!(bc_stackinfo[bci+len] & BC_COMPILED), "code already compiled for this bytecode?");
-	Thumb2_invoke_restore(jinfo, bc_stackinfo[bci+len] & ~BC_FLAGS_MASK);
-	  ldr_imm(jinfo->codebuf, ARM_R1, Rthread, THREAD_TOP_ZERO_FRAME, 1, 0);
-	  add_imm(jinfo->codebuf, ARM_R2, ARM_R2, 4);
-	  ldr_imm(jinfo->codebuf, ARM_R3, Rthread, THREAD_PENDING_EXC, 1, 0);
-	  str_imm(jinfo->codebuf, ARM_R2, Rthread, THREAD_JAVA_SP, 1, 0);
-	  str_imm(jinfo->codebuf, ARM_R1, Rthread, THREAD_LAST_JAVA_SP, 1, 0);
-	cmp_imm(jinfo->codebuf, ARM_R3, 0);
-	it(jinfo->codebuf, COND_NE, IT_MASK_T);
-	bl(jinfo->codebuf, handlers[H_HANDLE_EXCEPTION_NO_REGS]);
-	}
+	mov_imm(jinfo->codebuf, ARM_R0, bci+CONSTMETHOD_CODEOFFSET);
+	mov_imm(jinfo->codebuf, ARM_R1, index);
+	blx(jinfo->codebuf, handlers[H_INVOKEINTERFACE]);
+	Thumb2_invoke_restore(jinfo, STACKDEPTH(jinfo, bc_stackinfo[bci+len]));
 	break;
       }
 
       case opc_jsr_w:
       case opc_jsr: {
-	Thumb2_Jsr(jinfo , bci, stackdepth);
+	int offset = opcode == opc_jsr ?
+		GET_JAVA_S2(jinfo->code_base + bci + 1) :
+		GET_JAVA_U4(jinfo->code_base + bci + 1);
+	Reg r;
+
+	Thumb2_Spill(jinfo, 1, 0);
+	r = JSTACK_REG(jstack);
+	PUSH(jstack, r);
+	mov_imm(jinfo->codebuf, r, bci + ((opcode == opc_jsr) ? 3 : 5));
+	Thumb2_Flush(jinfo);
+	bci = Thumb2_Goto(jinfo, bci, offset, len);
+	len = 0;
 	break;
       }
 
@@ -6033,25 +5489,20 @@
 	break;
       }
 
-      case opc_athrow:
-	Thumb2_Exit(jinfo, H_ATHROW, bci, stackdepth);
-	break;
-
-      case opc_goto: {
-	int offset = GET_JAVA_S2(jinfo->code_base + bci + 1);
+      case opc_goto:
+      case opc_goto_w: {
+	int offset = opcode == opc_goto ?
+		GET_JAVA_S2(jinfo->code_base + bci + 1) :
+		GET_JAVA_U4(jinfo->code_base + bci + 1);
 	Thumb2_Flush(jinfo);
 	bci = Thumb2_Goto(jinfo, bci, offset, len);
 	len = 0;
 	break;
       }
 
-      case opc_goto_w: {
-	int offset = GET_JAVA_U4(jinfo->code_base + bci + 1);
-	Thumb2_Flush(jinfo);
-	bci = Thumb2_Goto(jinfo, bci, offset, len);
-	len = 0;
+      case opc_athrow:
+	Thumb2_Exit(jinfo, H_ATHROW, bci, stackdepth);
 	break;
-      }
 
       case opc_ifeq:
       case opc_ifne:
@@ -6100,8 +5551,7 @@
       case opc_ireturn:
       case opc_freturn:
       case opc_areturn:
-	Thumb2_Return(jinfo, opcode);
-	if (!jinfo->compiled_return) jinfo->compiled_return = bci;
+	Thumb2_Return(jinfo, opcode, bci, stackdepth);
 	break;
 
       case opc_return_register_finalizer: {
@@ -6119,12 +5569,12 @@
 	loc_eq = forward_16(jinfo->codebuf);
 	Thumb2_save_locals(jinfo, stackdepth);
 	mov_reg(jinfo->codebuf, ARM_R1, r);
-	ldr_imm(jinfo->codebuf, ARM_R0, Ristate, ISTATE_METHOD, 1, 0);
+	load_istate(jinfo, ARM_R0, ISTATE_METHOD, stackdepth);
 	ldr_imm(jinfo->codebuf, ARM_R0, ARM_R0, METHOD_CONSTMETHOD, 1, 0);
 	add_imm(jinfo->codebuf, ARM_R0, ARM_R0, bci+CONSTMETHOD_CODEOFFSET);
-	str_imm(jinfo->codebuf, ARM_R0, Ristate, ISTATE_BCP, 1, 0);
+	store_istate(jinfo, ARM_R0, ISTATE_BCP, stackdepth);
 	sub_imm(jinfo->codebuf, ARM_R0, Rstack, 4);
-	str_imm(jinfo->codebuf, ARM_R0, Ristate, ISTATE_STACK, 1, 0);
+	store_istate(jinfo, ARM_R0, ISTATE_STACK, stackdepth);
 
 	mov_reg(jinfo->codebuf, ARM_R0, Rthread);
 	mov_imm(jinfo->codebuf, ARM_R3, (u32)_ZN18InterpreterRuntime18register_finalizerEP10JavaThreadP7oopDesc);
@@ -6135,7 +5585,7 @@
 	it(jinfo->codebuf, COND_NE, IT_MASK_T);
 	bl(jinfo->codebuf, handlers[H_HANDLE_EXCEPTION]);
 	bcc_patch(jinfo->codebuf, COND_EQ, loc_eq);
-	Thumb2_Return(jinfo, opc_return);
+	Thumb2_Return(jinfo, opc_return, bci, stackdepth);
 	break;
       }
 
@@ -6217,6 +5667,31 @@
 	break;
       }
 
+      case opc_monitorenter:
+	Thumb2_Flush(jinfo);
+	Thumb2_invoke_save(jinfo, stackdepth);
+	mov_imm(jinfo->codebuf, ARM_R0, bci+CONSTMETHOD_CODEOFFSET);
+	bl(jinfo->codebuf, handlers[H_MONITORENTER]);
+	Thumb2_invoke_restore(jinfo, stackdepth);
+	break;
+
+      case opc_monitorexit: {
+	Reg r;
+
+	Thumb2_Fill(jinfo, 1);
+	r = POP(jstack);
+	Thumb2_Flush(jinfo);
+	mov_reg(jinfo->codebuf, ARM_R1, r);
+	mov_imm(jinfo->codebuf, ARM_R3, bci+CONSTMETHOD_CODEOFFSET);
+        Thumb2_save_locals(jinfo, stackdepth);
+	bl(jinfo->codebuf, handlers[H_MONITOREXIT]);
+        Thumb2_restore_locals(jinfo, stackdepth);
+	cmp_imm(jinfo->codebuf, ARM_R0, 0);
+	it(jinfo->codebuf, COND_NE, IT_MASK_T);
+	bl(jinfo->codebuf, handlers[H_HANDLE_EXCEPTION]);
+	break;
+      }
+
       case opc_newarray: {
 	Reg r;
 	unsigned loc;
@@ -6315,6 +5790,8 @@
 	Thumb2_Fill(jinfo, 1);
 	r = POP(jstack);
 
+	Thumb2_Flush(jinfo);
+
 	table_loc = out_loc(jinfo->codebuf);
 	for (i = 0, tablep = table; i < npairs; i++) {
 	  unsigned match;
@@ -6381,6 +5858,7 @@
 
 	Thumb2_Fill(jinfo, 1);
 	rs = POP(jstack);
+	Thumb2_Flush(jinfo);
 	r = Thumb2_Tmp(jinfo, (1<<rs));
 	sub_imm(jinfo->codebuf, r, rs, low);
 	cmp_imm(jinfo->codebuf, r, (high-low)+1);
@@ -6457,7 +5935,6 @@
 	    int nlocals = jinfo->method->max_locals();
 	    r = ARM_IP;
 	    stackdepth -= jstack->depth;
-	    if (jinfo->method->is_synchronized()) stackdepth += frame::interpreter_frame_monitor_size();
 	    load_local(jinfo, r, local, stackdepth);
 	    add_imm(jinfo->codebuf, r, r, constant);
 	    store_local(jinfo, r, local, stackdepth);
@@ -6488,7 +5965,7 @@
 	break;
     }
     bci += len;
-#ifdef T2EE_PRINT_DISASS
+#ifdef T2_PRINT_DISASS
     if (len == 0) {
       if (start_idx == jinfo->codebuf->idx) start_bci[start_idx] = -1;
     } else
@@ -6640,113 +6117,17 @@
   return 0;
 }
 
-static int DebugSwitch = 1;
-
-extern "C" void Debug_Ignore_Safepoints(void)
-{
-	printf("Ignore Safepoints\n");
-}
-
-extern "C" void Debug_Notice_Safepoints(void)
-{
-	printf("Notice Safepoints\n");
-}
-
-extern "C" void Debug_ExceptionReturn(interpreterState istate, intptr_t *stack)
-{
-  JavaThread *thread = istate->thread();
-
-  if (thread->has_pending_exception()) {
-    Handle ex(thread, thread->pending_exception());
-    tty->print_cr("Exception %s", Klass::cast(ex->klass())->external_name());
-  }
-}
-
-extern "C" void Debug_Stack(intptr_t *stack)
-{
-  int i;
-  char msg[16];
-
-  tty->print("  Stack:");
-  for (i = 0; i < 6; i++) {
-    tty->print(" [");
-    sprintf(msg, "%d", i);
-    tty->print(msg);
-    tty->print("] = ");
-    sprintf(msg, "%08x", (int)stack[i]);
-    tty->print(msg);
-  }
-  tty->cr();
-}
-
-extern "C" void Debug_MethodEntry(interpreterState istate, intptr_t *stack, methodOop callee)
-{
-#if 0
-  if (DebugSwitch) {
-    methodOop method = istate->method();
-    tty->print("Entering ");
-    callee->print_short_name(tty);
-    tty->print(" from ");
-    method->print_short_name(tty);
-    tty->cr();
-    Debug_Stack(stack);
-    tty->flush();
-  }
-#endif
-}
-
-extern "C" void Debug_MethodExit(interpreterState istate, intptr_t *stack)
-{
-  if (DebugSwitch) {
-    methodOop method = istate->method();
-    JavaThread *thread = istate->thread();
-    oop exc = thread->pending_exception();
-
-    if (!exc) return;
-    tty->print("Leaving ");
-    method->print_short_name(tty);
-    tty->cr();
-    Debug_Stack(stack);
-    tty->flush();
-    if (exc) tty->print_cr("Exception %s", exc->print_value_string());
-  }
-}
-
-extern "C" void Debug_MethodCall(interpreterState istate, intptr_t *stack, methodOop callee)
-{
-#if 0
-  if (DebugSwitch) {
-    methodOop method = istate->method();
-    tty->print("Calling ");
-    callee->print_short_name(tty);
-    tty->print(" from ");
-    method->print_short_name(tty);
-    tty->cr();
-    Debug_Stack(stack);
-    tty->flush();
-  }
-#endif
-}
-
-extern "C" int Debug_irem_Handler(int a, int b)
-{
-	printf("%d %% %d\n", a, b);
-	return a%b;
-}
-
 extern "C" void Thumb2_Install(methodOop mh, u32 entry);
 
-#define IS_COMPILED(e, cb) ((e) >= (unsigned)(cb) && (e) < (unsigned)(cb) + (cb)->size)
-
 extern "C" unsigned cmpxchg_ptr(unsigned new_value, volatile unsigned *ptr, unsigned cmp_value);
 static volatile unsigned compiling;
 static unsigned CompileCount = 0;
-static unsigned MaxCompile = 130;
+static unsigned MaxCompile = 10000;
 
 #define COMPILE_ONLY	0
 #define COMPILE_COUNT	0
 #define DISASS_AFTER	0
-//#define COMPILE_LIST	0
+//#define COMPILE_LIST
 
 #ifdef COMPILE_LIST
 static const char *compile_list[] = {
@@ -6756,10 +6137,9 @@
 
 static unsigned compiled_methods = 0;
 
-#ifdef T2EE_PRINT_STATISTICS
+#ifdef T2_PRINT_STATISTICS
 static unsigned bytecodes_compiled = 0;
 static unsigned arm_code_generated = 0;
-static unsigned total_zombie_bytes = 0;
 static clock_t total_compile_time = 0;
 #endif
 
@@ -6805,7 +6185,7 @@
     compiled_offset = Thumb2_osr_from_bci(cmethod, branch_pc);
     if (compiled_offset == 0) return 0;
     thumb_entry.compiled_entrypoint = slow_entry + compiled_offset;
-    thumb_entry.osr_entry = (unsigned)cmethod->osr_entry | TBIT;
+    thumb_entry.regusage = cmethod->regusage;
     return *(unsigned long long *)&thumb_entry;
   }
 
@@ -6820,8 +6200,7 @@
   // Othersize we have difficulty access the locals from the stack pointer
   //
   if (code_size > THUMB2_MAX_BYTECODE_SIZE ||
-		(method->max_locals() + method->max_stack()) >= 1000 ||
-		method->has_monitor_bytecodes()) {
+		(method->max_locals() + method->max_stack()) >= 1000) {
         method->set_not_compilable();
 	return 0;
   }
@@ -6840,7 +6219,10 @@
 		if (strcmp(s, method->name_and_sig_as_C_string()) == 0)
 			break;
 	}
-	if (!s) return 0;
+	if (!s) {
+		method->set_not_compilable();
+		return 0;
+	}
   }
 #endif
 
@@ -6857,23 +6239,24 @@
 
   if (cmpxchg_ptr(1, &compiling, 0)) return 0;
 
-#ifdef T2EE_PRINT_STATISTICS
+#ifdef T2_PRINT_STATISTICS
   clock_t compile_time = clock();
 #endif
 
-#ifdef T2EE_PRINT_COMPILATION
-  if (t2ee_print_compilation) {
-    fprintf(stderr, "Compiling %d %c%c %s\n",
+#ifdef T2_PRINT_COMPILATION
+  if (t2_print_compilation) {
+    fprintf(stderr, "Compiling %d %c%c%c %s\n",
 	compiled_methods,
 	method->is_synchronized() ? 'S' : ' ',
 	method->has_exception_handler() ? 'E' : ' ',
+	method->has_monitor_bytecodes() ? 'M' : ' ',
 	method->name_and_sig_as_C_string());
   }
 #endif
 
   memset(bc_stackinfo, 0, code_size * sizeof(unsigned));
   memset(locals_info, 0, method->max_locals() * sizeof(unsigned));
-#ifdef T2EE_PRINT_DISASS
+#ifdef T2_PRINT_DISASS
   memset(start_bci, 0xff, sizeof(start_bci));
   memset(end_bci, 0xff, sizeof(end_bci));
 #endif
@@ -6885,13 +6268,13 @@
   jinfo_str.bc_stackinfo = bc_stackinfo;
   jinfo_str.locals_info = locals_info;
   jinfo_str.compiled_return = 0;
-  jinfo_str.zombie_bytes = 0;
+  for (int i = 0; i < 12; i++) jinfo_str.compiled_word_return[i] = 0;
   jinfo_str.is_leaf = 1;
+  jinfo_str.use_istate = method->has_monitor_bytecodes();
 
   Thumb2_local_info_from_sig(&jinfo_str, method, base);
 
-  Thumb2_pass1(&jinfo_str, 0);
-  Thumb2_pass2(&jinfo_str, 0, 0);
+  Thumb2_pass1(&jinfo_str, 0, 0);
 
   codebuf_str.codebuf = (unsigned short *)cb->hp;
   codebuf_str.idx = 0;
@@ -6912,16 +6295,14 @@
   jregs_str.pregs[1] = JAZ_V2;
   jregs_str.pregs[2] = JAZ_V3;
   jregs_str.pregs[3] = JAZ_V4;
-
-#ifndef USE_RLOCAL
   jregs_str.pregs[4] = JAZ_V5;
-#endif
+  jregs_str.pregs[5] = JAZ_V6;
 
   jregs_str.npregs = PREGS;
 
   Thumb2_RegAlloc(&jinfo_str);
 
-  slow_entry = out_align(&codebuf_str, CODE_ALIGN);
+  slow_entry = out_align_offset(&codebuf_str, CODE_ALIGN, SLOW_ENTRY_OFFSET);
   cmethod = (Compiled_Method *)slow_entry;
   slow_entry |= TBIT;
 
@@ -6937,9 +6318,9 @@
     compiled_accessor = 0;
   }
 
-#ifdef T2EE_PRINT_DISASS
+#ifdef T2_PRINT_DISASS
   if (DISASS_AFTER == 0 || compiled_methods >= DISASS_AFTER)
-    if (t2ee_print_disass)
+    if (t2_print_disass)
 	Thumb2_disass(&jinfo_str);
 #endif
 
@@ -6948,15 +6329,14 @@
 
   Thumb2_Clear_Cache(cb->hp, cb->hp + codebuf_str.idx * 2);
 
-#ifdef T2EE_PRINT_STATISTICS
+#ifdef T2_PRINT_STATISTICS
   compile_time = clock() - compile_time;
   total_compile_time += compile_time;
 
-  if (t2ee_print_statistics) {
+  if (t2_print_statistics) {
     unsigned codegen = codebuf_str.idx * 2;
     bytecodes_compiled += code_size;
     arm_code_generated += codegen;
-    total_zombie_bytes += jinfo_str.zombie_bytes;
     fprintf(stderr, "%d bytecodes => %d bytes code in %.2f sec, totals: %d => %d in %.2f sec\n",
       code_size, codegen, (double)compile_time/(double)CLOCKS_PER_SEC,
     bytecodes_compiled, arm_code_generated, (double)total_compile_time/(double)CLOCKS_PER_SEC);
@@ -6984,7 +6364,7 @@
   compiled_offset = Thumb2_osr_from_bci(cmethod, branch_pc);
   if (compiled_offset == 0) return 0;
   thumb_entry.compiled_entrypoint = slow_entry + compiled_offset;
-  thumb_entry.osr_entry = (unsigned)cmethod->osr_entry | TBIT;
+  thumb_entry.regusage = cmethod->regusage;
   return *(unsigned long long *)&thumb_entry;
 }
 
@@ -6994,6 +6374,7 @@
 extern "C" void Thumb2_Handle_Exception_NoRegs(void);
 extern "C" void Thumb2_Exit_To_Interpreter(void);
 extern "C" void Thumb2_Stack_Overflow(void);
+extern "C" void Thumb2_monitorenter(void);
 
 extern "C" void __divsi3(void);
 extern "C" void __aeabi_ldivmod(void);
@@ -7006,6 +6387,7 @@
 extern "C" void Helper_new(void);
 extern "C" void Helper_instanceof(void);
 extern "C" void Helper_checkcast(void);
+extern "C" void Helper_monitorexit(void);
 extern "C" void Helper_aastore(void);
 extern "C" void Helper_aputfield(void);
 extern "C" void Helper_synchronized_enter(void);
@@ -7049,6 +6431,11 @@
 extern char Thumb2_putstatic_a_stub[];
 extern char Thumb2_putstatic_dw_stub[];
 
+extern char Thumb2_invokestaticresolved_stub[];
+extern char Thumb2_invokespecialresolved_stub[];
+extern char Thumb2_invokevirtualresolved_stub[];
+extern char Thumb2_invokevfinalresolved_stub[];
+
 #define STUBS_SIZE	(Thumb2_stubs_end-Thumb2_stubs)
 #define IDIV_STUB		(Thumb2_idiv_stub-Thumb2_stubs)
 #define IREM_STUB		(Thumb2_irem_stub-Thumb2_stubs)
@@ -7077,13 +6464,18 @@
 #define PUTSTATIC_A_STUB	(Thumb2_putstatic_a_stub-Thumb2_stubs)
 #define PUTSTATIC_DW_STUB	(Thumb2_putstatic_dw_stub-Thumb2_stubs)
 
+#define INVOKESTATIC_RESOLVED_STUB (Thumb2_invokestaticresolved_stub-Thumb2_stubs)
+#define INVOKESPECIAL_RESOLVED_STUB (Thumb2_invokespecialresolved_stub-Thumb2_stubs)
+#define INVOKEVIRTUAL_RESOLVED_STUB (Thumb2_invokevirtualresolved_stub-Thumb2_stubs)
+#define INVOKEVFINAL_RESOLVED_STUB (Thumb2_invokevfinalresolved_stub-Thumb2_stubs)
+
 extern "C" void Thumb2_NullPtr_Handler(void);
 
 
 extern "C" int Thumb2_Check_Null(unsigned *regs, unsigned pc)
 {
   Thumb2_CodeBuf *cb = thumb2_codebuf;
-  if (!(CPUInfo & ARCH_THUMBEE)) return 0;
+  if (!(CPUInfo & ARCH_THUMB2)) return 0;
   if (IS_COMPILED(pc, cb)) {
     regs[ARM_LR] = pc;
     regs[ARM_PC] = (unsigned)Thumb2_NullPtr_Handler;
@@ -7101,23 +6493,24 @@
   u32 loc_irem, loc_idiv, loc_ldiv;
   int rc;
 
-  if (!(CPUInfo & ARCH_THUMBEE)) {
+  if (!(CPUInfo & ARCH_THUMB2)) {
     DisableCompiler = 1;
     return;
   }
 
-#ifdef T2EE_PRINT_COMPILATION
-  t2ee_print_compilation = getenv("T2EE_PRINT_COMPILATION");
+#ifdef T2_PRINT_COMPILATION
+  t2_print_compilation = getenv("T2_PRINT_COMPILATION");
 #endif
-#ifdef T2EE_PRINT_STATISTICS
-  t2ee_print_statistics = getenv("T2EE_PRINT_STATISTICS");
+#ifdef T2_PRINT_STATISTICS
+  t2_print_statistics = getenv("T2_PRINT_STATISTICS");
 #endif
-#ifdef T2EE_PRINT_DISASS
-  t2ee_print_disass = getenv("T2EE_PRINT_DISASS");
+#ifdef T2_PRINT_DISASS
+  t2_print_disass = getenv("T2_PRINT_DISASS");
 #endif
-#ifdef T2EE_PRINT_REGUSAGE
-  t2ee_print_regusage = getenv("T2EE_PRINT_REGUSAGE");
+#ifdef T2_PRINT_REGUSAGE
+  t2_print_regusage = getenv("T2_PRINT_REGUSAGE");
 #endif
+  t2_ospace = getenv("T2_OSPACE");
 
   cb = (Thumb2_CodeBuf *)mmap(0, THUMB2_CODEBUF_SIZE, PROT_EXEC|PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, 0, 0);
   if (cb == MAP_FAILED) {
@@ -7138,7 +6531,6 @@
     return;
   }
 
-#if 1
   memcpy(cb->hp, Thumb2_stubs, STUBS_SIZE);
 
   handlers[H_IDIV] = (unsigned)(cb->hp + IDIV_STUB);
@@ -7148,6 +6540,11 @@
   handlers[H_INVOKESTATIC] = (unsigned)(cb->hp + INVOKESTATIC_STUB);
   handlers[H_INVOKESPECIAL] = (unsigned)(cb->hp + INVOKESPECIAL_STUB);
 
+  handlers[H_INVOKESTATIC_RESOLVED] = (unsigned)(cb->hp + INVOKESTATIC_RESOLVED_STUB);
+  handlers[H_INVOKESPECIAL_RESOLVED] = (unsigned)(cb->hp + INVOKESPECIAL_RESOLVED_STUB);
+  handlers[H_INVOKEVIRTUAL_RESOLVED] = (unsigned)(cb->hp + INVOKEVIRTUAL_RESOLVED_STUB);
+  handlers[H_INVOKEVFINAL] = (unsigned)(cb->hp + INVOKEVFINAL_RESOLVED_STUB);
+
   handlers[H_GETFIELD_WORD] = (unsigned)(cb->hp + GETFIELD_WORD_STUB);
   handlers[H_GETFIELD_SH] = (unsigned)(cb->hp + GETFIELD_SH_STUB);
   handlers[H_GETFIELD_H] = (unsigned)(cb->hp + GETFIELD_H_STUB);
@@ -7173,7 +6570,6 @@
   handlers[H_PUTSTATIC_DW] = (unsigned)(cb->hp + PUTSTATIC_DW_STUB);
 
   codebuf.idx += (Thumb2_stubs_end-Thumb2_stubs) >> 1;
-#endif
 
   handlers[H_LDIV] = handlers[H_LREM] = out_pos(&codebuf);
   dop_reg(&codebuf, DP_ORR, ARM_IP, ARM_R2, ARM_R3, 0, 0);
@@ -7253,7 +6649,8 @@
 //   r3 = bci
 //   result -> R0, == 0 => exception
   handlers[H_NEW] = out_pos(&codebuf);
-  mov_reg(&codebuf, ARM_R0, Ristate);
+  ldr_imm(&codebuf, ARM_R0, Rthread, THREAD_TOP_ZERO_FRAME, 1, 0);
+  sub_imm(&codebuf, ARM_R0, ARM_R0, ISTATE_NEXT_FRAME);
   ldr_imm(&codebuf, ARM_R2, ARM_R0, ISTATE_METHOD, 1, 0);
   mov_imm(&codebuf, ARM_IP, (u32)Helper_new);
   ldr_imm(&codebuf, ARM_R2, ARM_R2, METHOD_CONSTMETHOD, 1, 0);
@@ -7269,14 +6666,15 @@
 //   r3 = bci
 //   result -> thread->vm_result
   handlers[H_NEWARRAY] = out_pos(&codebuf);
-  ldr_imm(&codebuf, ARM_R0, Ristate, ISTATE_METHOD, 1, 0);
-  mov_imm(&codebuf, ARM_IP, (u32)_ZN18InterpreterRuntime8newarrayEP10JavaThread9BasicTypei);
+  ldr_imm(&codebuf, ARM_IP, Rthread, THREAD_TOP_ZERO_FRAME, 1, 0);
+  ldr_imm(&codebuf, ARM_R0, ARM_IP, ISTATE_METHOD-ISTATE_NEXT_FRAME, 1, 0);
   ldr_imm(&codebuf, ARM_R0, ARM_R0, METHOD_CONSTMETHOD, 1, 0);
   add_reg(&codebuf, ARM_R3, ARM_R0, ARM_R3);
   mov_reg(&codebuf, ARM_R0, Rthread);
-  str_imm(&codebuf, ARM_R3, Ristate, ISTATE_BCP, 1, 0);
-sub_imm(&codebuf, ARM_R3, Rstack, 4);
-  str_imm(&codebuf, ARM_R3, Ristate, ISTATE_STACK, 1, 0);
+  str_imm(&codebuf, ARM_R3, ARM_IP, ISTATE_BCP-ISTATE_NEXT_FRAME, 1, 0);
+  sub_imm(&codebuf, ARM_R3, Rstack, 4);
+  str_imm(&codebuf, ARM_R3, ARM_IP, ISTATE_STACK-ISTATE_NEXT_FRAME, 1, 0);
+  mov_imm(&codebuf, ARM_IP, (u32)_ZN18InterpreterRuntime8newarrayEP10JavaThread9BasicTypei);
   mov_reg(&codebuf, ARM_PC, ARM_IP);
 
 // ANEWARRAY Stub
@@ -7285,14 +6683,18 @@
 //   r3 = tos
 //   result -> thread->vm_result
   handlers[H_ANEWARRAY] = out_pos(&codebuf);
-sub_imm(&codebuf, ARM_R1, Rstack, 4);
-  str_imm(&codebuf, ARM_R1, Ristate, ISTATE_STACK, 1, 0);
-  ldr_imm(&codebuf, ARM_R1, Ristate, ISTATE_METHOD, 1, 0);
-  ldr_imm(&codebuf, ARM_IP, ARM_R1, METHOD_CONSTMETHOD, 1, 0);
+  ldr_imm(&codebuf, ARM_IP, Rthread, THREAD_TOP_ZERO_FRAME, 1, 0);
+  ldr_imm(&codebuf, ARM_R1, ARM_IP, ISTATE_METHOD-ISTATE_NEXT_FRAME, 1, 0);
+  ldr_imm(&codebuf, ARM_R1, ARM_R1, METHOD_CONSTMETHOD, 1, 0);
+  add_reg(&codebuf, ARM_R0, ARM_R0, ARM_R1);
+  str_imm(&codebuf, ARM_R0, ARM_IP, ISTATE_BCP-ISTATE_NEXT_FRAME, 1, 0);
+
+  sub_imm(&codebuf, ARM_R1, Rstack, 4);
+  str_imm(&codebuf, ARM_R1, ARM_IP, ISTATE_STACK-ISTATE_NEXT_FRAME, 1, 0);
+
+  ldr_imm(&codebuf, ARM_R1, ARM_IP, ISTATE_METHOD-ISTATE_NEXT_FRAME, 1, 0);
   ldr_imm(&codebuf, ARM_R1, ARM_R1, METHOD_CONSTANTS, 1, 0);
-  add_reg(&codebuf, ARM_R0, ARM_IP, ARM_R0);
   mov_imm(&codebuf, ARM_IP, (u32)_ZN18InterpreterRuntime9anewarrayEP10JavaThreadP19constantPoolOopDescii);
-  str_imm(&codebuf, ARM_R0, Ristate, ISTATE_BCP, 1, 0);
   mov_reg(&codebuf, ARM_R0, Rthread);
   mov_reg(&codebuf, ARM_PC, ARM_IP);
 
@@ -7300,14 +6702,15 @@
 //   r0 = bci
 //   r1 = dimensions (*4)
   handlers[H_MULTIANEWARRAY] = out_pos(&codebuf);
-  ldr_imm(&codebuf, ARM_R2, Ristate, ISTATE_METHOD, 1, 0);
+  ldr_imm(&codebuf, ARM_IP, Rthread, THREAD_TOP_ZERO_FRAME, 1, 0);
+  ldr_imm(&codebuf, ARM_R2, ARM_IP, ISTATE_METHOD-ISTATE_NEXT_FRAME, 1, 0);
   sub_imm(&codebuf, ARM_R3, Rstack, 4);
   ldr_imm(&codebuf, ARM_R2, ARM_R2, METHOD_CONSTMETHOD, 1, 0);
-  str_imm(&codebuf, ARM_R3, Ristate, ISTATE_STACK, 1, 0);
+  str_imm(&codebuf, ARM_R3, ARM_IP, ISTATE_STACK-ISTATE_NEXT_FRAME, 1, 0);
   add_reg(&codebuf, ARM_R0, ARM_R2, ARM_R0);
   add_reg(&codebuf, Rstack, Rstack, ARM_R1);
   mov_imm(&codebuf, ARM_R3, (u32)_ZN18InterpreterRuntime14multianewarrayEP10JavaThreadPi);
-  str_imm(&codebuf, ARM_R0, Ristate, ISTATE_BCP, 1, 0);
+  str_imm(&codebuf, ARM_R0, ARM_IP, ISTATE_BCP-ISTATE_NEXT_FRAME, 1, 0);
   mov_reg(&codebuf, ARM_R0, Rthread);
   sub_imm(&codebuf, ARM_R1, Rstack, 4);
   mov_reg(&codebuf, ARM_PC, ARM_R3);
@@ -7315,45 +6718,91 @@
 // LDC Stub
 //   r0 = bci
   handlers[H_LDC] = out_pos(&codebuf);
-  ldr_imm(&codebuf, ARM_R2, Ristate, ISTATE_METHOD, 1, 0);
+  ldr_imm(&codebuf, ARM_IP, Rthread, THREAD_TOP_ZERO_FRAME, 1, 0);
+  ldr_imm(&codebuf, ARM_R2, ARM_IP, ISTATE_METHOD-ISTATE_NEXT_FRAME, 1, 0);
+  sub_imm(&codebuf, ARM_R3, Rstack, 4);
+  ldr_imm(&codebuf, ARM_R2, ARM_R2, METHOD_CONSTMETHOD, 1, 0);
+  str_imm(&codebuf, ARM_R3, ARM_IP, ISTATE_STACK-ISTATE_NEXT_FRAME, 1, 0);
+  add_reg(&codebuf, ARM_R0, ARM_R2, ARM_R0);
+  mov_imm(&codebuf, ARM_R3, (u32)_ZN18InterpreterRuntime3ldcEP10JavaThreadb);
+  str_imm(&codebuf, ARM_R0, ARM_IP, ISTATE_BCP-ISTATE_NEXT_FRAME, 1, 0);
+  mov_reg(&codebuf, ARM_R0, Rthread);
+  mov_imm(&codebuf, ARM_R1, 0);
+  mov_reg(&codebuf, ARM_PC, ARM_R3);
+
+// LDC_W Stub
+//   r0 = bci
+  handlers[H_LDC_W] = out_pos(&codebuf);
+  ldr_imm(&codebuf, ARM_IP, Rthread, THREAD_TOP_ZERO_FRAME, 1, 0);
+  ldr_imm(&codebuf, ARM_R2, ARM_IP, ISTATE_METHOD-ISTATE_NEXT_FRAME, 1, 0);
   sub_imm(&codebuf, ARM_R3, Rstack, 4);
   ldr_imm(&codebuf, ARM_R2, ARM_R2, METHOD_CONSTMETHOD, 1, 0);
-  str_imm(&codebuf, ARM_R3, Ristate, ISTATE_STACK, 1, 0);
+  str_imm(&codebuf, ARM_R3, ARM_IP, ISTATE_STACK-ISTATE_NEXT_FRAME, 1, 0);
   add_reg(&codebuf, ARM_R0, ARM_R2, ARM_R0);
   mov_imm(&codebuf, ARM_R3, (u32)_ZN18InterpreterRuntime3ldcEP10JavaThreadb);
-  str_imm(&codebuf, ARM_R0, Ristate, ISTATE_BCP, 1, 0);
+  str_imm(&codebuf, ARM_R0, ARM_IP, ISTATE_BCP-ISTATE_NEXT_FRAME, 1, 0);
   mov_reg(&codebuf, ARM_R0, Rthread);
-//  mov_imm(&codebuf, ARM_R1, 0);
+  mov_imm(&codebuf, ARM_R1, 1);
   mov_reg(&codebuf, ARM_PC, ARM_R3);
 
 // INSTANCEOF Stub
 //   r1 = index
+//   r2 = tos
 //   r3 = bci
 //   result -> R0, == -1 => exception
   handlers[H_INSTANCEOF] = out_pos(&codebuf);
-  ldr_imm(&codebuf, ARM_R0, Ristate, ISTATE_METHOD, 1, 0);
+  ldr_imm(&codebuf, ARM_R0, Rthread, THREAD_TOP_ZERO_FRAME, 1, 0);
+  sub_imm(&codebuf, ARM_R0, ARM_R0, ISTATE_NEXT_FRAME);
+  ldr_imm(&codebuf, ARM_IP, ARM_R0, ISTATE_METHOD, 1, 0);
+  ldr_imm(&codebuf, ARM_IP, ARM_IP, METHOD_CONSTMETHOD, 1, 0);
+  add_reg(&codebuf, ARM_R3, ARM_IP, ARM_R3);
+  str_imm(&codebuf, ARM_R3, ARM_R0, ISTATE_BCP, 1, 0);
+  sub_imm(&codebuf, ARM_R3, Rstack, 4);
+  str_imm(&codebuf, ARM_R3, ARM_R0, ISTATE_STACK, 1, 0);
   mov_imm(&codebuf, ARM_IP, (u32)Helper_instanceof);
-  ldr_imm(&codebuf, ARM_R0, ARM_R0, METHOD_CONSTMETHOD, 1, 0);
-  add_reg(&codebuf, ARM_R0, ARM_R0, ARM_R3);
-sub_imm(&codebuf, ARM_R3, Rstack, 4);
-  str_imm(&codebuf, ARM_R3, Ristate, ISTATE_STACK, 1, 0);
-  str_imm(&codebuf, ARM_R0, Ristate, ISTATE_BCP, 1, 0);
-  mov_reg(&codebuf, ARM_R0, Ristate);
   mov_reg(&codebuf, ARM_PC, ARM_IP);
 
 // CHECKCAST Stub
 //   r1 = index
+//   r2 = tos
 //   r3 = bci
 //   result -> R0, != 0 => exception
   handlers[H_CHECKCAST] = out_pos(&codebuf);
-  ldr_imm(&codebuf, ARM_R0, Ristate, ISTATE_METHOD, 1, 0);
+  ldr_imm(&codebuf, ARM_R0, Rthread, THREAD_TOP_ZERO_FRAME, 1, 0);
+  sub_imm(&codebuf, ARM_R0, ARM_R0, ISTATE_NEXT_FRAME);
+  ldr_imm(&codebuf, ARM_IP, ARM_R0, ISTATE_METHOD, 1, 0);
+  ldr_imm(&codebuf, ARM_IP, ARM_IP, METHOD_CONSTMETHOD, 1, 0);
+  add_reg(&codebuf, ARM_R3, ARM_IP, ARM_R3);
+  str_imm(&codebuf, ARM_R3, ARM_R0, ISTATE_BCP, 1, 0);
+  sub_imm(&codebuf, ARM_R3, Rstack, 4);
+  str_imm(&codebuf, ARM_R3, ARM_R0, ISTATE_STACK, 1, 0);
   mov_imm(&codebuf, ARM_IP, (u32)Helper_checkcast);
-  ldr_imm(&codebuf, ARM_R0, ARM_R0, METHOD_CONSTMETHOD, 1, 0);
-  add_reg(&codebuf, ARM_R0, ARM_R0, ARM_R3);
-sub_imm(&codebuf, ARM_R3, Rstack, 4);
-  str_imm(&codebuf, ARM_R3, Ristate, ISTATE_STACK, 1, 0);
-  str_imm(&codebuf, ARM_R0, Ristate, ISTATE_BCP, 1, 0);
-  mov_reg(&codebuf, ARM_R0, Ristate);
+  mov_reg(&codebuf, ARM_PC, ARM_IP);
+
+// MONITORENTER
+//   r0 = bci
+  handlers[H_MONITORENTER] = out_pos(&codebuf);
+  ldr_imm(&codebuf, ARM_R2, Rthread, THREAD_TOP_ZERO_FRAME, 1, 0);
+  mov_imm(&codebuf, ARM_R3, (u32)Thumb2_monitorenter);
+  ldr_imm(&codebuf, ARM_R1, ARM_R2, ISTATE_METHOD-ISTATE_NEXT_FRAME, 1, 0);
+  ldr_imm(&codebuf, ARM_IP, ARM_R1, METHOD_CONSTMETHOD, 1, 0);
+  add_reg(&codebuf, Rint_jpc, ARM_IP, ARM_R0);
+  mov_reg(&codebuf, ARM_PC, ARM_R3);
+
+// MONITOREXIT Stub
+//   r1 = tos
+//   r3 = bci
+//   result -> R0, != 0 => exception
+  handlers[H_MONITOREXIT] = out_pos(&codebuf);
+  ldr_imm(&codebuf, ARM_R0, Rthread, THREAD_TOP_ZERO_FRAME, 1, 0);
+  sub_imm(&codebuf, ARM_R0, ARM_R0, ISTATE_NEXT_FRAME);
+  ldr_imm(&codebuf, ARM_IP, ARM_R0, ISTATE_METHOD, 1, 0);
+  ldr_imm(&codebuf, ARM_IP, ARM_IP, METHOD_CONSTMETHOD, 1, 0);
+  add_reg(&codebuf, ARM_R3, ARM_IP, ARM_R3);
+  str_imm(&codebuf, ARM_R3, ARM_R0, ISTATE_BCP, 1, 0);
+  sub_imm(&codebuf, ARM_R3, Rstack, 4);
+  str_imm(&codebuf, ARM_R3, ARM_R0, ISTATE_STACK, 1, 0);
+  mov_imm(&codebuf, ARM_IP, (u32)Helper_monitorexit);
   mov_reg(&codebuf, ARM_PC, ARM_IP);
 
 // AASTORE Stub
@@ -7362,14 +6811,16 @@
 //   r2 = index
 //   r3 = arrayref
   handlers[H_AASTORE] = out_pos(&codebuf);
-  ldr_imm(&codebuf, ARM_IP, Ristate, ISTATE_METHOD, 1, 0);
+  ldr_imm(&codebuf, ARM_IP, Rthread, THREAD_TOP_ZERO_FRAME, 1, 0);
+  ldr_imm(&codebuf, ARM_IP, ARM_IP, ISTATE_METHOD-ISTATE_NEXT_FRAME, 1, 0);
   ldr_imm(&codebuf, ARM_IP, ARM_IP, METHOD_CONSTMETHOD, 1, 0);
   add_reg(&codebuf, ARM_IP, ARM_IP, ARM_R0);
-sub_imm(&codebuf, ARM_R0, Rstack, 4);
-  str_imm(&codebuf, ARM_R0, Ristate, ISTATE_STACK, 1, 0);
-  str_imm(&codebuf, ARM_IP, Ristate, ISTATE_BCP, 1, 0);
+  ldr_imm(&codebuf, ARM_R0, Rthread, THREAD_TOP_ZERO_FRAME, 1, 0);
+  sub_imm(&codebuf, ARM_R0, ARM_R0, ISTATE_NEXT_FRAME);
+  str_imm(&codebuf, ARM_IP, ARM_R0, ISTATE_BCP, 1, 0);
+  sub_imm(&codebuf, ARM_IP, Rstack, 4);
+  str_imm(&codebuf, ARM_IP, ARM_R0, ISTATE_STACK, 1, 0);
   mov_imm(&codebuf, ARM_IP, (u32)Helper_aastore);
-  mov_reg(&codebuf, ARM_R0, Ristate);
   mov_reg(&codebuf, ARM_PC, ARM_IP);
 
 // APUTFIELD Stub
@@ -7380,16 +6831,20 @@
 
 // SYNCHRONIZED_ENTER Stub
 //   r0 = bci
-//   r1 = monitor
+//   Rstack = monitor
   handlers[H_SYNCHRONIZED_ENTER] = out_pos(&codebuf);
-  ldr_imm(&codebuf, ARM_IP, Ristate, ISTATE_METHOD, 1, 0);
-  ldr_imm(&codebuf, ARM_IP, ARM_IP, METHOD_CONSTMETHOD, 1, 0);
-  add_reg(&codebuf, ARM_IP, ARM_IP, ARM_R0);
-sub_imm(&codebuf, ARM_R0, Rstack, 4);
-  str_imm(&codebuf, ARM_R0, Ristate, ISTATE_STACK, 1, 0);
-  str_imm(&codebuf, ARM_IP, Ristate, ISTATE_BCP, 1, 0);
+  ldr_imm(&codebuf, ARM_R1, Rthread, THREAD_TOP_ZERO_FRAME, 1, 0);
+  ldr_imm(&codebuf, ARM_R2, ARM_R1, ISTATE_METHOD-ISTATE_NEXT_FRAME, 1, 0);
+  ldr_imm(&codebuf, ARM_R2, ARM_R2, METHOD_CONSTMETHOD, 1, 0);
+  add_reg(&codebuf, ARM_R2, ARM_R2, ARM_R0);
+  str_imm(&codebuf, ARM_R2, ARM_R1, ISTATE_BCP-ISTATE_NEXT_FRAME, 1, 0);
+
+  sub_imm(&codebuf, ARM_R0, Rstack, 4);
+  str_imm(&codebuf, ARM_R0, ARM_R1, ISTATE_STACK-ISTATE_NEXT_FRAME, 1, 0);
+
   mov_imm(&codebuf, ARM_IP, (u32)Helper_synchronized_enter);
   mov_reg(&codebuf, ARM_R0, Rthread);
+  mov_reg(&codebuf, ARM_R1, Rstack);
   mov_reg(&codebuf, ARM_PC, ARM_IP);
 
 //
@@ -7397,54 +6852,26 @@
 //   r0 = bci
 //   r1 = monitor
   handlers[H_SYNCHRONIZED_EXIT] = out_pos(&codebuf);
-  ldr_imm(&codebuf, ARM_IP, Ristate, ISTATE_METHOD, 1, 0);
+  ldr_imm(&codebuf, ARM_R2, Rthread, THREAD_TOP_ZERO_FRAME, 1, 0);
+
+  ldr_imm(&codebuf, ARM_IP, ARM_R2, ISTATE_METHOD-ISTATE_NEXT_FRAME, 1, 0);
   ldr_imm(&codebuf, ARM_IP, ARM_IP, METHOD_CONSTMETHOD, 1, 0);
   add_reg(&codebuf, ARM_IP, ARM_IP, ARM_R0);
-sub_imm(&codebuf, ARM_R0, Rstack, 4);
-  str_imm(&codebuf, ARM_R0, Ristate, ISTATE_STACK, 1, 0);
-  str_imm(&codebuf, ARM_IP, Ristate, ISTATE_BCP, 1, 0);
+  sub_imm(&codebuf, ARM_R0, Rstack, 4);
+  str_imm(&codebuf, ARM_R0, ARM_R2, ISTATE_STACK-ISTATE_NEXT_FRAME, 1, 0);
+  str_imm(&codebuf, ARM_IP, ARM_R2, ISTATE_BCP-ISTATE_NEXT_FRAME, 1, 0);
   mov_imm(&codebuf, ARM_IP, (u32)Helper_synchronized_exit);
   mov_reg(&codebuf, ARM_R0, Rthread);
   mov_reg(&codebuf, ARM_PC, ARM_IP);
 
-#define DEBUG_REGSET ((1<<ARM_R0)|(1<<ARM_R1)|(1<<ARM_R2)|(1<<ARM_R3)|(1<<ARM_IP))
-
-// DEBUG_METHODENTRY
-  handlers[H_DEBUG_METHODENTRY] = out_pos(&codebuf);
-  stm(&codebuf, DEBUG_REGSET | (1<<ARM_LR), ARM_SP, PUSH_FD, 1);
-  mov_reg(&codebuf, ARM_R2, ARM_R0);
-  mov_reg(&codebuf, ARM_R0, ARM_R8);
-  mov_reg(&codebuf, ARM_R1, ARM_R4);
-  mov_imm(&codebuf, ARM_IP, (u32)Debug_MethodEntry);
-  blx_reg(&codebuf, ARM_IP);
-  ldm(&codebuf, DEBUG_REGSET | (1<<ARM_PC), ARM_SP, POP_FD, 1);
-
-// DEBUG_METHODEXIT
-  handlers[H_DEBUG_METHODEXIT] = out_pos(&codebuf);
-  stm(&codebuf, DEBUG_REGSET | (1<<ARM_LR), ARM_SP, PUSH_FD, 1);
-  mov_reg(&codebuf, ARM_R0, ARM_R8);
-  mov_reg(&codebuf, ARM_R1, ARM_R4);
-  mov_imm(&codebuf, ARM_IP, (u32)Debug_MethodExit);
-  blx_reg(&codebuf, ARM_IP);
-  ldm(&codebuf, DEBUG_REGSET | (1<<ARM_PC), ARM_SP, POP_FD, 1);
-
-// DEBUG_METHODCALL
-  handlers[H_DEBUG_METHODCALL] = out_pos(&codebuf);
-  stm(&codebuf, DEBUG_REGSET | (1<<ARM_LR), ARM_SP, PUSH_FD, 1);
-  mov_reg(&codebuf, ARM_R2, ARM_R0);
-  mov_reg(&codebuf, ARM_R0, ARM_R8);
-  mov_reg(&codebuf, ARM_R1, ARM_R4);
-  mov_imm(&codebuf, ARM_IP, (u32)Debug_MethodCall);
-  blx_reg(&codebuf, ARM_IP);
-  ldm(&codebuf, DEBUG_REGSET | (1<<ARM_PC), ARM_SP, POP_FD, 1);
-
 // EXIT_TO_INTERPRETER
 //   r0 = bci
   handlers[H_EXIT_TO_INTERPRETER] = out_pos(&codebuf);
-  ldr_imm(&codebuf, ARM_R1, Ristate, ISTATE_METHOD, 1, 0);
+  ldr_imm(&codebuf, ARM_R2, Rthread, THREAD_TOP_ZERO_FRAME, 1, 0);
+  mov_imm(&codebuf, ARM_R3, (u32)Thumb2_Exit_To_Interpreter);
+  ldr_imm(&codebuf, ARM_R1, ARM_R2, ISTATE_METHOD-ISTATE_NEXT_FRAME, 1, 0);
   ldr_imm(&codebuf, ARM_IP, ARM_R1, METHOD_CONSTMETHOD, 1, 0);
   add_reg(&codebuf, Rint_jpc, ARM_IP, ARM_R0);
-  mov_imm(&codebuf, ARM_R3, (u32)Thumb2_Exit_To_Interpreter);
   mov_reg(&codebuf, ARM_PC, ARM_R3);
 
   Thumb2_Clear_Cache(cb->hp, cb->hp + codebuf.idx * 2);
@@ -7453,4 +6880,4 @@
   thumb2_codebuf = cb;
 }
 
-#endif // THUMB2EE
+#endif // T2JIT



More information about the distro-pkg-dev mailing list