[aarch64-port-dev ] AArch64 Simulator: Improve multi-threaded performance

Andrew Haley aph at redhat.com
Thu Dec 12 04:22:52 PST 2013


When we're running multi-threaded with breakpoints, the system slows
to a crawl.

The reason for this is interesting: we have a shared breakpoint list,
and threads are contending for the lock around it.  One fairly obvious
solution for this is a reader-writer lock, but that doesn't help as much
as you might think because the contention is then for the lock itself.
Threads tend to pile up, all blocked on the lock.  With a dozen or so
threads running we spend more than half our time in the kernel
acquiring and freeing it.

My solution is to reduce the frequency with which we try to acquire
the lock by only locking if we have a reasonable expectation that we
might need to.  One way to do this is a Bloom filter keyed on the
program counter value.  A little experimentation has shown that a
filter with 512 elements reduces contention for the lock to a small
percentage of total CPU time, given a modest number of breakpoints.

I haven't attempted to do anything for bytecode breakpoints.  They
could be made much faster simply by only checking for a bytecode break
on notify instructions.

Andrew.


# HG changeset patch
# User aph
# Date 1386850852 0
# Node ID a2fafeaf271edb73b94e3ea0054fe8a35f64a39d
# Parent  3caf9bc3bb7f7b7bbcd68dba97ef60011c97b03a
Improve multi-threaded breakpoint performance

diff -r 3caf9bc3bb7f -r a2fafeaf271e cachememory.cpp
--- a/cachememory.cpp	Tue Dec 03 15:00:29 2013 +0000
+++ b/cachememory.cpp	Thu Dec 12 12:20:52 2013 +0000
@@ -41,21 +41,21 @@

 static char *dotrace = getenv("simcachetrace");

-#define trace(op, size, addr, cacheidx, datum, lineidx, format)		\
-  if (dotrace) {							\
-    char pad[32];							\
-    char pad2[32];							\
-    char pad3[32];							\
-    sprintf(pad, "0x%lx", addr);					\
-    sprintf(pad2, "0x" #format, data[cacheidx].datum[lineidx]);		\
-    sprintf(pad3, "0x%x", cacheidx);					\
-    debugState->lockBreak();						\
-    cout << debugState->prefix						\
-	 << # op " " # size						\
-         << " " << pad << " : data[" << pad3				\
-         << "]." # datum "[" << lineidx					\
-         << "] = " << pad2 << endl;					\
-    debugState->unlockBreak();						\
+#define trace(op, size, addr, cacheidx, datum, lineidx, format)	\
+  if (dotrace) {						\
+    char pad[32];						\
+    char pad2[32];						\
+    char pad3[32];						\
+    sprintf(pad, "0x%lx", addr);				\
+    sprintf(pad2, "0x" #format, data[cacheidx].datum[lineidx]);	\
+    sprintf(pad3, "0x%x", cacheidx);				\
+    debugState->lockBreak_write();				\
+    cout << debugState->prefix					\
+	 << # op " " # size					\
+         << " " << pad << " : data[" << pad3			\
+         << "]." # datum "[" << lineidx				\
+         << "] = " << pad2 << endl;				\
+    debugState->unlockBreak();					\
   }

 // compute the index of the cache line for a given address
diff -r 3caf9bc3bb7f -r a2fafeaf271e debug.cpp
--- a/debug.cpp	Tue Dec 03 15:00:29 2013 +0000
+++ b/debug.cpp	Thu Dec 12 12:20:52 2013 +0000
@@ -58,6 +58,7 @@
 int DebugState::breakMax = 10;
 Break **DebugState::breakList = new Break*[breakMax];
 int DebugState::loaded = 0;
+BloomFilter DebugState::breakpoint_filter;

 extern "C" void db() {
   enabled = !enabled;
@@ -70,11 +71,11 @@
   bcStepBreak = new BytecodeBreak("", 0);
   nextBreak = 0;
   disassembler = 0;
-  bcCheck = 0;
   nextBreak = NULL;
+
   // load any persisted debug state
+  load();

-  load();
   // count 1 means we stop at the first instruction
   stepCount = (stopNew ? 1 : 0);
   bcStepCount = 0;
@@ -96,14 +97,14 @@
   // cache current method and set bc index to -1
   cacheMethod(pc, fp);
   // maybe trace method entry
-  lockBreak();
   if (traceBC && method[0]) {
+    lockBreak_write();
     cout << prefix
 	 << "  ENTER:    "
 	 << method
 	 << endl;
+    unlockBreak();
   }
-  unlockBreak();
 }
 void DebugState::notifyMethodReentry(u_int64_t pc, u_int64_t fp)
 {
@@ -113,7 +114,7 @@
   clearHitIdx();
   // maybe trace method reentry
   if (traceBC && method[0]) {
-    lockBreak();
+    lockBreak_write();
     cout << prefix
 	 << "  RE-ENTER: "
 	 << method
@@ -125,7 +126,7 @@
 void DebugState::notifyMethodExit()
 {
   if (traceBC && method[0]) {
-    lockBreak();
+    lockBreak_write();
     cout << prefix
 	 << "  EXIT:"
 	 << endl;
@@ -148,7 +149,7 @@
     stepCount = 1;
   }
   if (traceBC > 1 && method[0]) {
-    lockBreak();
+    lockBreak_write();
     cout << prefix
 	 << "  BYTECODE: "
 	 << bcdecode;
@@ -162,7 +163,8 @@

 void DebugState::notifyCompile(char *name, unsigned char *base)
 {
-  lockBreak();
+  lockBreak_write();
+  bool change = false;
   strncpy(compiledMethod, name, sizeof(compiledMethod) - 1);
   compiledBase = base;
   compiled.insert(name, base);
@@ -172,15 +174,19 @@
       RelocBreak *relocbr = (RelocBreak*)br;
       if (!strcmp(name, relocbr->breakMethod)) {
 	relocbr->rebase(base);
+	change = true;
       }
     }
   }
+  if (change)
+    breakpointsChanged();
   unlockBreak();
 }

 void DebugState::notifyRelocate(unsigned char *base, long byteOffset)
 {
-  lockBreak();
+  lockBreak_write();
+  bool change = false;
   char *name = compiled.relocate(base, byteOffset);
   if (name) {
     for (int i = 0; i < breakCount; i++) {
@@ -189,10 +195,13 @@
 	RelocBreak *relocbr = (RelocBreak*)br;
 	if (!strcmp(name, relocbr->breakMethod)) {
 	  relocbr->rebase(base + byteOffset);
+	  change = true;
 	}
       }
     }
   }
+  if (change)
+    breakpointsChanged();
   unlockBreak();
 }

@@ -272,7 +281,7 @@
     strcpy(method, save->method);
     bcidx = save->bcidx;
     if (traceBC) {
-      lockBreak();
+      lockBreak_write();
       cout << prefix
 	   << "  RE-ENTER: "
 	   << method
@@ -292,6 +301,21 @@

 // break point methods

+void DebugState::breakpointsChanged() {
+  BloomFilter filter;
+  if (bcCheck) {
+    // If we're tracing bytecodes, all PC values may trap.
+    filter.set_all();
+  } else {
+    for (int i = 0; i < breakCount; i++) {
+      u_int64_t pc = breakList[i]->PC();
+      if (pc) {
+	filter.set(pc);
+      }
+    }
+  }
+  breakpoint_filter = filter;
+}

 int InstrBreak::isBreak(AArch64Simulator *sim)
 {
@@ -589,7 +613,7 @@

   if (debugState.nextBreak && debugState.nextBreak->isBreak(this)) {
     debugState.atBreak = debugState.nextBreak;
-    debugState.lockBreak();
+    debugState.lockBreak_read();
     return true;
   }

@@ -598,12 +622,12 @@
       // mark with a default instruction break
       debugState.atBreak = debugState.stepBreak;
     }
-    debugState.lockBreak();
+    debugState.lockBreak_read();
     return true;
   }

   if (enabled && debugState.breakCount != 0) {
-    debugState.lockBreak();
+    debugState.lockBreak_read();
     if (isUserBreak()) {
       return true;
     }
@@ -629,9 +653,42 @@
   return false;
 }

+#define READER_WRITER_LOCKS
+
+
+#ifdef READER_WRITER_LOCKS
+static pthread_rwlock_t breakLock;
+
+class breakLock_init_t {
+public:
+  breakLock_init_t() { pthread_rwlock_init(&breakLock, NULL); }
+};
+breakLock_init_t breakLock_init;
+
+void DebugState::lockBreak_read()
+{
+  pthread_rwlock_rdlock(&breakLock);
+}
+
+void DebugState::lockBreak_write()
+{
+  pthread_rwlock_wrlock(&breakLock);
+}
+
+void DebugState::unlockBreak()
+{
+  pthread_rwlock_unlock(&breakLock);
+}
+
+#else
 static pthread_mutex_t breakLock = PTHREAD_MUTEX_INITIALIZER;

-void DebugState::lockBreak()
+void DebugState::lockBreak_read()
+{
+  pthread_mutex_lock(&breakLock);
+}
+
+void DebugState::lockBreak_write()
 {
   pthread_mutex_lock(&breakLock);
 }
@@ -640,6 +697,7 @@
 {
   pthread_mutex_unlock(&breakLock);
 }
+#endif

 void AArch64Simulator::setInstrBreak(char *pc)
 {
@@ -1004,7 +1062,7 @@
 {
   // we need to take the break lock when tracing as the dasm code is
   // not thread safe
-  debugState.lockBreak();
+  debugState.lockBreak_write();
   (cout << debugState.prefix).flush();
   if (debugState.disassembler) {
     (*debugState.disassembler)(cpuState.pc);
@@ -1360,7 +1418,7 @@

 void DebugState::load()
 {
-  lockBreak();
+  lockBreak_write();
   if (!loaded) {
     FILE *file = fopen(rcfilename(), "r");
     if (file != 0) {
@@ -1501,11 +1559,13 @@
     }
     loaded = 1;
   }
+  breakpointsChanged();
   unlockBreak();
 }

 void DebugState::persist()
 {
+  breakpointsChanged();
   // do not lock as this will happen in the debugger when we are
   // already locked
   FILE *file = fopen(rcfilename(), "w");
diff -r 3caf9bc3bb7f -r a2fafeaf271e debug.hpp
--- a/debug.hpp	Tue Dec 03 15:00:29 2013 +0000
+++ b/debug.hpp	Thu Dec 12 12:20:52 2013 +0000
@@ -36,6 +36,48 @@
 #include <sys/types.h>
 #include "compileindex.hpp"

+class BloomFilter {
+
+#define length 512
+
+  u_int64_t t[length/64];
+
+public:
+
+  unsigned int hash(u_int64_t d) {
+    return ((d >> 2) ^ (d >> 8)) % length;
+  }
+
+  void set(u_int64_t d) {
+    unsigned int h = hash(d);
+    unsigned int n = h / 64u;
+    unsigned int b = h % 64u;
+    t[n] |= (u_int64_t)1 << b;
+  }
+
+  bool test(u_int64_t d) {
+    unsigned int h = hash(d);
+    unsigned int n = h / 64u;
+    unsigned int b = h % 64u;
+    return ((u_int64_t)1 << b) & t[n];
+  }
+
+  void set_all() {
+    memset(t, ~0, sizeof t);
+  }
+
+  void clear() {
+    memset(t, 0, sizeof t);
+  }
+
+  BloomFilter() {
+    clear();
+  }
+
+#undef length
+} __attribute((aligned(64)));
+
+
 class AArch64Simulator;

 typedef void (*DisassFn)(u_int64_t);
@@ -63,6 +105,7 @@
   bool isDisabled() { return disabled; }
   void enable() { disabled = false; }
   void disable() { disabled = true; }
+  virtual u_int64_t PC() = 0;
 };

 class InstrBreak : public Break
@@ -77,6 +120,7 @@
   int isBreak(AArch64Simulator *sim);
   void printTo(char *buf, bool isCanonical = false);
   bool equals(Break *b);
+  virtual u_int64_t PC() { return isDisabled() ? 0 : breakPC; }
 };

 class BytecodeBreak : public Break
@@ -95,6 +139,7 @@
   int isBreak(AArch64Simulator *sim);
   void printTo(char *buf, bool isCanonical = false);
   bool equals(Break *b);
+  virtual u_int64_t PC() { return 0; }
 };

 class NextInstrBreak : public Break
@@ -108,6 +153,7 @@
   int isBreak(AArch64Simulator *sim);
   void printTo(char *buf, bool isCanonical = false);
   bool equals(Break *b);
+  virtual u_int64_t PC() { return 0; }
 };

 class NextBytecodeBreak : public Break
@@ -123,6 +169,7 @@
   int isBreak(AArch64Simulator *sim);
   void printTo(char *buf, bool isCanonical = false);
   bool equals(Break *b);
+  virtual u_int64_t PC() { return 0; }
 };

 class RelocBreak : public Break
@@ -137,6 +184,7 @@
   void printTo(char *buf, bool isCanonical = false);
   bool equals(Break *b);
   void rebase(unsigned char *base);
+  virtual u_int64_t PC() { return isDisabled() ? 0 : breakPC; }
 };

 // forward declaration for DebugStateSave
@@ -152,6 +200,8 @@
   static int breakCount;
   static int breakMax;
   static Break **breakList;
+  static BloomFilter breakpoint_filter;
+
   Break *atBreak;
   Break *bcStepBreak;
   Break *stepBreak;
@@ -184,6 +234,12 @@

   DebugState();

+  bool maybeBreakpoint(u_int64_t pc) {
+    return stepCount | trace
+      || breakpoint_filter.test(pc);
+  }
+
+  void breakpointsChanged();
   void notifyMethodEntry(u_int64_t pc, u_int64_t fp = 0);
   void notifyMethodReentry(u_int64_t pc, u_int64_t fp = 0);
   void notifyMethodExit();
@@ -201,7 +257,8 @@
   void reentrySave(DebugStateSave *save);
   void reentryRestore(DebugStateSave *save);

-  void lockBreak();
+  void lockBreak_read();
+  void lockBreak_write();
   void unlockBreak();

   void addBreak(Break *);
diff -r 3caf9bc3bb7f -r a2fafeaf271e simulator.cpp
--- a/simulator.cpp	Tue Dec 03 15:00:29 2013 +0000
+++ b/simulator.cpp	Thu Dec 12 12:20:52 2013 +0000
@@ -81,7 +81,7 @@
 AArch64Simulator::AArch64Simulator(Memory *m) : memory(m)
 {
   // set up callbacks into JVM
-  initCallbacks(false);
+  initCallbacks(true);
 }

 // Debugging support
@@ -99,6 +99,7 @@
   } else {
     debugState.bcCheck = 0;
   }
+  debugState.breakpointsChanged();
 }

 #define TST( _flag ) (cpuState.testCPSR( _flag ))
@@ -197,7 +198,11 @@

 AArch64Simulator *AArch64Simulator::current()
 {
-  return get_current(false, false);
+  bool buffered = false;
+  bool skipBCCheck = true;  // If we create a new simulator in this
+			    // thread, don't turn skipBCCheck on.
+
+  return get_current(buffered, skipBCCheck);
 }

 extern "C" void stop();
@@ -607,9 +612,9 @@
 AArch64Simulator::status_t AArch64Simulator::decodeAndExecute(u_int64_t pc)
 {
   // we need to check if gdb wants an in here
-
-  if (debugState.stepCount|debugState.breakCount|debugState.trace)
+  if (debugState.maybeBreakpoint(pc)) {
     checkBreak();
+  }

   u_int64_t group = dispatchGroup(instr);

@@ -9246,6 +9251,7 @@
     errorCode = ERROR_NYI;
     return error();
   }
+  return error();
 }

 AArch64Simulator::status_t AArch64Simulator::dexAdvSIMDShiftByImm()



More information about the aarch64-port-dev mailing list