Support for exception throwing.

These changes start to add support for a long jump style of exception throw.
A Context is added to build up the registers that will be loaded by the long
jump from callee saves that are on the stack. Throws are reworked slightly to
give the PC for the frame of the method being looked at, rather than the return
PC (that previously led the trace's PC to be off by a frame). Callee save
support is added to the JNI compiler which then no longer needs to spill
incoming argument registers as it may reuse the callee saves.

Currently the code is lightly tested on ARM and doesn't support
restoring floating point callee save registers.

Also clean up some PIC TODOs.

Change-Id: I9bcef4ab3bf4a9de57d7a5123fb3bb1707ca8921
diff --git a/build/Android.common.mk b/build/Android.common.mk
index c62b038..c22deed 100644
--- a/build/Android.common.mk
+++ b/build/Android.common.mk
@@ -64,6 +64,7 @@
 	src/calling_convention.cc \
 	src/calling_convention_arm.cc \
 	src/calling_convention_x86.cc \
+	src/context.cc \
 	src/check_jni.cc \
 	src/class_linker.cc \
 	src/class_loader.cc \
@@ -135,6 +136,7 @@
 
 LIBART_TARGET_SRC_FILES := \
 	$(LIBART_COMMON_SRC_FILES) \
+	src/context_arm.cc.arm \
 	src/logging_android.cc \
 	src/runtime_android.cc \
 	src/thread_android.cc \
@@ -142,6 +144,7 @@
 
 LIBART_HOST_SRC_FILES := \
 	$(LIBART_COMMON_SRC_FILES) \
+	src/context_x86.cc \
 	src/logging_linux.cc \
 	src/runtime_linux.cc \
 	src/thread_linux.cc \
diff --git a/src/assembler.h b/src/assembler.h
index 173609e..4d085e0 100644
--- a/src/assembler.h
+++ b/src/assembler.h
@@ -302,15 +302,11 @@
 
   // Emit code that will create an activation on the stack
   virtual void BuildFrame(size_t frame_size, ManagedRegister method_reg,
-                          const std::vector<ManagedRegister>& spill_regs) = 0;
+                          const std::vector<ManagedRegister>& callee_save_regs) = 0;
 
   // Emit code that will remove an activation from the stack
   virtual void RemoveFrame(size_t frame_size,
-                           const std::vector<ManagedRegister>& spill_regs) = 0;
-
-  // Fill list of registers from spill area
-  virtual void FillFromSpillArea(const std::vector<ManagedRegister>& spill_regs,
-                                 size_t displacement) = 0;
+                           const std::vector<ManagedRegister>& callee_save_regs) = 0;
 
   virtual void IncreaseFrameSize(size_t adjust) = 0;
   virtual void DecreaseFrameSize(size_t adjust) = 0;
@@ -395,6 +391,7 @@
                     ManagedRegister scratch) = 0;
   virtual void Call(FrameOffset base, Offset offset,
                     ManagedRegister scratch) = 0;
+  virtual void Call(ThreadOffset offset, ManagedRegister scratch) = 0;
 
   // Generate code to check if Thread::Current()->suspend_count_ is non-zero
   // and branch to a SuspendSlowPath if it is. The SuspendSlowPath will continue
diff --git a/src/assembler_arm.cc b/src/assembler_arm.cc
index d0fae17..c4dbbba 100644
--- a/src/assembler_arm.cc
+++ b/src/assembler_arm.cc
@@ -1414,46 +1414,55 @@
 }
 
 void ArmAssembler::BuildFrame(size_t frame_size, ManagedRegister method_reg,
-                              const std::vector<ManagedRegister>& spill_regs) {
+                              const std::vector<ManagedRegister>& callee_save_regs) {
   CHECK(IsAligned(frame_size, kStackAlignment));
   CHECK_EQ(R0, method_reg.AsArm().AsCoreRegister());
-  AddConstant(SP, -frame_size);
-  RegList spill_list = 1 << R0 | 1 << LR;
-  for (size_t i = 0; i < spill_regs.size(); i++) {
-    Register reg = spill_regs.at(i).AsArm().AsCoreRegister();
-    // check assumption LR is the last register that gets spilled
-    CHECK_LT(reg, LR);
-    spill_list |= 1 << reg;
+
+  // Push callee saves and link register
+  RegList push_list = 1 << LR;
+  size_t pushed_values = 1;
+  for (size_t i = 0; i < callee_save_regs.size(); i++) {
+    Register reg = callee_save_regs.at(i).AsArm().AsCoreRegister();
+    push_list |= 1 << reg;
+    pushed_values++;
   }
-  // Store spill list from (low to high number register) starting at SP
-  // incrementing after each store but not updating SP
-  stm(IA, SP, spill_list, AL);
+  PushList(push_list);
+
+  // Increase frame to required size
+  CHECK_GT(frame_size, pushed_values * kPointerSize);  // Must be at least space to push Method*
+  size_t adjust = frame_size - (pushed_values * kPointerSize);
+  IncreaseFrameSize(adjust);
+
+  // Write out Method*
+  StoreToOffset(kStoreWord, R0, SP, 0);
 }
 
 void ArmAssembler::RemoveFrame(size_t frame_size,
-                              const std::vector<ManagedRegister>& spill_regs) {
+                              const std::vector<ManagedRegister>& callee_save_regs) {
   CHECK(IsAligned(frame_size, kStackAlignment));
-  // Reload LR. TODO: reload any saved callee saves from spill_regs
-  LoadFromOffset(kLoadWord, LR, SP, (spill_regs.size() + 1) * kPointerSize);
-  AddConstant(SP, frame_size);
-  mov(PC, ShifterOperand(LR));
-}
-
-void ArmAssembler::FillFromSpillArea(const std::vector<ManagedRegister>& spill_regs,
-                                     size_t displacement) {
-  for(size_t i = 0; i < spill_regs.size(); i++) {
-    Register reg = spill_regs.at(i).AsArm().AsCoreRegister();
-    LoadFromOffset(kLoadWord, reg, SP, displacement + ((i + 1) * kPointerSize));
+  // Compute callee saves to pop and PC
+  RegList pop_list = 1 << PC;
+  size_t pop_values = 1;
+  for (size_t i = 0; i < callee_save_regs.size(); i++) {
+    Register reg = callee_save_regs.at(i).AsArm().AsCoreRegister();
+    pop_list |= 1 << reg;
+    pop_values++;
   }
+
+  // Decrease frame to start of callee saves
+  CHECK_GT(frame_size, pop_values * kPointerSize);
+  size_t adjust = frame_size - (pop_values * kPointerSize);
+  DecreaseFrameSize(adjust);
+
+  // Pop callee saves and PC
+  PopList(pop_list);
 }
 
 void ArmAssembler::IncreaseFrameSize(size_t adjust) {
-  CHECK(IsAligned(adjust, kStackAlignment));
   AddConstant(SP, -adjust);
 }
 
 void ArmAssembler::DecreaseFrameSize(size_t adjust) {
-  CHECK(IsAligned(adjust, kStackAlignment));
   AddConstant(SP, adjust);
 }
 
@@ -1752,6 +1761,10 @@
   // TODO: place reference map on call
 }
 
+void ArmAssembler::Call(ThreadOffset offset, ManagedRegister scratch) {
+  UNIMPLEMENTED(FATAL);
+}
+
 void ArmAssembler::GetCurrentThread(ManagedRegister tr) {
   mov(tr.AsArm().AsCoreRegister(), ShifterOperand(TR));
 }
diff --git a/src/assembler_arm.h b/src/assembler_arm.h
index cf031c7..e58eb92 100644
--- a/src/assembler_arm.h
+++ b/src/assembler_arm.h
@@ -415,7 +415,6 @@
   // Emit data (e.g. encoded instruction or immediate) to the
   // instruction stream.
   void Emit(int32_t value);
-
   void Bind(Label* label);
 
   //
@@ -424,15 +423,11 @@
 
   // Emit code that will create an activation on the stack
   virtual void BuildFrame(size_t frame_size, ManagedRegister method_reg,
-                          const std::vector<ManagedRegister>& spill_regs);
+                          const std::vector<ManagedRegister>& callee_save_regs);
 
   // Emit code that will remove an activation from the stack
   virtual void RemoveFrame(size_t frame_size,
-                           const std::vector<ManagedRegister>& spill_regs);
-
-  // Fill list of registers from spill area
-  virtual void FillFromSpillArea(const std::vector<ManagedRegister>& spill_regs,
-                                 size_t displacement);
+                           const std::vector<ManagedRegister>& callee_save_regs);
 
   virtual void IncreaseFrameSize(size_t adjust);
   virtual void DecreaseFrameSize(size_t adjust);
@@ -441,8 +436,6 @@
   virtual void Store(FrameOffset offs, ManagedRegister src, size_t size);
   virtual void StoreRef(FrameOffset dest, ManagedRegister src);
   virtual void StoreRawPtr(FrameOffset dest, ManagedRegister src);
-  virtual void StoreSpanning(FrameOffset dest, ManagedRegister src,
-                             FrameOffset in_off, ManagedRegister scratch);
 
   virtual void StoreImmediateToFrame(FrameOffset dest, uint32_t imm,
                                      ManagedRegister scratch);
@@ -456,6 +449,9 @@
 
   virtual void StoreStackPointerToThread(ThreadOffset thr_offs);
 
+  virtual void StoreSpanning(FrameOffset dest, ManagedRegister src,
+                             FrameOffset in_off, ManagedRegister scratch);
+
   // Load routines
   virtual void Load(ManagedRegister dest, FrameOffset src, size_t size);
 
@@ -516,6 +512,7 @@
                     ManagedRegister scratch);
   virtual void Call(FrameOffset base, Offset offset,
                     ManagedRegister scratch);
+  virtual void Call(ThreadOffset offset, ManagedRegister scratch);
 
   // Generate code to check if Thread::Current()->suspend_count_ is non-zero
   // and branch to a SuspendSlowPath if it is. The SuspendSlowPath will continue
diff --git a/src/assembler_x86.cc b/src/assembler_x86.cc
index 45c0086..892bf76 100644
--- a/src/assembler_x86.cc
+++ b/src/assembler_x86.cc
@@ -134,6 +134,12 @@
   EmitImmediate(imm);
 }
 
+void X86Assembler::movl(const Address& dst, Label* lbl) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0xC7);
+  EmitOperand(0, dst);
+  EmitLabel(lbl, dst.length_ + 5);
+}
 
 void X86Assembler::movzxb(Register dst, ByteRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
@@ -1389,11 +1395,6 @@
   ret();
 }
 
-void X86Assembler::FillFromSpillArea(
-    const std::vector<ManagedRegister>& spill_regs, size_t displacement) {
-  CHECK_EQ(0u, spill_regs.size());  // no spilled regs on x86
-}
-
 void X86Assembler::IncreaseFrameSize(size_t adjust) {
   CHECK(IsAligned(adjust, kStackAlignment));
   addl(ESP, Immediate(-adjust));
@@ -1467,6 +1468,10 @@
   fs()->movl(Address::Absolute(thr_offs), ESP);
 }
 
+void X86Assembler::StoreLabelToThread(ThreadOffset thr_offs, Label* lbl) {
+  fs()->movl(Address::Absolute(thr_offs), lbl);
+}
+
 void X86Assembler::StoreSpanning(FrameOffset dest, ManagedRegister src,
                                  FrameOffset in_off, ManagedRegister scratch) {
   UNIMPLEMENTED(FATAL);  // this case only currently exists for ARM
@@ -1654,19 +1659,11 @@
 }
 
 void X86Assembler::Call(FrameOffset base, Offset offset, ManagedRegister) {
-  // TODO: Needed for:
-  // JniCompilerTest.CompileAndRunIntObjectObjectMethod
-  // JniCompilerTest.CompileAndRunStaticIntObjectObjectMethod
-  // JniCompilerTest.CompileAndRunStaticSynchronizedIntObjectObjectMethod
-  // JniCompilerTest.ReturnGlobalRef
   UNIMPLEMENTED(FATAL);
 }
 
-// TODO: remove this generator of non-PIC code
-void X86Assembler::Call(uintptr_t addr, ManagedRegister mscratch) {
-  Register scratch = mscratch.AsX86().AsCpuRegister();
-  movl(scratch, Immediate(addr));
-  call(scratch);
+void X86Assembler::Call(ThreadOffset offset, ManagedRegister mscratch) {
+  fs()->call(Address::Absolute(offset));
 }
 
 void X86Assembler::GetCurrentThread(ManagedRegister tr) {
diff --git a/src/assembler_x86.h b/src/assembler_x86.h
index c893633..86069be 100644
--- a/src/assembler_x86.h
+++ b/src/assembler_x86.h
@@ -221,6 +221,7 @@
   void movl(Register dst, const Address& src);
   void movl(const Address& dst, Register src);
   void movl(const Address& dst, const Immediate& imm);
+  void movl(const Address& dst, Label* lbl);
 
   void movzxb(Register dst, ByteRegister src);
   void movzxb(Register dst, const Address& src);
@@ -444,15 +445,11 @@
 
   // Emit code that will create an activation on the stack
   virtual void BuildFrame(size_t frame_size, ManagedRegister method_reg,
-                          const std::vector<ManagedRegister>& spill_regs);
+                          const std::vector<ManagedRegister>& callee_save_regs);
 
   // Emit code that will remove an activation from the stack
   virtual void RemoveFrame(size_t frame_size,
-                           const std::vector<ManagedRegister>& spill_regs);
-
-  // Fill list of registers from spill area
-  virtual void FillFromSpillArea(const std::vector<ManagedRegister>& spill_regs,
-                                 size_t displacement);
+                           const std::vector<ManagedRegister>& callee_save_regs);
 
   virtual void IncreaseFrameSize(size_t adjust);
   virtual void DecreaseFrameSize(size_t adjust);
@@ -474,6 +471,8 @@
 
   virtual void StoreStackPointerToThread(ThreadOffset thr_offs);
 
+  void StoreLabelToThread(ThreadOffset thr_offs, Label* lbl);
+
   virtual void StoreSpanning(FrameOffset dest, ManagedRegister src,
                              FrameOffset in_off, ManagedRegister scratch);
 
@@ -537,7 +536,7 @@
                     ManagedRegister scratch);
   virtual void Call(FrameOffset base, Offset offset,
                     ManagedRegister scratch);
-  virtual void Call(uintptr_t addr, ManagedRegister scratch);
+  virtual void Call(ThreadOffset offset, ManagedRegister scratch);
 
   // Generate code to check if Thread::Current()->suspend_count_ is non-zero
   // and branch to a SuspendSlowPath if it is. The SuspendSlowPath will continue
diff --git a/src/calling_convention.h b/src/calling_convention.h
index dcca3bc..4464609 100644
--- a/src/calling_convention.h
+++ b/src/calling_convention.h
@@ -64,6 +64,13 @@
 };
 
 // Abstraction for managed code's calling conventions
+// | { Incoming stack args } |
+// | { Prior Method* }       | <-- Prior SP
+// | { Return address }      |
+// | { Callee saves }        |
+// | { Spills ... }          |
+// | { Outgoing stack args } |
+// | { Method* }             | <-- SP
 class ManagedRuntimeCallingConvention : public CallingConvention {
  public:
   static ManagedRuntimeCallingConvention* Create(Method* native_method,
@@ -90,21 +97,22 @@
 
  protected:
   explicit ManagedRuntimeCallingConvention(Method* method) :
-                                          CallingConvention(method) {}
+                                           CallingConvention(method) {}
 };
 
 // Abstraction for JNI calling conventions
-// | incoming stack args    | <-- Prior SP
-// | { Return address }     |     (x86)
-// | { Return value spill } |     (live on return slow paths)
-// | { Stack Indirect Ref.  |
-// |   Table...             |
-// |   num. refs./link }    |     (here to prior SP is frame size)
-// | { Spill area }         |     (ARM)
-// | Method*                | <-- Anchor SP written to thread
-// | { Outgoing stack args  |
-// |   ... }                | <-- SP at point of call
-// | Native frame           |
+// | { Incoming stack args }         | <-- Prior SP
+// | { Return address }              |
+// | { Callee saves }                |     ([1])
+// | { Return value spill }          |     (live on return slow paths)
+// | { Stack Indirect Ref. Table     |
+// |   num. refs./link }             |     (here to prior SP is frame size)
+// | { Method* }                     | <-- Anchor SP written to thread
+// | { Outgoing stack args }         | <-- SP at point of call
+// | Native frame                    |
+//
+// [1] We must save all callee saves here to enable any exception throws to restore
+// callee saves for frames above this one.
 class JniCallingConvention : public CallingConvention {
  public:
   static JniCallingConvention* Create(Method* native_method,
@@ -118,19 +126,18 @@
   virtual size_t ReturnPcOffset() = 0;
   // Size of outgoing arguments, including alignment
   virtual size_t OutArgSize() = 0;
-  // Size of area used to hold spilled registers
-  virtual size_t SpillAreaSize() = 0;
   // Number of references in stack indirect reference table
   size_t ReferenceCount();
   // Location where the return value of a call can be squirreled if another
   // call is made following the native call
   FrameOffset ReturnValueSaveLocation();
 
-  // Registers that must be spilled (due to clobbering) before the call into
-  // the native routine
-  const std::vector<ManagedRegister>& RegsToSpillPreCall() {
-    return spill_regs_;
-  }
+  // Callee save registers to spill prior to native code (which may clobber)
+  virtual const std::vector<ManagedRegister>& CalleeSaveRegisters() const = 0;
+
+  // Spill mask values
+  virtual uint32_t CoreSpillMask() const = 0;
+  virtual uint32_t FpSpillMask() const = 0;
 
   // Returns true if the register will be clobbered by an outgoing
   // argument value.
@@ -152,7 +159,6 @@
   // Position of SIRT and interior fields
   FrameOffset SirtOffset() {
     return FrameOffset(displacement_.Int32Value() +
-                       SpillAreaSize() +
                        kPointerSize);  // above Method*
   }
   FrameOffset SirtNumRefsOffset() {
@@ -182,9 +188,6 @@
 
  protected:
   static size_t NumberOfExtraArgumentsForJni(Method* method);
-
-  // Extra registers to spill before the call into native
-  std::vector<ManagedRegister> spill_regs_;
 };
 
 }  // namespace art
diff --git a/src/calling_convention_arm.cc b/src/calling_convention_arm.cc
index 4b4a146..f8e8f3d 100644
--- a/src/calling_convention_arm.cc
+++ b/src/calling_convention_arm.cc
@@ -99,20 +99,19 @@
 
 // JNI calling convention
 
-ArmJniCallingConvention::ArmJniCallingConvention(Method* method) :
-    JniCallingConvention(method) {
-  // A synchronized method will call monitor enter clobbering R1, R2 and R3
-  // unless they are spilled.
-  if (method->IsSynchronized()) {
-    spill_regs_.push_back(ArmManagedRegister::FromCoreRegister(R1));
-    spill_regs_.push_back(ArmManagedRegister::FromCoreRegister(R2));
-    spill_regs_.push_back(ArmManagedRegister::FromCoreRegister(R3));
+ArmJniCallingConvention::ArmJniCallingConvention(Method* method) : JniCallingConvention(method) {
+  for (int i = R4; i < R12; i++) {
+    callee_save_regs_.push_back(ArmManagedRegister::FromCoreRegister(static_cast<Register>(i)));
   }
+  // TODO: VFP
+  // for (SRegister i = S16; i <= S31; i++) {
+  //  callee_save_regs_.push_back(ArmManagedRegister::FromSRegister(i));
+  // }
 }
 
 size_t ArmJniCallingConvention::FrameSize() {
-  // Method* and spill area size
-  size_t frame_data_size = kPointerSize + SpillAreaSize();
+  // Method*, LR and callee save area size
+  size_t frame_data_size = (2 + CalleeSaveRegisters().size()) * kPointerSize;
   // References plus 2 words for SIRT header
   size_t sirt_size = (ReferenceCount() + 2) * kPointerSize;
   // Plus return value spill area size
@@ -135,15 +134,8 @@
 }
 
 size_t ArmJniCallingConvention::ReturnPcOffset() {
-  // Link register is always the last value spilled, skip forward one word for
-  // the Method* then skip back one word to get the link register (ie +0)
-  return SpillAreaSize();
-}
-
-size_t ArmJniCallingConvention::SpillAreaSize() {
-  // Space for link register. For synchronized methods we need enough space to
-  // save R1, R2 and R3 (R0 is the method register and always preserved)
-  return GetMethod()->IsSynchronized() ? (4 * kPointerSize) : kPointerSize;
+  // Link register is always the first value pushed when the frame is constructed
+  return FrameSize() - kPointerSize;
 }
 
 // Will reg be crushed by an outgoing argument?
@@ -164,7 +156,7 @@
   int arg_pos = itr_args_ - NumberOfExtraArgumentsForJni(method);
   if ((itr_args_ >= 2) && method->IsParamALongOrDouble(arg_pos)) {
     // itr_slots_ needs to be an even number, according to AAPCS.
-    if (itr_slots_ & 0x1u) {
+    if ((itr_slots_ & 0x1u) != 0) {
       itr_slots_++;
     }
   }
diff --git a/src/calling_convention_arm.h b/src/calling_convention_arm.h
index e9c8ab0..8e5d4c2 100644
--- a/src/calling_convention_arm.h
+++ b/src/calling_convention_arm.h
@@ -38,7 +38,15 @@
   virtual size_t FrameSize();
   virtual size_t ReturnPcOffset();
   virtual size_t OutArgSize();
-  virtual size_t SpillAreaSize();
+  virtual const std::vector<ManagedRegister>& CalleeSaveRegisters() const {
+    return callee_save_regs_;
+  }
+  virtual uint32_t CoreSpillMask() const {
+    return 0x0FF0;  // R4 to R12
+  }
+  virtual uint32_t FpSpillMask() const {
+    return 0;
+  }
   virtual bool IsOutArgRegister(ManagedRegister reg);
   virtual bool IsCurrentParamInRegister();
   virtual bool IsCurrentParamOnStack();
@@ -49,6 +57,9 @@
   virtual size_t NumberOfOutgoingStackArgs();
 
  private:
+  // TODO: these values aren't unique and can be shared amongst instances
+  std::vector<ManagedRegister> callee_save_regs_;
+
   DISALLOW_COPY_AND_ASSIGN(ArmJniCallingConvention);
 };
 
diff --git a/src/calling_convention_x86.cc b/src/calling_convention_x86.cc
index 6cc556a..af464af 100644
--- a/src/calling_convention_x86.cc
+++ b/src/calling_convention_x86.cc
@@ -65,6 +65,8 @@
 
 // JNI calling convention
 
+std::vector<ManagedRegister> X86JniCallingConvention::callee_save_regs_;
+
 size_t X86JniCallingConvention::FrameSize() {
   // Return address and Method*
   size_t frame_data_size = 2 * kPointerSize;
@@ -84,12 +86,6 @@
   return FrameSize() - kPointerSize;
 }
 
-
-size_t X86JniCallingConvention::SpillAreaSize() {
-  // No spills, return address was pushed at the top of the frame
-  return 0;
-}
-
 bool X86JniCallingConvention::IsOutArgRegister(ManagedRegister) {
   return false;  // Everything is passed by stack
 }
@@ -117,7 +113,7 @@
   // regular argument parameters and this
   size_t param_args = GetMethod()->NumArgs() +
                       GetMethod()->NumLongOrDoubleArgs();
-  return static_args + param_args + 1;  // count JNIEnv*
+  return static_args + param_args + 2;  // count JNIEnv* and return pc (pushed after Method*)
 }
 
 }  // namespace x86
diff --git a/src/calling_convention_x86.h b/src/calling_convention_x86.h
index d8dda57..8230754 100644
--- a/src/calling_convention_x86.h
+++ b/src/calling_convention_x86.h
@@ -39,7 +39,16 @@
   virtual size_t FrameSize();
   virtual size_t ReturnPcOffset();
   virtual size_t OutArgSize();
-  virtual size_t SpillAreaSize();
+  virtual const std::vector<ManagedRegister>& CalleeSaveRegisters() const {
+    DCHECK(callee_save_regs_.empty());
+    return callee_save_regs_;
+  }
+  virtual uint32_t CoreSpillMask() const {
+    return 0;
+  }
+  virtual uint32_t FpSpillMask() const {
+    return 0;
+  }
   virtual bool IsOutArgRegister(ManagedRegister reg);
   virtual bool IsCurrentParamInRegister();
   virtual bool IsCurrentParamOnStack();
@@ -50,6 +59,8 @@
   virtual size_t NumberOfOutgoingStackArgs();
 
  private:
+  static std::vector<ManagedRegister> callee_save_regs_;
+
   DISALLOW_COPY_AND_ASSIGN(X86JniCallingConvention);
 };
 
diff --git a/src/class_linker.h b/src/class_linker.h
index d0d3827..108059c 100644
--- a/src/class_linker.h
+++ b/src/class_linker.h
@@ -183,7 +183,12 @@
   Class* AllocClass(size_t class_size);
   DexCache* AllocDexCache(const DexFile& dex_file);
   Field* AllocField();
+
+  // TODO: have no friends, we need this currently to create a special method
+  // to describe callee save registers for throwing exceptions
+  friend class Thread;
   Method* AllocMethod();
+
   CodeAndDirectMethods* AllocCodeAndDirectMethods(size_t length);
   InterfaceEntry* AllocInterfaceEntry(Class* interface);
 
diff --git a/src/compiler.cc b/src/compiler.cc
index 0d583acc..b88fd5a 100644
--- a/src/compiler.cc
+++ b/src/compiler.cc
@@ -14,29 +14,20 @@
 
 namespace art {
 
-typedef void (*ThrowAme)(Method*, Thread*);
-
-void ThrowAbstractMethodError(Method* method, Thread* thread) {
-  LOG(FATAL) << "Unimplemented Exception Handling. Remove this when ThrowException works.";
-  thread->ThrowNewException("Ljava/lang/AbstractMethodError",
-                            "abstract method \"%s\"",
-                            PrettyMethod(method).c_str());
-}
-
 namespace arm {
-  ByteArray* CreateAbstractMethodErrorStub(ThrowAme);
+  ByteArray* CreateAbstractMethodErrorStub();
 }
 
 namespace x86 {
-  ByteArray* CreateAbstractMethodErrorStub(ThrowAme);
+  ByteArray* CreateAbstractMethodErrorStub();
 }
 
 Compiler::Compiler(InstructionSet insns) : instruction_set_(insns), jni_compiler_(insns),
     verbose_(false) {
   if (insns == kArm || insns == kThumb2) {
-    abstract_method_error_stub_ = arm::CreateAbstractMethodErrorStub(&ThrowAbstractMethodError);
+    abstract_method_error_stub_ = arm::CreateAbstractMethodErrorStub();
   } else if (insns == kX86) {
-    abstract_method_error_stub_ = x86::CreateAbstractMethodErrorStub(&ThrowAbstractMethodError);
+    abstract_method_error_stub_ = x86::CreateAbstractMethodErrorStub();
   }
 }
 
@@ -192,7 +183,10 @@
   if (method->IsNative()) {
     jni_compiler_.Compile(method);
     // unregister will install the stub to lookup via dlsym
-    method->UnregisterNative();
+    // TODO: this is only necessary for tests
+    if (!method->IsRegistered()) {
+      method->UnregisterNative();
+    }
   } else if (method->IsAbstract()) {
     DCHECK(abstract_method_error_stub_ != NULL);
     if (instruction_set_ == kX86) {
diff --git a/src/compiler/Frontend.cc b/src/compiler/Frontend.cc
index 9e6617e..fdcce9c 100644
--- a/src/compiler/Frontend.cc
+++ b/src/compiler/Frontend.cc
@@ -876,19 +876,19 @@
     }
 
     art::ByteArray* managed_code =
-        art::ByteArray::Alloc(cUnit.codeBuffer.size() *
-        sizeof(cUnit.codeBuffer[0]));
+        art::ByteArray::Alloc(cUnit.codeBuffer.size() * sizeof(cUnit.codeBuffer[0]));
     memcpy(managed_code->GetData(),
            reinterpret_cast<const int8_t*>(&cUnit.codeBuffer[0]),
            managed_code->GetLength());
-    art::ByteArray* mapping_table =
-        art::ByteArray::Alloc(cUnit.mappingTable.size() *
-        sizeof(cUnit.mappingTable[0]));
+    art::IntArray* mapping_table =
+        art::IntArray::Alloc(cUnit.mappingTable.size());
+    DCHECK_EQ(mapping_table->GetClass()->GetComponentSize(), sizeof(cUnit.mappingTable[0]));
     memcpy(mapping_table->GetData(),
-           reinterpret_cast<const int8_t*>(&cUnit.mappingTable[0]),
-           mapping_table->GetLength());
+           reinterpret_cast<const int32_t*>(&cUnit.mappingTable[0]),
+           mapping_table->GetLength() * sizeof(cUnit.mappingTable[0]));
     method->SetCode(managed_code, art::kThumb2, mapping_table);
     method->SetFrameSizeInBytes(cUnit.frameSize);
+    method->SetReturnPcOffsetInBytes(cUnit.frameSize - sizeof(intptr_t));
     method->SetCoreSpillMask(cUnit.coreSpillMask);
     method->SetFpSpillMask(cUnit.fpSpillMask);
     if (compiler.IsVerbose()) {
diff --git a/src/compiler/codegen/arm/Thumb2/Gen.cc b/src/compiler/codegen/arm/Thumb2/Gen.cc
index c54a0a8..2f63085 100644
--- a/src/compiler/codegen/arm/Thumb2/Gen.cc
+++ b/src/compiler/codegen/arm/Thumb2/Gen.cc
@@ -684,9 +684,8 @@
 {
     loadWordDisp(cUnit, rSELF,
                  OFFSETOF_MEMBER(Thread, pThrowException), rLR);
-    loadValueDirectFixed(cUnit, rlSrc, r1);  // Get exception object
-    genRegCopy(cUnit, r0, rSELF);
-    callUnwindableHelper(cUnit, rLR); // artThrowException(thread, exception);
+    loadValueDirectFixed(cUnit, rlSrc, r0);  // Get exception object
+    callNoUnwindHelper(cUnit, rLR); // art_throw_exception(exception);
 }
 
 static void genInstanceof(CompilationUnit* cUnit, MIR* mir, RegLocation rlDest,
diff --git a/src/compiler_test.cc b/src/compiler_test.cc
index 5c1bbef..5207561 100644
--- a/src/compiler_test.cc
+++ b/src/compiler_test.cc
@@ -227,8 +227,7 @@
   AssertStaticIntMethod(2222, LoadDex("IntMath"), "IntMath", "constClassTest", "(I)I", 1111);
 }
 
-// TODO: Need native nativeFillInStackTrace()
-TEST_F(CompilerTest, DISABLED_CatchTest) {
+TEST_F(CompilerTest, CatchTest) {
   CompileDirectMethod(NULL, "java.lang.Object", "<init>", "()V");
   CompileDirectMethod(NULL, "java.lang.NullPointerException", "<init>", "()V");
   CompileDirectMethod(NULL, "java.lang.RuntimeException", "<init>", "()V");
diff --git a/src/context.cc b/src/context.cc
new file mode 100644
index 0000000..1c001d6
--- /dev/null
+++ b/src/context.cc
@@ -0,0 +1,18 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+
+#include "context.h"
+
+#include "context_arm.h"
+#include "context_x86.h"
+
+namespace art {
+
+Context* Context::Create() {
+#if defined(__arm__)
+  return new arm::ArmContext();
+#else
+  return new x86::X86Context();
+#endif
+}
+
+}  // namespace art
diff --git a/src/context.h b/src/context.h
new file mode 100644
index 0000000..05cd43b
--- /dev/null
+++ b/src/context.h
@@ -0,0 +1,36 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+
+#ifndef ART_SRC_CONTEXT_H_
+#define ART_SRC_CONTEXT_H_
+
+#include <stdint.h>
+
+namespace art {
+
+class Frame;
+
+// Representation of a thread's context on the executing machine
+class Context {
+ public:
+  // Creates a context for the running architecture
+  static Context* Create();
+
+  virtual ~Context() {}
+
+  // Read values from callee saves in the given frame. The frame also holds
+  // the method that holds the layout.
+  virtual void FillCalleeSaves(const Frame& fr) = 0;
+
+  // Set the stack pointer value
+  virtual void SetSP(uintptr_t new_sp) = 0;
+
+  // Set the program counter value
+  virtual void SetPC(uintptr_t new_pc) = 0;
+
+  // Switch execution of the executing context to this context
+  virtual void DoLongJump() = 0;
+};
+
+}  // namespace art
+
+#endif  // ART_SRC_CONTEXT_H_
diff --git a/src/context_arm.cc b/src/context_arm.cc
new file mode 100644
index 0000000..3c2af94
--- /dev/null
+++ b/src/context_arm.cc
@@ -0,0 +1,56 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+
+#include "context_arm.h"
+
+#include "object.h"
+
+namespace art {
+namespace arm {
+
+ArmContext::ArmContext() {
+  for (int i=0; i < 16; i++) {
+    gprs_[i] = 0xEBAD6070;
+  }
+  memset(fprs_, 0, sizeof(fprs_));
+}
+
+void ArmContext::FillCalleeSaves(const Frame& fr) {
+  Method* method = fr.GetMethod();
+  uint32_t core_spills = method->GetCoreSpillMask();
+  size_t spill_count = __builtin_popcount(core_spills);
+  CHECK_EQ(method->GetFpSpillMask(), 0u);
+  if (spill_count > 0) {
+    // Lowest number spill is furthest away, walk registers and fill into context
+    int j = 1;
+    for(int i = 0; i < 16; i++) {
+      if (((core_spills >> i) & 1) != 0) {
+        gprs_[i] = fr.LoadCalleeSave(spill_count - j);
+        j++;
+      }
+    }
+  }
+}
+
+void ArmContext::DoLongJump() {
+  // TODO: Load all GPRs and FPRs, currently the code restores registers R4 to PC
+  asm volatile ( "mov %%r0, %0\n"
+                 "mov %%r1, %1\n"
+                 "ldm %%r0, {%%r4, %%r5, %%r6, %%r7,%%r8,%%r9,%%r10,%%r11,%%r12,%%r13,%%r14}\n"
+                 "mov %%pc,%%r1\n"
+      : // output
+      : "r"(&gprs_[4]), "r"(gprs_[R15])  // input
+#if 0  // TODO: FPRs..
+        "w0" (fprs_[0] ), "w1" (fprs_[1] ), "w2" (fprs_[2] ), "w3" (fprs_[3]),
+        "w4" (fprs_[4] ), "w5" (fprs_[5] ), "w6" (fprs_[6] ), "w7" (fprs_[7]),
+        "w8" (fprs_[8] ), "w9" (fprs_[9] ), "w10"(fprs_[10]), "w11"(fprs_[11]),
+        "w12"(fprs_[12]), "w13"(fprs_[13]), "w14"(fprs_[14]), "w15"(fprs_[15]),
+        "w16"(fprs_[16]), "w17"(fprs_[17]), "w18"(fprs_[18]), "w19"(fprs_[19]),
+        "w20"(fprs_[20]), "w21"(fprs_[21]), "w22"(fprs_[22]), "w23"(fprs_[23]),
+        "w24"(fprs_[24]), "w25"(fprs_[25]), "w26"(fprs_[26]), "w27"(fprs_[27]),
+        "w28"(fprs_[28]), "w29"(fprs_[29]), "w30"(fprs_[30]), "w31"(fprs_[31])
+#endif
+      :);  // clobber
+}
+
+}  // namespace arm
+}  // namespace art
diff --git a/src/context_arm.h b/src/context_arm.h
new file mode 100644
index 0000000..e5a5118
--- /dev/null
+++ b/src/context_arm.h
@@ -0,0 +1,37 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+
+#ifndef ART_SRC_CONTEXT_ARM_H_
+#define ART_SRC_CONTEXT_ARM_H_
+
+#include "constants_arm.h"
+#include "context.h"
+
+namespace art {
+namespace arm {
+
+class ArmContext : public Context {
+ public:
+  ArmContext();
+  virtual ~ArmContext() {}
+
+  virtual void FillCalleeSaves(const Frame& fr);
+
+  virtual void SetSP(uintptr_t new_sp) {
+    gprs_[SP] = new_sp;
+  }
+
+  virtual void SetPC(uintptr_t new_pc) {
+    gprs_[PC] = new_pc;
+  }
+
+  virtual void DoLongJump();
+
+ private:
+  uintptr_t gprs_[16];
+  float fprs_[32];
+};
+
+}  // namespace arm
+}  // namespace art
+
+#endif  // ART_SRC_CONTEXT_ARM_H_
diff --git a/src/context_x86.cc b/src/context_x86.cc
new file mode 100644
index 0000000..2f328e1
--- /dev/null
+++ b/src/context_x86.cc
@@ -0,0 +1,18 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+
+#include "context_x86.h"
+
+namespace art {
+namespace x86 {
+
+void X86Context::DoLongJump() {
+  // Load ESP and EIP
+  asm volatile ( "movl %%esp, %0\n"
+                 "jmp *%1"
+      : // output
+      : "m"(esp_), "r"(&eip_)  // input
+      :);  // clobber
+}
+
+}  // namespace x86
+}  // namespace art
diff --git a/src/context_x86.h b/src/context_x86.h
new file mode 100644
index 0000000..0e31b25
--- /dev/null
+++ b/src/context_x86.h
@@ -0,0 +1,37 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+
+#ifndef ART_SRC_CONTEXT_X86_H_
+#define ART_SRC_CONTEXT_X86_H_
+
+#include "context.h"
+
+namespace art {
+namespace x86 {
+
+class X86Context : public Context {
+ public:
+  X86Context() : esp_(0), eip_(0) {}
+  virtual ~X86Context() {}
+
+  // No callee saves on X86
+  virtual void FillCalleeSaves(const Frame& fr) {}
+
+  virtual void SetSP(uintptr_t new_sp) {
+    esp_ = new_sp;
+  }
+
+  virtual void SetPC(uintptr_t new_pc) {
+    eip_ = new_pc;
+  }
+
+  virtual void DoLongJump();
+
+ private:
+  // Currently just ESP and EIP are used
+  uintptr_t esp_;
+  uintptr_t eip_;
+};
+}  // namespace x86
+}  // namespace art
+
+#endif  // ART_SRC_CONTEXT_X86_H_
diff --git a/src/dex_file.h b/src/dex_file.h
index 8e89e0a..e4f68e8 100644
--- a/src/dex_file.h
+++ b/src/dex_file.h
@@ -276,8 +276,8 @@
 
     private:
       CatchHandlerItem handler_;
-      const byte *current_data_;  // the current handlder in dex file.
-      int32_t remaining_count_;   // number of handler not read.
+      const byte *current_data_;  // the current handler in dex file.
+      int32_t remaining_count_;   // number of handlers not read.
       bool catch_all_;            // is there a handler that will catch all exceptions in case
                                   // that all typed handler does not match.
   };
diff --git a/src/exception_test.cc b/src/exception_test.cc
index d77f1c0..8816530 100644
--- a/src/exception_test.cc
+++ b/src/exception_test.cc
@@ -72,27 +72,22 @@
     ASSERT_TRUE(class_loader != NULL);
     my_klass_ = class_linker_->FindClass("Ljava/lang/MyClass;", class_loader);
     ASSERT_TRUE(my_klass_ != NULL);
+    ByteArray* fake_code = ByteArray::Alloc(12);
+    ASSERT_TRUE(fake_code != NULL);
+    IntArray* fake_mapping_data = IntArray::Alloc(2);
+    ASSERT_TRUE(fake_mapping_data!= NULL);
+    fake_mapping_data->Set(0, 3);  // offset 3
+    fake_mapping_data->Set(1, 3);  // maps to dex offset 3
     method_f_ = my_klass_->FindVirtualMethod("f", "()I");
     ASSERT_TRUE(method_f_ != NULL);
     method_f_->SetFrameSizeInBytes(kStackAlignment);
-    method_f_->SetReturnPcOffsetInBytes(4);
+    method_f_->SetReturnPcOffsetInBytes(kStackAlignment-kPointerSize);
+    method_f_->SetCode(fake_code, kThumb2, fake_mapping_data);
     method_g_ = my_klass_->FindVirtualMethod("g", "(I)V");
     ASSERT_TRUE(method_g_ != NULL);
     method_g_->SetFrameSizeInBytes(kStackAlignment);
-    method_g_->SetReturnPcOffsetInBytes(4);
-  }
-
-  DexFile::CatchHandlerItem FindCatchHandlerItem(Method* method,
-                                                 const char exception_type[],
-                                                 uint32_t addr) {
-    const DexFile::CodeItem* code_item = dex_->GetCodeItem(method->GetCodeItemOffset());
-    for (DexFile::CatchHandlerIterator iter = dex_->dexFindCatchHandler(*code_item, addr);
-         !iter.HasNext(); iter.Next()) {
-      if (strcmp(exception_type, dex_->dexStringByTypeIdx(iter.Get().type_idx_)) == 0) {
-        return iter.Get();
-      }
-    }
-    return DexFile::CatchHandlerItem();
+    method_g_->SetReturnPcOffsetInBytes(kStackAlignment-kPointerSize);
+    method_g_->SetCode(fake_code, kThumb2, fake_mapping_data);
   }
 
   UniquePtr<const DexFile> dex_;
@@ -146,21 +141,22 @@
 
   // Create/push fake 16byte stack frame for method g
   fake_stack[top_of_stack++] = reinterpret_cast<uintptr_t>(method_g_);
-  fake_stack[top_of_stack++] = 3;
   fake_stack[top_of_stack++] = 0;
   fake_stack[top_of_stack++] = 0;
+  fake_stack[top_of_stack++] = reinterpret_cast<uintptr_t>(method_f_->GetCode()) + 3;  // return pc
 
   // Create/push fake 16byte stack frame for method f
   fake_stack[top_of_stack++] = reinterpret_cast<uintptr_t>(method_f_);
-  fake_stack[top_of_stack++] = 3;
   fake_stack[top_of_stack++] = 0;
   fake_stack[top_of_stack++] = 0;
+  fake_stack[top_of_stack++] = 0xEBAD6070;  // return pc
 
   // Pull Method* of NULL to terminate the trace
   fake_stack[top_of_stack++] = NULL;
 
+  // Set up thread to appear as if we called out of method_g_ at pc 3
   Thread* thread = Thread::Current();
-  thread->SetTopOfStack(fake_stack);
+  thread->SetTopOfStack(fake_stack, reinterpret_cast<uintptr_t>(method_g_->GetCode()) + 3);
 
   jobject internal = thread->CreateInternalStackTrace();
   jobjectArray ste_array =
diff --git a/src/jni_compiler.cc b/src/jni_compiler.cc
index eb84f14..2088f7d 100644
--- a/src/jni_compiler.cc
+++ b/src/jni_compiler.cc
@@ -76,19 +76,12 @@
   // Cache of IsStatic as we call it often enough
   const bool is_static = native_method->IsStatic();
 
-  // TODO: Need to make sure that the stub is copied into the image. I.e.,
-  // ByteArray* needs to be reachable either as a root or from the object graph.
-
-  // 1. Build the frame
+  // 1. Build the frame saving all callee saves
   const size_t frame_size(jni_conv->FrameSize());
-  const std::vector<ManagedRegister>& spill_regs = jni_conv->RegsToSpillPreCall();
-  __ BuildFrame(frame_size, mr_conv->MethodRegister(), spill_regs);
+  const std::vector<ManagedRegister>& callee_save_regs = jni_conv->CalleeSaveRegisters();
+  __ BuildFrame(frame_size, mr_conv->MethodRegister(), callee_save_regs);
 
-  // 2. Save callee save registers that aren't callee save in the native code
-  // TODO: implement computing the difference of the callee saves
-  // and saving
-
-  // 3. Set up the StackIndirectReferenceTable
+  // 2. Set up the StackIndirectReferenceTable
   mr_conv->ResetIterator(FrameOffset(frame_size));
   jni_conv->ResetIterator(FrameOffset(0));
   __ StoreImmediateToFrame(jni_conv->SirtNumRefsOffset(),
@@ -101,9 +94,9 @@
                               jni_conv->SirtOffset(),
                               mr_conv->InterproceduralScratchRegister());
 
-  // 4. Place incoming reference arguments into SIRT
+  // 3. Place incoming reference arguments into SIRT
   jni_conv->Next();  // Skip JNIEnv*
-  // 4.5. Create Class argument for static methods out of passed method
+  // 3.5. Create Class argument for static methods out of passed method
   if (is_static) {
     FrameOffset sirt_offset = jni_conv->CurrentParamSirtEntryOffset();
     // Check sirt offset is within frame
@@ -144,24 +137,44 @@
     jni_conv->Next();
   }
 
-  // 5. Transition from being in managed to native code
+  // 4. Transition from being in managed to native code. Save the top_of_managed_stack_
+  // so that the managed stack can be crawled while in native code. Clear the corresponding
+  // PC value that has no meaning for the this frame.
   // TODO: ensure the transition to native follow a store fence.
   __ StoreStackPointerToThread(Thread::TopOfManagedStackOffset());
+  __ StoreImmediateToThread(Thread::TopOfManagedStackPcOffset(), 0,
+                            mr_conv->InterproceduralScratchRegister());
   __ StoreImmediateToThread(Thread::StateOffset(), Thread::kNative,
-                                  mr_conv->InterproceduralScratchRegister());
+                            mr_conv->InterproceduralScratchRegister());
 
-  // 6. Move frame down to allow space for out going args. Do for as short a
+  // 5. Move frame down to allow space for out going args. Do for as short a
   //    time as possible to aid profiling..
   const size_t out_arg_size = jni_conv->OutArgSize();
   __ IncreaseFrameSize(out_arg_size);
 
-  // 7. Acquire lock for synchronized methods.
+  // 6. Acquire lock for synchronized methods.
   if (native_method->IsSynchronized()) {
-    // TODO: preserve incoming arguments in registers
-    mr_conv->ResetIterator(FrameOffset(frame_size+out_arg_size));
+    // Compute arguments in registers to preserve
+    mr_conv->ResetIterator(FrameOffset(frame_size + out_arg_size));
+    std::vector<ManagedRegister> live_argument_regs;
+    while (mr_conv->HasNext()) {
+      if (mr_conv->IsCurrentParamInRegister()) {
+        live_argument_regs.push_back(mr_conv->CurrentParamRegister());
+      }
+      mr_conv->Next();
+    }
+
+    // Copy arguments to preserve to callee save registers
+    CHECK_LE(live_argument_regs.size(), callee_save_regs.size());
+    for (size_t i = 0; i < live_argument_regs.size(); i++) {
+      __ Move(callee_save_regs.at(i), live_argument_regs.at(i));
+    }
+
+    // Get SIRT entry for 1st argument (jclass or this) to be 1st argument to
+    // monitor enter
+    mr_conv->ResetIterator(FrameOffset(frame_size + out_arg_size));
     jni_conv->ResetIterator(FrameOffset(out_arg_size));
     jni_conv->Next();  // Skip JNIEnv*
-    // Get SIRT entry for 1st argument
     if (is_static) {
       FrameOffset sirt_offset = jni_conv->CurrentParamSirtEntryOffset();
       if (jni_conv->IsCurrentParamOnStack()) {
@@ -178,21 +191,29 @@
       CopyParameter(jni_asm.get(), mr_conv.get(), jni_conv.get(), frame_size,
                     out_arg_size);
     }
+
     // Generate JNIEnv* in place and leave a copy in jni_fns_register
     jni_conv->ResetIterator(FrameOffset(out_arg_size));
     ManagedRegister jni_fns_register =
         jni_conv->InterproceduralScratchRegister();
     __ LoadRawPtrFromThread(jni_fns_register, Thread::JniEnvOffset());
     SetNativeParameter(jni_asm.get(), jni_conv.get(), jni_fns_register);
+
     // Call JNIEnv->MonitorEnter(object)
     __ LoadRawPtr(jni_fns_register, jni_fns_register, functions);
     __ Call(jni_fns_register, monitor_enter,
                   jni_conv->InterproceduralScratchRegister());
-    __ FillFromSpillArea(spill_regs, out_arg_size);
+
+    // Check for exceptions
     __ ExceptionPoll(jni_conv->InterproceduralScratchRegister());
+
+    // Restore live arguments
+    for (size_t i = 0; i < live_argument_regs.size(); i++) {
+      __ Move(live_argument_regs.at(i), callee_save_regs.at(i));
+    }
   }
 
-  // 8. Iterate over arguments placing values from managed calling convention in
+  // 7. Iterate over arguments placing values from managed calling convention in
   //    to the convention required for a native call (shuffling). For references
   //    place an index/pointer to the reference after checking whether it is
   //    NULL (which must be encoded as NULL).
@@ -240,7 +261,7 @@
                          ManagedRegister::NoRegister(), false);
     }
   }
-  // 9. Create 1st argument, the JNI environment ptr
+  // 8. Create 1st argument, the JNI environment ptr
   jni_conv->ResetIterator(FrameOffset(out_arg_size));
   if (jni_conv->IsCurrentParamInRegister()) {
     __ LoadRawPtrFromThread(jni_conv->CurrentParamRegister(),
@@ -251,7 +272,7 @@
                             jni_conv->InterproceduralScratchRegister());
   }
 
-  // 10. Plant call to native code associated with method
+  // 9. Plant call to native code associated with method
   if (!jni_conv->IsOutArgRegister(mr_conv->MethodRegister())) {
     // Method register shouldn't have been crushed by setting up outgoing
     // arguments
@@ -261,7 +282,8 @@
     __ Call(jni_conv->MethodStackOffset(), Method::NativeMethodOffset(),
             mr_conv->InterproceduralScratchRegister());
   }
-  // 11. Release lock for synchronized methods.
+
+  // 10. Release lock for synchronized methods.
   if (native_method->IsSynchronized()) {
     mr_conv->ResetIterator(FrameOffset(frame_size+out_arg_size));
     jni_conv->ResetIterator(FrameOffset(out_arg_size));
@@ -308,12 +330,12 @@
     }
   }
 
-  // 12. Release outgoing argument area
+  // 11. Release outgoing argument area
   __ DecreaseFrameSize(out_arg_size);
   mr_conv->ResetIterator(FrameOffset(frame_size));
   jni_conv->ResetIterator(FrameOffset(0));
 
-  // 13. Transition from being in native to managed code, possibly entering a
+  // 12. Transition from being in native to managed code, possibly entering a
   //     safepoint
   CHECK(!jni_conv->InterproceduralScratchRegister()
         .Equals(jni_conv->ReturnRegister()));  // don't clobber result
@@ -329,7 +351,7 @@
                             jni_conv->InterproceduralScratchRegister());
 
 
-  // 14. Place result in correct register possibly loading from indirect
+  // 13. Place result in correct register possibly loading from indirect
   //     reference table
   if (jni_conv->IsReturnAReference()) {
     __ IncreaseFrameSize(out_arg_size);
@@ -350,8 +372,7 @@
     } else {
       __ GetCurrentThread(jni_conv->CurrentParamStackOffset(),
                           jni_conv->InterproceduralScratchRegister());
-      __ Call(jni_conv->CurrentParamStackOffset(),
-              Offset(OFFSETOF_MEMBER(Thread, pDecodeJObjectInThread)),
+      __ Call(ThreadOffset(OFFSETOF_MEMBER(Thread, pDecodeJObjectInThread)),
               jni_conv->InterproceduralScratchRegister());
     }
 
@@ -360,14 +381,18 @@
   }
   __ Move(mr_conv->ReturnRegister(), jni_conv->ReturnRegister());
 
-  // 15. Remove SIRT from thread
+  // 14. Remove SIRT from thread
   __ CopyRawPtrToThread(Thread::TopSirtOffset(), jni_conv->SirtLinkOffset(),
                         jni_conv->InterproceduralScratchRegister());
 
-  // 16. Remove activation
-  __ RemoveFrame(frame_size, spill_regs);
+  // 15. Remove activation
+  if (native_method->IsSynchronized()) {
+    __ RemoveFrame(frame_size, callee_save_regs);
+  } else {
+    __ RemoveFrame(frame_size, std::vector<ManagedRegister>());
+  }
 
-  // 17. Finalize code generation
+  // 16. Finalize code generation
   __ EmitSlowPaths();
   size_t cs = __ CodeSize();
   ByteArray* managed_code = ByteArray::Alloc(cs);
@@ -377,6 +402,8 @@
   native_method->SetCode(managed_code, instruction_set_);
   native_method->SetFrameSizeInBytes(frame_size);
   native_method->SetReturnPcOffsetInBytes(jni_conv->ReturnPcOffset());
+  native_method->SetCoreSpillMask(jni_conv->CoreSpillMask());
+  native_method->SetFpSpillMask(jni_conv->FpSpillMask());
 #undef __
 }
 
diff --git a/src/jni_compiler.h b/src/jni_compiler.h
index 8b5f31a..d78404b 100644
--- a/src/jni_compiler.h
+++ b/src/jni_compiler.h
@@ -40,6 +40,7 @@
                           JniCallingConvention* jni_conv,
                           ManagedRegister in_reg);
 
+  // Architecture to generate code for
   InstructionSet instruction_set_;
 
   DISALLOW_COPY_AND_ASSIGN(JniCompiler);
diff --git a/src/jni_compiler_test.cc b/src/jni_compiler_test.cc
index 20c403c..9328e85 100644
--- a/src/jni_compiler_test.cc
+++ b/src/jni_compiler_test.cc
@@ -53,9 +53,11 @@
     }
     ASSERT_TRUE(jmethod_ != NULL);
 
-    if (native_fnptr) {
+    if (native_fnptr != NULL) {
       JNINativeMethod methods[] = {{method_name, method_sig, native_fnptr}};
       ASSERT_EQ(JNI_OK, env_->RegisterNatives(jklass_, methods, 1));
+    } else {
+      env_->UnregisterNatives(jklass_);
     }
 
     jmethodID constructor = env_->GetMethodID(jklass_, "<init>", "()V");
@@ -233,10 +235,6 @@
 }
 
 TEST_F(JniCompilerTest, CompileAndRunIntObjectObjectMethod) {
-#if !defined(__arm__)
-  UNIMPLEMENTED(WARNING) << "needs X86Assembler::Call(FrameOffset base, Offset offset, ManagedRegister)";
-  return;
-#endif
   SetupForTest(false, "fooIOO",
                "(ILjava/lang/Object;Ljava/lang/Object;)Ljava/lang/Object;",
                reinterpret_cast<void*>(&Java_MyClass_fooIOO));
@@ -336,10 +334,6 @@
 
 
 TEST_F(JniCompilerTest, CompileAndRunStaticIntObjectObjectMethod) {
-#if !defined(__arm__)
-  UNIMPLEMENTED(WARNING) << "needs X86Assembler::Call(FrameOffset base, Offset offset, ManagedRegister)";
-  return;
-#endif
   SetupForTest(true, "fooSIOO",
                "(ILjava/lang/Object;Ljava/lang/Object;)Ljava/lang/Object;",
                reinterpret_cast<void*>(&Java_MyClass_fooSIOO));
@@ -390,10 +384,6 @@
 }
 
 TEST_F(JniCompilerTest, CompileAndRunStaticSynchronizedIntObjectObjectMethod) {
-#if !defined(__arm__)
-  UNIMPLEMENTED(WARNING) << "needs X86Assembler::Call(FrameOffset base, Offset offset, ManagedRegister)";
-  return;
-#endif
   SetupForTest(true, "fooSSIOO",
                "(ILjava/lang/Object;Ljava/lang/Object;)Ljava/lang/Object;",
                reinterpret_cast<void*>(&Java_MyClass_fooSSIOO));
@@ -546,10 +536,6 @@
 }
 
 TEST_F(JniCompilerTest, ReturnGlobalRef) {
-#if !defined(__arm__)
-  UNIMPLEMENTED(WARNING) << "needs X86Assembler::Call(FrameOffset base, Offset offset, ManagedRegister)";
-  return;
-#endif
   SetupForTest(false, "fooO", "(Ljava/lang/Object;)Ljava/lang/Object;",
                reinterpret_cast<void*>(&Java_MyClass_fooO));
   jobject result = env_->CallNonvirtualObjectMethod(jobj_, jklass_, jmethod_, jobj_);
diff --git a/src/object.cc b/src/object.cc
index ea8a4bf..b5e66d9 100644
--- a/src/object.cc
+++ b/src/object.cc
@@ -460,11 +460,82 @@
           this->GetSignature()->Equals(that->GetSignature()));
 }
 
+uint32_t Method::ToDexPC(const uintptr_t pc) const {
+  IntArray* mapping_table = GetMappingTable();
+  if (mapping_table == NULL) {
+    DCHECK(pc == 0);
+    return DexFile::kDexNoIndex;   // Special no mapping/pc == -1 case
+  }
+  size_t mapping_table_length = mapping_table->GetLength();
+  uint32_t sought_offset = pc - reinterpret_cast<uintptr_t>(GetCode());
+  CHECK_LT(sought_offset, static_cast<uint32_t>(GetCodeArray()->GetLength()));
+  uint32_t best_offset = 0;
+  uint32_t best_dex_offset = 0;
+  for (size_t i = 0; i < mapping_table_length; i += 2) {
+    uint32_t map_offset = mapping_table->Get(i);
+    uint32_t map_dex_offset = mapping_table->Get(i + 1);
+    if (map_offset == sought_offset) {
+      best_offset = map_offset;
+      best_dex_offset = map_dex_offset;
+      break;
+    }
+    if (map_offset < sought_offset && map_offset > best_offset) {
+      best_offset = map_offset;
+      best_dex_offset = map_dex_offset;
+    }
+  }
+  return best_dex_offset;
+}
+
+uintptr_t Method::ToNativePC(const uint32_t dex_pc) const {
+  IntArray* mapping_table = GetMappingTable();
+  if (mapping_table == NULL) {
+    DCHECK(dex_pc == 0);
+    return 0;   // Special no mapping/pc == 0 case
+  }
+  size_t mapping_table_length = mapping_table->GetLength();
+  for (size_t i = 0; i < mapping_table_length; i += 2) {
+    uint32_t map_offset = mapping_table->Get(i);
+    uint32_t map_dex_offset = mapping_table->Get(i + 1);
+    if (map_dex_offset == dex_pc) {
+      DCHECK_LT(map_offset, static_cast<uint32_t>(GetCodeArray()->GetLength()));
+      return reinterpret_cast<uintptr_t>(GetCode()) + map_offset;
+    }
+  }
+  LOG(FATAL) << "Looking up Dex PC not contained in method";
+  return 0;
+}
+
+uint32_t Method::FindCatchBlock(Class* exception_type, uint32_t dex_pc) const {
+  DexCache* dex_cache = GetDeclaringClass()->GetDexCache();
+  const ClassLoader* class_loader = GetDeclaringClass()->GetClassLoader();
+  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+  const DexFile& dex_file = class_linker->FindDexFile(dex_cache);
+  const DexFile::CodeItem* code_item = dex_file.GetCodeItem(GetCodeItemOffset());
+  // Iterate over the catch handlers associated with dex_pc
+  for (DexFile::CatchHandlerIterator iter = dex_file.dexFindCatchHandler(*code_item, dex_pc);
+       !iter.HasNext(); iter.Next()) {
+    uint32_t iter_type_idx = iter.Get().type_idx_;
+    // Catch all case
+    if(iter_type_idx == DexFile::kDexNoIndex) {
+      return iter.Get().address_;
+    }
+    // Does this catch exception type apply?
+    Class* iter_exception_type =
+        class_linker->ResolveType(dex_file, iter_type_idx, dex_cache, class_loader);
+    if (iter_exception_type->IsAssignableFrom(exception_type)) {
+      return iter.Get().address_;
+    }
+  }
+  // Handler not found
+  return DexFile::kDexNoIndex;
+}
+
 void Method::SetCode(ByteArray* code_array, InstructionSet instruction_set,
-                     ByteArray* mapping_table) {
+                     IntArray* mapping_table) {
   CHECK(GetCode() == NULL || IsNative());
   SetFieldPtr<ByteArray*>(OFFSET_OF_OBJECT_MEMBER(Method, code_array_), code_array, false);
-  SetFieldPtr<ByteArray*>(OFFSET_OF_OBJECT_MEMBER(Method, mapping_table_),
+  SetFieldPtr<IntArray*>(OFFSET_OF_OBJECT_MEMBER(Method, mapping_table_),
        mapping_table, false);
   int8_t* code = code_array->GetData();
   uintptr_t address = reinterpret_cast<uintptr_t>(code);
@@ -475,6 +546,19 @@
   SetFieldPtr<const void*>(OFFSET_OF_OBJECT_MEMBER(Method, code_), reinterpret_cast<const void*>(address), false);
 }
 
+bool Method::IsWithinCode(uintptr_t pc) const {
+  if (GetCode() == NULL) {
+    return false;
+  }
+  if (pc == 0) {
+    // assume that this is some initial value that will always lie in code
+    return true;
+  } else {
+    uint32_t rel_offset = pc - reinterpret_cast<uintptr_t>(GetCodeArray()->GetData());
+    return rel_offset < static_cast<uint32_t>(GetCodeArray()->GetLength());
+  }
+}
+
 void Method::SetInvokeStub(const ByteArray* invoke_stub_array) {
   const InvokeStub* invoke_stub = reinterpret_cast<InvokeStub*>(invoke_stub_array->GetData());
   SetFieldPtr<const ByteArray*>(
diff --git a/src/object.h b/src/object.h
index 3e154e4..7292582 100644
--- a/src/object.h
+++ b/src/object.h
@@ -866,12 +866,20 @@
   }
 
   void SetCode(ByteArray* code_array, InstructionSet instruction_set,
-               ByteArray* mapping_table = NULL);
+               IntArray* mapping_table = NULL);
 
   static MemberOffset GetCodeOffset() {
     return OFFSET_OF_OBJECT_MEMBER(Method, code_);
   }
 
+  // Is the given PC within the code array?
+  bool IsWithinCode(uintptr_t pc) const;
+
+  IntArray* GetMappingTable() const {
+    return GetFieldObject<IntArray*>(
+        OFFSET_OF_OBJECT_MEMBER(Method, mapping_table_), false);
+  }
+
   size_t GetFrameSizeInBytes() const {
     DCHECK(sizeof(size_t) == sizeof(uint32_t));
     size_t result = GetField32(
@@ -943,10 +951,8 @@
 
   void SetInvokeStub(const ByteArray* invoke_stub_array);
 
-  void SetFpSpillMask(uint32_t fp_spill_mask) {
-    // Computed during compilation
-    SetField32(OFFSET_OF_OBJECT_MEMBER(Method, fp_spill_mask_),
-               fp_spill_mask, false);
+  uint32_t GetCoreSpillMask() {
+    return GetField32(OFFSET_OF_OBJECT_MEMBER(Method, core_spill_mask_), false);
   }
 
   void SetCoreSpillMask(uint32_t core_spill_mask) {
@@ -955,17 +961,26 @@
                core_spill_mask, false);
   }
 
+  uint32_t GetFpSpillMask() {
+    return GetField32(OFFSET_OF_OBJECT_MEMBER(Method, fp_spill_mask_), false);
+  }
+
+  void SetFpSpillMask(uint32_t fp_spill_mask) {
+    // Computed during compilation
+    SetField32(OFFSET_OF_OBJECT_MEMBER(Method, fp_spill_mask_),
+               fp_spill_mask, false);
+  }
+
   // Converts a native PC to a dex PC.  TODO: this is a no-op
   // until we associate a PC mapping table with each method.
-  uintptr_t ToDexPC(const uintptr_t pc) const {
-    return pc;
-  }
+  uint32_t ToDexPC(const uintptr_t pc) const;
 
   // Converts a dex PC to a native PC.  TODO: this is a no-op
   // until we associate a PC mapping table with each method.
-  uintptr_t ToNativePC(const uintptr_t pc) const {
-    return pc;
-  }
+  uintptr_t ToNativePC(const uint32_t dex_pc) const;
+
+  // Find the catch block for the given exception type and dex_pc
+  uint32_t FindCatchBlock(Class* exception_type, uint32_t dex_pc) const;
 
   static Class* GetJavaLangReflectMethod() {
     DCHECK(java_lang_reflect_Method_ != NULL);
@@ -1018,7 +1033,7 @@
   const ByteArray* invoke_stub_array_;
 
   // Storage for mapping_table_
-  const ByteArray* mapping_table_;
+  IntArray* mapping_table_;
 
   // Byte arrays that hold data for the register maps
   const ByteArray* register_map_data_;
diff --git a/src/runtime_support.S b/src/runtime_support.S
index fe40cf2..458102d 100644
--- a/src/runtime_support.S
+++ b/src/runtime_support.S
@@ -2,6 +2,20 @@
 
     .balign 4
 
+    .global art_throw_exception
+    .extern artThrowExceptionHelper
+    /*
+     * Called by managed code, saves all registers (forms basis of long jump context).
+     * artThrowExceptionHelper will place a mock Method* at the bottom of the thread.
+     * r0 holds Throwable
+     */
+art_throw_exception:
+    stmdb  sp!, {r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, lr}
+    sub sp, #16                @ 4 words of space, bottom word will hold Method*
+    mov r1, r9
+    mov r2, sp
+    b artThrowExceptionHelper  @ artThrowExceptionHelper(Throwable*, SP)
+
     .global art_invoke_interface_trampoline
     .extern artFindInterfaceMethodInCache
     .extern artFailedInvokeInterface
diff --git a/src/runtime_support.h b/src/runtime_support.h
index 04c464f..671d7dc 100644
--- a/src/runtime_support.h
+++ b/src/runtime_support.h
@@ -9,6 +9,7 @@
   extern "C" uint64_t art_shr_long(uint64_t, uint32_t);
   extern "C" uint64_t art_ushr_long(uint64_t, uint32_t);
   extern "C" void art_invoke_interface_trampoline(void*, void*, void*, void*);
+  extern "C" void art_throw_exception(void*);
 
   /* Conversions */
   extern "C" float __aeabi_i2f(int op1);             // OP_INT_TO_FLOAT
diff --git a/src/stub_arm.cc b/src/stub_arm.cc
index 0711085..986d2b0 100644
--- a/src/stub_arm.cc
+++ b/src/stub_arm.cc
@@ -11,7 +11,7 @@
 
 typedef void (*ThrowAme)(Method*, Thread*);
 
-ByteArray* CreateAbstractMethodErrorStub(ThrowAme throw_ame) {
+ByteArray* CreateAbstractMethodErrorStub() {
   UniquePtr<ArmAssembler> assembler( static_cast<ArmAssembler*>(Assembler::Create(kArm)) );
 
   // R0 is the Method* already.
@@ -19,8 +19,8 @@
   // Pass Thread::Current() in R1
   __ mov(R1, ShifterOperand(R9));
 
-  // Call throw_ame to throw AbstractMethodError
-  __ LoadImmediate(R12, reinterpret_cast<int32_t>(throw_ame));
+  // Call to throw AbstractMethodError
+  __ LoadFromOffset(kLoadWord, R12, TR, OFFSETOF_MEMBER(Thread, pThrowAbstractMethodErrorFromCode));
   // Leaf call to routine that never returns
   __ mov(PC, ShifterOperand(R12));
 
diff --git a/src/stub_x86.cc b/src/stub_x86.cc
index fd8d0f2..c23c751 100644
--- a/src/stub_x86.cc
+++ b/src/stub_x86.cc
@@ -11,7 +11,7 @@
 
 typedef void (*ThrowAme)(Method*, Thread*);
 
-ByteArray* CreateAbstractMethodErrorStub(ThrowAme throw_ame) {
+ByteArray* CreateAbstractMethodErrorStub() {
   UniquePtr<X86Assembler> assembler( static_cast<X86Assembler*>(Assembler::Create(kX86)) );
 
   // Pad stack to ensure 16-byte alignment
@@ -20,10 +20,9 @@
   __ fs()->pushl(Address::Absolute(Thread::SelfOffset()));  // Thread*
   __ pushl(EDI); // Method*
 
-  // Call throw_ame to throw AbstractMethodError
-  // TODO: make this PIC (throw_ame will not be in the same location after image load)
-  // TODO: remove X86Assembler::Call(uintptr_t addr, ManagedRegister scratch)
-  __ Call(reinterpret_cast<int32_t>(throw_ame), X86ManagedRegister::FromCpuRegister(ECX));
+  // Call to throw AbstractMethodError
+  __ Call(ThreadOffset(OFFSETOF_MEMBER(Thread, pThrowAbstractMethodErrorFromCode)),
+          X86ManagedRegister::FromCpuRegister(ECX));
 
   // Because the call above never returns, we do not need to do ESP+=16 here.
 
@@ -50,9 +49,8 @@
   __ fs()->movl(ECX, Address::Absolute(Thread::SelfOffset()));
   __ pushl(ECX);  // Thread*
 
-  // TODO: make this PIC (FindNativeMethod will not be in the same location after image load)
-  // TODO: remove X86Assembler::Call(uintptr_t addr, ManagedRegister scratch)
-  __ Call(reinterpret_cast<int32_t>(&FindNativeMethod), X86ManagedRegister::FromCpuRegister(ECX));
+  __ Call(ThreadOffset(OFFSETOF_MEMBER(Thread, pFindNativeMethod)),
+          X86ManagedRegister::FromCpuRegister(ECX));
 
   __ addl(ESP, Immediate(16));
 
diff --git a/src/thread.cc b/src/thread.cc
index d879cea..513c019 100644
--- a/src/thread.cc
+++ b/src/thread.cc
@@ -27,6 +27,7 @@
 #include <list>
 
 #include "class_linker.h"
+#include "context.h"
 #include "heap.h"
 #include "jni_internal.h"
 #include "object.h"
@@ -48,18 +49,26 @@
     LOG(INFO) << "Info: " << info;
 }
 
-// TODO: placeholder.  This is what generated code will call to throw
-void ThrowException(Thread* thread, Throwable* exception) {
+}  // namespace art
+
+// Called by generated call to throw an exception
+extern "C" void artThrowExceptionHelper(art::Throwable* exception,
+                                        art::Thread* thread,
+                                        art::Method** sp) {
   /*
    * exception may be NULL, in which case this routine should
    * throw NPE.  NOTE: this is a convenience for generated code,
    * which previously did the null check inline and constructed
    * and threw a NPE if NULL.  This routine responsible for setting
-   * exception_ in thread.
+   * exception_ in thread and delivering the exception.
    */
-  UNIMPLEMENTED(FATAL) << "Unimplemented exception throw: " << PrettyType(exception);
+  *sp = thread->CalleeSaveMethod();
+  thread->SetTopOfStack(sp, 0);
+  thread->DeliverException(exception);
 }
 
+namespace art {
+
 // TODO: placeholder.  Helper function to type
 Class* InitializeTypeFromCode(uint32_t type_idx, Method* method) {
   /*
@@ -167,6 +176,14 @@
     UNIMPLEMENTED(FATAL) << "No such method, idx: " << method_idx;
 }
 
+void ThrowAbstractMethodErrorFromCode(Method* method, Thread* thread) {
+  thread->ThrowNewException("Ljava/lang/AbstractMethodError",
+                            "abstract method \"%s\"",
+                            PrettyMethod(method).c_str());
+  thread->DeliverException(thread->GetException());
+}
+
+
 /*
  * Temporary placeholder.  Should include run-time checks for size
  * of fill data <= size of array.  If not, throw arrayOutOfBoundsException.
@@ -289,6 +306,7 @@
   pLdivmod = __aeabi_ldivmod;
   pLmul = __aeabi_lmul;
   pInvokeInterfaceTrampoline = art_invoke_interface_trampoline;
+  pThrowException = art_throw_exception;
 #endif
   pF2l = F2L;
   pD2l = D2L;
@@ -304,7 +322,6 @@
   pGetObjStatic = Field::GetObjStaticFromCode;
   pSetObjStatic = Field::SetObjStaticFromCode;
   pCanPutArrayElementFromCode = Class::CanPutArrayElementFromCode;
-  pThrowException = ThrowException;
   pInitializeTypeFromCode = InitializeTypeFromCode;
   pResolveMethodFromCode = ResolveMethodFromCode;
   pInitializeStaticStorage = ClassLinker::InitializeStaticStorageFromCode;
@@ -323,6 +340,7 @@
   pThrowRuntimeExceptionFromCode = ThrowRuntimeExceptionFromCode;
   pThrowInternalErrorFromCode = ThrowInternalErrorFromCode;
   pThrowNoSuchMethodFromCode = ThrowNoSuchMethodFromCode;
+  pThrowAbstractMethodErrorFromCode = ThrowAbstractMethodErrorFromCode;
   pFindNativeMethod = FindNativeMethod;
   pDecodeJObjectInThread = DecodeJObjectInThread;
   pDebugMe = DebugMe;
@@ -334,12 +352,22 @@
   sp_ = reinterpret_cast<Method**>(next_sp);
 }
 
-uintptr_t Frame::GetPC() const {
+uintptr_t Frame::GetReturnPC() const {
   byte* pc_addr = reinterpret_cast<byte*>(sp_) +
       GetMethod()->GetReturnPcOffsetInBytes();
   return *reinterpret_cast<uintptr_t*>(pc_addr);
 }
 
+uintptr_t Frame::LoadCalleeSave(int num) const {
+  // Callee saves are held at the top of the frame
+  Method* method = GetMethod();
+  DCHECK(method != NULL);
+  size_t frame_size = method->GetFrameSizeInBytes();
+  byte* save_addr = reinterpret_cast<byte*>(sp_) + frame_size -
+                    ((num + 1) * kPointerSize);
+  return *reinterpret_cast<uintptr_t*>(save_addr);
+}
+
 Method* Frame::NextMethod() const {
   byte* next_sp = reinterpret_cast<byte*>(sp_) +
       GetMethod()->GetFrameSizeInBytes();
@@ -656,10 +684,10 @@
   StackDumpVisitor(std::ostream& os) : os(os) {
   }
 
-  ~StackDumpVisitor() {
+  virtual ~StackDumpVisitor() {
   }
 
-  void VisitFrame(const Frame& frame) {
+  void VisitFrame(const Frame& frame, uintptr_t pc) {
     ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
 
     Method* m = frame.GetMethod();
@@ -670,7 +698,7 @@
     if (m->IsNative()) {
       os << "(Native method)";
     } else {
-      int line_number = dex_file.GetLineNumFromPC(m, m->ToDexPC(frame.GetPC()));
+      int line_number = dex_file.GetLineNumFromPC(m, m->ToDexPC(pc));
       os << "(" << c->GetSourceFile()->ToModifiedUtf8() << ":" << line_number << ")";
     }
     os << "\n";
@@ -946,7 +974,7 @@
  public:
   CountStackDepthVisitor() : depth_(0) {}
 
-  virtual void VisitFrame(const Frame&) {
+  virtual void VisitFrame(const Frame&, uintptr_t pc) {
     ++depth_;
   }
 
@@ -979,9 +1007,9 @@
 
   virtual ~BuildInternalStackTraceVisitor() {}
 
-  virtual void VisitFrame(const Frame& frame) {
+  virtual void VisitFrame(const Frame& frame, uintptr_t pc) {
     method_trace_->Set(count_, frame.GetMethod());
-    pc_trace_->Set(count_, frame.GetPC());
+    pc_trace_->Set(count_, pc);
     ++count_;
   }
 
@@ -1003,19 +1031,36 @@
 
 void Thread::WalkStack(StackVisitor* visitor) const {
   Frame frame = GetTopOfStack();
+  uintptr_t pc = top_of_managed_stack_pc_;
   // TODO: enable this CHECK after native_to_managed_record_ is initialized during startup.
   // CHECK(native_to_managed_record_ != NULL);
   NativeToManagedRecord* record = native_to_managed_record_;
 
-  while (frame.GetSP()) {
+  while (frame.GetSP() != 0) {
     for ( ; frame.GetMethod() != 0; frame.Next()) {
-      visitor->VisitFrame(frame);
+      DCHECK(frame.GetMethod()->IsWithinCode(pc));
+      visitor->VisitFrame(frame, pc);
+      pc = frame.GetReturnPC();
     }
     if (record == NULL) {
       break;
     }
-    frame.SetSP(reinterpret_cast<art::Method**>(record->last_top_of_managed_stack));  // last_tos should return Frame instead of sp?
-    record = record->link;
+    // last_tos should return Frame instead of sp?
+    frame.SetSP(reinterpret_cast<art::Method**>(record->last_top_of_managed_stack_));
+    pc = record->last_top_of_managed_stack_pc_;
+    record = record->link_;
+  }
+}
+
+void Thread::WalkStackUntilUpCall(StackVisitor* visitor) const {
+  Frame frame = GetTopOfStack();
+  uintptr_t pc = top_of_managed_stack_pc_;
+
+  if (frame.GetSP() != 0) {
+    for ( ; frame.GetMethod() != 0; frame.Next()) {
+      visitor->VisitFrame(frame, pc);
+      pc = frame.GetReturnPC();
+    }
   }
 }
 
@@ -1104,65 +1149,88 @@
   UNIMPLEMENTED(FATAL);
 }
 
-Frame Thread::FindExceptionHandler(void* throw_pc, void** handler_pc) {
-  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-  DCHECK(class_linker != NULL);
+Method* Thread::CalleeSaveMethod() const {
+  // TODO: we should only allocate this once
+  // TODO: this code is ARM specific
+  Method* method = Runtime::Current()->GetClassLinker()->AllocMethod();
+  method->SetCode(NULL, art::kThumb2, NULL);
+  method->SetFrameSizeInBytes(64);
+  method->SetReturnPcOffsetInBytes(60);
+  method->SetCoreSpillMask(0x4FFE);
+  method->SetFpSpillMask(0);
+  return method;
+}
 
-  Frame cur_frame = GetTopOfStack();
-  for (int unwind_depth = 0; ; unwind_depth++) {
-    const Method* cur_method = cur_frame.GetMethod();
-    DexCache* dex_cache = cur_method->GetDeclaringClass()->GetDexCache();
-    const DexFile& dex_file = class_linker->FindDexFile(dex_cache);
+class CatchBlockStackVisitor : public Thread::StackVisitor {
+ public:
+  CatchBlockStackVisitor(Class* to_find, Context* ljc)
+      : found_(false), to_find_(to_find), long_jump_context_(ljc) {}
 
-    void* handler_addr = FindExceptionHandlerInMethod(cur_method,
-                                                      throw_pc,
-                                                      dex_file,
-                                                      class_linker);
-    if (handler_addr) {
-      *handler_pc = handler_addr;
-      return cur_frame;
-    } else {
-      // Check if we are at the last frame
-      if (cur_frame.HasNext()) {
-        cur_frame.Next();
-      } else {
-        // Either at the top of stack or next frame is native.
-        break;
+  virtual void VisitFrame(const Frame& fr, uintptr_t pc) {
+    if (!found_) {
+      last_pc_ = pc;
+      handler_frame_ = fr;
+      Method* method = fr.GetMethod();
+      if (pc > 0) {
+        // Move the PC back 2 bytes as a call will frequently terminate the
+        // decoding of a particular instruction and we want to make sure we
+        // get the Dex PC of the instruction with the call and not the
+        // instruction following.
+        pc -= 2;
+      }
+      uint32_t dex_pc = method->ToDexPC(pc);
+      if (dex_pc != DexFile::kDexNoIndex) {
+        uint32_t found_dex_pc = method->FindCatchBlock(to_find_, dex_pc);
+        if (found_dex_pc != DexFile::kDexNoIndex) {
+          found_ = true;
+          handler_dex_pc_ = found_dex_pc;
+        }
+      }
+      if (!found_) {
+        // Caller may be handler, fill in callee saves in context
+        long_jump_context_->FillCalleeSaves(fr);
       }
     }
   }
-  *handler_pc = NULL;
-  return Frame();
+
+  // Did we find a catch block yet?
+  bool found_;
+  // The type of the exception catch block to find
+  Class* to_find_;
+  // Frame with found handler or last frame if no handler found
+  Frame handler_frame_;
+  // Found dex PC of the handler block
+  uint32_t handler_dex_pc_;
+  // Context that will be the target of the long jump
+  Context* long_jump_context_;
+  uintptr_t last_pc_;
+};
+
+void Thread::DeliverException(Throwable* exception) {
+  SetException(exception);  // Set exception on thread
+
+  Context* long_jump_context = GetLongJumpContext();
+  CatchBlockStackVisitor catch_finder(exception->GetClass(), long_jump_context);
+  WalkStackUntilUpCall(&catch_finder);
+
+  long_jump_context->SetSP(reinterpret_cast<intptr_t>(catch_finder.handler_frame_.GetSP()));
+  uintptr_t long_jump_pc;
+  if (catch_finder.found_) {
+    long_jump_pc = catch_finder.handler_frame_.GetMethod()->ToNativePC(catch_finder.handler_dex_pc_);
+  } else {
+    long_jump_pc = catch_finder.last_pc_;
+  }
+  long_jump_context->SetPC(long_jump_pc);
+  long_jump_context->DoLongJump();
 }
 
-void* Thread::FindExceptionHandlerInMethod(const Method* method,
-                                           void* throw_pc,
-                                           const DexFile& dex_file,
-                                           ClassLinker* class_linker) {
-  Throwable* exception_obj = exception_;
-  exception_ = NULL;
-
-  intptr_t dex_pc = -1;
-  const DexFile::CodeItem* code_item = dex_file.GetCodeItem(method->GetCodeItemOffset());
-  DexFile::CatchHandlerIterator iter;
-  for (iter = dex_file.dexFindCatchHandler(*code_item,
-                                           method->ToDexPC(reinterpret_cast<intptr_t>(throw_pc)));
-       !iter.HasNext();
-       iter.Next()) {
-    Class* klass = class_linker->FindSystemClass(dex_file.dexStringByTypeIdx(iter.Get().type_idx_));
-    DCHECK(klass != NULL);
-    if (exception_obj->InstanceOf(klass)) {
-      dex_pc = iter.Get().address_;
-      break;
-    }
+Context* Thread::GetLongJumpContext() {
+  Context* result = long_jump_context_.get();
+  if (result == NULL) {
+    result = Context::Create();
+    long_jump_context_.reset(result);
   }
-
-  exception_ = exception_obj;
-  if (iter.HasNext()) {
-    return NULL;
-  } else {
-    return reinterpret_cast<void*>( method->ToNativePC(dex_pc) );
-  }
+  return result;
 }
 
 void Thread::VisitRoots(Heap::RootVisitor* visitor, void* arg) const {
diff --git a/src/thread.h b/src/thread.h
index 66eba85..2e7b615 100644
--- a/src/thread.h
+++ b/src/thread.h
@@ -32,6 +32,7 @@
 #include "mutex.h"
 #include "mem_map.h"
 #include "offsets.h"
+#include "UniquePtr.h"
 
 namespace art {
 
@@ -39,6 +40,7 @@
 class Class;
 class ClassLinker;
 class ClassLoader;
+class Context;
 class Method;
 class Monitor;
 class Object;
@@ -94,8 +96,9 @@
 };
 
 struct NativeToManagedRecord {
-  NativeToManagedRecord* link;
-  void* last_top_of_managed_stack;
+  NativeToManagedRecord* link_;
+  void* last_top_of_managed_stack_;
+  uintptr_t last_top_of_managed_stack_pc_;
 };
 
 // Iterator over managed frames up to the first native-to-managed transition
@@ -113,7 +116,9 @@
 
   void Next();
 
-  uintptr_t GetPC() const;
+  uintptr_t GetReturnPC() const;
+
+  uintptr_t LoadCalleeSave(int num) const;
 
   Method** GetSP() const {
     return sp_;
@@ -207,7 +212,7 @@
   Method* (*pFindInterfaceMethodInCache)(Class*, uint32_t, const Method*, struct DvmDex*);
   void (*pUnlockObjectFromCode)(Thread*, Object*);
   void (*pLockObjectFromCode)(Thread*, Object*);
-  void (*pThrowException)(Thread*, Throwable*);
+  void (*pThrowException)(void*);
   void (*pHandleFillArrayDataFromCode)(Array*, const uint16_t*);
   Class* (*pInitializeTypeFromCode)(uint32_t, Method*);
   void (*pResolveMethodFromCode)(Method*, uint32_t);
@@ -224,13 +229,14 @@
   void (*pThrowRuntimeExceptionFromCode)(int32_t);
   void (*pThrowInternalErrorFromCode)(int32_t);
   void (*pThrowNoSuchMethodFromCode)(int32_t);
+  void (*pThrowAbstractMethodErrorFromCode)(Method* method, Thread* thread);
   void* (*pFindNativeMethod)(Thread* thread);
   Object* (*pDecodeJObjectInThread)(Thread* thread, jobject obj);
 
   class StackVisitor {
    public:
     virtual ~StackVisitor() {}
-    virtual void VisitFrame(const Frame& frame) = 0;
+    virtual void VisitFrame(const Frame& frame, uintptr_t pc) = 0;
   };
 
   // Creates a new thread.
@@ -324,16 +330,30 @@
     exception_ = NULL;
   }
 
+  // Find catch block and perform long jump to appropriate exception handle
+  void DeliverException(Throwable* exception);
+
+  Context* GetLongJumpContext();
+
   Frame GetTopOfStack() const {
     return top_of_managed_stack_;
   }
 
   // TODO: this is here for testing, remove when we have exception unit tests
   // that use the real stack
-  void SetTopOfStack(void* stack) {
+  void SetTopOfStack(void* stack, uintptr_t pc) {
     top_of_managed_stack_.SetSP(reinterpret_cast<Method**>(stack));
+    top_of_managed_stack_pc_ = pc;
   }
 
+  void SetTopOfStackPC(uintptr_t pc) {
+    top_of_managed_stack_pc_ = pc;
+  }
+
+  // Returns a special method that describes all callee saves being spilt to the
+  // stack.
+  Method* CalleeSaveMethod() const;
+
   void ThrowNewException(const char* exception_class_descriptor, const char* fmt, ...)
       __attribute__ ((format(printf, 3, 4)));
 
@@ -390,14 +410,16 @@
 
   // Linked list recording transitions from native to managed code
   void PushNativeToManagedRecord(NativeToManagedRecord* record) {
-    record->last_top_of_managed_stack = reinterpret_cast<void*>(top_of_managed_stack_.GetSP());
-    record->link = native_to_managed_record_;
+    record->last_top_of_managed_stack_ = reinterpret_cast<void*>(top_of_managed_stack_.GetSP());
+    record->last_top_of_managed_stack_pc_ = top_of_managed_stack_pc_;
+    record->link_ = native_to_managed_record_;
     native_to_managed_record_ = record;
     top_of_managed_stack_.SetSP(NULL);
   }
   void PopNativeToManagedRecord(const NativeToManagedRecord& record) {
-    native_to_managed_record_ = record.link;
-    top_of_managed_stack_.SetSP(reinterpret_cast<Method**>(record.last_top_of_managed_stack));
+    native_to_managed_record_ = record.link_;
+    top_of_managed_stack_.SetSP(reinterpret_cast<Method**>(record.last_top_of_managed_stack_));
+    top_of_managed_stack_pc_ = record.last_top_of_managed_stack_pc_;
   }
 
   const ClassLoader* GetClassLoaderOverride() {
@@ -461,6 +483,10 @@
         OFFSETOF_MEMBER(Frame, sp_));
   }
 
+  static ThreadOffset TopOfManagedStackPcOffset() {
+    return ThreadOffset(OFFSETOF_MEMBER(Thread, top_of_managed_stack_pc_));
+  }
+
   static ThreadOffset TopSirtOffset() {
     return ThreadOffset(OFFSETOF_MEMBER(Thread, top_sirt_));
   }
@@ -495,6 +521,8 @@
 
   void WalkStack(StackVisitor* visitor) const;
 
+  void WalkStackUntilUpCall(StackVisitor* visitor) const;
+
   // Thin lock thread id. This is a small integer used by the thin lock implementation.
   // This is not to be confused with the native thread's tid, nor is it the value returned
   // by java.lang.Thread.getId --- this is a distinct value, used only for locking. One
@@ -530,6 +558,9 @@
   // a managed stack when a thread is in native code.
   Frame top_of_managed_stack_;
 
+  // PC corresponding to the call out of the top_of_managed_stack_ frame
+  uintptr_t top_of_managed_stack_pc_;
+
   // A linked list (of stack allocated records) recording transitions from
   // native to managed code.
   NativeToManagedRecord* native_to_managed_record_;
@@ -560,6 +591,9 @@
   // useful for testing.
   const ClassLoader* class_loader_override_;
 
+  // Thread local, lazily allocated, long jump context. Used to deliver exceptions.
+  UniquePtr<Context> long_jump_context_;
+
   // TLS key used to retrieve the VM thread object.
   static pthread_key_t pthread_key_self_;
 
@@ -571,6 +605,7 @@
 
   DISALLOW_COPY_AND_ASSIGN(Thread);
 };
+
 std::ostream& operator<<(std::ostream& os, const Thread& thread);
 std::ostream& operator<<(std::ostream& os, const Thread::State& state);