Support hardware divide instruction

Bug: 11299025

Uses sdiv for division and a combo of sdiv, mul and sub for modulus.
Only does this on processors that are capable of the sdiv instruction, as determined
by the build system.

Also provides a command line arg --instruction-set-features= to allow cross compilation.
Makefile adds the --instruction-set-features= arg to build-time dex2oat runs and defaults
it to something obtained from the target architecture.

Provides a GetInstructionSetFeatures() function on CompilerDriver that can be
queried for various features.  The only feature supported right now is hasDivideInstruction().

Also adds a few more instructions to the ARM disassembler

b/11535253 is an addition to this CL to be done later.

Change-Id: Ia8aaf801fd94bc71e476902749cf20f74eba9f68
diff --git a/Android.mk b/Android.mk
index 0b4b231..3112ab0 100644
--- a/Android.mk
+++ b/Android.mk
@@ -270,7 +270,7 @@
 
 $$(OUT_OAT_FILE): $(PRODUCT_OUT)/$(1) $(TARGET_BOOT_IMG_OUT) $(DEX2OAT_DEPENDENCY)
 	@mkdir -p $$(dir $$@)
-	$(DEX2OAT) $(PARALLEL_ART_COMPILE_JOBS) --runtime-arg -Xms64m --runtime-arg -Xmx64m --boot-image=$(TARGET_BOOT_IMG_OUT) --dex-file=$(PRODUCT_OUT)/$(1) --dex-location=/$(1) --oat-file=$$@ --host-prefix=$(PRODUCT_OUT) --instruction-set=$(TARGET_ARCH) --android-root=$(PRODUCT_OUT)/system
+	$(DEX2OAT) $(PARALLEL_ART_COMPILE_JOBS) --runtime-arg -Xms64m --runtime-arg -Xmx64m --boot-image=$(TARGET_BOOT_IMG_OUT) --dex-file=$(PRODUCT_OUT)/$(1) --dex-location=/$(1) --oat-file=$$@ --host-prefix=$(PRODUCT_OUT) --instruction-set=$(TARGET_ARCH) --instruction-set-features=$(TARGET_INSTRUCTION_SET_FEATURES) --android-root=$(PRODUCT_OUT)/system
 
 endif
 
diff --git a/build/Android.executable.mk b/build/Android.executable.mk
index 5cf15be..b317d92 100644
--- a/build/Android.executable.mk
+++ b/build/Android.executable.mk
@@ -24,6 +24,10 @@
   ART_EXECUTABLES_CFLAGS += -DART_USE_PORTABLE_COMPILER=1
 endif
 
+# add the default instruction set features
+ART_EXECUTABLES_CFLAGS += \
+	-DART_DEFAULT_INSTRUCTION_SET_FEATURES=$(DEX2OAT_TARGET_INSTRUCTION_SET_FEATURES)
+
 # $(1): executable ("d" will be appended for debug version)
 # $(2): source
 # $(3): extra shared libraries
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index 655c7dd..0d759ce 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -86,6 +86,7 @@
 ifeq ($(ART_USE_PORTABLE_COMPILER),true)
   ART_TEST_CFLAGS += -DART_USE_PORTABLE_COMPILER=1
 endif
+ART_TEST_CFLAGS += -DART_DEFAULT_INSTRUCTION_SET_FEATURES=$(DEX2OAT_TARGET_INSTRUCTION_SET_FEATURES)
 
 # $(1): target or host
 # $(2): file name
diff --git a/build/Android.oat.mk b/build/Android.oat.mk
index b680b82..5d355a6 100644
--- a/build/Android.oat.mk
+++ b/build/Android.oat.mk
@@ -26,7 +26,7 @@
 
 # By default, do not run rerun dex2oat if the tool changes.
 # Comment out the | to force dex2oat to rerun on after all changes.
-DEX2OAT_DEPENDENCY := |
+DEX2OAT_DEPENDENCY := #|
 DEX2OAT_DEPENDENCY += $(DEX2OAT)
 DEX2OAT_DEPENDENCY += $(LIBART_COMPILER)
 
@@ -57,15 +57,26 @@
 HOST_CORE_IMG_OUT := $(HOST_OUT_JAVA_LIBRARIES)/core.art
 TARGET_CORE_IMG_OUT := $(ART_TEST_OUT)/core.art
 
+# DEX2OAT_TARGET_INSTRUCTION_SET_FEATURES is set in ../build/core/dex_preopt.mk based on
+# the TARGET_CPU_VARIANT
+
+TARGET_INSTRUCTION_SET_FEATURES := $(DEX2OAT_TARGET_INSTRUCTION_SET_FEATURES)
+
 $(HOST_CORE_IMG_OUT): $(HOST_CORE_DEX_FILES) $(DEX2OAT_DEPENDENCY)
 	@echo "host dex2oat: $@ ($?)"
 	@mkdir -p $(dir $@)
-	$(hide) $(DEX2OAT) $(PARALLEL_ART_COMPILE_JOBS) --runtime-arg -Xms16m --runtime-arg -Xmx16m --image-classes=$(PRELOADED_CLASSES) $(addprefix --dex-file=,$(HOST_CORE_DEX_FILES)) $(addprefix --dex-location=,$(HOST_CORE_DEX_LOCATIONS)) --oat-file=$(HOST_CORE_OAT_OUT) --oat-location=$(HOST_CORE_OAT) --image=$(HOST_CORE_IMG_OUT) --base=$(IMG_HOST_BASE_ADDRESS) --instruction-set=$(HOST_ARCH) --host --android-root=$(HOST_OUT)
+	$(hide) $(DEX2OAT) $(PARALLEL_ART_COMPILE_JOBS) --runtime-arg -Xms16m --runtime-arg -Xmx16m --image-classes=$(PRELOADED_CLASSES) $(addprefix \
+		--dex-file=,$(HOST_CORE_DEX_FILES)) $(addprefix --dex-location=,$(HOST_CORE_DEX_LOCATIONS)) --oat-file=$(HOST_CORE_OAT_OUT) \
+		--oat-location=$(HOST_CORE_OAT) --image=$(HOST_CORE_IMG_OUT) --base=$(IMG_HOST_BASE_ADDRESS) \
+		--instruction-set=$(HOST_ARCH) --host --android-root=$(HOST_OUT)
 
 $(TARGET_CORE_IMG_OUT): $(TARGET_CORE_DEX_FILES) $(DEX2OAT_DEPENDENCY)
 	@echo "target dex2oat: $@ ($?)"
 	@mkdir -p $(dir $@)
-	$(hide) $(DEX2OAT) $(PARALLEL_ART_COMPILE_JOBS) --runtime-arg -Xms16m --runtime-arg -Xmx16m --image-classes=$(PRELOADED_CLASSES) $(addprefix --dex-file=,$(TARGET_CORE_DEX_FILES)) $(addprefix --dex-location=,$(TARGET_CORE_DEX_LOCATIONS)) --oat-file=$(TARGET_CORE_OAT_OUT) --oat-location=$(TARGET_CORE_OAT) --image=$(TARGET_CORE_IMG_OUT) --base=$(IMG_TARGET_BASE_ADDRESS) --instruction-set=$(TARGET_ARCH) --host-prefix=$(PRODUCT_OUT) --android-root=$(PRODUCT_OUT)/system
+	$(hide) $(DEX2OAT) $(PARALLEL_ART_COMPILE_JOBS) --runtime-arg -Xms16m --runtime-arg -Xmx16m --image-classes=$(PRELOADED_CLASSES) $(addprefix \
+		--dex-file=,$(TARGET_CORE_DEX_FILES)) $(addprefix --dex-location=,$(TARGET_CORE_DEX_LOCATIONS)) --oat-file=$(TARGET_CORE_OAT_OUT) \
+		--oat-location=$(TARGET_CORE_OAT) --image=$(TARGET_CORE_IMG_OUT) --base=$(IMG_TARGET_BASE_ADDRESS) \
+		--instruction-set=$(TARGET_ARCH) --instruction-set-features=$(TARGET_INSTRUCTION_SET_FEATURES) --host-prefix=$(PRODUCT_OUT) --android-root=$(PRODUCT_OUT)/system
 
 $(HOST_CORE_OAT_OUT): $(HOST_CORE_IMG_OUT)
 
@@ -110,7 +121,10 @@
 	@echo "target dex2oat: $@ ($?)"
 	@mkdir -p $(dir $@)
 	@mkdir -p $(dir $(TARGET_BOOT_OAT_UNSTRIPPED_OUT))
-	$(hide) $(DEX2OAT) $(PARALLEL_ART_COMPILE_JOBS) --runtime-arg -Xms256m --runtime-arg -Xmx256m --image-classes=$(PRELOADED_CLASSES) $(addprefix --dex-file=,$(TARGET_BOOT_DEX_FILES)) $(addprefix --dex-location=,$(TARGET_BOOT_DEX_LOCATIONS)) --oat-symbols=$(TARGET_BOOT_OAT_UNSTRIPPED_OUT) --oat-file=$(TARGET_BOOT_OAT_OUT) --oat-location=$(TARGET_BOOT_OAT) --image=$(TARGET_BOOT_IMG_OUT) --base=$(IMG_TARGET_BASE_ADDRESS) --instruction-set=$(TARGET_ARCH) --host-prefix=$(PRODUCT_OUT) --android-root=$(PRODUCT_OUT)/system
+	$(hide) $(DEX2OAT) $(PARALLEL_ART_COMPILE_JOBS) --runtime-arg -Xms256m --runtime-arg -Xmx256m --image-classes=$(PRELOADED_CLASSES) $(addprefix --dex-file=,$(TARGET_BOOT_DEX_FILES)) $(addprefix --dex-location=,$(TARGET_BOOT_DEX_LOCATIONS)) \
+		--oat-symbols=$(TARGET_BOOT_OAT_UNSTRIPPED_OUT) --oat-file=$(TARGET_BOOT_OAT_OUT) \
+		--oat-location=$(TARGET_BOOT_OAT) --image=$(TARGET_BOOT_IMG_OUT) --base=$(IMG_TARGET_BASE_ADDRESS) \
+		--instruction-set=$(TARGET_ARCH) --instruction-set-features=$(TARGET_INSTRUCTION_SET_FEATURES) --host-prefix=$(PRODUCT_OUT) --android-root=$(PRODUCT_OUT)/system
 
 $(TARGET_BOOT_OAT_UNSTRIPPED_OUT): $(TARGET_BOOT_IMG_OUT)
 
diff --git a/compiler/dex/compiler_ir.h b/compiler/dex/compiler_ir.h
index 0d7209e..fd46975 100644
--- a/compiler/dex/compiler_ir.h
+++ b/compiler/dex/compiler_ir.h
@@ -97,6 +97,9 @@
   CompilerBackend compiler_backend;
   InstructionSet instruction_set;
 
+  const InstructionSetFeatures& GetInstructionSetFeatures() {
+    return compiler_driver->GetInstructionSetFeatures();
+  }
   // TODO: much of this info available elsewhere.  Go to the original source?
   uint16_t num_dalvik_registers;        // method->registers_size.
   const uint16_t* insns;
diff --git a/compiler/dex/quick/arm/arm_lir.h b/compiler/dex/quick/arm/arm_lir.h
index 2ff7f1c..ffaaf84 100644
--- a/compiler/dex/quick/arm/arm_lir.h
+++ b/compiler/dex/quick/arm/arm_lir.h
@@ -380,6 +380,8 @@
   kThumb2CmnRR,      // cmn [111010110001] rn[19..16] [0000] [1111] [0000] rm[3..0].
   kThumb2EorRRR,     // eor [111010101000] rn[19..16] [0000] rd[11..8] [0000] rm[3..0].
   kThumb2MulRRR,     // mul [111110110000] rn[19..16] [1111] rd[11..8] [0000] rm[3..0].
+  kThumb2SdivRRR,    // sdiv [111110111001] rn[19..16] [1111] rd[11..8] [1111] rm[3..0].
+  kThumb2UdivRRR,    // udiv [111110111011] rn[19..16] [1111] rd[11..8] [1111] rm[3..0].
   kThumb2MnvRR,      // mvn [11101010011011110] rd[11-8] [0000] rm[3..0].
   kThumb2RsubRRI8,   // rsub [111100011100] rn[19..16] [0000] rd[11..8] imm8[7..0].
   kThumb2NegRR,      // actually rsub rd, rn, #0.
diff --git a/compiler/dex/quick/arm/assemble_arm.cc b/compiler/dex/quick/arm/assemble_arm.cc
index e8c188c..3d0f263 100644
--- a/compiler/dex/quick/arm/assemble_arm.cc
+++ b/compiler/dex/quick/arm/assemble_arm.cc
@@ -687,6 +687,14 @@
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
                  "mul", "!0C, !1C, !2C", 4, kFixupNone),
+    ENCODING_MAP(kThumb2SdivRRR,  0xfb90f0f0,
+                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
+                 "sdiv", "!0C, !1C, !2C", 4, kFixupNone),
+    ENCODING_MAP(kThumb2UdivRRR,  0xfbb0f0f0,
+                 kFmtBitBlt, 11, 8, kFmtBitBlt, 19, 16, kFmtBitBlt, 3, 0,
+                 kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
+                 "udiv", "!0C, !1C, !2C", 4, kFixupNone),
     ENCODING_MAP(kThumb2MnvRR,  0xea6f0000,
                  kFmtBitBlt, 11, 8, kFmtBitBlt, 3, 0, kFmtShift, -1, -1,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE1,
diff --git a/compiler/dex/quick/arm/int_arm.cc b/compiler/dex/quick/arm/int_arm.cc
index 0a8cbf9..42bf3d4 100644
--- a/compiler/dex/quick/arm/int_arm.cc
+++ b/compiler/dex/quick/arm/int_arm.cc
@@ -466,14 +466,39 @@
 
 RegLocation ArmMir2Lir::GenDivRemLit(RegLocation rl_dest, int reg1, int lit,
                                      bool is_div) {
-  LOG(FATAL) << "Unexpected use of GenDivRemLit for Arm";
-  return rl_dest;
+  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+
+  // Put the literal in a temp.
+  int lit_temp = AllocTemp();
+  LoadConstant(lit_temp, lit);
+  // Use the generic case for div/rem with arg2 in a register.
+  // TODO: The literal temp can be freed earlier during a modulus to reduce reg pressure.
+  rl_result = GenDivRem(rl_result, reg1, lit_temp, is_div);
+  FreeTemp(lit_temp);
+
+  return rl_result;
 }
 
 RegLocation ArmMir2Lir::GenDivRem(RegLocation rl_dest, int reg1, int reg2,
                                   bool is_div) {
-  LOG(FATAL) << "Unexpected use of GenDivRem for Arm";
-  return rl_dest;
+  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+  if (is_div) {
+    // Simple case, use sdiv instruction.
+    OpRegRegReg(kOpDiv, rl_result.low_reg, reg1, reg2);
+  } else {
+    // Remainder case, use the following code:
+    // temp = reg1 / reg2      - integer division
+    // temp = temp * reg2
+    // dest = reg1 - temp
+
+    int temp = AllocTemp();
+    OpRegRegReg(kOpDiv, temp, reg1, reg2);
+    OpRegReg(kOpMul, temp, reg2);
+    OpRegRegReg(kOpSub, rl_result.low_reg, reg1, temp);
+    FreeTemp(temp);
+  }
+
+  return rl_result;
 }
 
 bool ArmMir2Lir::GenInlinedMinMaxInt(CallInfo* info, bool is_min) {
diff --git a/compiler/dex/quick/arm/utility_arm.cc b/compiler/dex/quick/arm/utility_arm.cc
index 3ceeacf..d631cf7 100644
--- a/compiler/dex/quick/arm/utility_arm.cc
+++ b/compiler/dex/quick/arm/utility_arm.cc
@@ -395,6 +395,10 @@
       DCHECK_EQ(shift, 0);
       opcode = kThumb2MulRRR;
       break;
+    case kOpDiv:
+      DCHECK_EQ(shift, 0);
+      opcode = kThumb2SdivRRR;
+      break;
     case kOpOr:
       opcode = kThumb2OrrRRR;
       break;
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index 2b3404a..df6493d 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -1307,6 +1307,7 @@
     }
     StoreValue(rl_dest, rl_result);
   } else {
+    bool done = false;      // Set to true if we happen to find a way to use a real instruction.
     if (cu_->instruction_set == kMips) {
       rl_src1 = LoadValue(rl_src1, kCoreReg);
       rl_src2 = LoadValue(rl_src2, kCoreReg);
@@ -1314,7 +1315,23 @@
           GenImmedCheck(kCondEq, rl_src2.low_reg, 0, kThrowDivZero);
       }
       rl_result = GenDivRem(rl_dest, rl_src1.low_reg, rl_src2.low_reg, op == kOpDiv);
-    } else {
+      done = true;
+    } else if (cu_->instruction_set == kThumb2) {
+      if (cu_->GetInstructionSetFeatures().HasDivideInstruction()) {
+        // Use ARM SDIV instruction for division.  For remainder we also need to
+        // calculate using a MUL and subtract.
+        rl_src1 = LoadValue(rl_src1, kCoreReg);
+        rl_src2 = LoadValue(rl_src2, kCoreReg);
+        if (check_zero) {
+            GenImmedCheck(kCondEq, rl_src2.low_reg, 0, kThrowDivZero);
+        }
+        rl_result = GenDivRem(rl_dest, rl_src1.low_reg, rl_src2.low_reg, op == kOpDiv);
+        done = true;
+      }
+    }
+
+    // If we haven't already generated the code use the callout function.
+    if (!done) {
       ThreadOffset func_offset = QUICK_ENTRYPOINT_OFFSET(pIdivmod);
       FlushAllRegs();   /* Send everything to home location */
       LoadValueDirectFixed(rl_src2, TargetReg(kArg1));
@@ -1323,7 +1340,7 @@
       if (check_zero) {
         GenImmedCheck(kCondEq, TargetReg(kArg1), 0, kThrowDivZero);
       }
-      // NOTE: callout here is not a safepoint
+      // NOTE: callout here is not a safepoint.
       CallHelper(r_tgt, func_offset, false /* not a safepoint */);
       if (op == kOpDiv)
         rl_result = GetReturn(false);
@@ -1561,11 +1578,24 @@
       if (HandleEasyDivRem(opcode, is_div, rl_src, rl_dest, lit)) {
         return;
       }
+
+      bool done = false;
       if (cu_->instruction_set == kMips) {
         rl_src = LoadValue(rl_src, kCoreReg);
         rl_result = GenDivRemLit(rl_dest, rl_src.low_reg, lit, is_div);
-      } else {
-        FlushAllRegs();   /* Everything to home location */
+        done = true;
+      } else if (cu_->instruction_set == kThumb2) {
+        if (cu_->GetInstructionSetFeatures().HasDivideInstruction()) {
+          // Use ARM SDIV instruction for division.  For remainder we also need to
+          // calculate using a MUL and subtract.
+          rl_src = LoadValue(rl_src, kCoreReg);
+          rl_result = GenDivRemLit(rl_dest, rl_src.low_reg, lit, is_div);
+          done = true;
+        }
+      }
+
+      if (!done) {
+        FlushAllRegs();   /* Everything to home location. */
         LoadValueDirectFixed(rl_src, TargetReg(kArg0));
         Clobber(TargetReg(kArg0));
         ThreadOffset func_offset = QUICK_ENTRYPOINT_OFFSET(pIdivmod);
@@ -1583,7 +1613,7 @@
   }
   rl_src = LoadValue(rl_src, kCoreReg);
   rl_result = EvalLoc(rl_dest, kCoreReg, true);
-  // Avoid shifts by literal 0 - no support in Thumb.  Change to copy
+  // Avoid shifts by literal 0 - no support in Thumb.  Change to copy.
   if (shift_op && (lit == 0)) {
     OpRegCopy(rl_result.low_reg, rl_src.low_reg);
   } else {
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 783c322..4871e16 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -336,10 +336,12 @@
                                                std::string const& filename);
 
 CompilerDriver::CompilerDriver(CompilerBackend compiler_backend, InstructionSet instruction_set,
+                               InstructionSetFeatures instruction_set_features,
                                bool image, DescriptorSet* image_classes, size_t thread_count,
                                bool dump_stats)
     : compiler_backend_(compiler_backend),
       instruction_set_(instruction_set),
+      instruction_set_features_(instruction_set_features),
       freezing_constructor_lock_("freezing constructor lock"),
       compiled_classes_lock_("compiled classes lock"),
       compiled_methods_lock_("compiled method lock"),
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index 9b9a884..9321f06 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -91,6 +91,7 @@
   // can assume will be in the image, with NULL implying all available
   // classes.
   explicit CompilerDriver(CompilerBackend compiler_backend, InstructionSet instruction_set,
+                          InstructionSetFeatures instruction_set_features,
                           bool image, DescriptorSet* image_classes,
                           size_t thread_count, bool dump_stats);
 
@@ -104,10 +105,14 @@
   void CompileOne(const mirror::ArtMethod* method, base::TimingLogger& timings)
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
 
-  InstructionSet GetInstructionSet() const {
+  const InstructionSet& GetInstructionSet() const {
     return instruction_set_;
   }
 
+  const InstructionSetFeatures& GetInstructionSetFeatures() const {
+    return instruction_set_features_;
+  }
+
   CompilerBackend GetCompilerBackend() const {
     return compiler_backend_;
   }
@@ -386,7 +391,8 @@
 
   CompilerBackend compiler_backend_;
 
-  InstructionSet instruction_set_;
+  const InstructionSet instruction_set_;
+  const InstructionSetFeatures instruction_set_features_;
 
   // All class references that require
   mutable ReaderWriterMutex freezing_constructor_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc
index 815bca5..6213b45 100644
--- a/compiler/oat_test.cc
+++ b/compiler/oat_test.cc
@@ -76,7 +76,10 @@
   CompilerBackend compiler_backend = kQuick;
 #endif
   InstructionSet insn_set = kIsTargetBuild ? kThumb2 : kX86;
-  compiler_driver_.reset(new CompilerDriver(compiler_backend, insn_set, false, NULL, 2, true));
+
+  InstructionSetFeatures insn_features;
+  compiler_driver_.reset(new CompilerDriver(compiler_backend, insn_set,
+                                            insn_features, false, NULL, 2, true));
   jobject class_loader = NULL;
   if (kCompile) {
     base::TimingLogger timings("OatTest::WriteRead", false, false);
@@ -149,17 +152,19 @@
 TEST_F(OatTest, OatHeaderSizeCheck) {
   // If this test is failing and you have to update these constants,
   // it is time to update OatHeader::kOatVersion
-  EXPECT_EQ(72U, sizeof(OatHeader));
+  EXPECT_EQ(76U, sizeof(OatHeader));
   EXPECT_EQ(28U, sizeof(OatMethodOffsets));
 }
 
 TEST_F(OatTest, OatHeaderIsValid) {
     InstructionSet instruction_set = kX86;
+    InstructionSetFeatures instruction_set_features;
     std::vector<const DexFile*> dex_files;
     uint32_t image_file_location_oat_checksum = 0;
     uint32_t image_file_location_oat_begin = 0;
     const std::string image_file_location;
     OatHeader oat_header(instruction_set,
+                         instruction_set_features,
                          &dex_files,
                          image_file_location_oat_checksum,
                          image_file_location_oat_begin,
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index 28355bf..f3bb112 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -98,6 +98,7 @@
 size_t OatWriter::InitOatHeader() {
   // create the OatHeader
   oat_header_ = new OatHeader(compiler_driver_->GetInstructionSet(),
+                              compiler_driver_->GetInstructionSetFeatures(),
                               dex_files_,
                               image_file_location_oat_checksum_,
                               image_file_location_oat_begin_,
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index 98c62ce..1472337 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -131,6 +131,10 @@
   UsageError("      Example: --instruction-set=x86");
   UsageError("      Default: arm");
   UsageError("");
+  UsageError("  --instruction-set-features=...,: Specify instruction set features");
+  UsageError("      Example: --instruction-set-features=div");
+  UsageError("      Default: default");
+  UsageError("");
   UsageError("  --compiler-backend=(Quick|QuickGBC|Portable): select compiler backend");
   UsageError("      set.");
   UsageError("      Example: --compiler-backend=Portable");
@@ -155,13 +159,15 @@
                      Runtime::Options& options,
                      CompilerBackend compiler_backend,
                      InstructionSet instruction_set,
+                     InstructionSetFeatures instruction_set_features,
                      size_t thread_count)
       SHARED_TRYLOCK_FUNCTION(true, Locks::mutator_lock_) {
     if (!CreateRuntime(options, instruction_set)) {
       *p_dex2oat = NULL;
       return false;
     }
-    *p_dex2oat = new Dex2Oat(Runtime::Current(), compiler_backend, instruction_set, thread_count);
+    *p_dex2oat = new Dex2Oat(Runtime::Current(), compiler_backend, instruction_set,
+        instruction_set_features, thread_count);
     return true;
   }
 
@@ -257,6 +263,7 @@
 
     UniquePtr<CompilerDriver> driver(new CompilerDriver(compiler_backend_,
                                                         instruction_set_,
+                                                        instruction_set_features_,
                                                         image,
                                                         image_classes.release(),
                                                         thread_count_,
@@ -330,9 +337,11 @@
   explicit Dex2Oat(Runtime* runtime,
                    CompilerBackend compiler_backend,
                    InstructionSet instruction_set,
+                   InstructionSetFeatures instruction_set_features,
                    size_t thread_count)
       : compiler_backend_(compiler_backend),
         instruction_set_(instruction_set),
+        instruction_set_features_(instruction_set_features),
         runtime_(runtime),
         thread_count_(thread_count),
         start_ns_(NanoTime()) {
@@ -391,6 +400,7 @@
   const CompilerBackend compiler_backend_;
 
   const InstructionSet instruction_set_;
+  const InstructionSetFeatures instruction_set_features_;
 
   Runtime* runtime_;
   size_t thread_count_;
@@ -559,6 +569,32 @@
 const unsigned int WatchDog::kWatchDogWarningSeconds;
 const unsigned int WatchDog::kWatchDogTimeoutSeconds;
 
+// Given a set of instruction features from the build, parse it.  The
+// input 'str' is a comma separated list of feature names.  Parse it and
+// return the InstructionSetFeatures object.
+static InstructionSetFeatures ParseFeatureList(std::string str) {
+  InstructionSetFeatures result;
+  typedef std::vector<std::string> FeatureList;
+  FeatureList features;
+  Split(str, ',', features);
+  for (FeatureList::iterator i = features.begin(); i != features.end(); i++) {
+    std::string feature = Trim(*i);
+    if (feature == "default") {
+      // Nothing to do.
+    } else if (feature == "div") {
+      // Supports divide instruction.
+       result.SetHasDivideInstruction(true);
+    } else if (feature == "nodiv") {
+      // Turn off support for divide instruction.
+      result.SetHasDivideInstruction(false);
+    } else {
+      Usage("Unknown instruction set feature: '%s'", feature.c_str());
+    }
+  }
+  // others...
+  return result;
+}
+
 static int dex2oat(int argc, char** argv) {
   base::TimingLogger timings("compiler", false, false);
 
@@ -595,6 +631,15 @@
 #else
   CompilerBackend compiler_backend = kQuick;
 #endif
+
+  // Take the default set of instruction features from the build if present.
+  InstructionSetFeatures instruction_set_features =
+#ifdef ART_DEFAULT_INSTRUCTION_SET_FEATURES
+    ParseFeatureList(STRINGIFY(ART_DEFAULT_INSTRUCTION_SET_FEATURES));
+#else
+    ParseFeatureList("default");
+#endif
+
 #if defined(__arm__)
   InstructionSet instruction_set = kThumb2;
 #elif defined(__i386__)
@@ -604,6 +649,8 @@
 #else
 #error "Unsupported architecture"
 #endif
+
+
   bool is_host = false;
   bool dump_stats = false;
   bool dump_timing = false;
@@ -678,6 +725,9 @@
       } else if (instruction_set_str == "x86") {
         instruction_set = kX86;
       }
+    } else if (option.starts_with("--instruction-set-features=")) {
+      StringPiece str = option.substr(strlen("--instruction-set-features=")).data();
+      instruction_set_features = ParseFeatureList(str.as_string());
     } else if (option.starts_with("--compiler-backend=")) {
       StringPiece backend_str = option.substr(strlen("--compiler-backend=")).data();
       if (backend_str == "Quick") {
@@ -870,7 +920,8 @@
 #endif
 
   Dex2Oat* p_dex2oat;
-  if (!Dex2Oat::Create(&p_dex2oat, options, compiler_backend, instruction_set, thread_count)) {
+  if (!Dex2Oat::Create(&p_dex2oat, options, compiler_backend, instruction_set,
+      instruction_set_features, thread_count)) {
     LOG(ERROR) << "Failed to create dex2oat";
     return EXIT_FAILURE;
   }
@@ -1093,8 +1144,6 @@
 
   return EXIT_SUCCESS;
 }
-
-
 }  // namespace art
 
 int main(int argc, char** argv) {
diff --git a/disassembler/disassembler_arm.cc b/disassembler/disassembler_arm.cc
index 6239e9a..8d4f3ce 100644
--- a/disassembler/disassembler_arm.cc
+++ b/disassembler/disassembler_arm.cc
@@ -374,7 +374,102 @@
         // uint32_t op5 = (instr >> 4) & 0xF;
         ArmRegister Rn(instr, 16);
         ArmRegister Rt(instr, 12);
+        ArmRegister Rd(instr, 8);
         uint32_t imm8 = instr & 0xFF;
+        if ((op3 & 2) == 2) {     // 1x
+          int W = (instr >> 21) & 1;
+          int U = (instr >> 23) & 1;
+          int P = (instr >> 24) & 1;
+
+          if ((op4 & 1) == 1) {
+            opcode << "ldrd";
+          } else {
+            opcode << "strd";
+          }
+          args << Rt << "," << Rd << ", [" << Rn;
+          const char *sign = U ? "+" : "-";
+          if (P == 0 && W == 1) {
+            args << "], #" << sign << imm8;
+          } else {
+            args << ", #" << sign << imm8 << "]";
+            if (W == 1) {
+              args << "!";
+            }
+          }
+        } else {                  // 0x
+          switch (op4) {
+            case 0:
+              if (op3 == 0) {   // op3 is 00, op4 is 00
+                opcode << "strex";
+                args << Rd << ", " << Rt << ", [" << Rn << ", #" << (imm8 << 2) << "]";
+              } else {          // op3 is 01, op4 is 00
+                // this is one of strexb, strexh or strexd
+                int op5 = (instr >> 4) & 0xf;
+                switch (op5) {
+                  case 4:
+                    opcode << "strexb";
+                    break;
+                  case 5:
+                    opcode << "strexh";
+                    break;
+                  case 7:
+                    opcode << "strexd";
+                    break;
+                }
+              }
+              break;
+            case 1:
+              if (op3 == 0) {   // op3 is 00, op4 is 01
+                opcode << "ldrex";
+                args << Rt << ", [" << Rn << ", #" << (imm8 << 2) << "]";
+              } else {          // op3 is 01, op4 is 01
+                // this is one of strexb, strexh or strexd
+                int op5 = (instr >> 4) & 0xf;
+                switch (op5) {
+                  case 0:
+                    opcode << "tbb";
+                    break;
+                  case 1:
+                    opcode << "tbh";
+                    break;
+                  case 4:
+                    opcode << "ldrexb";
+                    break;
+                  case 5:
+                    opcode << "ldrexh";
+                    break;
+                  case 7:
+                    opcode << "ldrexd";
+                    break;
+                }
+              }
+              break;
+            case 2:     // op3 is 0x, op4 is 10
+            case 3:   // op3 is 0x, op4 is 11
+              if (op4 == 2) {
+                opcode << "strd";
+              } else {
+                opcode << "ldrd";
+              }
+              int W = (instr >> 21) & 1;
+              int U = (instr >> 23) & 1;
+              int P = (instr >> 24) & 1;
+
+              args << Rt << "," << Rd << ", [" << Rn;
+              const char *sign = U ? "+" : "-";
+              if (P == 0 && W == 1) {
+                args << "], #" << sign << imm8;
+              } else {
+                args << ", #" << sign << imm8 << "]";
+                if (W == 1) {
+                  args << "!";
+                }
+              }
+              break;
+          }
+        }
+
+
         if (op3 == 0 && op4 == 0) {  // STREX
           ArmRegister Rd(instr, 8);
           opcode << "strex";
@@ -519,19 +614,11 @@
         uint32_t op3 = (instr >> 20) & 0x3F;
         uint32_t coproc = (instr >> 8) & 0xF;
         uint32_t op4 = (instr >> 4) & 0x1;
-        if ((op3 == 2 || op3 == 2 || op3 == 6 || op3 == 7) ||  // 00x1x
-            (op3 >= 8 && op3 <= 15) || (op3 >= 16 && op3 <= 31)) {  // 001xxx, 01xxxx
-          // Extension register load/store instructions
-          // |111|1|110|00000|0000|1111|110|000000000|
-          // |5 3|2|109|87654|3  0|54 2|10 |87 54   0|
-          // |---|-|---|-----|----|----|---|---------|
-          // |332|2|222|22222|1111|1111|110|000000000|
-          // |1 9|8|765|43210|9  6|54 2|10 |87 54   0|
-          // |---|-|---|-----|----|----|---|---------|
-          // |111|T|110| op3 | Rn |    |101|         |
-          //  111 0 110 01001 0011 0000 101 000000011 - ec930a03
-          if (op3 == 9 || op3 == 0xD) {  // VLDM
-            //  1110 110 PUDW1 nnnn dddd 101S iiii iiii
+
+        if (coproc == 10 || coproc == 11) {   // 101x
+          if (op3 < 0x20 && (op3 >> 1) != 2) {     // 0xxxxx and not 00010x
+            // extension load/store instructions
+            int op = op3 & 0x1f;
             uint32_t P = (instr >> 24) & 1;
             uint32_t U = (instr >> 23) & 1;
             uint32_t D = (instr >> 22) & 1;
@@ -541,20 +628,49 @@
             uint32_t Vd = (instr >> 12) & 0xF;
             uint32_t imm8 = instr & 0xFF;
             uint32_t d = (S == 0 ? ((Vd << 1) | D) : (Vd | (D << 4)));
-            if (P == 0 && U == 0 && W == 0) {
-              // TODO: 64bit transfers between ARM core and extension registers.
-            } else if (P == 0 && U == 1 && Rn.r == 13) {  // VPOP
-              opcode << "vpop" << (S == 0 ? ".f64" : ".f32");
-              args << d << " .. " << (d + imm8);
-            } else if (P == 1 && W == 0) {  // VLDR
-              opcode << "vldr" << (S == 0 ? ".f64" : ".f32");
-              args << d << ", [" << Rn << ", #" << imm8 << "]";
-            } else {  // VLDM
-              opcode << "vldm" << (S == 0 ? ".f64" : ".f32");
-              args << Rn << ", " << d << " .. " << (d + imm8);
+            ArmRegister Rd(d, 0);
+
+            if (op == 8 || op == 12 || op == 10 || op == 14 ||
+                op == 18 || op == 22) {   // 01x00 or 01x10
+              // vector store multiple or vpush
+              if (P == 1 && U == 0 && W == 1 && Rn.r == 13) {
+                opcode << "vpush" << (S == 0 ? ".f64" : ".f32");
+                args << Rd << " .. " << (Rd.r + imm8);
+              } else {
+                opcode << "vstm" << (S == 0 ? ".f64" : ".f32");
+                args << Rn << ", " << Rd << " .. " << (Rd.r + imm8);
+              }
+            } else if (op == 16 || op == 20 || op == 24 || op == 28) {
+              // 1xx00
+              // vector store register
+              opcode << "vstr" << (S == 0 ? ".f64" : ".f32");
+              args << Rd << ", [" << Rn << ", #" << imm8 << "]";
+            } else if (op == 17 || op == 21 || op == 25 || op == 29) {
+              // 1xx01
+              // vector load register
+               opcode << "vldr" << (S == 0 ? ".f64" : ".f32");
+               args << Rd << ", [" << Rn << ", #" << imm8 << "]";
+            } else if (op == 9 || op == 13 || op == 11 || op == 15 ||
+                op == 19 || op == 23 ) {    // 01x11 10x11
+              // vldm or vpop
+              if (P == 1 && U == 0 && W == 1 && Rn.r == 13) {
+                opcode << "vpop" << (S == 0 ? ".f64" : ".f32");
+                args <<  Rd << " .. " << (Rd.r + imm8);
+              } else {
+                opcode << "vldm" << (S == 0 ? ".f64" : ".f32");
+                args << Rn << ", " << Rd << " .. " << (Rd.r + imm8);
+              }
             }
+          } else if ((op3 >> 1) == 2) {      // 00010x
+            // 64 bit transfers
+          } else if ((op3 >> 4) == 2 && op4 == 0) {     // 10xxxx, op = 0
+            // fp data processing
+          } else if ((op3 >> 4) == 2 && op4 == 1) {     // 10xxxx, op = 1
+            // 8,16,32 bit transfers
           }
-        } else if ((op3 & 0x30) == 0x20 && op4 == 0) {  // 10 xxxx ... 0
+        }
+
+        if ((op3 & 0x30) == 0x20 && op4 == 0) {  // 10 xxxx ... 0
           if ((coproc & 0xE) == 0xA) {
             // VFP data-processing instructions
             // |111|1|1100|0000|0000|1111|110|0|00  |0|0|0000|
@@ -1070,6 +1186,72 @@
           }
           break;
         }
+      default:      // more formats
+        if ((op2 >> 4) == 2) {      // 010xxxx
+          // data processing (register)
+        } else if ((op2 >> 3) == 6) {       // 0110xxx
+          // Multiply, multiply accumulate, and absolute difference
+          op1 = (instr >> 20) & 0x7;
+          op2 = (instr >> 4) & 0x2;
+          ArmRegister Ra(instr, 12);
+          ArmRegister Rn(instr, 16);
+          ArmRegister Rm(instr, 0);
+          ArmRegister Rd(instr, 8);
+          switch (op1) {
+          case 0:
+            if (op2 == 0) {
+              if (Ra.r == 0xf) {
+                opcode << "mul";
+                args << Rd << ", " << Rn << ", " << Rm;
+              } else {
+                opcode << "mla";
+                args << Rd << ", " << Rn << ", " << Rm << ", " << Ra;
+              }
+            } else {
+              opcode << "mls";
+              args << Rd << ", " << Rn << ", " << Rm << ", " << Ra;
+            }
+            break;
+          case 1:
+          case 2:
+          case 3:
+          case 4:
+          case 5:
+          case 6:
+              break;        // do these sometime
+          }
+        } else if ((op2 >> 3) == 7) {       // 0111xxx
+          // Long multiply, long multiply accumulate, and divide
+          op1 = (instr >> 20) & 0x7;
+          op2 = (instr >> 4) & 0xf;
+          ArmRegister Rn(instr, 16);
+          ArmRegister Rm(instr, 0);
+          ArmRegister Rd(instr, 8);
+          ArmRegister RdHi(instr, 8);
+          ArmRegister RdLo(instr, 12);
+          switch (op1) {
+          case 0:
+            opcode << "smull";
+            args << RdLo << ", " << RdHi << ", " << Rn << ", " << Rm;
+            break;
+          case 1:
+            opcode << "sdiv";
+            args << Rd << ", " << Rn << ", " << Rm;
+            break;
+          case 2:
+            opcode << "umull";
+            args << RdLo << ", " << RdHi << ", " << Rn << ", " << Rm;
+            break;
+          case 3:
+            opcode << "udiv";
+            args << Rd << ", " << Rn << ", " << Rm;
+            break;
+          case 4:
+          case 5:
+          case 6:
+            break;      // TODO: when we generate these...
+          }
+        }
       }
     default:
       break;
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index 3a32ff1..b9716d5 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -118,6 +118,9 @@
     os << "INSTRUCTION SET:\n";
     os << oat_header.GetInstructionSet() << "\n\n";
 
+    os << "INSTRUCTION SET FEATURES:\n";
+    os << oat_header.GetInstructionSetFeatures().GetFeatureString() << "\n\n";
+
     os << "DEX FILE COUNT:\n";
     os << oat_header.GetDexFileCount() << "\n\n";
 
diff --git a/runtime/Android.mk b/runtime/Android.mk
index 3d275e6..bef4381 100644
--- a/runtime/Android.mk
+++ b/runtime/Android.mk
@@ -187,6 +187,7 @@
 	arch/arm/jni_entrypoints_arm.S \
 	arch/arm/portable_entrypoints_arm.S \
 	arch/arm/quick_entrypoints_arm.S \
+	arch/arm/arm_sdiv.S \
 	arch/arm/thread_arm.cc
 else # TARGET_ARCH != arm
 ifeq ($(TARGET_ARCH),x86)
diff --git a/runtime/arch/arm/arm_sdiv.S b/runtime/arch/arm/arm_sdiv.S
new file mode 100644
index 0000000..925e428
--- /dev/null
+++ b/runtime/arch/arm/arm_sdiv.S
@@ -0,0 +1,24 @@
+// This function is used to check for the CPU's support for the sdiv
+// instruction at runtime.  It will either return the value 1 or
+// will cause an invalid instruction trap (SIGILL signal).  The
+// caller must arrange for the signal handler to set the r0
+// register to 0 and move the pc forward by 4 bytes (to skip
+// the invalid instruction).
+
+
+#include "asm_support_arm.S"
+
+.section .text
+ENTRY CheckForARMSDIVInstruction
+  mov r1,#1
+  // depending on the architecture, the assembler will not allow an
+  // sdiv instruction, so we will have to output the bytes directly.
+
+  // sdiv r0,r1,r1 is two words: 0xfb91 0xf1f0.  We need little endian.
+  .byte 0x91,0xfb,0xf1,0xf0
+
+  // if the divide worked, r0 will have the value #1 (result of sdiv).
+  // It will have 0 otherwise (set by the signal handler)
+  // the value is just returned from this function.
+  bx lr
+  END CheckForARMSDIVInstruction
diff --git a/runtime/base/macros.h b/runtime/base/macros.h
index d00c64a..00a530a 100644
--- a/runtime/base/macros.h
+++ b/runtime/base/macros.h
@@ -130,6 +130,10 @@
 #define LIKELY(x)       __builtin_expect((x), true)
 #define UNLIKELY(x)     __builtin_expect((x), false)
 
+// Stringify the argument.
+#define QUOTE(x) #x
+#define STRINGIFY(x) QUOTE(x)
+
 #ifndef NDEBUG
 #define ALWAYS_INLINE
 #else
diff --git a/runtime/common_test.h b/runtime/common_test.h
index 673a03b..79fa680 100644
--- a/runtime/common_test.h
+++ b/runtime/common_test.h
@@ -22,6 +22,7 @@
 #include <sys/mman.h>
 #include <sys/stat.h>
 #include <sys/types.h>
+#include <fstream>
 
 #include "../../external/icu4c/common/unicode/uvernum.h"
 #include "base/macros.h"
@@ -152,6 +153,113 @@
   UniquePtr<File> file_;
 };
 
+#if defined(__arm__)
+
+
+#include <signal.h>
+#include <asm/sigcontext.h>
+#include <asm/ucontext.h>
+
+
+// A signal handler called when have an illegal instruction.  We record the fact in
+// a global boolean and then increment the PC in the signal context to return to
+// the next instruction.  We know the instruction is an sdiv (4 bytes long).
+static void baddivideinst(int signo, siginfo *si, void *data) {
+  (void)signo;
+  (void)si;
+  struct ucontext *uc = (struct ucontext *)data;
+  struct sigcontext *sc = &uc->uc_mcontext;
+  sc->arm_r0 = 0;     // set R0 to #0 to signal error
+  sc->arm_pc += 4;    // skip offending instruction
+}
+
+// This is in arch/arm/arm_sdiv.S.  It does the following:
+// mov r1,#1
+// sdiv r0,r1,r1
+// bx lr
+//
+// the result will be the value 1 if sdiv is supported.  If it is not supported
+// a SIGILL signal will be raised and the signal handler (baddivideinst) called.
+// The signal handler sets r0 to #0 and then increments pc beyond the failed instruction.
+// Thus if the instruction is not supported, the result of this function will be #0
+
+extern "C" bool CheckForARMSDIVInstruction();
+
+static InstructionSetFeatures GuessInstructionFeatures() {
+  InstructionSetFeatures f;
+
+  // Uncomment this for processing of /proc/cpuinfo.
+  if (false) {
+    // Look in /proc/cpuinfo for features we need.  Only use this when we can guarantee that
+    // the kernel puts the appropriate feature flags in here.  Sometimes it doesn't.
+    std::ifstream in("/proc/cpuinfo");
+    if (in) {
+      while (!in.eof()) {
+        std::string line;
+        std::getline(in, line);
+        if (!in.eof()) {
+          if (line.find("Features") != std::string::npos) {
+            if (line.find("idivt") != std::string::npos) {
+              f.SetHasDivideInstruction(true);
+            }
+          }
+        }
+        in.close();
+      }
+    } else {
+      LOG(INFO) << "Failed to open /proc/cpuinfo";
+    }
+  }
+
+  // See if have a sdiv instruction.  Register a signal handler and try to execute
+  // an sdiv instruction.  If we get a SIGILL then it's not supported.  We can't use
+  // the /proc/cpuinfo method for this because Krait devices don't always put the idivt
+  // feature in the list.
+  struct sigaction sa, osa;
+  sa.sa_flags = SA_ONSTACK | SA_RESTART | SA_SIGINFO;
+  sa.sa_sigaction = baddivideinst;
+  sigaction(SIGILL, &sa, &osa);
+
+  if (CheckForARMSDIVInstruction()) {
+    f.SetHasDivideInstruction(true);
+  }
+
+  // Restore the signal handler.
+  sigaction(SIGILL, &osa, NULL);
+
+  // Other feature guesses in here.
+  return f;
+}
+
+#endif
+
+// Given a set of instruction features from the build, parse it.  The
+// input 'str' is a comma separated list of feature names.  Parse it and
+// return the InstructionSetFeatures object.
+static InstructionSetFeatures ParseFeatureList(std::string str) {
+  LOG(INFO) << "Parsing features " << str;
+  InstructionSetFeatures result;
+  typedef std::vector<std::string> FeatureList;
+  FeatureList features;
+  Split(str, ',', features);
+  for (FeatureList::iterator i = features.begin(); i != features.end(); i++) {
+    std::string feature = Trim(*i);
+    if (feature == "default") {
+      // Nothing to do.
+    } else if (feature == "div") {
+      // Supports divide instruction.
+      result.SetHasDivideInstruction(true);
+    } else if (feature == "nodiv") {
+      // Turn off support for divide instruction.
+      result.SetHasDivideInstruction(false);
+    } else {
+      LOG(FATAL) << "Unknown instruction set feature: '" << feature << "'";
+    }
+  }
+  // Others...
+  return result;
+}
+
 class CommonTest : public testing::Test {
  public:
   static void MakeExecutable(const mirror::ByteArray* code_array) {
@@ -314,8 +422,22 @@
       class_linker_ = runtime_->GetClassLinker();
 
       InstructionSet instruction_set = kNone;
+
+      // take the default set of instruction features from the build if present
+      InstructionSetFeatures instruction_set_features =
+#ifdef ART_DEFAULT_INSTRUCTION_SET_FEATURES
+        ParseFeatureList(STRINGIFY(ART_DEFAULT_INSTRUCTION_SET_FEATURES));
+#else
+        ParseFeatureList("default");
+#endif
+
 #if defined(__arm__)
       instruction_set = kThumb2;
+      InstructionSetFeatures runtime_features = GuessInstructionFeatures();
+
+      // for ARM, do a runtime check to make sure that the features we are passed from
+      // the build match the features we actually determine at runtime.
+      ASSERT_EQ(instruction_set_features, runtime_features);
 #elif defined(__mips__)
       instruction_set = kMips;
 #elif defined(__i386__)
@@ -338,6 +460,7 @@
       }
       class_linker_->FixupDexCaches(runtime_->GetResolutionMethod());
       compiler_driver_.reset(new CompilerDriver(compiler_backend, instruction_set,
+                                                instruction_set_features,
                                                 true, new CompilerDriver::DescriptorSet,
                                                 2, true));
     }
@@ -568,7 +691,6 @@
 #else
 #define TEST_DISABLED_FOR_PORTABLE()
 #endif
-
 }  // namespace art
 
 namespace std {
diff --git a/runtime/instruction_set.h b/runtime/instruction_set.h
index 2217f7f..aee7447 100644
--- a/runtime/instruction_set.h
+++ b/runtime/instruction_set.h
@@ -18,6 +18,9 @@
 #define ART_RUNTIME_INSTRUCTION_SET_H_
 
 #include <iosfwd>
+#include <string>
+
+#include "base/macros.h"
 
 namespace art {
 
@@ -29,6 +32,53 @@
   kMips
 };
 
+enum InstructionFeatures {
+  kHwDiv = 1                  // Supports hardware divide.
+};
+
+// This is a bitmask of supported features per architecture.
+class PACKED(4) InstructionSetFeatures {
+ public:
+  InstructionSetFeatures() : mask_(0) {}
+  explicit InstructionSetFeatures(uint32_t mask) : mask_(mask) {}
+
+  bool HasDivideInstruction() const {
+      return (mask_ & kHwDiv) != 0;
+  }
+
+  void SetHasDivideInstruction(bool v) {
+    mask_ = (mask_ & ~kHwDiv) | (v ? kHwDiv : 0);
+  }
+
+  std::string GetFeatureString() const {
+    std::string result;
+    if ((mask_ & kHwDiv) != 0) {
+      result += "div";
+    }
+    if (result.size() == 0) {
+      result = "none";
+    }
+    return result;
+  }
+
+  uint32_t get_mask() const {
+    return mask_;
+  }
+
+  // Other features in here.
+
+  bool operator==(const InstructionSetFeatures &peer) const {
+    return mask_ == peer.mask_;
+  }
+
+  bool operator!=(const InstructionSetFeatures &peer) const {
+    return mask_ != peer.mask_;
+  }
+
+ private:
+  uint32_t mask_;
+};
+
 std::ostream& operator<<(std::ostream& os, const InstructionSet& rhs);
 
 }  // namespace art
diff --git a/runtime/oat.cc b/runtime/oat.cc
index defda6b..9489795 100644
--- a/runtime/oat.cc
+++ b/runtime/oat.cc
@@ -22,13 +22,14 @@
 namespace art {
 
 const uint8_t OatHeader::kOatMagic[] = { 'o', 'a', 't', '\n' };
-const uint8_t OatHeader::kOatVersion[] = { '0', '0', '9', '\0' };
+const uint8_t OatHeader::kOatVersion[] = { '0', '1', '0', '\0' };
 
 OatHeader::OatHeader() {
   memset(this, 0, sizeof(*this));
 }
 
 OatHeader::OatHeader(InstructionSet instruction_set,
+                     const InstructionSetFeatures& instruction_set_features,
                      const std::vector<const DexFile*>* dex_files,
                      uint32_t image_file_location_oat_checksum,
                      uint32_t image_file_location_oat_data_begin,
@@ -42,6 +43,9 @@
   instruction_set_ = instruction_set;
   UpdateChecksum(&instruction_set_, sizeof(instruction_set_));
 
+  instruction_set_features_ = instruction_set_features;
+  UpdateChecksum(&instruction_set_features_, sizeof(instruction_set_features_));
+
   dex_file_count_ = dex_files->size();
   UpdateChecksum(&dex_file_count_, sizeof(dex_file_count_));
 
@@ -99,6 +103,11 @@
   return instruction_set_;
 }
 
+const InstructionSetFeatures& OatHeader::GetInstructionSetFeatures() const {
+  CHECK(IsValid());
+  return instruction_set_features_;
+}
+
 uint32_t OatHeader::GetExecutableOffset() const {
   DCHECK(IsValid());
   DCHECK_ALIGNED(executable_offset_, kPageSize);
diff --git a/runtime/oat.h b/runtime/oat.h
index c864c2c..de840b5 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -32,6 +32,7 @@
 
   OatHeader();
   OatHeader(InstructionSet instruction_set,
+            const InstructionSetFeatures& instruction_set_features,
             const std::vector<const DexFile*>* dex_files,
             uint32_t image_file_location_oat_checksum,
             uint32_t image_file_location_oat_data_begin,
@@ -80,6 +81,7 @@
   void SetQuickToInterpreterBridgeOffset(uint32_t offset);
 
   InstructionSet GetInstructionSet() const;
+  const InstructionSetFeatures& GetInstructionSetFeatures() const;
   uint32_t GetImageFileLocationOatChecksum() const;
   uint32_t GetImageFileLocationOatDataBegin() const;
   uint32_t GetImageFileLocationSize() const;
@@ -92,6 +94,7 @@
   uint32_t adler32_checksum_;
 
   InstructionSet instruction_set_;
+  InstructionSetFeatures instruction_set_features_;
   uint32_t dex_file_count_;
   uint32_t executable_offset_;
   uint32_t interpreter_to_interpreter_bridge_offset_;
diff --git a/runtime/utils.cc b/runtime/utils.cc
index 9796b99..e039581 100644
--- a/runtime/utils.cc
+++ b/runtime/utils.cc
@@ -889,6 +889,35 @@
   }
 }
 
+std::string Trim(std::string s) {
+  std::string result;
+  unsigned int start_index = 0;
+  unsigned int end_index = s.size() - 1;
+
+  // Skip initial whitespace.
+  while (start_index < s.size()) {
+    if (!isspace(s[start_index])) {
+      break;
+    }
+    start_index++;
+  }
+
+  // Skip terminating whitespace.
+  while (end_index >= start_index) {
+    if (!isspace(s[end_index])) {
+      break;
+    }
+    end_index--;
+  }
+
+  // All spaces, no beef.
+  if (end_index < start_index) {
+    return "";
+  }
+  // Start_index is the first non-space, end_index is the last one.
+  return s.substr(start_index, end_index - start_index + 1);
+}
+
 template <typename StringT>
 std::string Join(std::vector<StringT>& strings, char separator) {
   if (strings.empty()) {
diff --git a/runtime/utils.h b/runtime/utils.h
index 51035b6..6850e8b 100644
--- a/runtime/utils.h
+++ b/runtime/utils.h
@@ -252,7 +252,7 @@
 // Get the appropriate unit for a nanosecond duration.
 TimeUnit GetAppropriateTimeUnit(uint64_t nano_duration);
 
-// Get the divisor to convert from a nanoseconds to a time unit
+// Get the divisor to convert from a nanoseconds to a time unit.
 uint64_t GetNsToTimeUnitDivisor(TimeUnit time_unit);
 
 // Performs JNI name mangling as described in section 11.3 "Linking Native Methods"
@@ -326,6 +326,9 @@
 // strings. Empty strings will be omitted.
 void Split(const std::string& s, char separator, std::vector<std::string>& result);
 
+// Trims whitespace off both ends of the given string.
+std::string Trim(std::string s);
+
 // Joins a vector of strings into a single string, using the given separator.
 template <typename StringT> std::string Join(std::vector<StringT>& strings, char separator);
 
@@ -354,10 +357,10 @@
 // Dumps the kernel stack for thread 'tid' to 'os'. Note that this is only available on linux-x86.
 void DumpKernelStack(std::ostream& os, pid_t tid, const char* prefix = "", bool include_count = true);
 
-// Find $ANDROID_ROOT, /system, or abort
+// Find $ANDROID_ROOT, /system, or abort.
 const char* GetAndroidRoot();
 
-// Find $ANDROID_DATA, /data, or abort
+// Find $ANDROID_DATA, /data, or abort.
 const char* GetAndroidData();
 
 // Returns the dalvik-cache location, or dies trying.