Support passing arguments to invoke-static* instructions.

- Stop using the frame pointer for accessing locals.
- Stop emulating a stack when doing code generation. Instead,
  rely on dex register model, where instructions only reference
  registers.

Change-Id: Id51bd7d33ac430cb87a53c9f4b0c864eeb1006f9
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index 64ecdb5..0554876 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -37,8 +37,6 @@
 static bool CanHandleCodeItem(const DexFile::CodeItem& code_item) {
   if (code_item.tries_size_ > 0) {
     return false;
-  } else if (code_item.outs_size_ > 0) {
-    return false;
   } else if (code_item.ins_size_ > 0) {
     return false;
   }
@@ -62,6 +60,7 @@
   graph_->SetExitBlock(exit_block_);
 
   InitializeLocals(code_item.registers_size_);
+  graph_->UpdateMaximumNumberOfOutVRegs(code_item.outs_size_);
 
   // To avoid splitting blocks, we compute ahead of time the instructions that
   // start a new block, and create these blocks.
@@ -200,14 +199,49 @@
       uint32_t return_type_idx = dex_file_->GetProtoId(method_id.proto_idx_).return_type_idx_;
       const char* descriptor = dex_file_->StringByTypeIdx(return_type_idx);
       const size_t number_of_arguments = instruction.VRegA_35c();
-      if (number_of_arguments != 0) {
-        return false;
-      }
+
       if (Primitive::GetType(descriptor[0]) != Primitive::kPrimVoid) {
         return false;
       }
-      current_block_->AddInstruction(new (arena_) HInvokeStatic(
-          arena_, number_of_arguments, dex_offset, method_idx));
+
+      HInvokeStatic* invoke = new (arena_) HInvokeStatic(
+          arena_, number_of_arguments, dex_offset, method_idx);
+
+      uint32_t args[5];
+      instruction.GetArgs(args);
+
+      for (size_t i = 0; i < number_of_arguments; i++) {
+        HInstruction* arg = LoadLocal(args[i]);
+        HInstruction* push = new (arena_) HPushArgument(arg, i);
+        current_block_->AddInstruction(push);
+        invoke->SetArgumentAt(i, push);
+      }
+
+      current_block_->AddInstruction(invoke);
+      break;
+    }
+
+    case Instruction::INVOKE_STATIC_RANGE: {
+      uint32_t method_idx = instruction.VRegB_3rc();
+      const DexFile::MethodId& method_id = dex_file_->GetMethodId(method_idx);
+      uint32_t return_type_idx = dex_file_->GetProtoId(method_id.proto_idx_).return_type_idx_;
+      const char* descriptor = dex_file_->StringByTypeIdx(return_type_idx);
+      const size_t number_of_arguments = instruction.VRegA_3rc();
+
+      if (Primitive::GetType(descriptor[0]) != Primitive::kPrimVoid) {
+        return false;
+      }
+
+      HInvokeStatic* invoke = new (arena_) HInvokeStatic(
+          arena_, number_of_arguments, dex_offset, method_idx);
+      int32_t register_index = instruction.VRegC();
+      for (size_t i = 0; i < number_of_arguments; i++) {
+        HInstruction* arg = LoadLocal(register_index + i);
+        HInstruction* push = new (arena_) HPushArgument(arg, i);
+        current_block_->AddInstruction(push);
+        invoke->SetArgumentAt(i, push);
+      }
+      current_block_->AddInstruction(invoke);
       break;
     }
 
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index b86665b..8f07d89 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -30,6 +30,7 @@
 namespace art {
 
 void CodeGenerator::Compile(CodeAllocator* allocator) {
+  frame_size_ = GetGraph()->GetMaximumNumberOfOutVRegs() * kWordSize;
   const GrowableArray<HBasicBlock*>* blocks = GetGraph()->GetBlocks();
   DCHECK(blocks->Get(0) == GetGraph()->GetEntryBlock());
   DCHECK(GoesToNextBlock(GetGraph()->GetEntryBlock(), blocks->Get(1)));
@@ -73,9 +74,6 @@
     current->Accept(location_builder);
     InitLocations(current);
     current->Accept(instruction_visitor);
-    if (current->GetLocations() != nullptr && current->GetLocations()->Out().IsValid()) {
-      Push(current, current->GetLocations()->Out());
-    }
   }
 }
 
@@ -85,7 +83,7 @@
     Location location = instruction->GetLocations()->InAt(i);
     if (location.IsValid()) {
       // Move the input to the desired location.
-      Move(instruction->InputAt(i), location);
+      Move(instruction->InputAt(i), location, instruction);
     }
   }
 }
@@ -204,11 +202,10 @@
 
 void CodeGenerator::BuildVMapTable(std::vector<uint8_t>* data) const {
   Leb128EncodingVector vmap_encoder;
-  size_t size = 1 + 1 /* marker */ + 0;
+  // We currently don't use callee-saved registers.
+  size_t size = 0 + 1 /* marker */ + 0;
   vmap_encoder.Reserve(size + 1u);  // All values are likely to be one byte in ULEB128 (<128).
   vmap_encoder.PushBackUnsigned(size);
-  // We're currently always saving the frame pointer, so set it in the table as a temporary.
-  vmap_encoder.PushBackUnsigned(kVRegTempBaseReg + VmapTable::kEntryAdjustment);
   vmap_encoder.PushBackUnsigned(VmapTable::kAdjustedFpMarker);
 
   *data = vmap_encoder.GetData();
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 24dcab6..01bbcc0 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -141,8 +141,7 @@
   virtual void GenerateFrameEntry() = 0;
   virtual void GenerateFrameExit() = 0;
   virtual void Bind(Label* label) = 0;
-  virtual void Move(HInstruction* instruction, Location location) = 0;
-  virtual void Push(HInstruction* instruction, Location location) = 0;
+  virtual void Move(HInstruction* instruction, Location location, HInstruction* move_for) = 0;
   virtual HGraphVisitor* GetLocationBuilder() = 0;
   virtual HGraphVisitor* GetInstructionVisitor() = 0;
   virtual Assembler* GetAssembler() = 0;
@@ -191,6 +190,33 @@
   DISALLOW_COPY_AND_ASSIGN(CodeGenerator);
 };
 
+template <typename T>
+class CallingConvention {
+ public:
+  CallingConvention(const T* registers, int number_of_registers)
+      : registers_(registers), number_of_registers_(number_of_registers) {}
+
+  size_t GetNumberOfRegisters() const { return number_of_registers_; }
+
+  T GetRegisterAt(size_t index) const {
+    DCHECK_LT(index, number_of_registers_);
+    return registers_[index];
+  }
+
+  uint8_t GetStackOffsetOf(size_t index) const {
+    DCHECK_GE(index, number_of_registers_);
+    // We still reserve the space for parameters passed by registers.
+    // Add kWordSize for the method pointer.
+    return index * kWordSize + kWordSize;
+  }
+
+ private:
+  const T* registers_;
+  const size_t number_of_registers_;
+
+  DISALLOW_COPY_AND_ASSIGN(CallingConvention);
+};
+
 }  // namespace art
 
 #endif  // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_H_
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 68c997b..09d6f7b 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -26,40 +26,50 @@
 namespace art {
 namespace arm {
 
+static constexpr int kNumberOfPushedRegistersAtEntry = 1;
+static constexpr int kCurrentMethodStackOffset = 0;
+
+InstructionCodeGeneratorARM::InstructionCodeGeneratorARM(HGraph* graph, CodeGeneratorARM* codegen)
+      : HGraphVisitor(graph),
+        assembler_(codegen->GetAssembler()),
+        codegen_(codegen) {}
+
 void CodeGeneratorARM::GenerateFrameEntry() {
   core_spill_mask_ |= (1 << LR);
-  // We're currently always using FP, which is callee-saved in Quick.
-  core_spill_mask_ |= (1 << FP);
+  __ PushList((1 << LR));
 
-  __ PushList((1 << FP) | (1 << LR));
-  __ mov(FP, ShifterOperand(SP));
-
-  // Add the current ART method to the frame size, the return pc, and FP.
-  SetFrameSize(RoundUp(GetFrameSize() + 3 * kWordSize, kStackAlignment));
-  // PC and FP have already been pushed on the stack.
-  __ AddConstant(SP, -(GetFrameSize() - 2 * kWordSize));
+  // Add the current ART method to the frame size and the return PC.
+  SetFrameSize(RoundUp(GetFrameSize() + 2 * kWordSize, kStackAlignment));
+  // The retrn PC has already been pushed on the stack.
+  __ AddConstant(SP, -(GetFrameSize() - kNumberOfPushedRegistersAtEntry * kWordSize));
   __ str(R0, Address(SP, 0));
 }
 
 void CodeGeneratorARM::GenerateFrameExit() {
-  __ mov(SP, ShifterOperand(FP));
-  __ PopList((1 << FP) | (1 << PC));
+  __ AddConstant(SP, GetFrameSize() - kNumberOfPushedRegistersAtEntry * kWordSize);
+  __ PopList((1 << PC));
 }
 
 void CodeGeneratorARM::Bind(Label* label) {
   __ Bind(label);
 }
 
-void CodeGeneratorARM::Push(HInstruction* instruction, Location location) {
-  __ Push(location.reg<Register>());
+int32_t CodeGeneratorARM::GetStackSlot(HLocal* local) const {
+  return (GetGraph()->GetMaximumNumberOfOutVRegs() + local->GetRegNumber()) * kWordSize;
 }
 
-void CodeGeneratorARM::Move(HInstruction* instruction, Location location) {
-  HIntConstant* constant = instruction->AsIntConstant();
-  if (constant != nullptr) {
-    __ LoadImmediate(location.reg<Register>(), constant->GetValue());
+void CodeGeneratorARM::Move(HInstruction* instruction, Location location, HInstruction* move_for) {
+  if (instruction->AsIntConstant() != nullptr) {
+    __ LoadImmediate(location.reg<Register>(), instruction->AsIntConstant()->GetValue());
+  } else if (instruction->AsLoadLocal() != nullptr) {
+    __ LoadFromOffset(kLoadWord, location.reg<Register>(),
+                      SP, GetStackSlot(instruction->AsLoadLocal()->GetLocal()));
   } else {
-    __ Pop(location.reg<Register>());
+    // This can currently only happen when the instruction that requests the move
+    // is the next to be compiled.
+    DCHECK_EQ(instruction->GetNext(), move_for);
+    __ mov(location.reg<Register>(),
+           ShifterOperand(instruction->GetLocations()->Out().reg<Register>()));
   }
 }
 
@@ -128,20 +138,11 @@
 }
 
 void LocationsBuilderARM::VisitLoadLocal(HLoadLocal* load) {
-  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load);
-  locations->SetOut(Location(R0));
-  load->SetLocations(locations);
-}
-
-static int32_t GetStackSlot(HLocal* local) {
-  // We are currently using FP to access locals, so the offset must be negative.
-  return (local->GetRegNumber() + 1) * -kWordSize;
+  load->SetLocations(nullptr);
 }
 
 void InstructionCodeGeneratorARM::VisitLoadLocal(HLoadLocal* load) {
-  LocationSummary* locations = load->GetLocations();
-  __ LoadFromOffset(kLoadWord, locations->Out().reg<Register>(),
-                    FP, GetStackSlot(load->GetLocal()));
+  // Nothing to do, this is driven by the code generator.
 }
 
 void LocationsBuilderARM::VisitStoreLocal(HStoreLocal* store) {
@@ -153,7 +154,7 @@
 void InstructionCodeGeneratorARM::VisitStoreLocal(HStoreLocal* store) {
   LocationSummary* locations = store->GetLocations();
   __ StoreToOffset(kStoreWord, locations->InAt(1).reg<Register>(),
-                   FP, GetStackSlot(store->GetLocal()));
+                   SP, codegen_->GetStackSlot(store->GetLocal()));
 }
 
 void LocationsBuilderARM::VisitIntConstant(HIntConstant* constant) {
@@ -183,15 +184,52 @@
   codegen_->GenerateFrameExit();
 }
 
+static constexpr Register kParameterCoreRegisters[] = { R1, R2, R3 };
+static constexpr int kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters);
+
+class InvokeStaticCallingConvention : public CallingConvention<Register> {
+ public:
+  InvokeStaticCallingConvention()
+      : CallingConvention(kParameterCoreRegisters, kParameterCoreRegistersLength) {}
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(InvokeStaticCallingConvention);
+};
+
+void LocationsBuilderARM::VisitPushArgument(HPushArgument* argument) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(argument);
+  InvokeStaticCallingConvention calling_convention;
+  if (argument->GetArgumentIndex() < calling_convention.GetNumberOfRegisters()) {
+    Location location = Location(calling_convention.GetRegisterAt(argument->GetArgumentIndex()));
+    locations->SetInAt(0, location);
+    locations->SetOut(location);
+  } else {
+    locations->SetInAt(0, Location(R0));
+  }
+  argument->SetLocations(locations);
+}
+
+void InstructionCodeGeneratorARM::VisitPushArgument(HPushArgument* argument) {
+  uint8_t argument_index = argument->GetArgumentIndex();
+  InvokeStaticCallingConvention calling_convention;
+  size_t parameter_registers = calling_convention.GetNumberOfRegisters();
+  LocationSummary* locations = argument->GetLocations();
+  if (argument_index >= parameter_registers) {
+    uint8_t offset = calling_convention.GetStackOffsetOf(argument_index);
+    __ StoreToOffset(kStoreWord, locations->InAt(0).reg<Register>(), SP, offset);
+  } else {
+    DCHECK_EQ(locations->Out().reg<Register>(), locations->InAt(0).reg<Register>());
+  }
+}
+
 void LocationsBuilderARM::VisitInvokeStatic(HInvokeStatic* invoke) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(invoke);
-  CHECK_EQ(invoke->InputCount(), 0);
   locations->AddTemp(Location(R0));
   invoke->SetLocations(locations);
 }
 
 void InstructionCodeGeneratorARM::LoadCurrentMethod(Register reg) {
-  __ ldr(reg, Address(SP, 0));
+  __ ldr(reg, Address(SP, kCurrentMethodStackOffset));
 }
 
 void InstructionCodeGeneratorARM::VisitInvokeStatic(HInvokeStatic* invoke) {
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 7a2835d..52d6b2e 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -43,12 +43,11 @@
   DISALLOW_COPY_AND_ASSIGN(LocationsBuilderARM);
 };
 
+class CodeGeneratorARM;
+
 class InstructionCodeGeneratorARM : public HGraphVisitor {
  public:
-  explicit InstructionCodeGeneratorARM(HGraph* graph, CodeGenerator* codegen)
-      : HGraphVisitor(graph),
-        assembler_(codegen->GetAssembler()),
-        codegen_(codegen) { }
+  InstructionCodeGeneratorARM(HGraph* graph, CodeGeneratorARM* codegen);
 
 #define DECLARE_VISIT_INSTRUCTION(name)     \
   virtual void Visit##name(H##name* instr);
@@ -57,12 +56,12 @@
 
 #undef DECLARE_VISIT_INSTRUCTION
 
-  Assembler* GetAssembler() const { return assembler_; }
+  ArmAssembler* GetAssembler() const { return assembler_; }
   void LoadCurrentMethod(Register reg);
 
  private:
-  Assembler* const assembler_;
-  CodeGenerator* const codegen_;
+  ArmAssembler* const assembler_;
+  CodeGeneratorARM* const codegen_;
 
   DISALLOW_COPY_AND_ASSIGN(InstructionCodeGeneratorARM);
 };
@@ -75,12 +74,10 @@
         instruction_visitor_(graph, this) { }
   virtual ~CodeGeneratorARM() { }
 
- protected:
   virtual void GenerateFrameEntry() OVERRIDE;
   virtual void GenerateFrameExit() OVERRIDE;
   virtual void Bind(Label* label) OVERRIDE;
-  virtual void Move(HInstruction* instruction, Location location) OVERRIDE;
-  virtual void Push(HInstruction* instruction, Location location) OVERRIDE;
+  virtual void Move(HInstruction* instruction, Location location, HInstruction* move_for) OVERRIDE;
 
   virtual HGraphVisitor* GetLocationBuilder() OVERRIDE {
     return &location_builder_;
@@ -90,16 +87,17 @@
     return &instruction_visitor_;
   }
 
-  virtual Assembler* GetAssembler() OVERRIDE {
+  virtual ArmAssembler* GetAssembler() OVERRIDE {
     return &assembler_;
   }
 
+  int32_t GetStackSlot(HLocal* local) const;
+
  private:
   LocationsBuilderARM location_builder_;
   InstructionCodeGeneratorARM instruction_visitor_;
   ArmAssembler assembler_;
 
-
   DISALLOW_COPY_AND_ASSIGN(CodeGeneratorARM);
 };
 
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 1764486..7b0a087 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -26,45 +26,54 @@
 namespace art {
 namespace x86 {
 
+static constexpr int kNumberOfPushedRegistersAtEntry = 1;
+static constexpr int kCurrentMethodStackOffset = 0;
+
+InstructionCodeGeneratorX86::InstructionCodeGeneratorX86(HGraph* graph, CodeGeneratorX86* codegen)
+      : HGraphVisitor(graph),
+        assembler_(codegen->GetAssembler()),
+        codegen_(codegen) {}
+
 void CodeGeneratorX86::GenerateFrameEntry() {
   // Create a fake register to mimic Quick.
   static const int kFakeReturnRegister = 8;
   core_spill_mask_ |= (1 << kFakeReturnRegister);
-  // We're currently always using EBP, which is callee-saved in Quick.
-  core_spill_mask_ |= (1 << EBP);
 
-  __ pushl(EBP);
-  __ movl(EBP, ESP);
-  // Add the current ART method to the frame size, the return pc, and EBP.
-  SetFrameSize(RoundUp(GetFrameSize() + 3 * kWordSize, kStackAlignment));
-  // The PC and EBP have already been pushed on the stack.
-  __ subl(ESP, Immediate(GetFrameSize() - 2 * kWordSize));
+  // Add the current ART method to the frame size and the return PC.
+  SetFrameSize(RoundUp(GetFrameSize() + 2 * kWordSize, kStackAlignment));
+  // The return PC has already been pushed on the stack.
+  __ subl(ESP, Immediate(GetFrameSize() - kNumberOfPushedRegistersAtEntry * kWordSize));
   __ movl(Address(ESP, 0), EAX);
 }
 
 void CodeGeneratorX86::GenerateFrameExit() {
-  __ movl(ESP, EBP);
-  __ popl(EBP);
+  __ addl(ESP, Immediate(GetFrameSize() - kNumberOfPushedRegistersAtEntry * kWordSize));
 }
 
 void CodeGeneratorX86::Bind(Label* label) {
   __ Bind(label);
 }
 
-void CodeGeneratorX86::Push(HInstruction* instruction, Location location) {
-  __ pushl(location.reg<Register>());
-}
-
 void InstructionCodeGeneratorX86::LoadCurrentMethod(Register reg) {
-  __ movl(reg, Address(ESP, 0));
+  __ movl(reg, Address(ESP, kCurrentMethodStackOffset));
 }
 
-void CodeGeneratorX86::Move(HInstruction* instruction, Location location) {
-  HIntConstant* constant = instruction->AsIntConstant();
-  if (constant != nullptr) {
-    __ movl(location.reg<Register>(), Immediate(constant->GetValue()));
+int32_t CodeGeneratorX86::GetStackSlot(HLocal* local) const {
+  return (GetGraph()->GetMaximumNumberOfOutVRegs() + local->GetRegNumber()) * kWordSize;
+}
+
+void CodeGeneratorX86::Move(HInstruction* instruction, Location location, HInstruction* move_for) {
+  if (instruction->AsIntConstant() != nullptr) {
+    __ movl(location.reg<Register>(), Immediate(instruction->AsIntConstant()->GetValue()));
+  } else if (instruction->AsLoadLocal() != nullptr) {
+    __ movl(location.reg<Register>(),
+            Address(ESP, GetStackSlot(instruction->AsLoadLocal()->GetLocal())));
   } else {
-    __ popl(location.reg<Register>());
+    // This can currently only happen when the instruction that requests the move
+    // is the next to be compiled.
+    DCHECK_EQ(instruction->GetNext(), move_for);
+    __ movl(location.reg<Register>(),
+            instruction->GetLocations()->Out().reg<Register>());
   }
 }
 
@@ -117,20 +126,11 @@
 }
 
 void LocationsBuilderX86::VisitLoadLocal(HLoadLocal* local) {
-  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(local);
-  locations->SetOut(Location(EAX));
-  local->SetLocations(locations);
-}
-
-static int32_t GetStackSlot(HLocal* local) {
-  // We are currently using EBP to access locals, so the offset must be negative.
-  // +1 for going backwards, +1 for the method pointer.
-  return (local->GetRegNumber() + 2) * -kWordSize;
+  local->SetLocations(nullptr);
 }
 
 void InstructionCodeGeneratorX86::VisitLoadLocal(HLoadLocal* load) {
-  __ movl(load->GetLocations()->Out().reg<Register>(),
-          Address(EBP, GetStackSlot(load->GetLocal())));
+  // Nothing to do, this is driven by the code generator.
 }
 
 void LocationsBuilderX86::VisitStoreLocal(HStoreLocal* local) {
@@ -140,7 +140,7 @@
 }
 
 void InstructionCodeGeneratorX86::VisitStoreLocal(HStoreLocal* store) {
-  __ movl(Address(EBP, GetStackSlot(store->GetLocal())),
+  __ movl(Address(ESP, codegen_->GetStackSlot(store->GetLocal())),
           store->GetLocations()->InAt(1).reg<Register>());
 }
 
@@ -187,9 +187,48 @@
   __ ret();
 }
 
+static constexpr Register kParameterCoreRegisters[] = { ECX, EDX, EBX };
+static constexpr int kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters);
+
+class InvokeStaticCallingConvention : public CallingConvention<Register> {
+ public:
+  InvokeStaticCallingConvention()
+      : CallingConvention(kParameterCoreRegisters, kParameterCoreRegistersLength) {}
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(InvokeStaticCallingConvention);
+};
+
+void LocationsBuilderX86::VisitPushArgument(HPushArgument* argument) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(argument);
+  InvokeStaticCallingConvention calling_convention;
+  if (argument->GetArgumentIndex() < calling_convention.GetNumberOfRegisters()) {
+    Location location = Location(calling_convention.GetRegisterAt(argument->GetArgumentIndex()));
+    locations->SetInAt(0, location);
+    locations->SetOut(location);
+  } else {
+    locations->SetInAt(0, Location(EAX));
+  }
+  argument->SetLocations(locations);
+}
+
+void InstructionCodeGeneratorX86::VisitPushArgument(HPushArgument* argument) {
+  uint8_t argument_index = argument->GetArgumentIndex();
+  InvokeStaticCallingConvention calling_convention;
+  size_t parameter_registers = calling_convention.GetNumberOfRegisters();
+  if (argument_index >= parameter_registers) {
+    uint8_t offset = calling_convention.GetStackOffsetOf(argument_index);
+    __ movl(Address(ESP, offset),
+            argument->GetLocations()->InAt(0).reg<Register>());
+
+  } else {
+    DCHECK_EQ(argument->GetLocations()->Out().reg<Register>(),
+              argument->GetLocations()->InAt(0).reg<Register>());
+  }
+}
+
 void LocationsBuilderX86::VisitInvokeStatic(HInvokeStatic* invoke) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(invoke);
-  CHECK_EQ(invoke->InputCount(), 0);
   locations->AddTemp(Location(EAX));
   invoke->SetLocations(locations);
 }
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 505237b..dd5044f 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -23,9 +23,6 @@
 
 namespace art {
 
-class Assembler;
-class Label;
-
 namespace x86 {
 
 class LocationsBuilderX86 : public HGraphVisitor {
@@ -43,12 +40,11 @@
   DISALLOW_COPY_AND_ASSIGN(LocationsBuilderX86);
 };
 
+class CodeGeneratorX86;
+
 class InstructionCodeGeneratorX86 : public HGraphVisitor {
  public:
-  explicit InstructionCodeGeneratorX86(HGraph* graph, CodeGenerator* codegen)
-      : HGraphVisitor(graph),
-        assembler_(codegen->GetAssembler()),
-        codegen_(codegen) { }
+  InstructionCodeGeneratorX86(HGraph* graph, CodeGeneratorX86* codegen);
 
 #define DECLARE_VISIT_INSTRUCTION(name)     \
   virtual void Visit##name(H##name* instr);
@@ -59,11 +55,11 @@
 
   void LoadCurrentMethod(Register reg);
 
-  Assembler* GetAssembler() const { return assembler_; }
+  X86Assembler* GetAssembler() const { return assembler_; }
 
  private:
-  Assembler* const assembler_;
-  CodeGenerator* const codegen_;
+  X86Assembler* const assembler_;
+  CodeGeneratorX86* const codegen_;
 
   DISALLOW_COPY_AND_ASSIGN(InstructionCodeGeneratorX86);
 };
@@ -76,12 +72,10 @@
         instruction_visitor_(graph, this) { }
   virtual ~CodeGeneratorX86() { }
 
- protected:
   virtual void GenerateFrameEntry() OVERRIDE;
   virtual void GenerateFrameExit() OVERRIDE;
   virtual void Bind(Label* label) OVERRIDE;
-  virtual void Move(HInstruction* instruction, Location location) OVERRIDE;
-  virtual void Push(HInstruction* instruction, Location location) OVERRIDE;
+  virtual void Move(HInstruction* instruction, Location location, HInstruction* move_for) OVERRIDE;
 
   virtual HGraphVisitor* GetLocationBuilder() OVERRIDE {
     return &location_builder_;
@@ -95,6 +89,8 @@
     return &assembler_;
   }
 
+  int32_t GetStackSlot(HLocal* local) const;
+
  private:
   LocationsBuilderX86 location_builder_;
   InstructionCodeGeneratorX86 instruction_visitor_;
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index fc67486..2b21905 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -41,6 +41,7 @@
       : arena_(arena),
         blocks_(arena, kDefaultNumberOfBlocks),
         dominator_order_(arena, kDefaultNumberOfBlocks),
+        maximum_number_of_out_vregs_(0),
         current_instruction_id_(0) { }
 
   ArenaAllocator* GetArena() const { return arena_; }
@@ -59,6 +60,14 @@
     return current_instruction_id_++;
   }
 
+  uint16_t GetMaximumNumberOfOutVRegs() const {
+    return maximum_number_of_out_vregs_;
+  }
+
+  void UpdateMaximumNumberOfOutVRegs(uint16_t new_value) {
+    maximum_number_of_out_vregs_ = std::max(new_value, maximum_number_of_out_vregs_);
+  }
+
  private:
   HBasicBlock* FindCommonDominator(HBasicBlock* first, HBasicBlock* second) const;
   void VisitBlockForDominatorTree(HBasicBlock* block,
@@ -81,6 +90,9 @@
   HBasicBlock* entry_block_;
   HBasicBlock* exit_block_;
 
+  // The maximum number of arguments passed to a HInvoke in this graph.
+  uint16_t maximum_number_of_out_vregs_;
+
   // The current id to assign to a newly added instruction. See HInstruction.id_.
   int current_instruction_id_;
 
@@ -189,6 +201,7 @@
   M(InvokeStatic)                                          \
   M(LoadLocal)                                             \
   M(Local)                                                 \
+  M(PushArgument)                                          \
   M(Return)                                                \
   M(ReturnVoid)                                            \
   M(StoreLocal)                                            \
@@ -589,6 +602,10 @@
   virtual intptr_t InputCount() const { return inputs_.Size(); }
   virtual HInstruction* InputAt(intptr_t i) const { return inputs_.Get(i); }
 
+  void SetArgumentAt(size_t index, HInstruction* argument) {
+    inputs_.Put(index, argument);
+  }
+
   int32_t GetDexPc() const { return dex_pc_; }
 
  protected:
@@ -601,19 +618,40 @@
 
 class HInvokeStatic : public HInvoke {
  public:
-  HInvokeStatic(ArenaAllocator* arena, uint32_t number_of_arguments, int32_t dex_pc, int32_t index_in_dex_cache)
-      : HInvoke(arena, number_of_arguments, dex_pc), index_in_dex_cache_(index_in_dex_cache) { }
+  HInvokeStatic(ArenaAllocator* arena,
+                uint32_t number_of_arguments,
+                int32_t dex_pc,
+                int32_t index_in_dex_cache)
+      : HInvoke(arena, number_of_arguments, dex_pc), index_in_dex_cache_(index_in_dex_cache) {}
 
   uint32_t GetIndexInDexCache() const { return index_in_dex_cache_; }
 
   DECLARE_INSTRUCTION(InvokeStatic)
 
  private:
-  uint32_t index_in_dex_cache_;
+  const uint32_t index_in_dex_cache_;
 
   DISALLOW_COPY_AND_ASSIGN(HInvokeStatic);
 };
 
+// HPushArgument nodes are inserted after the evaluation of an argument
+// of a call. Their mere purpose is to ease the code generator's work.
+class HPushArgument : public HTemplateInstruction<1> {
+ public:
+  HPushArgument(HInstruction* argument, uint8_t argument_index) : argument_index_(argument_index) {
+    SetRawInputAt(0, argument);
+  }
+
+  uint8_t GetArgumentIndex() const { return argument_index_; }
+
+  DECLARE_INSTRUCTION(PushArgument)
+
+ private:
+  const uint8_t argument_index_;
+
+  DISALLOW_COPY_AND_ASSIGN(HPushArgument);
+};
+
 class HAdd : public HBinaryOperation {
  public:
   HAdd(Primitive::Type result_type, HInstruction* left, HInstruction* right)
diff --git a/test/401-optimizing-compiler/expected.txt b/test/401-optimizing-compiler/expected.txt
index 7b3a018..268da55 100644
--- a/test/401-optimizing-compiler/expected.txt
+++ b/test/401-optimizing-compiler/expected.txt
@@ -1,3 +1,6 @@
 In static method
+In static method with 2 args 1 2
+In static method with 5 args 1 2 3 4 5
+In static method with 7 args 1 2 3 4 5 6 7
 Forced GC
 java.lang.Error: Error
diff --git a/test/401-optimizing-compiler/src/Main.java b/test/401-optimizing-compiler/src/Main.java
index 2609e0f..4031ff1 100644
--- a/test/401-optimizing-compiler/src/Main.java
+++ b/test/401-optimizing-compiler/src/Main.java
@@ -30,6 +30,9 @@
 
   public static void $opt$TestInvokeStatic() {
     printStaticMethod();
+    printStaticMethodWith2Args(1, 2);
+    printStaticMethodWith5Args(1, 2, 3, 4, 5);
+    printStaticMethodWith7Args(1, 2, 3, 4, 5, 6, 7);
     forceGCStaticMethod();
     throwStaticMethod();
   }
@@ -38,6 +41,20 @@
     System.out.println("In static method");
   }
 
+  public static void printStaticMethodWith2Args(int a, int b) {
+    System.out.println("In static method with 2 args " + a + " " + b);
+  }
+
+  public static void printStaticMethodWith5Args(int a, int b, int c, int d, int e) {
+    System.out.println("In static method with 5 args "
+        + a + " " + b + " " + c + " " + d + " " + e);
+  }
+
+  public static void printStaticMethodWith7Args(int a, int b, int c, int d, int e, int f, int g) {
+    System.out.println("In static method with 7 args "
+        + a + " " + b + " " + c + " " + d + " " + e + " " + f + " " + g);
+  }
+
   public static void forceGCStaticMethod() {
     Runtime.getRuntime().gc();
     Runtime.getRuntime().gc();