Revert "ART: Reduce the instructions generated by packed switch."
This reverts commit 59f054d98f519a3efa992b1c688eb97bdd8bbf55.
bug:26121945
Change-Id: I8a5ad7ef1f1de8d44787c27528fa3f7f5c2e9cd3
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h
index ca71c32..c3979f3 100644
--- a/compiler/optimizing/builder.h
+++ b/compiler/optimizing/builder.h
@@ -90,9 +90,8 @@
static constexpr const char* kBuilderPassName = "builder";
- // The number of entries in a packed switch before we use a jump table or specified
- // compare/jump series.
- static constexpr uint16_t kSmallSwitchThreshold = 3;
+ // The number of entries in a packed switch before we use a jump table.
+ static constexpr uint16_t kSmallSwitchThreshold = 5;
private:
// Analyzes the dex instruction and adds HInstruction to the graph
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 0a26786..ac6b5e8 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -59,7 +59,7 @@
// S registers. Therefore there is no need to block it.
static constexpr DRegister DTMP = D31;
-static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7;
+static constexpr uint32_t kPackedSwitchJumpTableThreshold = 6;
#define __ down_cast<ArmAssembler*>(codegen->GetAssembler())->
#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArmWordSize, x).Int32Value()
@@ -6106,7 +6106,7 @@
LocationSummary* locations =
new (GetGraph()->GetArena()) LocationSummary(switch_instr, LocationSummary::kNoCall);
locations->SetInAt(0, Location::RequiresRegister());
- if (switch_instr->GetNumEntries() > kPackedSwitchCompareJumpThreshold &&
+ if (switch_instr->GetNumEntries() >= kPackedSwitchJumpTableThreshold &&
codegen_->GetAssembler()->IsThumb()) {
locations->AddTemp(Location::RequiresRegister()); // We need a temp for the table base.
if (switch_instr->GetStartValue() != 0) {
@@ -6122,30 +6122,12 @@
Register value_reg = locations->InAt(0).AsRegister<Register>();
HBasicBlock* default_block = switch_instr->GetDefaultBlock();
- if (num_entries <= kPackedSwitchCompareJumpThreshold || !codegen_->GetAssembler()->IsThumb()) {
+ if (num_entries < kPackedSwitchJumpTableThreshold || !codegen_->GetAssembler()->IsThumb()) {
// Create a series of compare/jumps.
- Register temp_reg = IP;
- // Note: It is fine for the below AddConstantSetFlags() using IP register to temporarily store
- // the immediate, because IP is used as the destination register. For the other
- // AddConstantSetFlags() and GenerateCompareWithImmediate(), the immediate values are constant,
- // and they can be encoded in the instruction without making use of IP register.
- __ AddConstantSetFlags(temp_reg, value_reg, -lower_bound);
-
const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
- // Jump to successors[0] if value == lower_bound.
- __ b(codegen_->GetLabelOf(successors[0]), EQ);
- int32_t last_index = 0;
- for (; num_entries - last_index > 2; last_index += 2) {
- __ AddConstantSetFlags(temp_reg, temp_reg, -2);
- // Jump to successors[last_index + 1] if value < case_value[last_index + 2].
- __ b(codegen_->GetLabelOf(successors[last_index + 1]), LO);
- // Jump to successors[last_index + 2] if value == case_value[last_index + 2].
- __ b(codegen_->GetLabelOf(successors[last_index + 2]), EQ);
- }
- if (num_entries - last_index == 2) {
- // The last missing case_value.
- GenerateCompareWithImmediate(temp_reg, 1);
- __ b(codegen_->GetLabelOf(successors[last_index + 1]), EQ);
+ for (uint32_t i = 0; i < num_entries; i++) {
+ GenerateCompareWithImmediate(value_reg, lower_bound + i);
+ __ b(codegen_->GetLabelOf(successors[i]), EQ);
}
// And the default for any other value.
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 227f4be..04acd9d 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -71,10 +71,10 @@
using helpers::ArtVixlRegCodeCoherentForRegSet;
static constexpr int kCurrentMethodStackOffset = 0;
-// The compare/jump sequence will generate about (1.5 * num_entries + 3) instructions. While jump
+// The compare/jump sequence will generate about (2 * num_entries + 1) instructions. While jump
// table version generates 7 instructions and num_entries literals. Compare/jump sequence will
// generates less code/data with a small num_entries.
-static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7;
+static constexpr uint32_t kPackedSwitchJumpTableThreshold = 6;
inline Condition ARM64Condition(IfCondition cond) {
switch (cond) {
@@ -546,7 +546,7 @@
void JumpTableARM64::EmitTable(CodeGeneratorARM64* codegen) {
uint32_t num_entries = switch_instr_->GetNumEntries();
- DCHECK_GE(num_entries, kPackedSwitchCompareJumpThreshold);
+ DCHECK_GE(num_entries, kPackedSwitchJumpTableThreshold);
// We are about to use the assembler to place literals directly. Make sure we have enough
// underlying code buffer and we have generated the jump table with right size.
@@ -4558,29 +4558,20 @@
// ranges and emit the tables only as required.
static constexpr int32_t kJumpTableInstructionThreshold = 1* MB / kMaxExpectedSizePerHInstruction;
- if (num_entries <= kPackedSwitchCompareJumpThreshold ||
+ if (num_entries < kPackedSwitchJumpTableThreshold ||
// Current instruction id is an upper bound of the number of HIRs in the graph.
GetGraph()->GetCurrentInstructionId() > kJumpTableInstructionThreshold) {
// Create a series of compare/jumps.
- UseScratchRegisterScope temps(codegen_->GetVIXLAssembler());
- Register temp = temps.AcquireW();
- __ Subs(temp, value_reg, Operand(lower_bound));
-
const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
- // Jump to successors[0] if value == lower_bound.
- __ B(eq, codegen_->GetLabelOf(successors[0]));
- int32_t last_index = 0;
- for (; num_entries - last_index > 2; last_index += 2) {
- __ Subs(temp, temp, Operand(2));
- // Jump to successors[last_index + 1] if value < case_value[last_index + 2].
- __ B(lo, codegen_->GetLabelOf(successors[last_index + 1]));
- // Jump to successors[last_index + 2] if value == case_value[last_index + 2].
- __ B(eq, codegen_->GetLabelOf(successors[last_index + 2]));
- }
- if (num_entries - last_index == 2) {
- // The last missing case_value.
- __ Cmp(temp, Operand(1));
- __ B(eq, codegen_->GetLabelOf(successors[last_index + 1]));
+ for (uint32_t i = 0; i < num_entries; i++) {
+ int32_t case_value = lower_bound + i;
+ vixl::Label* succ = codegen_->GetLabelOf(successors[i]);
+ if (case_value == 0) {
+ __ Cbz(value_reg, succ);
+ } else {
+ __ Cmp(value_reg, Operand(case_value));
+ __ B(eq, succ);
+ }
}
// And the default for any other value.
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index f872bfe..9dc9167 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -4248,31 +4248,19 @@
HBasicBlock* default_block = switch_instr->GetDefaultBlock();
// Create a set of compare/jumps.
- Register temp_reg = TMP;
- __ Addiu32(temp_reg, value_reg, -lower_bound);
- // Jump to default if index is negative
- // Note: We don't check the case that index is positive while value < lower_bound, because in
- // this case, index >= num_entries must be true. So that we can save one branch instruction.
- __ Bltz(temp_reg, codegen_->GetLabelOf(default_block));
-
const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
- // Jump to successors[0] if value == lower_bound.
- __ Beqz(temp_reg, codegen_->GetLabelOf(successors[0]));
- int32_t last_index = 0;
- for (; num_entries - last_index > 2; last_index += 2) {
- __ Addiu(temp_reg, temp_reg, -2);
- // Jump to successors[last_index + 1] if value < case_value[last_index + 2].
- __ Bltz(temp_reg, codegen_->GetLabelOf(successors[last_index + 1]));
- // Jump to successors[last_index + 2] if value == case_value[last_index + 2].
- __ Beqz(temp_reg, codegen_->GetLabelOf(successors[last_index + 2]));
- }
- if (num_entries - last_index == 2) {
- // The last missing case_value.
- __ Addiu(temp_reg, temp_reg, -1);
- __ Beqz(temp_reg, codegen_->GetLabelOf(successors[last_index + 1]));
+ for (int32_t i = 0; i < num_entries; ++i) {
+ int32_t case_value = lower_bound + i;
+ MipsLabel* successor_label = codegen_->GetLabelOf(successors[i]);
+ if (case_value == 0) {
+ __ Beqz(value_reg, successor_label);
+ } else {
+ __ LoadConst32(TMP, case_value);
+ __ Beq(value_reg, TMP, successor_label);
+ }
}
- // And the default for any other value.
+ // Insert the default branch for every other value.
if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
__ B(codegen_->GetLabelOf(default_block));
}
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 78f5644..bc5eb31 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -3975,34 +3975,17 @@
GpuRegister value_reg = locations->InAt(0).AsRegister<GpuRegister>();
HBasicBlock* default_block = switch_instr->GetDefaultBlock();
- // Create a set of compare/jumps.
- GpuRegister temp_reg = TMP;
- if (IsInt<16>(-lower_bound)) {
- __ Addiu(temp_reg, value_reg, -lower_bound);
- } else {
- __ LoadConst32(AT, -lower_bound);
- __ Addu(temp_reg, value_reg, AT);
- }
- // Jump to default if index is negative
- // Note: We don't check the case that index is positive while value < lower_bound, because in
- // this case, index >= num_entries must be true. So that we can save one branch instruction.
- __ Bltzc(temp_reg, codegen_->GetLabelOf(default_block));
-
+ // Create a series of compare/jumps.
const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
- // Jump to successors[0] if value == lower_bound.
- __ Beqzc(temp_reg, codegen_->GetLabelOf(successors[0]));
- int32_t last_index = 0;
- for (; num_entries - last_index > 2; last_index += 2) {
- __ Addiu(temp_reg, temp_reg, -2);
- // Jump to successors[last_index + 1] if value < case_value[last_index + 2].
- __ Bltzc(temp_reg, codegen_->GetLabelOf(successors[last_index + 1]));
- // Jump to successors[last_index + 2] if value == case_value[last_index + 2].
- __ Beqzc(temp_reg, codegen_->GetLabelOf(successors[last_index + 2]));
- }
- if (num_entries - last_index == 2) {
- // The last missing case_value.
- __ Addiu(temp_reg, temp_reg, -1);
- __ Beqzc(temp_reg, codegen_->GetLabelOf(successors[last_index + 1]));
+ for (int32_t i = 0; i < num_entries; i++) {
+ int32_t case_value = lower_bound + i;
+ Mips64Label* succ = codegen_->GetLabelOf(successors[i]);
+ if (case_value == 0) {
+ __ Beqzc(value_reg, succ);
+ } else {
+ __ LoadConst32(TMP, case_value);
+ __ Beqc(value_reg, TMP, succ);
+ }
}
// And the default for any other value.
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 19f03df..2fb87d3 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -42,6 +42,7 @@
static constexpr int kCurrentMethodStackOffset = 0;
static constexpr Register kMethodRegisterArgument = EAX;
+
static constexpr Register kCoreCalleeSaves[] = { EBP, ESI, EDI };
static constexpr int kC2ConditionMask = 0x400;
@@ -6425,67 +6426,31 @@
locations->SetInAt(0, Location::RequiresRegister());
}
-void InstructionCodeGeneratorX86::GenPackedSwitchWithCompares(Register value_reg,
- int32_t lower_bound,
- uint32_t num_entries,
- HBasicBlock* switch_block,
- HBasicBlock* default_block) {
- // Figure out the correct compare values and jump conditions.
- // Handle the first compare/branch as a special case because it might
- // jump to the default case.
- DCHECK_GT(num_entries, 2u);
- Condition first_condition;
- uint32_t index;
- const ArenaVector<HBasicBlock*>& successors = switch_block->GetSuccessors();
- if (lower_bound != 0) {
- first_condition = kLess;
- __ cmpl(value_reg, Immediate(lower_bound));
- __ j(first_condition, codegen_->GetLabelOf(default_block));
- __ j(kEqual, codegen_->GetLabelOf(successors[0]));
+void InstructionCodeGeneratorX86::VisitPackedSwitch(HPackedSwitch* switch_instr) {
+ int32_t lower_bound = switch_instr->GetStartValue();
+ int32_t num_entries = switch_instr->GetNumEntries();
+ LocationSummary* locations = switch_instr->GetLocations();
+ Register value_reg = locations->InAt(0).AsRegister<Register>();
+ HBasicBlock* default_block = switch_instr->GetDefaultBlock();
- index = 1;
- } else {
- // Handle all the compare/jumps below.
- first_condition = kBelow;
- index = 0;
- }
-
- // Handle the rest of the compare/jumps.
- for (; index + 1 < num_entries; index += 2) {
- int32_t compare_to_value = lower_bound + index + 1;
- __ cmpl(value_reg, Immediate(compare_to_value));
- // Jump to successors[index] if value < case_value[index].
- __ j(first_condition, codegen_->GetLabelOf(successors[index]));
- // Jump to successors[index + 1] if value == case_value[index + 1].
- __ j(kEqual, codegen_->GetLabelOf(successors[index + 1]));
- }
-
- if (index != num_entries) {
- // There are an odd number of entries. Handle the last one.
- DCHECK_EQ(index + 1, num_entries);
- __ cmpl(value_reg, Immediate(lower_bound + index));
- __ j(kEqual, codegen_->GetLabelOf(successors[index]));
+ // Create a series of compare/jumps.
+ const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
+ for (int i = 0; i < num_entries; i++) {
+ int32_t case_value = lower_bound + i;
+ if (case_value == 0) {
+ __ testl(value_reg, value_reg);
+ } else {
+ __ cmpl(value_reg, Immediate(case_value));
+ }
+ __ j(kEqual, codegen_->GetLabelOf(successors[i]));
}
// And the default for any other value.
- if (!codegen_->GoesToNextBlock(switch_block, default_block)) {
- __ jmp(codegen_->GetLabelOf(default_block));
+ if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
+ __ jmp(codegen_->GetLabelOf(default_block));
}
}
-void InstructionCodeGeneratorX86::VisitPackedSwitch(HPackedSwitch* switch_instr) {
- int32_t lower_bound = switch_instr->GetStartValue();
- uint32_t num_entries = switch_instr->GetNumEntries();
- LocationSummary* locations = switch_instr->GetLocations();
- Register value_reg = locations->InAt(0).AsRegister<Register>();
-
- GenPackedSwitchWithCompares(value_reg,
- lower_bound,
- num_entries,
- switch_instr->GetBlock(),
- switch_instr->GetDefaultBlock());
-}
-
void LocationsBuilderX86::VisitX86PackedSwitch(HX86PackedSwitch* switch_instr) {
LocationSummary* locations =
new (GetGraph()->GetArena()) LocationSummary(switch_instr, LocationSummary::kNoCall);
@@ -6500,20 +6465,11 @@
void InstructionCodeGeneratorX86::VisitX86PackedSwitch(HX86PackedSwitch* switch_instr) {
int32_t lower_bound = switch_instr->GetStartValue();
- uint32_t num_entries = switch_instr->GetNumEntries();
+ int32_t num_entries = switch_instr->GetNumEntries();
LocationSummary* locations = switch_instr->GetLocations();
Register value_reg = locations->InAt(0).AsRegister<Register>();
HBasicBlock* default_block = switch_instr->GetDefaultBlock();
- if (num_entries <= kPackedSwitchJumpTableThreshold) {
- GenPackedSwitchWithCompares(value_reg,
- lower_bound,
- num_entries,
- switch_instr->GetBlock(),
- default_block);
- return;
- }
-
// Optimizing has a jump area.
Register temp_reg = locations->GetTemp(0).AsRegister<Register>();
Register constant_area = locations->InAt(1).AsRegister<Register>();
@@ -6525,7 +6481,7 @@
}
// Is the value in range?
- DCHECK_GE(num_entries, 1u);
+ DCHECK_GE(num_entries, 1);
__ cmpl(value_reg, Immediate(num_entries - 1));
__ j(kAbove, codegen_->GetLabelOf(default_block));
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index f9403a6..064051c 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -195,11 +195,6 @@
X86Assembler* GetAssembler() const { return assembler_; }
- // The compare/jump sequence will generate about (1.5 * num_entries) instructions. A jump
- // table version generates 7 instructions and num_entries literals. Compare/jump sequence will
- // generates less code/data with a small num_entries.
- static constexpr uint32_t kPackedSwitchJumpTableThreshold = 5;
-
private:
// Generate code for the given suspend check. If not null, `successor`
// is the block to branch to if the suspend check is not needed, and after
@@ -241,11 +236,6 @@
void GenerateFPJumps(HCondition* cond, Label* true_label, Label* false_label);
void GenerateLongComparesAndJumps(HCondition* cond, Label* true_label, Label* false_label);
void HandleGoto(HInstruction* got, HBasicBlock* successor);
- void GenPackedSwitchWithCompares(Register value_reg,
- int32_t lower_bound,
- uint32_t num_entries,
- HBasicBlock* switch_block,
- HBasicBlock* default_block);
X86Assembler* const assembler_;
CodeGeneratorX86* const codegen_;
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 44a51ea..4618be9 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -41,10 +41,6 @@
static constexpr int kCurrentMethodStackOffset = 0;
static constexpr Register kMethodRegisterArgument = RDI;
-// The compare/jump sequence will generate about (1.5 * num_entries) instructions. A jump
-// table version generates 7 instructions and num_entries literals. Compare/jump sequence will
-// generates less code/data with a small num_entries.
-static constexpr uint32_t kPackedSwitchJumpTableThreshold = 5;
static constexpr Register kCoreCalleeSaves[] = { RBX, RBP, R12, R13, R14, R15 };
static constexpr FloatRegister kFpuCalleeSaves[] = { XMM12, XMM13, XMM14, XMM15 };
@@ -6025,58 +6021,11 @@
void InstructionCodeGeneratorX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
int32_t lower_bound = switch_instr->GetStartValue();
- uint32_t num_entries = switch_instr->GetNumEntries();
+ int32_t num_entries = switch_instr->GetNumEntries();
LocationSummary* locations = switch_instr->GetLocations();
CpuRegister value_reg_in = locations->InAt(0).AsRegister<CpuRegister>();
CpuRegister temp_reg = locations->GetTemp(0).AsRegister<CpuRegister>();
CpuRegister base_reg = locations->GetTemp(1).AsRegister<CpuRegister>();
- HBasicBlock* default_block = switch_instr->GetDefaultBlock();
-
- // Should we generate smaller inline compare/jumps?
- if (num_entries <= kPackedSwitchJumpTableThreshold) {
- // Figure out the correct compare values and jump conditions.
- // Handle the first compare/branch as a special case because it might
- // jump to the default case.
- DCHECK_GT(num_entries, 2u);
- Condition first_condition;
- uint32_t index;
- const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
- if (lower_bound != 0) {
- first_condition = kLess;
- __ cmpl(value_reg_in, Immediate(lower_bound));
- __ j(first_condition, codegen_->GetLabelOf(default_block));
- __ j(kEqual, codegen_->GetLabelOf(successors[0]));
-
- index = 1;
- } else {
- // Handle all the compare/jumps below.
- first_condition = kBelow;
- index = 0;
- }
-
- // Handle the rest of the compare/jumps.
- for (; index + 1 < num_entries; index += 2) {
- int32_t compare_to_value = lower_bound + index + 1;
- __ cmpl(value_reg_in, Immediate(compare_to_value));
- // Jump to successors[index] if value < case_value[index].
- __ j(first_condition, codegen_->GetLabelOf(successors[index]));
- // Jump to successors[index + 1] if value == case_value[index + 1].
- __ j(kEqual, codegen_->GetLabelOf(successors[index + 1]));
- }
-
- if (index != num_entries) {
- // There are an odd number of entries. Handle the last one.
- DCHECK_EQ(index + 1, num_entries);
- __ cmpl(value_reg_in, Immediate(lower_bound + index));
- __ j(kEqual, codegen_->GetLabelOf(successors[index]));
- }
-
- // And the default for any other value.
- if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
- __ jmp(codegen_->GetLabelOf(default_block));
- }
- return;
- }
// Remove the bias, if needed.
Register value_reg_out = value_reg_in.AsRegister();
@@ -6087,6 +6036,7 @@
CpuRegister value_reg(value_reg_out);
// Is the value in range?
+ HBasicBlock* default_block = switch_instr->GetDefaultBlock();
__ cmpl(value_reg, Immediate(num_entries - 1));
__ j(kAbove, codegen_->GetLabelOf(default_block));
diff --git a/compiler/optimizing/pc_relative_fixups_x86.cc b/compiler/optimizing/pc_relative_fixups_x86.cc
index a385448..b383f1e 100644
--- a/compiler/optimizing/pc_relative_fixups_x86.cc
+++ b/compiler/optimizing/pc_relative_fixups_x86.cc
@@ -15,7 +15,6 @@
*/
#include "pc_relative_fixups_x86.h"
-#include "code_generator_x86.h"
namespace art {
namespace x86 {
@@ -80,10 +79,6 @@
}
void VisitPackedSwitch(HPackedSwitch* switch_insn) OVERRIDE {
- if (switch_insn->GetNumEntries() <=
- InstructionCodeGeneratorX86::kPackedSwitchJumpTableThreshold) {
- return;
- }
// We need to replace the HPackedSwitch with a HX86PackedSwitch in order to
// address the constant area.
InitializePCRelativeBasePointer();