Improved and simplified loop optimizations.
Rationale:
This CL merges some common cases into one, thereby simplifying
the code quite a bit. It also prepares for more general induction
cycles (rather than the simple phi-add currently used). Finally,
it generalizes the closed form elimination with empty loops.
As a result of the latter, elaborate but weird code like:
private static int waterFall() {
int i = 0;
for (; i < 10; i++);
for (; i < 20; i++);
for (; i < 30; i++);
for (; i < 40; i++);
for (; i < 50; i++);
return i;
}
now becomes just this (on x86)!
mov eax, 50
ret
Change-Id: I8d22ce63ce9696918f57bb90f64d9a9303a4791d
Test: m test-art-host
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index 4acf3ac..93c6c20 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -21,13 +21,15 @@
namespace art {
// TODO: Generalize to cycles, as found by induction analysis?
-static bool IsPhiAddSub(HPhi* phi, /*out*/ HInstruction** addsub_out) {
+static bool IsPhiInduction(HPhi* phi, ArenaSet<HInstruction*>* iset) {
+ DCHECK(iset->empty());
HInputsRef inputs = phi->GetInputs();
if (inputs.size() == 2 && (inputs[1]->IsAdd() || inputs[1]->IsSub())) {
HInstruction* addsub = inputs[1];
if (addsub->InputAt(0) == phi || addsub->InputAt(1) == phi) {
if (addsub->GetUses().HasExactlyOneElement()) {
- *addsub_out = addsub;
+ iset->insert(phi);
+ iset->insert(addsub);
return true;
}
}
@@ -35,39 +37,23 @@
return false;
}
-static bool IsOnlyUsedAfterLoop(const HLoopInformation& loop_info,
- HPhi* phi, HInstruction* addsub) {
- for (const HUseListNode<HInstruction*>& use : phi->GetUses()) {
- if (use.GetUser() != addsub) {
- HLoopInformation* other_loop_info = use.GetUser()->GetBlock()->GetLoopInformation();
- if (other_loop_info != nullptr && other_loop_info->IsIn(loop_info)) {
- return false;
- }
- }
- }
- return true;
-}
-
// Find: phi: Phi(init, addsub)
// s: SuspendCheck
// c: Condition(phi, bound)
// i: If(c)
// TODO: Find a less pattern matching approach?
-static bool IsEmptyHeader(HBasicBlock* block, /*out*/ HInstruction** addsub) {
+static bool IsEmptyHeader(HBasicBlock* block, ArenaSet<HInstruction*>* iset) {
+ DCHECK(iset->empty());
HInstruction* phi = block->GetFirstPhi();
- if (phi != nullptr && phi->GetNext() == nullptr && IsPhiAddSub(phi->AsPhi(), addsub)) {
+ if (phi != nullptr && phi->GetNext() == nullptr && IsPhiInduction(phi->AsPhi(), iset)) {
HInstruction* s = block->GetFirstInstruction();
if (s != nullptr && s->IsSuspendCheck()) {
HInstruction* c = s->GetNext();
if (c != nullptr && c->IsCondition() && c->GetUses().HasExactlyOneElement()) {
HInstruction* i = c->GetNext();
if (i != nullptr && i->IsIf() && i->InputAt(0) == c) {
- // Check that phi is only used inside loop as expected.
- for (const HUseListNode<HInstruction*>& use : phi->GetUses()) {
- if (use.GetUser() != *addsub && use.GetUser() != c) {
- return false;
- }
- }
+ iset->insert(c);
+ iset->insert(s);
return true;
}
}
@@ -76,10 +62,11 @@
return false;
}
-static bool IsEmptyBody(HBasicBlock* block, HInstruction* addsub) {
+static bool IsEmptyBody(HBasicBlock* block, ArenaSet<HInstruction*>* iset) {
HInstruction* phi = block->GetFirstPhi();
HInstruction* i = block->GetFirstInstruction();
- return phi == nullptr && i == addsub && i->GetNext() != nullptr && i->GetNext()->IsGoto();
+ return phi == nullptr && iset->find(i) != iset->end() &&
+ i->GetNext() != nullptr && i->GetNext()->IsGoto();
}
static HBasicBlock* TryRemovePreHeader(HBasicBlock* preheader, HBasicBlock* entry_block) {
@@ -127,7 +114,8 @@
induction_range_(induction_analysis),
loop_allocator_(nullptr),
top_loop_(nullptr),
- last_loop_(nullptr) {
+ last_loop_(nullptr),
+ iset_(nullptr) {
}
void HLoopOptimization::Run() {
@@ -164,8 +152,14 @@
}
}
- // Traverse the loop hierarchy inner-to-outer and optimize.
- TraverseLoopsInnerToOuter(top_loop_);
+ // Traverse the loop hierarchy inner-to-outer and optimize. Traversal can use
+ // a temporary set that stores instructions using the phase-local allocator.
+ if (top_loop_ != nullptr) {
+ ArenaSet<HInstruction*> iset(loop_allocator_->Adapter(kArenaAllocLoopOptimization));
+ iset_ = &iset;
+ TraverseLoopsInnerToOuter(top_loop_);
+ iset_ = nullptr; // detach
+ }
}
void HLoopOptimization::AddLoop(HLoopInformation* loop_info) {
@@ -194,9 +188,25 @@
void HLoopOptimization::RemoveLoop(LoopNode* node) {
DCHECK(node != nullptr);
- // TODO: implement when needed (for current set of optimizations, we don't
- // need to keep recorded loop hierarchy up to date, but as we get different
- // traversal, we may want to remove the node from the hierarchy here.
+ DCHECK(node->inner == nullptr);
+ if (node->previous != nullptr) {
+ // Within sequence.
+ node->previous->next = node->next;
+ if (node->next != nullptr) {
+ node->next->previous = node->previous;
+ }
+ } else {
+ // First of sequence.
+ if (node->outer != nullptr) {
+ node->outer->inner = node->next;
+ } else {
+ top_loop_ = node->next;
+ }
+ if (node->next != nullptr) {
+ node->next->outer = node->outer;
+ node->next->previous = nullptr;
+ }
+ }
}
void HLoopOptimization::TraverseLoopsInnerToOuter(LoopNode* node) {
@@ -213,34 +223,20 @@
void HLoopOptimization::SimplifyInduction(LoopNode* node) {
HBasicBlock* header = node->loop_info->GetHeader();
HBasicBlock* preheader = node->loop_info->GetPreHeader();
- // Scan the phis in the header to find opportunities to optimize induction.
+ // Scan the phis in the header to find opportunities to simplify an induction
+ // cycle that is only used outside the loop. Replace these uses, if any, with
+ // the last value and remove the induction cycle.
+ // Examples: for (int i = 0; x != null; i++) { .... no i .... }
+ // for (int i = 0; i < 10; i++, k++) { .... no k .... } return k;
for (HInstructionIterator it(header->GetPhis()); !it.Done(); it.Advance()) {
HPhi* phi = it.Current()->AsPhi();
- HInstruction* addsub = nullptr;
- // Find phi-add/sub cycle.
- if (IsPhiAddSub(phi, &addsub)) {
- // Simple case, the induction is only used by itself. Although redundant,
- // later phases do not easily detect this property. Thus, eliminate here.
- // Example: for (int i = 0; x != null; i++) { .... no i .... }
- if (phi->GetUses().HasExactlyOneElement()) {
- // Remove the cycle, including all uses. Even environment uses can be removed,
- // since these computations have no effect at all.
- RemoveFromCycle(phi); // removes environment uses too
- RemoveFromCycle(addsub);
- continue;
- }
- // Closed form case. Only the last value of the induction is needed. Remove all
- // overhead from the loop, and replace subsequent uses with the last value.
- // Example: for (int i = 0; i < 10; i++, k++) { .... no k .... } return k;
- if (IsOnlyUsedAfterLoop(*node->loop_info, phi, addsub) &&
- induction_range_.CanGenerateLastValue(phi)) {
- HInstruction* last = induction_range_.GenerateLastValue(phi, graph_, preheader);
- // Remove the cycle, replacing all uses. Even environment uses can consume the final
- // value, since any first real use is outside the loop (although this may imply
- // that deopting may look "ahead" a bit on the phi value).
- ReplaceAllUses(phi, last, addsub);
- RemoveFromCycle(phi); // removes environment uses too
- RemoveFromCycle(addsub);
+ iset_->clear();
+ int32_t use_count = 0;
+ if (IsPhiInduction(phi, iset_) &&
+ IsOnlyUsedAfterLoop(*node->loop_info, phi, &use_count) &&
+ TryReplaceWithLastValue(phi, use_count, preheader)) {
+ for (HInstruction* i : *iset_) {
+ RemoveFromCycle(i);
}
}
}
@@ -266,14 +262,18 @@
HBasicBlock* exit = (header->GetSuccessors()[0] == body)
? header->GetSuccessors()[1]
: header->GetSuccessors()[0];
- // Ensure exit can only be reached by exiting loop (this seems typically the
- // case anyway, and simplifies code generation below; TODO: perhaps relax?).
+ // Ensure exit can only be reached by exiting loop.
if (exit->GetPredecessors().size() != 1) {
return;
}
- // Detect an empty loop: no side effects other than plain iteration.
- HInstruction* addsub = nullptr;
- if (IsEmptyHeader(header, &addsub) && IsEmptyBody(body, addsub)) {
+ // Detect an empty loop: no side effects other than plain iteration. Replace
+ // subsequent index uses, if any, with the last value and remove the loop.
+ iset_->clear();
+ int32_t use_count = 0;
+ if (IsEmptyHeader(header, iset_) &&
+ IsEmptyBody(body, iset_) &&
+ IsOnlyUsedAfterLoop(*node->loop_info, header->GetFirstPhi(), &use_count) &&
+ TryReplaceWithLastValue(header->GetFirstPhi(), use_count, preheader)) {
HBasicBlock* entry = TryRemovePreHeader(preheader, graph_->GetEntryBlock());
body->DisconnectAndDelete();
exit->RemovePredecessor(header);
@@ -299,15 +299,29 @@
}
}
-void HLoopOptimization::ReplaceAllUses(HInstruction* instruction,
- HInstruction* replacement,
- HInstruction* exclusion) {
+bool HLoopOptimization::IsOnlyUsedAfterLoop(const HLoopInformation& loop_info,
+ HInstruction* instruction,
+ /*out*/ int32_t* use_count) {
+ for (const HUseListNode<HInstruction*>& use : instruction->GetUses()) {
+ HInstruction* user = use.GetUser();
+ if (iset_->find(user) == iset_->end()) { // not excluded?
+ HLoopInformation* other_loop_info = user->GetBlock()->GetLoopInformation();
+ if (other_loop_info != nullptr && other_loop_info->IsIn(loop_info)) {
+ return false;
+ }
+ ++*use_count;
+ }
+ }
+ return true;
+}
+
+void HLoopOptimization::ReplaceAllUses(HInstruction* instruction, HInstruction* replacement) {
const HUseList<HInstruction*>& uses = instruction->GetUses();
for (auto it = uses.begin(), end = uses.end(); it != end;) {
HInstruction* user = it->GetUser();
size_t index = it->GetIndex();
++it; // increment before replacing
- if (user != exclusion) {
+ if (iset_->find(user) == iset_->end()) { // not excluded?
user->ReplaceInput(replacement, index);
induction_range_.Replace(user, instruction, replacement); // update induction
}
@@ -317,7 +331,7 @@
HEnvironment* user = it->GetUser();
size_t index = it->GetIndex();
++it; // increment before replacing
- if (user->GetHolder() != exclusion) {
+ if (iset_->find(user->GetHolder()) == iset_->end()) { // not excluded?
user->RemoveAsUserOfInput(index);
user->SetRawEnvAt(index, replacement);
replacement->AddEnvUseAt(user, index);
@@ -325,4 +339,20 @@
}
}
+bool HLoopOptimization::TryReplaceWithLastValue(HInstruction* instruction,
+ int32_t use_count,
+ HBasicBlock* block) {
+ // If true uses appear after the loop, replace these uses with the last value. Environment
+ // uses can consume this value too, since any first true use is outside the loop (although
+ // this may imply that de-opting may look "ahead" a bit on the phi value). If there are only
+ // environment uses, the value is dropped altogether, since the computations have no effect.
+ if (use_count > 0) {
+ if (!induction_range_.CanGenerateLastValue(instruction)) {
+ return false;
+ }
+ ReplaceAllUses(instruction, induction_range_.GenerateLastValue(instruction, graph_, block));
+ }
+ return true;
+}
+
} // namespace art
diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h
index 6092955..b2bf1c8 100644
--- a/compiler/optimizing/loop_optimization.h
+++ b/compiler/optimizing/loop_optimization.h
@@ -63,9 +63,13 @@
void SimplifyInduction(LoopNode* node);
void RemoveIfEmptyLoop(LoopNode* node);
- void ReplaceAllUses(HInstruction* instruction,
- HInstruction* replacement,
- HInstruction* exclusion);
+ bool IsOnlyUsedAfterLoop(const HLoopInformation& loop_info,
+ HInstruction* instruction,
+ /*out*/ int32_t* use_count);
+ void ReplaceAllUses(HInstruction* instruction, HInstruction* replacement);
+ bool TryReplaceWithLastValue(HInstruction* instruction,
+ int32_t use_count,
+ HBasicBlock* block);
// Range information based on prior induction variable analysis.
InductionVarRange induction_range_;
@@ -79,6 +83,10 @@
LoopNode* top_loop_;
LoopNode* last_loop_;
+ // Temporary bookkeeping of a set of instructions.
+ // Contents reside in phase-local heap memory.
+ ArenaSet<HInstruction*>* iset_;
+
friend class LoopOptimizationTest;
DISALLOW_COPY_AND_ASSIGN(HLoopOptimization);
diff --git a/test/618-checker-induction/src/Main.java b/test/618-checker-induction/src/Main.java
index a68c383..0ea85da 100644
--- a/test/618-checker-induction/src/Main.java
+++ b/test/618-checker-induction/src/Main.java
@@ -155,7 +155,7 @@
/// CHECK-DAG: Return [<<Phi1>>] loop:none
//
/// CHECK-START: int Main.closedFormInductionUp() loop_optimization (after)
- /// CHECK-NOT: Phi loop:B\d+ outer_loop:none
+ /// CHECK-NOT: Phi loop:{{B\d+}} outer_loop:none
/// CHECK-DAG: Return loop:none
static int closedFormInductionUp() {
int closed = 12345;
@@ -171,7 +171,7 @@
/// CHECK-DAG: Return [<<Phi2>>] loop:none
//
/// CHECK-START: int Main.closedFormInductionInAndDown(int) loop_optimization (after)
- /// CHECK-NOT: Phi loop:B\d+ outer_loop:none
+ /// CHECK-NOT: Phi loop:{{B\d+}} outer_loop:none
/// CHECK-DAG: Return loop:none
static int closedFormInductionInAndDown(int closed) {
for (int i = 0; i < 10; i++) {
@@ -180,6 +180,17 @@
return closed; // only needs last value
}
+ // TODO: move closed form even further out?
+ static int closedFormNested() {
+ int closed = 0;
+ for (int i = 0; i < 10; i++) {
+ for (int j = 0; j < 10; j++) {
+ closed++;
+ }
+ }
+ return closed; // only needs last-value
+ }
+
// TODO: taken test around closed form?
static int closedFormInductionUpN(int n) {
int closed = 12345;
@@ -198,7 +209,7 @@
}
// TODO: move closed form even further out?
- static int closedFormNested(int n) {
+ static int closedFormNestedN(int n) {
int closed = 0;
for (int i = 0; i < n; i++) {
for (int j = 0; j < 10; j++) {
@@ -208,34 +219,40 @@
return closed; // only needs last-value
}
- // TODO: handle as closed/empty eventually?
- static int mainIndexReturned(int n) {
+ // TODO: move closed form even further out?
+ static int closedFormNestedNN(int n) {
+ int closed = 0;
+ for (int i = 0; i < n; i++) {
+ for (int j = 0; j < n; j++) {
+ closed++;
+ }
+ }
+ return closed; // only needs last-value
+ }
+
+ /// CHECK-START: int Main.mainIndexReturned() loop_optimization (before)
+ /// CHECK-DAG: <<Phi:i\d+>> Phi loop:{{B\d+}} outer_loop:none
+ /// CHECK-DAG: Return [<<Phi>>] loop:none
+ //
+ /// CHECK-START: int Main.mainIndexReturned() loop_optimization (after)
+ /// CHECK-NOT: Phi loop:{{B\d+}} outer_loop:none
+ /// CHECK-DAG: Return loop:none
+ static int mainIndexReturned() {
int i;
- for (i = 0; i < n; i++);
+ for (i = 0; i < 10; i++);
return i;
}
// If ever replaced by closed form, last value should be correct!
- static int periodicReturned(int n) {
+ static int periodicReturned() {
int k = 0;
- for (int i = 0; i < n; i++) {
+ for (int i = 0; i < 9; i++) {
k = 1 - k;
}
return k;
}
- // Same here.
- private static int getSum(int n) {
- int k = 0;
- int sum = 0;
- for (int i = 0; i < n; i++) {
- k++;
- sum += k;
- }
- return sum;
- }
-
- // Same here.
+ // If ever replaced by closed form, last value should be correct!
private static int getSum21() {
int k = 0;
int sum = 0;
@@ -246,7 +263,34 @@
return sum;
}
- // Same here.
+ // TODO: handle as closed/empty eventually?
+ static int mainIndexReturnedN(int n) {
+ int i;
+ for (i = 0; i < n; i++);
+ return i;
+ }
+
+ // If ever replaced by closed form, last value should be correct!
+ static int periodicReturnedN(int n) {
+ int k = 0;
+ for (int i = 0; i < n; i++) {
+ k = 1 - k;
+ }
+ return k;
+ }
+
+ // If ever replaced by closed form, last value should be correct!
+ private static int getSumN(int n) {
+ int k = 0;
+ int sum = 0;
+ for (int i = 0; i < n; i++) {
+ k++;
+ sum += k;
+ }
+ return sum;
+ }
+
+ // If ever replaced by closed form, last value should be correct!
private static int closedTwice() {
int closed = 0;
for (int i = 0; i < 10; i++) {
@@ -269,7 +313,7 @@
/// CHECK-EVAL: "<<Loop1>>" != "<<Loop2>>"
//
/// CHECK-START: int Main.closedFeed() loop_optimization (after)
- /// CHECK-NOT: Phi loop:B\d+ outer_loop:none
+ /// CHECK-NOT: Phi loop:{{B\d+}} outer_loop:none
/// CHECK-DAG: Return loop:none
private static int closedFeed() {
int closed = 0;
@@ -316,6 +360,27 @@
return closed;
}
+ /// CHECK-START: int Main.waterFall() loop_optimization (before)
+ /// CHECK-DAG: <<Phi1:i\d+>> Phi loop:<<Loop1:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi2:i\d+>> Phi loop:<<Loop2:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi3:i\d+>> Phi loop:<<Loop3:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi4:i\d+>> Phi loop:<<Loop4:B\d+>> outer_loop:none
+ /// CHECK-DAG: <<Phi5:i\d+>> Phi loop:<<Loop5:B\d+>> outer_loop:none
+ /// CHECK-DAG: Return [<<Phi5>>] loop:none
+ //
+ /// CHECK-START: int Main.waterFall() loop_optimization (after)
+ /// CHECK-NOT: Phi loop:B\d+ outer_loop:none
+ /// CHECK-DAG: Return loop:none
+ private static int waterFall() {
+ int i = 0;
+ for (; i < 10; i++);
+ for (; i < 20; i++);
+ for (; i < 30; i++);
+ for (; i < 40; i++);
+ for (; i < 50; i++);
+ return i; // this should become just 50
+ }
+
private static int exceptionExitBeforeAdd() {
int k = 0;
try {
@@ -376,31 +441,32 @@
expectEquals(4, a[i]);
}
- int c = closedFormInductionUp();
- expectEquals(12395, c);
- c = closedFormInductionInAndDown(12345);
- expectEquals(12295, c);
+ expectEquals(12395, closedFormInductionUp());
+ expectEquals(12295, closedFormInductionInAndDown(12345));
+ expectEquals(10 * 10, closedFormNested());
for (int n = -4; n < 10; n++) {
int tc = (n <= 0) ? 0 : n;
- c = closedFormInductionUpN(n);
- expectEquals(12345 + tc * 5, c);
- c = closedFormInductionInAndDownN(12345, n);
- expectEquals(12345 - tc * 5, c);
- c = closedFormNested(n);
- expectEquals(tc * 10, c);
+ expectEquals(12345 + tc * 5, closedFormInductionUpN(n));
+ expectEquals(12345 - tc * 5, closedFormInductionInAndDownN(12345, n));
+ expectEquals(tc * 10, closedFormNestedN(n));
+ expectEquals(tc * tc, closedFormNestedNN(n));
}
+ expectEquals(10, mainIndexReturned());
+ expectEquals(1, periodicReturned());
+ expectEquals(21, getSum21());
for (int n = -4; n < 4; n++) {
int tc = (n <= 0) ? 0 : n;
- expectEquals(tc, mainIndexReturned(n));
- expectEquals(tc & 1, periodicReturned(n));
- expectEquals((tc * (tc + 1)) / 2, getSum(n));
+ expectEquals(tc, mainIndexReturnedN(n));
+ expectEquals(tc & 1, periodicReturnedN(n));
+ expectEquals((tc * (tc + 1)) / 2, getSumN(n));
}
- expectEquals(21, getSum21());
+
expectEquals(10, closedTwice());
expectEquals(20, closedFeed());
expectEquals(-10, closedLargeUp());
expectEquals(10, closedLargeDown());
+ expectEquals(50, waterFall());
expectEquals(100, exceptionExitBeforeAdd());
expectEquals(100, exceptionExitAfterAdd());