diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1957,11 +1957,13 @@ if (!shouldMergeGEPs(*cast(&GEP), *Src)) return nullptr; - // LICM takes priority over canonicalization swapping for merging constant- - // indexed GEP, because mergable constant-indexed GEPs can still be merged - // after they are hoisted out of the loop, but performing canonicalization - // first may miss the LICM opportunity. - bool shouldCanonicalizeSwap = true; + // LICM moves a GEP with constant indices to the front, while canonicalization + // swaps it to the back of a non-constant GEP. If both transformations can be + // applied, LICM takes priority because it generally provides greater + // optimization by reducing instruction count in the loop body, but performing + // canonicalization swapping first negates the LICM opportunity while it does + // not necessarily reduce instruction count. + bool ShouldCanonicalizeSwap = true; if (Src->getResultElementType() == GEP.getSourceElementType() && Src->getNumOperands() == 2 && GEP.getNumOperands() == 2 && @@ -1973,11 +1975,10 @@ // Try to reassociate loop invariant GEP chains to enable LICM. if (Loop *L = LI->getLoopFor(GEP.getParent())) { // If SO1 is invariant and GO1 is variant, they should not be swapped by - // canonicalization, otherwise this will go into an infinite loop with - // the swapping below. - if (!L->isLoopInvariant(GO1) && L->isLoopInvariant(SO1)) { - shouldCanonicalizeSwap = false; - } + // canonicalization even if it can be applied, otherwise it triggers + // LICM swapping in the next iteration, causing an infinite loop. + if (!L->isLoopInvariant(GO1) && L->isLoopInvariant(SO1)) + ShouldCanonicalizeSwap = false; // Reassociate the two GEPs if SO1 is variant in the loop and GO1 is // invariant: this breaks the dependence between GEPs and allows LICM @@ -2007,7 +2008,7 @@ // it doesn't violate def-use relations or contradict with loop invariant // swap above. This allows more potential applications of constant-indexed GEP // optimizations below. - if (shouldCanonicalizeSwap && Src->hasOneUse() && + if (ShouldCanonicalizeSwap && Src->hasOneUse() && Src->getPointerOperand()->getType() == GEP.getPointerOperand()->getType()) { // When swapping, GEP with all constant indices are more prioritized than diff --git a/llvm/test/Transforms/InstCombine/gep-canonicalize-constant-indices.ll b/llvm/test/Transforms/InstCombine/gep-canonicalize-constant-indices.ll --- a/llvm/test/Transforms/InstCombine/gep-canonicalize-constant-indices.ll +++ b/llvm/test/Transforms/InstCombine/gep-canonicalize-constant-indices.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes=instcombine -opaque-pointers -S | FileCheck %s +; RUN: opt < %s -passes='require,instcombine' -opaque-pointers -S | FileCheck %s ; Constant-indexed GEP instructions in a chain of GEP instructions should be ; swapped to the end whenever such transformation is valid. This allows them to @@ -27,26 +27,28 @@ ; GEP with the last index being a constant should also be swapped. define ptr @partialConstant1(ptr %p, i64 %a, i64 %b) { ; CHECK-LABEL: @partialConstant1( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 [[B:%.*]] -; CHECK-NEXT: ret ptr [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[B:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr [4 x i32], ptr [[TMP1]], i64 [[A:%.*]], i64 1 +; CHECK-NEXT: ret ptr [[TMP2]] ; %1 = getelementptr inbounds [4 x i32], ptr %p, i64 %a, i64 1 - %2 = getelementptr inbounds i32, ptr %p, i64 %b + %2 = getelementptr inbounds i32, ptr %1, i64 %b ret ptr %2 } ; Negative test. GEP should not be swapped if the last index is not a constant. define ptr @partialConstant2(ptr %p, i64 %a, i64 %b) { ; CHECK-LABEL: @partialConstant2( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 [[B:%.*]] -; CHECK-NEXT: ret ptr [[TMP1]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [4 x i32], ptr [[P:%.*]], i64 1, i64 [[A:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[B:%.*]] +; CHECK-NEXT: ret ptr [[TMP2]] ; %1 = getelementptr inbounds [4 x i32], ptr %p, i64 1, i64 %a - %2 = getelementptr inbounds i32, ptr %p, i64 %b + %2 = getelementptr inbounds i32, ptr %1, i64 %b ret ptr %2 } -; Constant-indexed GEP are merged after swawpping. +; Constant-indexed GEP are merged after swapping. ; result = ((i32*) p + a) + 3 define ptr @merge(ptr %p, i64 %a) { ; CHECK-LABEL: @merge(