Index: lib/Transforms/Vectorize/LoadStoreVectorizer.cpp =================================================================== --- lib/Transforms/Vectorize/LoadStoreVectorizer.cpp +++ lib/Transforms/Vectorize/LoadStoreVectorizer.cpp @@ -49,6 +49,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/OrderedBasicBlock.h" #include "llvm/Analysis/ScalarEvolution.h" @@ -473,6 +474,7 @@ OrderedBasicBlock OBB(I->getParent()); SmallPtrSet InstructionsToMove; SmallVector Worklist; + SimplifyQuery SQ(DL, nullptr, &DT); Worklist.push_back(I); while (!Worklist.empty()) { @@ -489,6 +491,11 @@ continue; if (!OBB.dominates(IM, I)) { + if (auto *S = SimplifyInstruction(IM, SQ.getWithInstruction(IM))) { + IW->setOperand(i, S); + i--; + continue; + } InstructionsToMove.insert(IM); Worklist.push_back(IM); } Index: test/Transforms/LoadStoreVectorizer/AArch64/reorder-infinite-loop.ll =================================================================== --- /dev/null +++ test/Transforms/LoadStoreVectorizer/AArch64/reorder-infinite-loop.ll @@ -0,0 +1,27 @@ +; RUN: opt -load-store-vectorizer -S -o - %s | FileCheck %s +target triple = "aarch64" + +@a = dso_local global i32 0, align 1 +@b = dso_local global [1 x [3 x i32]] zeroinitializer, align 1 + +; arrayidx4 should have the last index replaced with 0 due to simplification, +; to avoid the infinite loop in reorder. +; CHECK-LABEL: @main() +; CHECK: %arrayidx4 = getelementptr inbounds [1 x [3 x i32]], [1 x [3 x i32]]* @b, i32 0, i32 0 + +; Function Attrs: noinline nounwind +define dso_local i16 @main() #0 { +entry: + %0 = load i32, i32* @a, align 1 + %rem = urem i32 %0, 1 + %arrayidx = getelementptr inbounds [1 x [3 x i32]], [1 x [3 x i32]]* @b, i32 0, i32 %rem + %arrayidx1 = getelementptr inbounds [3 x i32], [3 x i32]* %arrayidx, i32 0, i32 2 + %1 = load i32, i32* %arrayidx1, align 1 + %rem2 = urem i32 %1, 1 + %arrayidx4 = getelementptr inbounds [1 x [3 x i32]], [1 x [3 x i32]]* @b, i32 0, i32 %rem2 + %arrayidx5 = getelementptr inbounds [3 x i32], [3 x i32]* %arrayidx4, i32 0, i32 1 + %2 = load i32, i32* %arrayidx5, align 1 + ret i16 0 +} + +attributes #0 = { noinline nounwind }