diff --git a/llvm/include/llvm/Transforms/Scalar/LoopIdiomRecognize.h b/llvm/include/llvm/Transforms/Scalar/LoopIdiomRecognize.h --- a/llvm/include/llvm/Transforms/Scalar/LoopIdiomRecognize.h +++ b/llvm/include/llvm/Transforms/Scalar/LoopIdiomRecognize.h @@ -16,6 +16,7 @@ #define LLVM_TRANSFORMS_SCALAR_LOOPIDIOMRECOGNIZE_H #include "llvm/Analysis/LoopAnalysisManager.h" +#include "llvm/Analysis/LoopNestAnalysis.h" #include "llvm/IR/PassManager.h" namespace llvm { @@ -43,6 +44,13 @@ LoopStandardAnalysisResults &AR, LPMUpdater &U); }; +// NFC LoopNestPass with regards to the current LoopPass-LoopIdiomRecognize +class LoopNestIdiomRecognizePass : public PassInfoMixin { +public: + PreservedAnalyses run(LoopNest &LN, LoopAnalysisManager &LAM, + LoopStandardAnalysisResults &AR, LPMUpdater &U); +}; + } // end namespace llvm #endif // LLVM_TRANSFORMS_SCALAR_LOOPIDIOMRECOGNIZE_H diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -392,6 +392,7 @@ LOOP_PASS("loop-idiom", LoopIdiomRecognizePass()) LOOP_PASS("loop-instsimplify", LoopInstSimplifyPass()) LOOP_PASS("loop-interchange", LoopInterchangePass()) +LOOP_PASS("loop-nest-idiom", LoopNestIdiomRecognizePass()) LOOP_PASS("loop-rotate", LoopRotatePass()) LOOP_PASS("no-op-loop", NoOpLoopPass()) LOOP_PASS("print", PrintLoopPass(dbgs())) diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp --- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -148,6 +148,7 @@ class LoopIdiomRecognize { Loop *CurLoop = nullptr; + LoopNest *LN; AliasAnalysis *AA; DominatorTree *DT; LoopInfo *LI; @@ -161,16 +162,17 @@ public: explicit LoopIdiomRecognize(AliasAnalysis *AA, DominatorTree *DT, - LoopInfo *LI, ScalarEvolution *SE, + LoopInfo *LI, LoopNest *LN, ScalarEvolution *SE, TargetLibraryInfo *TLI, const TargetTransformInfo *TTI, MemorySSA *MSSA, const DataLayout *DL, OptimizationRemarkEmitter &ORE) - : AA(AA), DT(DT), LI(LI), SE(SE), TLI(TLI), TTI(TTI), DL(DL), ORE(ORE) { + : LN(LN), AA(AA), DT(DT), LI(LI), SE(SE), TLI(TLI), TTI(TTI), DL(DL), ORE(ORE) { if (MSSA) MSSAU = std::make_unique(MSSA); } + bool runOnLoopNest(); bool runOnLoop(Loop *L); private: @@ -292,7 +294,7 @@ // but ORE cannot be preserved (see comment before the pass definition). OptimizationRemarkEmitter ORE(L->getHeader()->getParent()); - LoopIdiomRecognize LIR(AA, DT, LI, SE, TLI, TTI, MSSA, DL, ORE); + LoopIdiomRecognize LIR(AA, DT, LI, nullptr, SE, TLI, TTI, MSSA, DL, ORE); return LIR.runOnLoop(L); } @@ -323,7 +325,7 @@ // but ORE cannot be preserved (see comment before the pass definition). OptimizationRemarkEmitter ORE(L.getHeader()->getParent()); - LoopIdiomRecognize LIR(&AR.AA, &AR.DT, &AR.LI, &AR.SE, &AR.TLI, &AR.TTI, + LoopIdiomRecognize LIR(&AR.AA, &AR.DT, &AR.LI, nullptr, &AR.SE, &AR.TLI, &AR.TTI, AR.MSSA, DL, ORE); if (!LIR.runOnLoop(&L)) return PreservedAnalyses::all(); @@ -334,6 +336,28 @@ return PA; } +PreservedAnalyses LoopNestIdiomRecognizePass::run(LoopNest &LN, LoopAnalysisManager &LAM, + LoopStandardAnalysisResults &AR, + LPMUpdater &U) { + if (DisableLIRP::All) + return PreservedAnalyses::all(); + + const auto *DL = &LN.getOutermostLoop().getHeader()->getModule()->getDataLayout(); + OptimizationRemarkEmitter ORE(LN.getOutermostLoop().getHeader()->getParent()); + + LoopIdiomRecognize LIR(&AR.AA, &AR.DT, &AR.LI, &LN, &AR.SE, &AR.TLI, &AR.TTI, + AR.MSSA, DL, ORE); + + if (!LIR.runOnLoopNest()) + return PreservedAnalyses::all(); + + auto PA = getLoopPassPreservedAnalyses(); + if (AR.MSSA) + PA.preserve(); + + return PA; +} + INITIALIZE_PASS_BEGIN(LoopIdiomRecognizeLegacyPass, "loop-idiom", "Recognize loop idioms", false, false) INITIALIZE_PASS_DEPENDENCY(LoopPass) @@ -355,6 +379,27 @@ // //===----------------------------------------------------------------------===// +bool LoopIdiomRecognize::runOnLoopNest() { + assert(LN != nullptr && "should not call this function is LoopNest isn't" + "provided in ctor"); + + // Build worklist that contains all loops inside the LoopNest + SmallVector WorkList; + for (Loop *L : LN->getLoops()) { + WorkList.push_back(L); + } + + std::reverse(WorkList.begin(), WorkList.end()); + + // Iterate from innermost to outermost + bool Changed = false; + for (Loop *L : WorkList) { + Changed |= runOnLoop(L); + } + + return Changed; +} + bool LoopIdiomRecognize::runOnLoop(Loop *L) { CurLoop = L; // If the loop could not be converted to canonical form, it must have an diff --git a/llvm/test/Transforms/LoopIdiom/AMDGPU/popcnt.ll b/llvm/test/Transforms/LoopIdiom/AMDGPU/popcnt.ll --- a/llvm/test/Transforms/LoopIdiom/AMDGPU/popcnt.ll +++ b/llvm/test/Transforms/LoopIdiom/AMDGPU/popcnt.ll @@ -1,4 +1,5 @@ ; RUN: opt -loop-idiom -mtriple=amdgcn-- -S < %s | FileCheck %s +; RUN: opt -passes="loop-nest-idiom" -S -mtriple=amdgcn-- < %s | FileCheck %s ; Mostly copied from x86 version. diff --git a/llvm/test/Transforms/LoopIdiom/ARM/ctlz.ll b/llvm/test/Transforms/LoopIdiom/ARM/ctlz.ll --- a/llvm/test/Transforms/LoopIdiom/ARM/ctlz.ll +++ b/llvm/test/Transforms/LoopIdiom/ARM/ctlz.ll @@ -1,5 +1,7 @@ ; RUN: opt -loop-idiom -mtriple=armv7a < %s -S | FileCheck -check-prefix=LZCNT --check-prefix=ALL %s ; RUN: opt -loop-idiom -mtriple=armv4t < %s -S | FileCheck -check-prefix=NOLZCNT --check-prefix=ALL %s +; RUN: opt -passes="loop-nest-idiom" -S -mtriple=armv7a < %s | FileCheck %s -check-prefix=LZCNT --check-prefix=ALL +; RUN: opt -passes="loop-nest-idiom" -S -mtriple=armv7t < %s | FileCheck %s -check-prefix=NOLZCNT --check-prefix=ALL ; Recognize CTLZ builtin pattern. ; Here we'll just convert loop to countable, diff --git a/llvm/test/Transforms/LoopIdiom/RISCV/popcnt.ll b/llvm/test/Transforms/LoopIdiom/RISCV/popcnt.ll --- a/llvm/test/Transforms/LoopIdiom/RISCV/popcnt.ll +++ b/llvm/test/Transforms/LoopIdiom/RISCV/popcnt.ll @@ -5,6 +5,12 @@ ; RUN: opt -loop-idiom -mtriple=riscv64 -mattr=+experimental-b -S < %s | FileCheck %s --check-prefixes=CPOP ; RUN: opt -loop-idiom -mtriple=riscv32 -S < %s | FileCheck %s --check-prefixes=NOCPOP ; RUN: opt -loop-idiom -mtriple=riscv64 -S < %s | FileCheck %s --check-prefixes=NOCPOP +; RUN: opt -passes="loop-nest-idiom" -mtriple=riscv32 -mattr=+experimental-zbb -S < %s | FileCheck %s --check-prefixes=CPOP +; RUN: opt -passes="loop-nest-idiom" -mtriple=riscv64 -mattr=+experimental-zbb -S < %s | FileCheck %s --check-prefixes=CPOP +; RUN: opt -passes="loop-nest-idiom" -mtriple=riscv32 -mattr=+experimental-b -S < %s | FileCheck %s --check-prefixes=CPOP +; RUN: opt -passes="loop-nest-idiom" -mtriple=riscv64 -mattr=+experimental-b -S < %s | FileCheck %s --check-prefixes=CPOP +; RUN: opt -passes="loop-nest-idiom" -mtriple=riscv32 -S < %s | FileCheck %s --check-prefixes=NOCPOP +; RUN: opt -passes="loop-nest-idiom" -mtriple=riscv64 -S < %s | FileCheck %s --check-prefixes=NOCPOP ; Mostly copied from AMDGPU version. diff --git a/llvm/test/Transforms/LoopIdiom/X86/arithmetic-right-shift-until-zero.ll b/llvm/test/Transforms/LoopIdiom/X86/arithmetic-right-shift-until-zero.ll --- a/llvm/test/Transforms/LoopIdiom/X86/arithmetic-right-shift-until-zero.ll +++ b/llvm/test/Transforms/LoopIdiom/X86/arithmetic-right-shift-until-zero.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -loop-idiom -mtriple=x86_64 -mcpu=core-avx2 < %s -S | FileCheck %s +; RUN: opt -passes="loop-nest-idiom" -S -mtriple=x86_64 -mcpu=core-avx2 < %s | FileCheck %s declare void @escape_inner(i8, i8, i8, i1, i8) declare void @escape_outer(i8, i8, i8, i1, i8) diff --git a/llvm/test/Transforms/LoopIdiom/X86/ctlz.ll b/llvm/test/Transforms/LoopIdiom/X86/ctlz.ll --- a/llvm/test/Transforms/LoopIdiom/X86/ctlz.ll +++ b/llvm/test/Transforms/LoopIdiom/X86/ctlz.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -loop-idiom -mtriple=x86_64 -mcpu=core-avx2 < %s -S | FileCheck %s -check-prefixes=ALL,LZCNT ; RUN: opt -loop-idiom -mtriple=x86_64 -mcpu=corei7 < %s -S | FileCheck %s -check-prefixes=ALL,NOLZCNT +; RUN: opt -passes="loop-nest-idiom" -S -mtriple=x86_64 -mcpu=core-avx2 < %s | FileCheck %s -check-prefixes=ALL,LZCNT +; RUN: opt -passes="loop-nest-idiom" -S -mtriple=x86_64 -mcpu=corei7 < %s | FileCheck %s -check-prefixes=ALL,NOLZCNT ; Recognize CTLZ builtin pattern. ; Here we'll just convert loop to countable, diff --git a/llvm/test/Transforms/LoopIdiom/X86/cttz.ll b/llvm/test/Transforms/LoopIdiom/X86/cttz.ll --- a/llvm/test/Transforms/LoopIdiom/X86/cttz.ll +++ b/llvm/test/Transforms/LoopIdiom/X86/cttz.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -loop-idiom -mtriple=x86_64 -mcpu=core-avx2 < %s -S | FileCheck --check-prefix=ALL %s ; RUN: opt -loop-idiom -mtriple=x86_64 -mcpu=corei7 < %s -S | FileCheck --check-prefix=ALL %s +; RUN: opt -passes="loop-nest-idiom" -S -mtriple=x86_64 -mcpu=core-avx2 < %s | FileCheck %s --check-prefix=ALL +; RUN: opt -passes="loop-nest-idiom" -S -mtriple=x86_64 -mcpu=corei7 < %s | FileCheck %s --check-prefix=ALL ; Recognize CTTZ builtin pattern. ; Here it will replace the loop - diff --git a/llvm/test/Transforms/LoopIdiom/X86/left-shift-until-bittest.ll b/llvm/test/Transforms/LoopIdiom/X86/left-shift-until-bittest.ll --- a/llvm/test/Transforms/LoopIdiom/X86/left-shift-until-bittest.ll +++ b/llvm/test/Transforms/LoopIdiom/X86/left-shift-until-bittest.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -debugify -loop-idiom -mtriple=x86_64 -mcpu=core-avx2 < %s -S | FileCheck --check-prefixes=ALL,LZCNT %s ; RUN: opt -debugify -loop-idiom -mtriple=x86_64 -mcpu=corei7 < %s -S | FileCheck --check-prefixes=ALL,NOLZCNT %s +; RUN: opt -passes="loop-nest-idiom" -enable-debugify -debug-pass-manager -mtriple=x86_64 -mcpu=core-avx2 < %s -S | FileCheck --check-prefixes=ALL,LZCNT %s +; RUN: opt -passes="loop-nest-idiom" -enable-debugify -debug-pass-manager -mtriple=x86_64 -mcpu=corei7 < %s -S | FileCheck --check-prefixes=ALL,NOLZCNT %s declare i32 @gen32() declare void @use32(i32) diff --git a/llvm/test/Transforms/LoopIdiom/X86/left-shift-until-zero.ll b/llvm/test/Transforms/LoopIdiom/X86/left-shift-until-zero.ll --- a/llvm/test/Transforms/LoopIdiom/X86/left-shift-until-zero.ll +++ b/llvm/test/Transforms/LoopIdiom/X86/left-shift-until-zero.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -loop-idiom -mtriple=x86_64 -mcpu=core-avx2 < %s -S | FileCheck %s +; RUN: opt -passes="loop-nest-idiom" -S -mtriple=x86_64 -mcpu=core-avx2 < %s | FileCheck %s declare void @escape_inner(i8, i8, i8, i1, i8) declare void @escape_outer(i8, i8, i8, i1, i8) diff --git a/llvm/test/Transforms/LoopIdiom/X86/logical-right-shift-until-zero-cost.ll b/llvm/test/Transforms/LoopIdiom/X86/logical-right-shift-until-zero-cost.ll --- a/llvm/test/Transforms/LoopIdiom/X86/logical-right-shift-until-zero-cost.ll +++ b/llvm/test/Transforms/LoopIdiom/X86/logical-right-shift-until-zero-cost.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -loop-idiom -mtriple=x86_64 -mcpu=corei7 < %s -S | FileCheck %s - +; RUN: opt -passes="loop-nest-idiom" -S -mtriple=x86_64 -mcpu=corei7 < %s | FileCheck %s declare void @escape_inner(i8, i8, i8, i1, i8) declare void @escape_outer(i8, i8, i8, i1, i8) diff --git a/llvm/test/Transforms/LoopIdiom/X86/logical-right-shift-until-zero-debuginfo.ll b/llvm/test/Transforms/LoopIdiom/X86/logical-right-shift-until-zero-debuginfo.ll --- a/llvm/test/Transforms/LoopIdiom/X86/logical-right-shift-until-zero-debuginfo.ll +++ b/llvm/test/Transforms/LoopIdiom/X86/logical-right-shift-until-zero-debuginfo.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -debugify -loop-idiom -mtriple=x86_64 -mcpu=core-avx2 < %s -S | FileCheck %s +; RUN: opt -passes="loop-nest-idiom" -enable-debugify -debug-pass-manager -mtriple=x86_64 -mcpu=core-avx2 < %s -S | FileCheck %s declare void @escape_inner(i8, i8, i8, i1, i8) declare void @escape_outer(i8, i8, i8, i1, i8) diff --git a/llvm/test/Transforms/LoopIdiom/X86/logical-right-shift-until-zero.ll b/llvm/test/Transforms/LoopIdiom/X86/logical-right-shift-until-zero.ll --- a/llvm/test/Transforms/LoopIdiom/X86/logical-right-shift-until-zero.ll +++ b/llvm/test/Transforms/LoopIdiom/X86/logical-right-shift-until-zero.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -loop-idiom -mtriple=x86_64 -mcpu=core-avx2 < %s -S | FileCheck %s +; RUN: opt -passes="loop-nest-idiom" -S -mtriple=x86_64 -mcpu=core-avx2 < %s | FileCheck %s declare void @escape_inner(i8, i8, i8, i1, i8) declare void @escape_outer(i8, i8, i8, i1, i8) diff --git a/llvm/test/Transforms/LoopIdiom/X86/popcnt.ll b/llvm/test/Transforms/LoopIdiom/X86/popcnt.ll --- a/llvm/test/Transforms/LoopIdiom/X86/popcnt.ll +++ b/llvm/test/Transforms/LoopIdiom/X86/popcnt.ll @@ -1,4 +1,5 @@ ; RUN: opt -loop-idiom < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 -S | FileCheck %s +; RUN: opt -passes="loop-nest-idiom" -S -mtriple=x86_64-apple-darwin -mcpu=corei7 < %s | FileCheck %s target triple = "x86_64-apple-macosx10.8.0" diff --git a/llvm/test/Transforms/LoopIdiom/X86/unordered-atomic-memcpy.ll b/llvm/test/Transforms/LoopIdiom/X86/unordered-atomic-memcpy.ll --- a/llvm/test/Transforms/LoopIdiom/X86/unordered-atomic-memcpy.ll +++ b/llvm/test/Transforms/LoopIdiom/X86/unordered-atomic-memcpy.ll @@ -1,4 +1,5 @@ ; RUN: opt -basic-aa -loop-idiom < %s -S | FileCheck %s +; RUN: opt -passes="loop-nest-idiom" -S < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/LoopIdiom/basic-address-space.ll b/llvm/test/Transforms/LoopIdiom/basic-address-space.ll --- a/llvm/test/Transforms/LoopIdiom/basic-address-space.ll +++ b/llvm/test/Transforms/LoopIdiom/basic-address-space.ll @@ -1,4 +1,5 @@ ; RUN: opt -basic-aa -loop-idiom < %s -S | FileCheck %s +; RUN: opt -passes="loop-nest-idiom" -S < %s | FileCheck %s target datalayout = "e-p:32:32:32-p1:64:64:64-p2:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-n8:16:32:64" target triple = "x86_64-apple-darwin10.0.0" diff --git a/llvm/test/Transforms/LoopIdiom/basic.ll b/llvm/test/Transforms/LoopIdiom/basic.ll --- a/llvm/test/Transforms/LoopIdiom/basic.ll +++ b/llvm/test/Transforms/LoopIdiom/basic.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -basic-aa -loop-idiom < %s -S | FileCheck %s +; RUN: opt -passes="loop-nest-idiom" < %s -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" ; For @test11_pattern diff --git a/llvm/test/Transforms/LoopIdiom/crash.ll b/llvm/test/Transforms/LoopIdiom/crash.ll --- a/llvm/test/Transforms/LoopIdiom/crash.ll +++ b/llvm/test/Transforms/LoopIdiom/crash.ll @@ -1,4 +1,5 @@ ; RUN: opt -basic-aa -loop-idiom -S < %s +; RUN: opt -passes="loop-nest-idiom" -S < %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" diff --git a/llvm/test/Transforms/LoopIdiom/ctpop-multiple-users-crash.ll b/llvm/test/Transforms/LoopIdiom/ctpop-multiple-users-crash.ll --- a/llvm/test/Transforms/LoopIdiom/ctpop-multiple-users-crash.ll +++ b/llvm/test/Transforms/LoopIdiom/ctpop-multiple-users-crash.ll @@ -1,4 +1,5 @@ ; RUN: opt -loop-idiom -S < %s | FileCheck %s +; RUN: opt -passes="loop-nest-idiom" -S < %s | FileCheck %s target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" target triple = "arm64-apple-ios8.0.0" diff --git a/llvm/test/Transforms/LoopIdiom/dbginfo-cost.ll b/llvm/test/Transforms/LoopIdiom/dbginfo-cost.ll --- a/llvm/test/Transforms/LoopIdiom/dbginfo-cost.ll +++ b/llvm/test/Transforms/LoopIdiom/dbginfo-cost.ll @@ -1,4 +1,5 @@ ; RUN: opt -S -loop-idiom -mtriple=systemz-unknown -mcpu=z13 %s | FileCheck %s +; RUN: opt -passes="loop-nest-idiom" -S -mtriple=systemz-unknown -mcpu=z13 < %s | FileCheck %s ; CHECK: @llvm.ctlz.i32 diff --git a/llvm/test/Transforms/LoopIdiom/debug-line.ll b/llvm/test/Transforms/LoopIdiom/debug-line.ll --- a/llvm/test/Transforms/LoopIdiom/debug-line.ll +++ b/llvm/test/Transforms/LoopIdiom/debug-line.ll @@ -1,4 +1,5 @@ ; RUN: opt -loop-idiom < %s -S | FileCheck %s +; RUN: opt -passes="loop-nest-idiom" -S < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-apple-darwin10.0.0" diff --git a/llvm/test/Transforms/LoopIdiom/disable-options.ll b/llvm/test/Transforms/LoopIdiom/disable-options.ll --- a/llvm/test/Transforms/LoopIdiom/disable-options.ll +++ b/llvm/test/Transforms/LoopIdiom/disable-options.ll @@ -9,6 +9,11 @@ ; RUN: opt -passes="loop-idiom" -aa-pipeline=basic-aa -disable-loop-idiom-memcpy < %s -S | FileCheck %s --check-prefix=DIS-MEMCPY ; RUN: opt -passes="loop-idiom" -aa-pipeline=basic-aa -disable-loop-idiom-memset < %s -S | FileCheck %s --check-prefix=DIS-MEMSET ; RUN: opt -passes="loop-idiom" -aa-pipeline=basic-aa -disable-loop-idiom-memset -disable-loop-idiom-memcpy < %s -S | FileCheck %s --check-prefix=DIS-ALL +; RUN: opt -passes="loop-nest-idiom" -aa-pipeline=basic-aa < %s -S | FileCheck %s --check-prefix=DIS-NONE +; RUN: opt -passes="loop-nest-idiom" -aa-pipeline=basic-aa -disable-loop-idiom-all < %s -S | FileCheck %s --check-prefix=DIS-ALL +; RUN: opt -passes="loop-nest-idiom" -aa-pipeline=basic-aa -disable-loop-idiom-memcpy < %s -S | FileCheck %s --check-prefix=DIS-MEMCPY +; RUN: opt -passes="loop-nest-idiom" -aa-pipeline=basic-aa -disable-loop-idiom-memset < %s -S | FileCheck %s --check-prefix=DIS-MEMSET +; RUN: opt -passes="loop-nest-idiom" -aa-pipeline=basic-aa -disable-loop-idiom-memset -disable-loop-idiom-memcpy < %s -S | FileCheck %s --check-prefix=DIS-ALL define void @test-memcpy(i64 %Size) nounwind ssp { ; DIS-NONE-LABEL: @test-memcpy( diff --git a/llvm/test/Transforms/LoopIdiom/expander-do-not-delete-reused-values.ll b/llvm/test/Transforms/LoopIdiom/expander-do-not-delete-reused-values.ll --- a/llvm/test/Transforms/LoopIdiom/expander-do-not-delete-reused-values.ll +++ b/llvm/test/Transforms/LoopIdiom/expander-do-not-delete-reused-values.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -loop-idiom -S %s | FileCheck %s +; RUN: opt -passes="loop-nest-idiom" -S < %s | FileCheck %s ; Make sure we do not delete instructions not inserted during expansion, e.g. ; because the expande re-used existing instructions. diff --git a/llvm/test/Transforms/LoopIdiom/int_sideeffect.ll b/llvm/test/Transforms/LoopIdiom/int_sideeffect.ll --- a/llvm/test/Transforms/LoopIdiom/int_sideeffect.ll +++ b/llvm/test/Transforms/LoopIdiom/int_sideeffect.ll @@ -1,4 +1,5 @@ ; RUN: opt -S < %s -loop-idiom | FileCheck %s +; RUN: opt -passes="loop-nest-idiom" -S < %s | FileCheck %s declare void @llvm.sideeffect() diff --git a/llvm/test/Transforms/LoopIdiom/lir-heurs-multi-block-loop.ll b/llvm/test/Transforms/LoopIdiom/lir-heurs-multi-block-loop.ll --- a/llvm/test/Transforms/LoopIdiom/lir-heurs-multi-block-loop.ll +++ b/llvm/test/Transforms/LoopIdiom/lir-heurs-multi-block-loop.ll @@ -1,4 +1,5 @@ ; RUN: opt -basic-aa -loop-idiom -use-lir-code-size-heurs=true < %s -S | FileCheck %s +; RUN: opt -passes="loop-nest-idiom" -use-lir-code-size-heurs=true -S < %s | FileCheck %s ; When compiling for codesize we avoid idiom recognition for a ; multi-block loop unless it is one of diff --git a/llvm/test/Transforms/LoopIdiom/memcpy-debugify-remarks.ll b/llvm/test/Transforms/LoopIdiom/memcpy-debugify-remarks.ll --- a/llvm/test/Transforms/LoopIdiom/memcpy-debugify-remarks.ll +++ b/llvm/test/Transforms/LoopIdiom/memcpy-debugify-remarks.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -basic-aa -debugify -loop-idiom -pass-remarks=loop-idiom -pass-remarks-analysis=loop-idiom -verify -verify-each -verify-dom-info -verify-loop-info < %s -S 2>&1 | FileCheck %s +; RUN: opt -passes="loop-nest-idiom" -enable-debugify -debug-pass-manager -pass-remarks=loop-idiom -pass-remarks-analysis=loop-idiom < %s -S 2>&1 | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/LoopIdiom/memcpy-intrinsic-different-types.ll b/llvm/test/Transforms/LoopIdiom/memcpy-intrinsic-different-types.ll --- a/llvm/test/Transforms/LoopIdiom/memcpy-intrinsic-different-types.ll +++ b/llvm/test/Transforms/LoopIdiom/memcpy-intrinsic-different-types.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -loop-idiom < %s -S | FileCheck %s +; RUN: opt -passes="loop-nest-idiom" -S < %s | FileCheck %s ; #include ; diff --git a/llvm/test/Transforms/LoopIdiom/memcpy-intrinsic.ll b/llvm/test/Transforms/LoopIdiom/memcpy-intrinsic.ll --- a/llvm/test/Transforms/LoopIdiom/memcpy-intrinsic.ll +++ b/llvm/test/Transforms/LoopIdiom/memcpy-intrinsic.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -loop-idiom < %s -S | FileCheck %s +; RUN: opt -passes="loop-nest-idiom" -S < %s | FileCheck %s %struct.S = type { i32, i32, i8 } diff --git a/llvm/test/Transforms/LoopIdiom/memcpy-vectors.ll b/llvm/test/Transforms/LoopIdiom/memcpy-vectors.ll --- a/llvm/test/Transforms/LoopIdiom/memcpy-vectors.ll +++ b/llvm/test/Transforms/LoopIdiom/memcpy-vectors.ll @@ -1,4 +1,5 @@ ; RUN: opt -loop-idiom -S <%s | FileCheck %s +; RUN: opt -passes="loop-nest-idiom" -S < %s | FileCheck %s define void @memcpy_fixed_vec(i64* noalias %a, i64* noalias %b) local_unnamed_addr #1 { ; CHECK-LABEL: @memcpy_fixed_vec( diff --git a/llvm/test/Transforms/LoopIdiom/memcpy.ll b/llvm/test/Transforms/LoopIdiom/memcpy.ll --- a/llvm/test/Transforms/LoopIdiom/memcpy.ll +++ b/llvm/test/Transforms/LoopIdiom/memcpy.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -basic-aa -loop-idiom < %s -S | FileCheck %s +; RUN: opt -passes="loop-nest-idiom" -S < %s | FileCheck %s define void @copy_both_noalias(float* noalias nocapture %d, float* noalias nocapture readonly %s, i64 %sz) { ; CHECK-LABEL: @copy_both_noalias( diff --git a/llvm/test/Transforms/LoopIdiom/memset-debugify-remarks.ll b/llvm/test/Transforms/LoopIdiom/memset-debugify-remarks.ll --- a/llvm/test/Transforms/LoopIdiom/memset-debugify-remarks.ll +++ b/llvm/test/Transforms/LoopIdiom/memset-debugify-remarks.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -basic-aa -debugify -loop-idiom -pass-remarks=loop-idiom -pass-remarks-analysis=loop-idiom -verify -verify-each -verify-dom-info -verify-loop-info < %s -S 2>&1 | FileCheck %s +; RUN: opt -passes="loop-nest-idiom" -enable-debugify -debug-pass-manager -pass-remarks=loop-idiom -pass-remarks-analysis=loop-idiom < %s -S 2>&1 | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/LoopIdiom/memset.ll b/llvm/test/Transforms/LoopIdiom/memset.ll --- a/llvm/test/Transforms/LoopIdiom/memset.ll +++ b/llvm/test/Transforms/LoopIdiom/memset.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -basic-aa -loop-idiom < %s -S | FileCheck %s +; RUN: opt -passes="loop-nest-idiom" < %s -S | FileCheck %s define dso_local void @double_memset(i8* nocapture %p, i8* noalias nocapture %q, i32 %n) { diff --git a/llvm/test/Transforms/LoopIdiom/memset_noidiom.ll b/llvm/test/Transforms/LoopIdiom/memset_noidiom.ll --- a/llvm/test/Transforms/LoopIdiom/memset_noidiom.ll +++ b/llvm/test/Transforms/LoopIdiom/memset_noidiom.ll @@ -1,4 +1,5 @@ ; RUN: opt -loop-idiom < %s -S | FileCheck %s +; RUN: opt -passes="loop-nest-idiom" -S < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-apple-darwin10.0.0" diff --git a/llvm/test/Transforms/LoopIdiom/non-canonical-loop.ll b/llvm/test/Transforms/LoopIdiom/non-canonical-loop.ll --- a/llvm/test/Transforms/LoopIdiom/non-canonical-loop.ll +++ b/llvm/test/Transforms/LoopIdiom/non-canonical-loop.ll @@ -1,4 +1,5 @@ ; RUN: opt -S -loop-idiom < %s +; RUN: opt -S --passes="loop-nest-idiom" < %s ; Don't crash ; PR13892 diff --git a/llvm/test/Transforms/LoopIdiom/non-integral-pointers.ll b/llvm/test/Transforms/LoopIdiom/non-integral-pointers.ll --- a/llvm/test/Transforms/LoopIdiom/non-integral-pointers.ll +++ b/llvm/test/Transforms/LoopIdiom/non-integral-pointers.ll @@ -1,4 +1,5 @@ ; RUN: opt -S -basic-aa -loop-idiom < %s | FileCheck %s +; RUN: opt -passes="loop-nest-idiom" -S < %s | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:4" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/LoopIdiom/nontemporal_store.ll b/llvm/test/Transforms/LoopIdiom/nontemporal_store.ll --- a/llvm/test/Transforms/LoopIdiom/nontemporal_store.ll +++ b/llvm/test/Transforms/LoopIdiom/nontemporal_store.ll @@ -1,4 +1,5 @@ ; RUN: opt -loop-idiom < %s -S | FileCheck %s +; RUN: opt -passes="loop-nest-idiom" -S < %s | FileCheck %s ; RUN: opt -aa-pipeline=basic-aa -passes='require,require,require,require,loop(loop-idiom)' < %s -S | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/LoopIdiom/phi-insertion.ll b/llvm/test/Transforms/LoopIdiom/phi-insertion.ll --- a/llvm/test/Transforms/LoopIdiom/phi-insertion.ll +++ b/llvm/test/Transforms/LoopIdiom/phi-insertion.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -loop-idiom -S %s | FileCheck %s +; RUN: opt -passes="loop-nest-idiom" -S < %s | FileCheck %s define void @phi_insertion(i1 %c, i32* %ptr.start, i32* %ptr.end, i64 %offset) { ; CHECK-LABEL: @phi_insertion( diff --git a/llvm/test/Transforms/LoopIdiom/pr28196.ll b/llvm/test/Transforms/LoopIdiom/pr28196.ll --- a/llvm/test/Transforms/LoopIdiom/pr28196.ll +++ b/llvm/test/Transforms/LoopIdiom/pr28196.ll @@ -1,4 +1,5 @@ ; RUN: opt -loop-idiom -S < %s | FileCheck %s +; RUN: opt -passes="loop-nest-idiom" -S < %s | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/LoopIdiom/pr33114.ll b/llvm/test/Transforms/LoopIdiom/pr33114.ll --- a/llvm/test/Transforms/LoopIdiom/pr33114.ll +++ b/llvm/test/Transforms/LoopIdiom/pr33114.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; Check that we're not crashing while looking at the recurrence variable. ; RUN: opt -S -loop-idiom %s | FileCheck %s +; RUN: opt -passes="loop-nest-idiom" -S < %s | FileCheck %s define void @tinkywinky() { ; CHECK-LABEL: @tinkywinky( diff --git a/llvm/test/Transforms/LoopIdiom/reuse-cast.ll b/llvm/test/Transforms/LoopIdiom/reuse-cast.ll --- a/llvm/test/Transforms/LoopIdiom/reuse-cast.ll +++ b/llvm/test/Transforms/LoopIdiom/reuse-cast.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -loop-idiom -S %s | FileCheck %s +; RUN: opt -passes="loop-nest-idiom" -S < %s | FileCheck %s define void @reuse_cast_1(float** %ptr, i1 %c) { ; CHECK-LABEL: @reuse_cast_1( diff --git a/llvm/test/Transforms/LoopIdiom/scev-invalidation.ll b/llvm/test/Transforms/LoopIdiom/scev-invalidation.ll --- a/llvm/test/Transforms/LoopIdiom/scev-invalidation.ll +++ b/llvm/test/Transforms/LoopIdiom/scev-invalidation.ll @@ -1,4 +1,5 @@ ; RUN: opt -S -indvars -loop-idiom < %s +; RUN: opt -passes="loop-nest-idiom,indvars" -S < %s ; PR14214 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/LoopIdiom/scev-invalidation_topmostloop.ll b/llvm/test/Transforms/LoopIdiom/scev-invalidation_topmostloop.ll --- a/llvm/test/Transforms/LoopIdiom/scev-invalidation_topmostloop.ll +++ b/llvm/test/Transforms/LoopIdiom/scev-invalidation_topmostloop.ll @@ -1,4 +1,5 @@ ; RUN: opt -S -indvars -loop-idiom -verify -loop-simplifycfg -simplifycfg-require-and-preserve-domtree=1 -loop-idiom < %s | FileCheck %s +; RUN: opt -passes="loop-nest-idiom" -S < %s | FileCheck %s target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/LoopIdiom/struct-custom-dl.ll b/llvm/test/Transforms/LoopIdiom/struct-custom-dl.ll --- a/llvm/test/Transforms/LoopIdiom/struct-custom-dl.ll +++ b/llvm/test/Transforms/LoopIdiom/struct-custom-dl.ll @@ -1,4 +1,5 @@ ; RUN: opt -basic-aa -loop-idiom < %s -S | FileCheck %s +; RUN: opt -passes="loop-nest-idiom" -S < %s | FileCheck %s target datalayout = "e-p:40:64:64:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" %struct.foo = type { i32, i32 } diff --git a/llvm/test/Transforms/LoopIdiom/struct.ll b/llvm/test/Transforms/LoopIdiom/struct.ll --- a/llvm/test/Transforms/LoopIdiom/struct.ll +++ b/llvm/test/Transforms/LoopIdiom/struct.ll @@ -1,4 +1,5 @@ ; RUN: opt -basic-aa -loop-idiom < %s -S | FileCheck %s +; RUN: opt -passes="loop-nest-idiom" -S < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-apple-darwin10.0.0" diff --git a/llvm/test/Transforms/LoopIdiom/struct_pattern.ll b/llvm/test/Transforms/LoopIdiom/struct_pattern.ll --- a/llvm/test/Transforms/LoopIdiom/struct_pattern.ll +++ b/llvm/test/Transforms/LoopIdiom/struct_pattern.ll @@ -1,4 +1,5 @@ ; RUN: opt -basic-aa -loop-idiom < %s -S | FileCheck %s +; RUN: opt -passes="loop-nest-idiom" -S < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" ; CHECK: @.memset_pattern = private unnamed_addr constant [4 x i32] [i32 2, i32 2, i32 2, i32 2], align 16 diff --git a/llvm/test/Transforms/LoopIdiom/unordered-atomic-memcpy-noarch.ll b/llvm/test/Transforms/LoopIdiom/unordered-atomic-memcpy-noarch.ll --- a/llvm/test/Transforms/LoopIdiom/unordered-atomic-memcpy-noarch.ll +++ b/llvm/test/Transforms/LoopIdiom/unordered-atomic-memcpy-noarch.ll @@ -1,4 +1,5 @@ ; RUN: opt -basic-aa -loop-idiom < %s -S | FileCheck %s +; RUN: opt -passes="loop-nest-idiom" -S < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" ;; memcpy.atomic formation (atomic load & store) -- element size 2 diff --git a/llvm/test/Transforms/LoopIdiom/unroll-custom-dl.ll b/llvm/test/Transforms/LoopIdiom/unroll-custom-dl.ll --- a/llvm/test/Transforms/LoopIdiom/unroll-custom-dl.ll +++ b/llvm/test/Transforms/LoopIdiom/unroll-custom-dl.ll @@ -1,4 +1,5 @@ ; RUN: opt -basic-aa -loop-idiom < %s -S | FileCheck %s +; RUN: opt -passes="loop-nest-idiom" -S < %s | FileCheck %s target datalayout = "e-p:64:64:64:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" ; CHECK: @.memset_pattern = private unnamed_addr constant [4 x i32] [i32 2, i32 2, i32 2, i32 2], align 16 diff --git a/llvm/test/Transforms/LoopIdiom/unroll.ll b/llvm/test/Transforms/LoopIdiom/unroll.ll --- a/llvm/test/Transforms/LoopIdiom/unroll.ll +++ b/llvm/test/Transforms/LoopIdiom/unroll.ll @@ -1,4 +1,5 @@ ; RUN: opt -basic-aa -loop-idiom < %s -S | FileCheck %s +; RUN: opt -passes="loop-nest-idiom" -S < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" ; CHECK: @.memset_pattern = private unnamed_addr constant [4 x i32] [i32 2, i32 2, i32 2, i32 2], align 16 diff --git a/llvm/test/Transforms/LoopIdiom/unsafe.ll b/llvm/test/Transforms/LoopIdiom/unsafe.ll --- a/llvm/test/Transforms/LoopIdiom/unsafe.ll +++ b/llvm/test/Transforms/LoopIdiom/unsafe.ll @@ -1,4 +1,5 @@ ; RUN: opt -S < %s -loop-idiom | FileCheck %s +; RUN: opt -passes="loop-nest-idiom" -S < %s | FileCheck %s ; CHECK-NOT: memset ; check that memset is not generated (for stores) because that will result ; in udiv hoisted out of the loop by the SCEV Expander diff --git a/llvm/test/Transforms/LoopIdiom/unwind.ll b/llvm/test/Transforms/LoopIdiom/unwind.ll --- a/llvm/test/Transforms/LoopIdiom/unwind.ll +++ b/llvm/test/Transforms/LoopIdiom/unwind.ll @@ -1,4 +1,5 @@ ; RUN: opt -loop-idiom < %s -S | FileCheck %s +; RUN: opt -passes="loop-nest-idiom" -S < %s | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu"