diff --git a/llvm/include/llvm/CodeGen/HardwareLoops.h b/llvm/include/llvm/CodeGen/HardwareLoops.h new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/CodeGen/HardwareLoops.h @@ -0,0 +1,76 @@ +//===- HardwareLoops.h ------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// Defines an IR pass for the creation of hardware loops. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_HARDWARELOOPS_H +#define LLVM_CODEGEN_HARDWARELOOPS_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { + +struct HardwareLoopOptions { + std::optional Decrement; + std::optional Bitwidth; + std::optional Force; + std::optional ForcePhi; + std::optional ForceNested; + std::optional ForceGuard; + + HardwareLoopOptions &setDecrement(unsigned Count) { + Decrement = Count; + return *this; + } + HardwareLoopOptions &setCounterBitwidth(unsigned Width) { + Bitwidth = Width; + return *this; + } + HardwareLoopOptions &setForce(bool Force) { + this->Force = Force; + return *this; + } + HardwareLoopOptions &setForcePhi(bool Force) { + ForcePhi = Force; + return *this; + } + HardwareLoopOptions &setForceNested(bool Force) { + ForceNested = Force; + return *this; + } + HardwareLoopOptions &setForceGuard(bool Force) { + ForceGuard = Force; + return *this; + } + bool getForcePhi() const { + return ForcePhi.has_value() && ForcePhi.value(); + } + bool getForceNested() const { + return ForceNested.has_value() && ForceNested.value(); + } + bool getForceGuard() const { + return ForceGuard.has_value() && ForceGuard.value(); + } +}; + +class HardwareLoopsPass : public PassInfoMixin { + HardwareLoopOptions Opts; + +public: + explicit HardwareLoopsPass(HardwareLoopOptions Opts = {}) + : Opts(Opts) { } + + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; + +} // end namespace llvm + +#endif // LLVM_CODEGEN_HARDWARELOOPS_H diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h --- a/llvm/include/llvm/CodeGen/Passes.h +++ b/llvm/include/llvm/CodeGen/Passes.h @@ -542,7 +542,7 @@ FunctionPass *createEHContGuardCatchretPass(); /// Create Hardware Loop pass. \see HardwareLoops.cpp - FunctionPass *createHardwareLoopsPass(); + FunctionPass *createHardwareLoopsLegacyPass(); /// This pass inserts pseudo probe annotation for callsite profiling. FunctionPass *createPseudoProbeInserter(); diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -153,7 +153,7 @@ void initializeGlobalSplitPass(PassRegistry&); void initializeGlobalsAAWrapperPassPass(PassRegistry&); void initializeGuardWideningLegacyPassPass(PassRegistry&); -void initializeHardwareLoopsPass(PassRegistry&); +void initializeHardwareLoopsLegacyPass(PassRegistry&); void initializeMIRProfileLoaderPassPass(PassRegistry &); void initializeIPSCCPLegacyPassPass(PassRegistry&); void initializeIRCELegacyPassPass(PassRegistry&); diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h --- a/llvm/include/llvm/LinkAllPasses.h +++ b/llvm/include/llvm/LinkAllPasses.h @@ -197,7 +197,7 @@ (void) llvm::createFloat2IntPass(); (void) llvm::createEliminateAvailableExternallyPass(); (void)llvm::createScalarizeMaskedMemIntrinLegacyPass(); - (void) llvm::createHardwareLoopsPass(); + (void) llvm::createHardwareLoopsLegacyPass(); (void) llvm::createInjectTLIMappingsLegacyPass(); (void) llvm::createUnifyLoopExitsPass(); (void) llvm::createFixIrreduciblePass(); diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp --- a/llvm/lib/CodeGen/CodeGen.cpp +++ b/llvm/lib/CodeGen/CodeGen.cpp @@ -48,7 +48,7 @@ initializeFuncletLayoutPass(Registry); initializeGCMachineCodeAnalysisPass(Registry); initializeGCModuleInfoPass(Registry); - initializeHardwareLoopsPass(Registry); + initializeHardwareLoopsLegacyPass(Registry); initializeIfConverterPass(Registry); initializeImplicitNullChecksPass(Registry); initializeIndirectBrExpandPassPass(Registry); diff --git a/llvm/lib/CodeGen/HardwareLoops.cpp b/llvm/lib/CodeGen/HardwareLoops.cpp --- a/llvm/lib/CodeGen/HardwareLoops.cpp +++ b/llvm/lib/CodeGen/HardwareLoops.cpp @@ -15,8 +15,10 @@ /// //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/HardwareLoops.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ScalarEvolution.h" @@ -115,12 +117,12 @@ using TTI = TargetTransformInfo; - class HardwareLoops : public FunctionPass { + class HardwareLoopsLegacy : public FunctionPass { public: static char ID; - HardwareLoops() : FunctionPass(ID) { - initializeHardwareLoopsPass(*PassRegistry::getPassRegistry()); + HardwareLoopsLegacy() : FunctionPass(ID) { + initializeHardwareLoopsLegacyPass(*PassRegistry::getPassRegistry()); } bool runOnFunction(Function &F) override; @@ -131,29 +133,44 @@ AU.addRequired(); AU.addPreserved(); AU.addRequired(); + AU.addPreserved(); AU.addRequired(); AU.addRequired(); AU.addRequired(); + AU.addPreserved(); } + }; + + class HardwareLoopsImpl { + public: + HardwareLoopsImpl(ScalarEvolution &SE, LoopInfo &LI, bool PreserveLCSSA, + DominatorTree &DT, const DataLayout &DL, + const TargetTransformInfo &TTI, TargetLibraryInfo *TLI, + AssumptionCache &AC, OptimizationRemarkEmitter *ORE, + HardwareLoopOptions &Opts) + : SE(SE), LI(LI), PreserveLCSSA(PreserveLCSSA), DT(DT), DL(DL), TTI(TTI), + TLI(TLI), AC(AC), ORE(ORE), Opts(Opts) { } + bool run(Function &F); + + private: // Try to convert the given Loop into a hardware loop. - bool TryConvertLoop(Loop *L); + bool TryConvertLoop(Loop *L, LLVMContext &Ctx); // Given that the target believes the loop to be profitable, try to // convert it. bool TryConvertLoop(HardwareLoopInfo &HWLoopInfo); - private: - ScalarEvolution *SE = nullptr; - LoopInfo *LI = nullptr; - const DataLayout *DL = nullptr; - OptimizationRemarkEmitter *ORE = nullptr; - const TargetTransformInfo *TTI = nullptr; - DominatorTree *DT = nullptr; - bool PreserveLCSSA = false; - AssumptionCache *AC = nullptr; - TargetLibraryInfo *LibInfo = nullptr; - Module *M = nullptr; + ScalarEvolution &SE; + LoopInfo &LI; + bool PreserveLCSSA; + DominatorTree &DT; + const DataLayout &DL; + const TargetTransformInfo &TTI; + TargetLibraryInfo *TLI = nullptr; + AssumptionCache &AC; + OptimizationRemarkEmitter *ORE; + HardwareLoopOptions &Opts; bool MadeChange = false; }; @@ -182,8 +199,9 @@ public: HardwareLoop(HardwareLoopInfo &Info, ScalarEvolution &SE, const DataLayout &DL, - OptimizationRemarkEmitter *ORE) : - SE(SE), DL(DL), ORE(ORE), L(Info.L), M(L->getHeader()->getModule()), + OptimizationRemarkEmitter *ORE, + HardwareLoopOptions &Opts) : + SE(SE), DL(DL), ORE(ORE), Opts(Opts), L(Info.L), M(L->getHeader()->getModule()), ExitCount(Info.ExitCount), CountType(Info.CountType), ExitBranch(Info.ExitBranch), @@ -197,6 +215,7 @@ ScalarEvolution &SE; const DataLayout &DL; OptimizationRemarkEmitter *ORE = nullptr; + HardwareLoopOptions &Opts; Loop *L = nullptr; Module *M = nullptr; const SCEV *ExitCount = nullptr; @@ -209,40 +228,83 @@ }; } -char HardwareLoops::ID = 0; +char HardwareLoopsLegacy::ID = 0; -bool HardwareLoops::runOnFunction(Function &F) { +bool HardwareLoopsLegacy::runOnFunction(Function &F) { if (skipFunction(F)) return false; LLVM_DEBUG(dbgs() << "HWLoops: Running on " << F.getName() << "\n"); - LI = &getAnalysis().getLoopInfo(); - SE = &getAnalysis().getSE(); - DT = &getAnalysis().getDomTree(); - TTI = &getAnalysis().getTTI(F); - DL = &F.getParent()->getDataLayout(); - ORE = &getAnalysis().getORE(); + auto &LI = getAnalysis().getLoopInfo(); + auto &SE = getAnalysis().getSE(); + auto &DT = getAnalysis().getDomTree(); + auto &TTI = getAnalysis().getTTI(F); + auto &DL = F.getParent()->getDataLayout(); + auto *ORE = &getAnalysis().getORE(); auto *TLIP = getAnalysisIfAvailable(); - LibInfo = TLIP ? &TLIP->getTLI(F) : nullptr; - PreserveLCSSA = mustPreserveAnalysisID(LCSSAID); - AC = &getAnalysis().getAssumptionCache(F); - M = F.getParent(); + auto *TLI = TLIP ? &TLIP->getTLI(F) : nullptr; + auto &AC = getAnalysis().getAssumptionCache(F); + bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID); + + HardwareLoopOptions Opts; + if (ForceHardwareLoops.getNumOccurrences()) + Opts.setForce(ForceHardwareLoops); + if (ForceHardwareLoopPHI.getNumOccurrences()) + Opts.setForcePhi(ForceHardwareLoopPHI); + if (ForceNestedLoop.getNumOccurrences()) + Opts.setForceNested(ForceNestedLoop); + if (ForceGuardLoopEntry.getNumOccurrences()) + Opts.setForceGuard(ForceGuardLoopEntry); + if (LoopDecrement.getNumOccurrences()) + Opts.setDecrement(LoopDecrement); + if (CounterBitWidth.getNumOccurrences()) + Opts.setCounterBitwidth(CounterBitWidth); - for (Loop *L : *LI) - if (L->isOutermost()) - TryConvertLoop(L); + HardwareLoopsImpl Impl(SE, LI, PreserveLCSSA, DT, DL, TTI, TLI, AC, ORE, + Opts); + return Impl.run(F); +} + +PreservedAnalyses HardwareLoopsPass::run(Function &F, + FunctionAnalysisManager &AM) { + auto &LI = AM.getResult(F); + auto &SE = AM.getResult(F); + auto &DT = AM.getResult(F); + auto &TTI = AM.getResult(F); + auto *TLI = &AM.getResult(F); + auto &AC = AM.getResult(F); + auto *ORE = &AM.getResult(F); + auto &DL = F.getParent()->getDataLayout(); + + HardwareLoopsImpl Impl(SE, LI, true, DT, DL, TTI, TLI, AC, ORE, Opts); + bool Changed = Impl.run(F); + if (!Changed) + return PreservedAnalyses::all(); + + PreservedAnalyses PA; + PA.preserve(); + PA.preserve(); + PA.preserve(); + PA.preserve(); + return PA; +} +bool HardwareLoopsImpl::run(Function &F) { + LLVMContext &Ctx = F.getParent()->getContext(); + for (Loop *L : LI) + if (L->isOutermost()) + TryConvertLoop(L, Ctx); return MadeChange; } // Return true if the search should stop, which will be when an inner loop is // converted and the parent loop doesn't support containing a hardware loop. -bool HardwareLoops::TryConvertLoop(Loop *L) { +bool HardwareLoopsImpl::TryConvertLoop(Loop *L, LLVMContext &Ctx) { // Process nested loops first. bool AnyChanged = false; for (Loop *SL : *L) - AnyChanged |= TryConvertLoop(SL); + AnyChanged |= TryConvertLoop(SL, Ctx); if (AnyChanged) { reportHWLoopFailure("nested hardware-loops not supported", "HWLoopNested", ORE, L); @@ -252,39 +314,39 @@ LLVM_DEBUG(dbgs() << "HWLoops: Loop " << L->getHeader()->getName() << "\n"); HardwareLoopInfo HWLoopInfo(L); - if (!HWLoopInfo.canAnalyze(*LI)) { + if (!HWLoopInfo.canAnalyze(LI)) { reportHWLoopFailure("cannot analyze loop, irreducible control flow", "HWLoopCannotAnalyze", ORE, L); return false; } - if (!ForceHardwareLoops && - !TTI->isHardwareLoopProfitable(L, *SE, *AC, LibInfo, HWLoopInfo)) { + if (!Opts.Force && + !TTI.isHardwareLoopProfitable(L, SE, AC, TLI, HWLoopInfo)) { reportHWLoopFailure("it's not profitable to create a hardware-loop", "HWLoopNotProfitable", ORE, L); return false; } // Allow overriding of the counter width and loop decrement value. - if (CounterBitWidth.getNumOccurrences()) - HWLoopInfo.CountType = - IntegerType::get(M->getContext(), CounterBitWidth); + if (Opts.Bitwidth.has_value()) { + HWLoopInfo.CountType = IntegerType::get(Ctx, Opts.Bitwidth.value()); + } - if (LoopDecrement.getNumOccurrences()) + if (Opts.Decrement.has_value()) HWLoopInfo.LoopDecrement = - ConstantInt::get(HWLoopInfo.CountType, LoopDecrement); + ConstantInt::get(HWLoopInfo.CountType, Opts.Decrement.value()); MadeChange |= TryConvertLoop(HWLoopInfo); - return MadeChange && (!HWLoopInfo.IsNestingLegal && !ForceNestedLoop); + return MadeChange && (!HWLoopInfo.IsNestingLegal && !Opts.ForceNested); } -bool HardwareLoops::TryConvertLoop(HardwareLoopInfo &HWLoopInfo) { +bool HardwareLoopsImpl::TryConvertLoop(HardwareLoopInfo &HWLoopInfo) { Loop *L = HWLoopInfo.L; LLVM_DEBUG(dbgs() << "HWLoops: Try to convert profitable loop: " << *L); - if (!HWLoopInfo.isHardwareLoopCandidate(*SE, *LI, *DT, ForceNestedLoop, - ForceHardwareLoopPHI)) { + if (!HWLoopInfo.isHardwareLoopCandidate(SE, LI, DT, Opts.getForceNested(), + Opts.getForcePhi())) { // TODO: there can be many reasons a loop is not considered a // candidate, so we should let isHardwareLoopCandidate fill in the // reason and then report a better message here. @@ -300,11 +362,11 @@ // If we don't have a preheader, then insert one. if (!Preheader) - Preheader = InsertPreheaderForLoop(L, DT, LI, nullptr, PreserveLCSSA); + Preheader = InsertPreheaderForLoop(L, &DT, &LI, nullptr, PreserveLCSSA); if (!Preheader) return false; - HardwareLoop HWLoop(HWLoopInfo, *SE, *DL, ORE); + HardwareLoop HWLoop(HWLoopInfo, SE, DL, ORE, Opts); HWLoop.Create(); ++NumHWLoops; return true; @@ -322,7 +384,7 @@ Value *Setup = InsertIterationSetup(LoopCountInit); - if (UsePHICounter || ForceHardwareLoopPHI) { + if (UsePHICounter || Opts.ForcePhi) { Instruction *LoopDec = InsertLoopRegDec(LoopCountInit); Value *EltsRem = InsertPHICounter(Setup, LoopDec); LoopDec->setOperand(0, EltsRem); @@ -397,7 +459,8 @@ if (SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, ExitCount, SE.getZero(ExitCount->getType()))) { LLVM_DEBUG(dbgs() << " - Attempting to use test.set counter.\n"); - UseLoopGuard |= ForceGuardLoopEntry; + if (Opts.ForceGuard) + UseLoopGuard = true; } else UseLoopGuard = false; @@ -441,7 +504,7 @@ Value* HardwareLoop::InsertIterationSetup(Value *LoopCountInit) { IRBuilder<> Builder(BeginBB->getTerminator()); Type *Ty = LoopCountInit->getType(); - bool UsePhi = UsePHICounter || ForceHardwareLoopPHI; + bool UsePhi = UsePHICounter || Opts.ForcePhi; Intrinsic::ID ID = UseLoopGuard ? (UsePhi ? Intrinsic::test_start_loop_iterations : Intrinsic::test_set_loop_iterations) @@ -533,11 +596,11 @@ RecursivelyDeleteTriviallyDeadInstructions(OldCond); } -INITIALIZE_PASS_BEGIN(HardwareLoops, DEBUG_TYPE, HW_LOOPS_NAME, false, false) +INITIALIZE_PASS_BEGIN(HardwareLoopsLegacy, DEBUG_TYPE, HW_LOOPS_NAME, false, false) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass) -INITIALIZE_PASS_END(HardwareLoops, DEBUG_TYPE, HW_LOOPS_NAME, false, false) +INITIALIZE_PASS_END(HardwareLoopsLegacy, DEBUG_TYPE, HW_LOOPS_NAME, false, false) -FunctionPass *llvm::createHardwareLoopsPass() { return new HardwareLoops(); } +FunctionPass *llvm::createHardwareLoopsLegacyPass() { return new HardwareLoopsLegacy(); } diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -73,6 +73,7 @@ #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/TypeBasedAliasAnalysis.h" #include "llvm/Analysis/UniformityAnalysis.h" +#include "llvm/CodeGen/HardwareLoops.h" #include "llvm/CodeGen/TypePromotion.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/Dominators.h" @@ -540,6 +541,48 @@ return Result; } +/// Parser of parameters for HardwareLoops pass. +Expected parseHardwareLoopOptions(StringRef Params) { + HardwareLoopOptions HardwareLoopOpts; + + while (!Params.empty()) { + StringRef ParamName; + std::tie(ParamName, Params) = Params.split(';'); + if (ParamName.consume_front("hardware-loop-decrement=")) { + int Count; + if (ParamName.getAsInteger(0, Count)) + return make_error( + formatv("invalid HardwareLoopPass parameter '{0}' ", ParamName).str(), + inconvertibleErrorCode()); + HardwareLoopOpts.setDecrement(Count); + continue; + } + if (ParamName.consume_front("hardware-loop-counter-bitwidth=")) { + int Count; + if (ParamName.getAsInteger(0, Count)) + return make_error( + formatv("invalid HardwareLoopPass parameter '{0}' ", ParamName).str(), + inconvertibleErrorCode()); + HardwareLoopOpts.setCounterBitwidth(Count); + continue; + } + if (ParamName == "force-hardware-loops") { + HardwareLoopOpts.setForce(true); + } else if (ParamName == "force-hardware-loop-phi") { + HardwareLoopOpts.setForcePhi(true); + } else if (ParamName == "force-nested-hardware-loop") { + HardwareLoopOpts.setForceNested(true); + } else if (ParamName == "force-hardware-loop-guard") { + HardwareLoopOpts.setForceGuard(true); + } else { + return make_error( + formatv("invalid HardwarePass parameter '{0}' ", ParamName).str(), + inconvertibleErrorCode()); + } + } + return HardwareLoopOpts; +} + /// Parser of parameters for LoopUnroll pass. Expected parseLoopUnrollOptions(StringRef Params) { LoopUnrollOptions UnrollOpts; diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -424,6 +424,18 @@ }, parseEntryExitInstrumenterPassOptions, "post-inline") +FUNCTION_PASS_WITH_PARAMS("hardware-loops", + "HardwareLoopsPass", + [](HardwareLoopOptions Opts) { + return HardwareLoopsPass(Opts); + }, + parseHardwareLoopOptions, + "force-hardware-loops;" + "force-hardware-loop-phi;" + "force-nested-hardware-loop;" + "force-hardware-loop-guard;" + "hardware-loop-decrement=N;" + "hardware-loop-counter-bitwidth=N") FUNCTION_PASS_WITH_PARAMS("lower-matrix-intrinsics", "LowerMatrixIntrinsicsPass", [](bool Minimal) { diff --git a/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/llvm/lib/Target/ARM/ARMTargetMachine.cpp --- a/llvm/lib/Target/ARM/ARMTargetMachine.cpp +++ b/llvm/lib/Target/ARM/ARMTargetMachine.cpp @@ -481,7 +481,7 @@ } if (TM->getOptLevel() != CodeGenOpt::None) { - addPass(createHardwareLoopsPass()); + addPass(createHardwareLoopsLegacyPass()); addPass(createMVETailPredicationPass()); // FIXME: IR passes can delete address-taken basic blocks, deleting // corresponding blockaddresses. ARMConstantPoolConstant holds references to diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp --- a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -474,7 +474,7 @@ addPass(createPPCLoopInstrFormPrepPass(getPPCTargetMachine())); if (!DisableCTRLoops && getOptLevel() != CodeGenOpt::None) - addPass(createHardwareLoopsPass()); + addPass(createHardwareLoopsLegacyPass()); return false; } diff --git a/llvm/test/CodeGen/PowerPC/hardware-loops-crash.ll b/llvm/test/CodeGen/PowerPC/hardware-loops-crash.ll --- a/llvm/test/CodeGen/PowerPC/hardware-loops-crash.ll +++ b/llvm/test/CodeGen/PowerPC/hardware-loops-crash.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -hardware-loops -S -verify-loop-lcssa %s | FileCheck %s +; RUN: opt < %s -passes=hardware-loops -verify-loop-lcssa -S | FileCheck %s target datalayout = "E-m:e-i64:64-n32:64" target triple = "ppc64-unknown-linux-elf" @@ -20,11 +20,12 @@ ; CHECK-NEXT: [[C_0:%.*]] = call i1 @cond() ; CHECK-NEXT: br i1 [[C_0]], label [[WHILE_COND25_PREHEADER:%.*]], label [[FOR_BODY]] ; CHECK: while.cond25.preheader: +; CHECK-NEXT: [[INDVARS_IV349_PH:%.*]] = phi i64 [ 50, [[FOR_INC]] ] ; CHECK-NEXT: call void @llvm.set.loop.iterations.i64(i64 51) ; CHECK-NEXT: br label [[WHILE_COND25:%.*]] ; CHECK: while.cond25: ; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[WHILE_COND25_PREHEADER]] ], [ [[INDVAR_NEXT:%.*]], [[LAND_RHS:%.*]] ] -; CHECK-NEXT: [[INDVARS_IV349:%.*]] = phi i64 [ [[INDVARS_IV_NEXT350:%.*]], [[LAND_RHS]] ], [ 50, [[WHILE_COND25_PREHEADER]] ] +; CHECK-NEXT: [[INDVARS_IV349:%.*]] = phi i64 [ [[INDVARS_IV_NEXT350:%.*]], [[LAND_RHS]] ], [ [[INDVARS_IV349_PH]], [[WHILE_COND25_PREHEADER]] ] ; CHECK-NEXT: [[TMP0:%.*]] = call i1 @llvm.loop.decrement.i64(i64 1) ; CHECK-NEXT: br i1 [[TMP0]], label [[LAND_RHS]], label [[WHILE_END187:%.*]] ; CHECK: land.rhs: diff --git a/llvm/test/Transforms/HardwareLoops/ARM/calls.ll b/llvm/test/Transforms/HardwareLoops/ARM/calls.ll --- a/llvm/test/Transforms/HardwareLoops/ARM/calls.ll +++ b/llvm/test/Transforms/HardwareLoops/ARM/calls.ll @@ -1,9 +1,9 @@ -; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -hardware-loops %s -S -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MAIN -; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+fullfp16 -hardware-loops %s -S -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FP -; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+fp-armv8,+fullfp16 -hardware-loops %s -S -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FP64 -; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -hardware-loops %s -S -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MVE -; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -hardware-loops %s -S -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MVEFP -; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -hardware-loops -disable-arm-loloops=true %s -S -o - | FileCheck %s --check-prefix=DISABLED +; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -passes=hardware-loops %s -S -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MAIN +; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+fullfp16 -passes=hardware-loops %s -S -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FP +; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+fp-armv8,+fullfp16 -passes=hardware-loops %s -S -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FP64 +; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -passes=hardware-loops %s -S -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MVE +; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -passes=hardware-loops %s -S -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MVEFP +; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -passes=hardware-loops -disable-arm-loloops=true %s -S -o - | FileCheck %s --check-prefix=DISABLED ; DISABLED-NOT: call i32 @llvm.loop.decrement diff --git a/llvm/test/Transforms/HardwareLoops/ARM/counter.ll b/llvm/test/Transforms/HardwareLoops/ARM/counter.ll --- a/llvm/test/Transforms/HardwareLoops/ARM/counter.ll +++ b/llvm/test/Transforms/HardwareLoops/ARM/counter.ll @@ -1,4 +1,4 @@ -; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -hardware-loops %s -o - | FileCheck %s +; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -passes=hardware-loops %s -o - | FileCheck %s @g = common local_unnamed_addr global ptr null, align 4 diff --git a/llvm/test/Transforms/HardwareLoops/ARM/do-rem.ll b/llvm/test/Transforms/HardwareLoops/ARM/do-rem.ll --- a/llvm/test/Transforms/HardwareLoops/ARM/do-rem.ll +++ b/llvm/test/Transforms/HardwareLoops/ARM/do-rem.ll @@ -1,4 +1,4 @@ -; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -hardware-loops %s -S -o - | FileCheck %s +; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -passes=hardware-loops %s -S -o - | FileCheck %s @g = common local_unnamed_addr global ptr null, align 4 diff --git a/llvm/test/Transforms/HardwareLoops/ARM/fp-emulation.ll b/llvm/test/Transforms/HardwareLoops/ARM/fp-emulation.ll --- a/llvm/test/Transforms/HardwareLoops/ARM/fp-emulation.ll +++ b/llvm/test/Transforms/HardwareLoops/ARM/fp-emulation.ll @@ -1,5 +1,5 @@ -; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+fp-armv8 -hardware-loops %s -S -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FP -; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+soft-float -hardware-loops %s -S -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-SOFT +; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+fp-armv8 -passes=hardware-loops %s -S -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FP +; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+soft-float -passes=hardware-loops %s -S -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-SOFT ; CHECK-LABEL: test_fptosi ; CHECK-SOFT-NOT: call i32 @llvm.start.loop.iterations diff --git a/llvm/test/Transforms/HardwareLoops/ARM/simple-do.ll b/llvm/test/Transforms/HardwareLoops/ARM/simple-do.ll --- a/llvm/test/Transforms/HardwareLoops/ARM/simple-do.ll +++ b/llvm/test/Transforms/HardwareLoops/ARM/simple-do.ll @@ -1,5 +1,5 @@ -; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -hardware-loops %s -S -o - | FileCheck %s -; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -hardware-loops -disable-arm-loloops=true %s -S -o - | FileCheck %s --check-prefix=DISABLED +; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -passes=hardware-loops %s -S -o - | FileCheck %s +; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -passes=hardware-loops -disable-arm-loloops=true %s -S -o - | FileCheck %s --check-prefix=DISABLED ; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi %s -o - | FileCheck %s --check-prefix=CHECK-LLC ; DISABLED-NOT: llvm.{{.*}}.loop.iterations diff --git a/llvm/test/Transforms/HardwareLoops/ARM/structure.ll b/llvm/test/Transforms/HardwareLoops/ARM/structure.ll --- a/llvm/test/Transforms/HardwareLoops/ARM/structure.ll +++ b/llvm/test/Transforms/HardwareLoops/ARM/structure.ll @@ -1,8 +1,8 @@ -; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -hardware-loops %s -S -o - | \ +; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -passes=hardware-loops %s -S -o - | \ ; RUN: FileCheck %s ; RUN: opt -mtriple=thumbv8.1m.main -passes=loop-unroll -unroll-remainder=false -S < %s | \ ; RUN: llc -mtriple=thumbv8.1m.main | FileCheck %s --check-prefix=CHECK-UNROLL -; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -hardware-loops \ +; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -passes=hardware-loops \ ; RUN: -pass-remarks-analysis=hardware-loops %s -S -o - 2>&1 | \ ; RUN: FileCheck %s --check-prefix=CHECK-REMARKS @@ -14,7 +14,7 @@ ; CHECK-REMARKS: remark: :0:0: hardware-loop not created: it's not profitable to create a hardware-loop ; CHECK-REMARKS: remark: :0:0: hardware-loop not created: it's not profitable to create a hardware-loop ; CHECK-REMARKS: remark: :0:0: hardware-loop not created: loop is not a candidate -; CHECK-REMARKS: remark: :0:0: hardware-loop not created: nested hardware-loops not supported +; CHECK-REMARKS: remark: :0:0: hardware-loop not created: loop is not a candidate ; CHECK-REMARKS: remark: :0:0: hardware-loop not created: it's not profitable to create a hardware-loop ; CHECK-REMARKS: remark: :0:0: hardware-loop not created: it's not profitable to create a hardware-loop diff --git a/llvm/test/Transforms/HardwareLoops/loop-guards.ll b/llvm/test/Transforms/HardwareLoops/loop-guards.ll --- a/llvm/test/Transforms/HardwareLoops/loop-guards.ll +++ b/llvm/test/Transforms/HardwareLoops/loop-guards.ll @@ -1,6 +1,6 @@ -; RUN: opt -hardware-loops -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -force-hardware-loop-guard=true -S %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-EXIT -; RUN: opt -hardware-loops -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -force-hardware-loop-guard=true -force-hardware-loop-phi=true -S %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LATCH -; RUN: opt -hardware-loops -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -force-hardware-loop-guard=false -S %s -o - | FileCheck %s --check-prefix=NO-GUARD +; RUN: opt -passes='hardware-loops' -S %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-EXIT +; RUN: opt -passes='hardware-loops' -S %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LATCH +; RUN: opt -passes='hardware-loops' -S %s -o - | FileCheck %s --check-prefix=NO-GUARD ; NO-GUARD-NOT: @llvm.test.set.loop.iterations diff --git a/llvm/test/Transforms/HardwareLoops/scalar-while.ll b/llvm/test/Transforms/HardwareLoops/scalar-while.ll --- a/llvm/test/Transforms/HardwareLoops/scalar-while.ll +++ b/llvm/test/Transforms/HardwareLoops/scalar-while.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -hardware-loops -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -S %s -o - | FileCheck %s --check-prefix=CHECK-DEC -; RUN: opt -hardware-loops -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -force-hardware-loop-phi=true -S %s -o - | FileCheck %s --check-prefix=CHECK-PHI -; RUN: opt -hardware-loops -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -force-nested-hardware-loop=true -S %s -o - | FileCheck %s --check-prefix=CHECK-NESTED -; RUN: opt -hardware-loops -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -force-hardware-loop-guard=true -S %s -o - | FileCheck %s --check-prefix=CHECK-GUARD -; RUN: opt -hardware-loops -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -force-hardware-loop-phi=true -force-hardware-loop-guard=true -S %s -o - | FileCheck %s --check-prefix=CHECK-PHIGUARD +; RUN: opt -passes='hardware-loops' -S %s -o - | FileCheck %s --check-prefix=CHECK-DEC +; RUN: opt -passes='hardware-loops' -S %s -o - | FileCheck %s --check-prefix=CHECK-PHI +; RUN: opt -passes='hardware-loops' -S %s -o - | FileCheck %s --check-prefix=CHECK-NESTED +; RUN: opt -passes='hardware-loops' -S %s -o - | FileCheck %s --check-prefix=CHECK-GUARD +; RUN: opt -passes='hardware-loops' -S %s -o - | FileCheck %s --check-prefix=CHECK-PHIGUARD define void @while_lt(i32 %i, i32 %N, ptr nocapture %A) { ; CHECK-DEC-LABEL: @while_lt( diff --git a/llvm/test/Transforms/HardwareLoops/sibling-loops.ll b/llvm/test/Transforms/HardwareLoops/sibling-loops.ll --- a/llvm/test/Transforms/HardwareLoops/sibling-loops.ll +++ b/llvm/test/Transforms/HardwareLoops/sibling-loops.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -hardware-loops -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -S | FileCheck %s +; RUN: opt < %s -passes='hardware-loops' -S | FileCheck %s define arm_aapcs_vfpcc void @test(ptr noalias nocapture readonly %off, ptr noalias nocapture %data, ptr noalias nocapture %dst, i32 %n) { ; CHECK-LABEL: @test( @@ -25,10 +25,11 @@ ; CHECK-NEXT: [[TMP0:%.*]] = call i1 @llvm.loop.decrement.i32(i32 1) ; CHECK-NEXT: br i1 [[TMP0]], label [[FOR_BODY4_US]], label [[FOR_BODY15_US_PREHEADER:%.*]] ; CHECK: for.body15.us.preheader: +; CHECK-NEXT: [[J10_055_US_PH:%.*]] = phi i32 [ 0, [[FOR_BODY4_US]] ] ; CHECK-NEXT: call void @llvm.set.loop.iterations.i32(i32 [[N]]) ; CHECK-NEXT: br label [[FOR_BODY15_US:%.*]] ; CHECK: for.body15.us: -; CHECK-NEXT: [[J10_055_US:%.*]] = phi i32 [ [[INC26_US:%.*]], [[FOR_BODY15_US]] ], [ 0, [[FOR_BODY15_US_PREHEADER]] ] +; CHECK-NEXT: [[J10_055_US:%.*]] = phi i32 [ [[INC26_US:%.*]], [[FOR_BODY15_US]] ], [ [[J10_055_US_PH]], [[FOR_BODY15_US_PREHEADER]] ] ; CHECK-NEXT: [[ARRAYIDX16_US:%.*]] = getelementptr inbounds i16, ptr [[OFF]], i32 [[J10_055_US]] ; CHECK-NEXT: [[L0:%.*]] = load i16, ptr [[ARRAYIDX16_US]], align 2 ; CHECK-NEXT: [[ARRAYIDX18_US:%.*]] = getelementptr inbounds i16, ptr [[DATA]], i32 [[J10_055_US]] diff --git a/llvm/test/Transforms/HardwareLoops/unconditional-latch.ll b/llvm/test/Transforms/HardwareLoops/unconditional-latch.ll --- a/llvm/test/Transforms/HardwareLoops/unconditional-latch.ll +++ b/llvm/test/Transforms/HardwareLoops/unconditional-latch.ll @@ -1,6 +1,6 @@ -; RUN: opt -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -hardware-loops -S %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ALLOW -; RUN: opt -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -hardware-loops -force-hardware-loop-guard=true -S %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ALLOW -; RUN: opt -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -force-hardware-loop-phi=true -hardware-loops -S %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LATCH +; RUN: opt -passes='hardware-loops' -S %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ALLOW +; RUN: opt -passes='hardware-loops' -force-hardware-loop-guard=true -S %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ALLOW +; RUN: opt -passes='hardware-loops' -S %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LATCH ; CHECK-LABEL: not_rotated ; CHECK-LATCH-NOT: call void @llvm.set.loop.iterations diff --git a/llvm/test/Transforms/HardwareLoops/unscevable.ll b/llvm/test/Transforms/HardwareLoops/unscevable.ll --- a/llvm/test/Transforms/HardwareLoops/unscevable.ll +++ b/llvm/test/Transforms/HardwareLoops/unscevable.ll @@ -1,6 +1,6 @@ -; RUN: opt -hardware-loops -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -S %s -o - | FileCheck %s -; RUN: opt -hardware-loops -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -force-hardware-loop-phi=true -S %s -o - | FileCheck %s -; RUN: opt -hardware-loops -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -force-nested-hardware-loop=true -S %s -o - | FileCheck %s +; RUN: opt -passes=hardware-loops -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -S %s -o - | FileCheck %s +; RUN: opt -passes=hardware-loops -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -force-hardware-loop-phi=true -S %s -o - | FileCheck %s +; RUN: opt -passes=hardware-loops -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -force-nested-hardware-loop=true -S %s -o - | FileCheck %s ; CHECK-LABEL: float_counter ; CHECK-NOT: set.loop.iterations diff --git a/llvm/tools/llc/llc.cpp b/llvm/tools/llc/llc.cpp --- a/llvm/tools/llc/llc.cpp +++ b/llvm/tools/llc/llc.cpp @@ -366,7 +366,7 @@ initializeScalarizeMaskedMemIntrinLegacyPassPass(*Registry); initializeExpandReductionsPass(*Registry); initializeExpandVectorPredicationPass(*Registry); - initializeHardwareLoopsPass(*Registry); + initializeHardwareLoopsLegacyPass(*Registry); initializeTransformUtils(*Registry); initializeReplaceWithVeclibLegacyPass(*Registry); initializeTLSVariableHoistLegacyPassPass(*Registry); diff --git a/llvm/tools/opt/opt.cpp b/llvm/tools/opt/opt.cpp --- a/llvm/tools/opt/opt.cpp +++ b/llvm/tools/opt/opt.cpp @@ -368,7 +368,6 @@ "verify-safepoint-ir", "atomic-expand", "expandvp", - "hardware-loops", "mve-tail-predication", "interleaved-access", "global-merge", @@ -462,7 +461,6 @@ initializeExpandVectorPredicationPass(Registry); initializeWasmEHPreparePass(Registry); initializeWriteBitcodePassPass(Registry); - initializeHardwareLoopsPass(Registry); initializeReplaceWithVeclibLegacyPass(Registry); initializeJMCInstrumenterPass(Registry); diff --git a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/JIT.cpp b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/JIT.cpp --- a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/JIT.cpp +++ b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/JIT.cpp @@ -113,7 +113,7 @@ initializeExpandVectorPredicationPass(Registry); initializeWasmEHPreparePass(Registry); initializeWriteBitcodePassPass(Registry); - initializeHardwareLoopsPass(Registry); + initializeHardwareLoopsLegacyPass(Registry); initializeTypePromotionLegacyPass(Registry); initializeReplaceWithVeclibLegacyPass(Registry); initializeJMCInstrumenterPass(Registry);