Index: llvm/include/llvm/Analysis/TargetTransformInfo.h =================================================================== --- llvm/include/llvm/Analysis/TargetTransformInfo.h +++ llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -372,6 +372,8 @@ unsigned getAssumedAddrSpace(const Value *V) const; + bool isSingleThreaded() const; + std::pair getPredicatedAddrSpace(const Value *V) const; @@ -1581,6 +1583,7 @@ virtual bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const = 0; virtual unsigned getAssumedAddrSpace(const Value *V) const = 0; + virtual bool isSingleThreaded() const = 0; virtual std::pair getPredicatedAddrSpace(const Value *V) const = 0; virtual Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, @@ -1959,6 +1962,8 @@ return Impl.getAssumedAddrSpace(V); } + bool isSingleThreaded() const override { return Impl.isSingleThreaded(); } + std::pair getPredicatedAddrSpace(const Value *V) const override { return Impl.getPredicatedAddrSpace(V); Index: llvm/include/llvm/Analysis/TargetTransformInfoImpl.h =================================================================== --- llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -108,6 +108,8 @@ unsigned getAssumedAddrSpace(const Value *V) const { return -1; } + bool isSingleThreaded() const { return false; } + std::pair getPredicatedAddrSpace(const Value *V) const { return std::make_pair(nullptr, -1); Index: llvm/include/llvm/CodeGen/BasicTTIImpl.h =================================================================== --- llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -47,6 +47,7 @@ #include "llvm/Support/MachineValueType.h" #include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" #include #include #include @@ -287,6 +288,11 @@ return getTLI()->getTargetMachine().getAssumedAddrSpace(V); } + bool isSingleThreaded() const { + return getTLI()->getTargetMachine().Options.ThreadModel == + ThreadModel::Single; + } + std::pair getPredicatedAddrSpace(const Value *V) const { return getTLI()->getTargetMachine().getPredicatedAddrSpace(V); Index: llvm/include/llvm/Transforms/Utils/LoopUtils.h =================================================================== --- llvm/include/llvm/Transforms/Utils/LoopUtils.h +++ llvm/include/llvm/Transforms/Utils/LoopUtils.h @@ -210,8 +210,9 @@ const SmallSetVector &, SmallVectorImpl &, SmallVectorImpl &, SmallVectorImpl &, PredIteratorCache &, LoopInfo *, DominatorTree *, AssumptionCache *AC, - const TargetLibraryInfo *, Loop *, MemorySSAUpdater &, ICFLoopSafetyInfo *, - OptimizationRemarkEmitter *, bool AllowSpeculation); + const TargetLibraryInfo *, TargetTransformInfo *, Loop *, + MemorySSAUpdater &, ICFLoopSafetyInfo *, OptimizationRemarkEmitter *, + bool AllowSpeculation); /// Does a BFS from a given node to all of its children inside a given loop. /// The returned vector of nodes includes the starting point. Index: llvm/lib/Analysis/TargetTransformInfo.cpp =================================================================== --- llvm/lib/Analysis/TargetTransformInfo.cpp +++ llvm/lib/Analysis/TargetTransformInfo.cpp @@ -273,6 +273,10 @@ return TTIImpl->getAssumedAddrSpace(V); } +bool TargetTransformInfo::isSingleThreaded() const { + return TTIImpl->isSingleThreaded(); +} + std::pair TargetTransformInfo::getPredicatedAddrSpace(const Value *V) const { return TTIImpl->getPredicatedAddrSpace(V); Index: llvm/lib/Transforms/Scalar/LICM.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LICM.cpp +++ llvm/lib/Transforms/Scalar/LICM.cpp @@ -76,6 +76,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetOptions.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/AssumeBundleBuilder.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -110,6 +111,10 @@ "licm-control-flow-hoisting", cl::Hidden, cl::init(false), cl::desc("Enable control flow (and PHI) hoisting in LICM")); +static cl::opt SingleThread("licm-force-thread-model-single", cl::Hidden, + cl::init(false), + cl::desc("Allow data races in LICM pass")); + static cl::opt MaxNumUsesTraversed( "licm-max-num-uses-traversed", cl::Hidden, cl::init(8), cl::desc("Max num uses visited for identifying load " @@ -487,7 +492,8 @@ collectPromotionCandidates(MSSA, AA, L)) { LocalPromoted |= promoteLoopAccessesToScalars( PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC, LI, - DT, AC, TLI, L, MSSAU, &SafetyInfo, ORE, LicmAllowSpeculation); + DT, AC, TLI, TTI, L, MSSAU, &SafetyInfo, ORE, + LicmAllowSpeculation); } Promoted |= LocalPromoted; } while (LocalPromoted); @@ -1909,17 +1915,21 @@ if (auto *A = dyn_cast(Object)) return A->hasByValAttr(); + if (auto *G = dyn_cast(Object)) + return !G->isConstant(); + // TODO: Noalias has nothing to do with writability, this should check for // an allocator function. return isNoAliasCall(Object); } -bool isThreadLocalObject(const Value *Object, const Loop *L, - DominatorTree *DT) { +bool isThreadLocalObject(const Value *Object, const Loop *L, DominatorTree *DT, + TargetTransformInfo *TTI) { // The object must be function-local to start with, and then not captured // before/in the loop. - return isIdentifiedFunctionLocal(Object) && - isNotCapturedBeforeOrInLoop(Object, L, DT); + return (isIdentifiedFunctionLocal(Object) && + isNotCapturedBeforeOrInLoop(Object, L, DT)) || + (TTI->isSingleThreaded() || SingleThread); } } // namespace @@ -1935,9 +1945,9 @@ SmallVectorImpl &InsertPts, SmallVectorImpl &MSSAInsertPts, PredIteratorCache &PIC, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, - const TargetLibraryInfo *TLI, Loop *CurLoop, MemorySSAUpdater &MSSAU, - ICFLoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE, - bool AllowSpeculation) { + const TargetLibraryInfo *TLI, TargetTransformInfo *TTI, Loop *CurLoop, + MemorySSAUpdater &MSSAU, ICFLoopSafetyInfo *SafetyInfo, + OptimizationRemarkEmitter *ORE, bool AllowSpeculation) { // Verify inputs. assert(LI != nullptr && DT != nullptr && CurLoop != nullptr && SafetyInfo != nullptr && @@ -2147,7 +2157,8 @@ // violating the memory model. if (StoreSafety == StoreSafetyUnknown) { Value *Object = getUnderlyingObject(SomePtr); - if (isWritableObject(Object) && isThreadLocalObject(Object, CurLoop, DT)) + if (isWritableObject(Object) && + isThreadLocalObject(Object, CurLoop, DT, TTI)) StoreSafety = StoreSafe; } Index: llvm/test/Transforms/LICM/promote-sink-store-arg.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LICM/promote-sink-store-arg.ll @@ -0,0 +1,111 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -licm -licm-force-thread-model-single -S %s | FileCheck %s --check-prefixes LICM-TMS +; RUN: opt -licm -S %s | FileCheck %s --check-prefixes LICM-NO-TMS + +define dso_local void @f(i32 noundef %n, i32 noundef %u) { +; LICM-TMS-LABEL: @f( +; LICM-TMS-NEXT: entry: +; LICM-TMS-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +; LICM-TMS-NEXT: [[U_ADDR:%.*]] = alloca i32, align 4 +; LICM-TMS-NEXT: [[I:%.*]] = alloca i32, align 4 +; LICM-TMS-NEXT: store i32 [[N:%.*]], ptr [[N_ADDR]], align 4 +; LICM-TMS-NEXT: store i32 [[U:%.*]], ptr [[U_ADDR]], align 4 +; LICM-TMS-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[I]]) +; LICM-TMS-NEXT: store i32 0, ptr [[I]], align 4 +; LICM-TMS-NEXT: [[I2:%.*]] = load i32, ptr [[N_ADDR]], align 4 +; LICM-TMS-NEXT: [[I_PROMOTED:%.*]] = load i32, ptr [[I]], align 4 +; LICM-TMS-NEXT: [[U_ADDR_PROMOTED:%.*]] = load i32, ptr [[U_ADDR]], align 1 +; LICM-TMS-NEXT: br label [[FOR_COND:%.*]] +; LICM-TMS: for.cond: +; LICM-TMS-NEXT: [[INC12:%.*]] = phi i32 [ [[INC1:%.*]], [[FOR_INC:%.*]] ], [ [[U_ADDR_PROMOTED]], [[ENTRY:%.*]] ] +; LICM-TMS-NEXT: [[INC1]] = phi i32 [ [[INC:%.*]], [[FOR_INC]] ], [ [[I_PROMOTED]], [[ENTRY]] ] +; LICM-TMS-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC1]], [[I2]] +; LICM-TMS-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_COND_CLEANUP:%.*]] +; LICM-TMS: for.cond.cleanup: +; LICM-TMS-NEXT: [[INC12_LCSSA:%.*]] = phi i32 [ [[INC12]], [[FOR_COND]] ] +; LICM-TMS-NEXT: [[INC1_LCSSA:%.*]] = phi i32 [ [[INC1]], [[FOR_COND]] ] +; LICM-TMS-NEXT: store i32 [[INC1_LCSSA]], ptr [[I]], align 4 +; LICM-TMS-NEXT: store i32 [[INC12_LCSSA]], ptr [[U_ADDR]], align 1 +; LICM-TMS-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[I]]) +; LICM-TMS-NEXT: br label [[FOR_END:%.*]] +; LICM-TMS: for.body: +; LICM-TMS-NEXT: br label [[FOR_INC]] +; LICM-TMS: for.inc: +; LICM-TMS-NEXT: [[INC]] = add nsw i32 [[INC1]], 1 +; LICM-TMS-NEXT: br label [[FOR_COND]] +; LICM-TMS: for.end: +; LICM-TMS-NEXT: ret void +; +; LICM-NO-TMS-LABEL: @f( +; LICM-NO-TMS-NEXT: entry: +; LICM-NO-TMS-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +; LICM-NO-TMS-NEXT: [[U_ADDR:%.*]] = alloca i32, align 4 +; LICM-NO-TMS-NEXT: [[I:%.*]] = alloca i32, align 4 +; LICM-NO-TMS-NEXT: store i32 [[N:%.*]], ptr [[N_ADDR]], align 4 +; LICM-NO-TMS-NEXT: store i32 [[U:%.*]], ptr [[U_ADDR]], align 4 +; LICM-NO-TMS-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[I]]) +; LICM-NO-TMS-NEXT: store i32 0, ptr [[I]], align 4 +; LICM-NO-TMS-NEXT: [[I2:%.*]] = load i32, ptr [[N_ADDR]], align 4 +; LICM-NO-TMS-NEXT: [[I_PROMOTED:%.*]] = load i32, ptr [[I]], align 4 +; LICM-NO-TMS-NEXT: [[U_ADDR_PROMOTED:%.*]] = load i32, ptr [[U_ADDR]], align 1 +; LICM-NO-TMS-NEXT: br label [[FOR_COND:%.*]] +; LICM-NO-TMS: for.cond: +; LICM-NO-TMS-NEXT: [[INC12:%.*]] = phi i32 [ [[INC1:%.*]], [[FOR_INC:%.*]] ], [ [[U_ADDR_PROMOTED]], [[ENTRY:%.*]] ] +; LICM-NO-TMS-NEXT: [[INC1]] = phi i32 [ [[INC:%.*]], [[FOR_INC]] ], [ [[I_PROMOTED]], [[ENTRY]] ] +; LICM-NO-TMS-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC1]], [[I2]] +; LICM-NO-TMS-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_COND_CLEANUP:%.*]] +; LICM-NO-TMS: for.cond.cleanup: +; LICM-NO-TMS-NEXT: [[INC12_LCSSA:%.*]] = phi i32 [ [[INC12]], [[FOR_COND]] ] +; LICM-NO-TMS-NEXT: [[INC1_LCSSA:%.*]] = phi i32 [ [[INC1]], [[FOR_COND]] ] +; LICM-NO-TMS-NEXT: store i32 [[INC1_LCSSA]], ptr [[I]], align 4 +; LICM-NO-TMS-NEXT: store i32 [[INC12_LCSSA]], ptr [[U_ADDR]], align 1 +; LICM-NO-TMS-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[I]]) +; LICM-NO-TMS-NEXT: br label [[FOR_END:%.*]] +; LICM-NO-TMS: for.body: +; LICM-NO-TMS-NEXT: br label [[FOR_INC]] +; LICM-NO-TMS: for.inc: +; LICM-NO-TMS-NEXT: [[INC]] = add nsw i32 [[INC1]], 1 +; LICM-NO-TMS-NEXT: br label [[FOR_COND]] +; LICM-NO-TMS: for.end: +; LICM-NO-TMS-NEXT: ret void +; +entry: + %n.addr = alloca i32, align 4 + %u.addr = alloca i32, align 4 + %i = alloca i32, align 4 + store i32 %n, ptr %n.addr, align 4 + store i32 %u, ptr %u.addr, align 4 + call void @llvm.lifetime.start.p0(i64 4, ptr %i) + store i32 0, ptr %i, align 4 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %i1 = load i32, ptr %i, align 4 + %i2 = load i32, ptr %n.addr, align 4 + %cmp = icmp slt i32 %i1, %i2 + br i1 %cmp, label %for.body, label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond + call void @llvm.lifetime.end.p0(i64 4, ptr %i) + br label %for.end + +for.body: ; preds = %for.cond + %i3 = load i32, ptr %i, align 4 + store i32 %i3, ptr %u.addr, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %i4 = load i32, ptr %i, align 4 + %inc = add nsw i32 %i4, 1 + store i32 %inc, ptr %i, align 4 + br label %for.cond + +for.end: ; preds = %for.cond.cleanup + ret void +} + +; Function Attrs: argmemonly nocallback nofree nosync nounwind willreturn +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) + +; Function Attrs: argmemonly nocallback nofree nosync nounwind willreturn +declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) Index: llvm/test/Transforms/LICM/promote-sink-store-capture.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LICM/promote-sink-store-capture.ll @@ -0,0 +1,126 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -licm -licm-force-thread-model-single -S %s | FileCheck %s --check-prefixes LICM-TMS +; RUN: opt -licm -S %s | FileCheck %s --check-prefixes LICM-NO-TMS + +define dso_local void @f(i32 noundef %n, i32 noundef %u) { +; LICM-TMS-LABEL: @f( +; LICM-TMS-NEXT: entry: +; LICM-TMS-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +; LICM-TMS-NEXT: [[U_ADDR:%.*]] = alloca i32, align 4 +; LICM-TMS-NEXT: [[I:%.*]] = alloca i32, align 4 +; LICM-TMS-NEXT: [[X:%.*]] = alloca i32, align 4 +; LICM-TMS-NEXT: store i32 [[N:%.*]], ptr [[N_ADDR]], align 4 +; LICM-TMS-NEXT: store i32 [[U:%.*]], ptr [[U_ADDR]], align 4 +; LICM-TMS-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[I]]) +; LICM-TMS-NEXT: store i32 0, ptr [[I]], align 4 +; LICM-TMS-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[X]]) +; LICM-TMS-NEXT: [[I1:%.*]] = load i32, ptr [[U_ADDR]], align 4 +; LICM-TMS-NEXT: store i32 [[I1]], ptr [[X]], align 4 +; LICM-TMS-NEXT: [[I3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +; LICM-TMS-NEXT: [[I_PROMOTED:%.*]] = load i32, ptr [[I]], align 4 +; LICM-TMS-NEXT: [[X_PROMOTED:%.*]] = load i32, ptr [[X]], align 1 +; LICM-TMS-NEXT: br label [[FOR_COND:%.*]] +; LICM-TMS: for.cond: +; LICM-TMS-NEXT: [[INC12:%.*]] = phi i32 [ [[INC1:%.*]], [[FOR_INC:%.*]] ], [ [[X_PROMOTED]], [[ENTRY:%.*]] ] +; LICM-TMS-NEXT: [[INC1]] = phi i32 [ [[INC:%.*]], [[FOR_INC]] ], [ [[I_PROMOTED]], [[ENTRY]] ] +; LICM-TMS-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC1]], [[I3]] +; LICM-TMS-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_COND_CLEANUP:%.*]] +; LICM-TMS: for.cond.cleanup: +; LICM-TMS-NEXT: [[INC12_LCSSA:%.*]] = phi i32 [ [[INC12]], [[FOR_COND]] ] +; LICM-TMS-NEXT: [[INC1_LCSSA:%.*]] = phi i32 [ [[INC1]], [[FOR_COND]] ] +; LICM-TMS-NEXT: store i32 [[INC1_LCSSA]], ptr [[I]], align 4 +; LICM-TMS-NEXT: store i32 [[INC12_LCSSA]], ptr [[X]], align 1 +; LICM-TMS-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[X]]) +; LICM-TMS-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[I]]) +; LICM-TMS-NEXT: br label [[FOR_END:%.*]] +; LICM-TMS: for.body: +; LICM-TMS-NEXT: br label [[FOR_INC]] +; LICM-TMS: for.inc: +; LICM-TMS-NEXT: [[INC]] = add nsw i32 [[INC1]], 1 +; LICM-TMS-NEXT: br label [[FOR_COND]] +; LICM-TMS: for.end: +; LICM-TMS-NEXT: ret void +; +; LICM-NO-TMS-LABEL: @f( +; LICM-NO-TMS-NEXT: entry: +; LICM-NO-TMS-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +; LICM-NO-TMS-NEXT: [[U_ADDR:%.*]] = alloca i32, align 4 +; LICM-NO-TMS-NEXT: [[I:%.*]] = alloca i32, align 4 +; LICM-NO-TMS-NEXT: [[X:%.*]] = alloca i32, align 4 +; LICM-NO-TMS-NEXT: store i32 [[N:%.*]], ptr [[N_ADDR]], align 4 +; LICM-NO-TMS-NEXT: store i32 [[U:%.*]], ptr [[U_ADDR]], align 4 +; LICM-NO-TMS-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[I]]) +; LICM-NO-TMS-NEXT: store i32 0, ptr [[I]], align 4 +; LICM-NO-TMS-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[X]]) +; LICM-NO-TMS-NEXT: [[I1:%.*]] = load i32, ptr [[U_ADDR]], align 4 +; LICM-NO-TMS-NEXT: store i32 [[I1]], ptr [[X]], align 4 +; LICM-NO-TMS-NEXT: [[I3:%.*]] = load i32, ptr [[N_ADDR]], align 4 +; LICM-NO-TMS-NEXT: [[I_PROMOTED:%.*]] = load i32, ptr [[I]], align 4 +; LICM-NO-TMS-NEXT: [[X_PROMOTED:%.*]] = load i32, ptr [[X]], align 1 +; LICM-NO-TMS-NEXT: br label [[FOR_COND:%.*]] +; LICM-NO-TMS: for.cond: +; LICM-NO-TMS-NEXT: [[INC12:%.*]] = phi i32 [ [[INC1:%.*]], [[FOR_INC:%.*]] ], [ [[X_PROMOTED]], [[ENTRY:%.*]] ] +; LICM-NO-TMS-NEXT: [[INC1]] = phi i32 [ [[INC:%.*]], [[FOR_INC]] ], [ [[I_PROMOTED]], [[ENTRY]] ] +; LICM-NO-TMS-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC1]], [[I3]] +; LICM-NO-TMS-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_COND_CLEANUP:%.*]] +; LICM-NO-TMS: for.cond.cleanup: +; LICM-NO-TMS-NEXT: [[INC12_LCSSA:%.*]] = phi i32 [ [[INC12]], [[FOR_COND]] ] +; LICM-NO-TMS-NEXT: [[INC1_LCSSA:%.*]] = phi i32 [ [[INC1]], [[FOR_COND]] ] +; LICM-NO-TMS-NEXT: store i32 [[INC1_LCSSA]], ptr [[I]], align 4 +; LICM-NO-TMS-NEXT: store i32 [[INC12_LCSSA]], ptr [[X]], align 1 +; LICM-NO-TMS-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[X]]) +; LICM-NO-TMS-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[I]]) +; LICM-NO-TMS-NEXT: br label [[FOR_END:%.*]] +; LICM-NO-TMS: for.body: +; LICM-NO-TMS-NEXT: br label [[FOR_INC]] +; LICM-NO-TMS: for.inc: +; LICM-NO-TMS-NEXT: [[INC]] = add nsw i32 [[INC1]], 1 +; LICM-NO-TMS-NEXT: br label [[FOR_COND]] +; LICM-NO-TMS: for.end: +; LICM-NO-TMS-NEXT: ret void +; +entry: + %n.addr = alloca i32, align 4 + %u.addr = alloca i32, align 4 + %i = alloca i32, align 4 + %x = alloca i32, align 4 + store i32 %n, ptr %n.addr, align 4 + store i32 %u, ptr %u.addr, align 4 + call void @llvm.lifetime.start.p0(i64 4, ptr %i) + store i32 0, ptr %i, align 4 + call void @llvm.lifetime.start.p0(i64 4, ptr %x) + %i1 = load i32, ptr %u.addr, align 4 + store i32 %i1, ptr %x, align 4 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %i2 = load i32, ptr %i, align 4 + %i3 = load i32, ptr %n.addr, align 4 + %cmp = icmp slt i32 %i2, %i3 + br i1 %cmp, label %for.body, label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond + call void @llvm.lifetime.end.p0(i64 4, ptr %x) + call void @llvm.lifetime.end.p0(i64 4, ptr %i) + br label %for.end + +for.body: ; preds = %for.cond + %i4 = load i32, ptr %i, align 4 + store i32 %i4, ptr %x, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %i5 = load i32, ptr %i, align 4 + %inc = add nsw i32 %i5, 1 + store i32 %inc, ptr %i, align 4 + br label %for.cond + +for.end: ; preds = %for.cond.cleanup + ret void +} + +; Function Attrs: argmemonly nocallback nofree nosync nounwind willreturn +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) + +; Function Attrs: argmemonly nocallback nofree nosync nounwind willreturn +declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) Index: llvm/test/Transforms/LICM/promote-sink-store-constant-global.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LICM/promote-sink-store-constant-global.ll @@ -0,0 +1,115 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -licm -licm-force-thread-model-single -S %s | FileCheck %s --check-prefixes LICM-TMS +; RUN: opt -licm -S %s | FileCheck %s --check-prefixes LICM-NO-TMS + +@u = dso_local constant i32 7, align 4 + +define dso_local void @f(i32 noundef %n) { +; LICM-TMS-LABEL: @f( +; LICM-TMS-NEXT: entry: +; LICM-TMS-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +; LICM-TMS-NEXT: [[X:%.*]] = alloca i32, align 4 +; LICM-TMS-NEXT: [[I:%.*]] = alloca i32, align 4 +; LICM-TMS-NEXT: store i32 [[N:%.*]], ptr [[N_ADDR]], align 4 +; LICM-TMS-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[X]]) +; LICM-TMS-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[I]]) +; LICM-TMS-NEXT: store i32 0, ptr [[I]], align 4 +; LICM-TMS-NEXT: [[I2:%.*]] = load i32, ptr [[N_ADDR]], align 4 +; LICM-TMS-NEXT: [[I_PROMOTED:%.*]] = load i32, ptr [[I]], align 4 +; LICM-TMS-NEXT: [[X_PROMOTED:%.*]] = load i32, ptr [[X]], align 1 +; LICM-TMS-NEXT: br label [[FOR_COND:%.*]] +; LICM-TMS: for.cond: +; LICM-TMS-NEXT: [[TMP0:%.*]] = phi i32 [ 7, [[FOR_INC:%.*]] ], [ [[X_PROMOTED]], [[ENTRY:%.*]] ] +; LICM-TMS-NEXT: [[INC1:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_INC]] ], [ [[I_PROMOTED]], [[ENTRY]] ] +; LICM-TMS-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC1]], [[I2]] +; LICM-TMS-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_COND_CLEANUP:%.*]] +; LICM-TMS: for.cond.cleanup: +; LICM-TMS-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ [[TMP0]], [[FOR_COND]] ] +; LICM-TMS-NEXT: [[INC1_LCSSA:%.*]] = phi i32 [ [[INC1]], [[FOR_COND]] ] +; LICM-TMS-NEXT: store i32 [[INC1_LCSSA]], ptr [[I]], align 4 +; LICM-TMS-NEXT: store i32 [[DOTLCSSA]], ptr [[X]], align 1 +; LICM-TMS-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[I]]) +; LICM-TMS-NEXT: br label [[FOR_END:%.*]] +; LICM-TMS: for.body: +; LICM-TMS-NEXT: br label [[FOR_INC]] +; LICM-TMS: for.inc: +; LICM-TMS-NEXT: [[INC]] = add nsw i32 [[INC1]], 1 +; LICM-TMS-NEXT: br label [[FOR_COND]] +; LICM-TMS: for.end: +; LICM-TMS-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[X]]) +; LICM-TMS-NEXT: ret void +; +; LICM-NO-TMS-LABEL: @f( +; LICM-NO-TMS-NEXT: entry: +; LICM-NO-TMS-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +; LICM-NO-TMS-NEXT: [[X:%.*]] = alloca i32, align 4 +; LICM-NO-TMS-NEXT: [[I:%.*]] = alloca i32, align 4 +; LICM-NO-TMS-NEXT: store i32 [[N:%.*]], ptr [[N_ADDR]], align 4 +; LICM-NO-TMS-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[X]]) +; LICM-NO-TMS-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[I]]) +; LICM-NO-TMS-NEXT: store i32 0, ptr [[I]], align 4 +; LICM-NO-TMS-NEXT: [[I2:%.*]] = load i32, ptr [[N_ADDR]], align 4 +; LICM-NO-TMS-NEXT: [[I_PROMOTED:%.*]] = load i32, ptr [[I]], align 4 +; LICM-NO-TMS-NEXT: [[X_PROMOTED:%.*]] = load i32, ptr [[X]], align 1 +; LICM-NO-TMS-NEXT: br label [[FOR_COND:%.*]] +; LICM-NO-TMS: for.cond: +; LICM-NO-TMS-NEXT: [[TMP0:%.*]] = phi i32 [ 7, [[FOR_INC:%.*]] ], [ [[X_PROMOTED]], [[ENTRY:%.*]] ] +; LICM-NO-TMS-NEXT: [[INC1:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_INC]] ], [ [[I_PROMOTED]], [[ENTRY]] ] +; LICM-NO-TMS-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC1]], [[I2]] +; LICM-NO-TMS-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_COND_CLEANUP:%.*]] +; LICM-NO-TMS: for.cond.cleanup: +; LICM-NO-TMS-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ [[TMP0]], [[FOR_COND]] ] +; LICM-NO-TMS-NEXT: [[INC1_LCSSA:%.*]] = phi i32 [ [[INC1]], [[FOR_COND]] ] +; LICM-NO-TMS-NEXT: store i32 [[INC1_LCSSA]], ptr [[I]], align 4 +; LICM-NO-TMS-NEXT: store i32 [[DOTLCSSA]], ptr [[X]], align 1 +; LICM-NO-TMS-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[I]]) +; LICM-NO-TMS-NEXT: br label [[FOR_END:%.*]] +; LICM-NO-TMS: for.body: +; LICM-NO-TMS-NEXT: br label [[FOR_INC]] +; LICM-NO-TMS: for.inc: +; LICM-NO-TMS-NEXT: [[INC]] = add nsw i32 [[INC1]], 1 +; LICM-NO-TMS-NEXT: br label [[FOR_COND]] +; LICM-NO-TMS: for.end: +; LICM-NO-TMS-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[X]]) +; LICM-NO-TMS-NEXT: ret void +; +entry: + %n.addr = alloca i32, align 4 + %x = alloca i32, align 4 + %i = alloca i32, align 4 + store i32 %n, ptr %n.addr, align 4 + call void @llvm.lifetime.start.p0(i64 4, ptr %x) + call void @llvm.lifetime.start.p0(i64 4, ptr %i) + store i32 0, ptr %i, align 4 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %i1 = load i32, ptr %i, align 4 + %i2 = load i32, ptr %n.addr, align 4 + %cmp = icmp slt i32 %i1, %i2 + br i1 %cmp, label %for.body, label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond + call void @llvm.lifetime.end.p0(i64 4, ptr %i) + br label %for.end + +for.body: ; preds = %for.cond + store i32 7, ptr %x, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %i3 = load i32, ptr %i, align 4 + %inc = add nsw i32 %i3, 1 + store i32 %inc, ptr %i, align 4 + br label %for.cond + +for.end: ; preds = %for.cond.cleanup + call void @llvm.lifetime.end.p0(i64 4, ptr %x) + ret void +} + +; Function Attrs: argmemonly nocallback nofree nosync nounwind willreturn +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) + +; Function Attrs: argmemonly nocallback nofree nosync nounwind willreturn +declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) Index: llvm/test/Transforms/LICM/promote-sink-store-global.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LICM/promote-sink-store-global.ll @@ -0,0 +1,126 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -licm -licm-force-thread-model-single -S %s | FileCheck %s --check-prefixes LICM-TMS +; RUN: opt -licm -S %s | FileCheck %s --check-prefixes LICM-NO-TMS + +@u = dso_local global i32 0, align 4 +@v = dso_local global i32 0, align 4 + +define dso_local void @f(i32 noundef %n) { +; LICM-TMS-LABEL: @f( +; LICM-TMS-NEXT: entry: +; LICM-TMS-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +; LICM-TMS-NEXT: [[A:%.*]] = alloca i32, align 4 +; LICM-TMS-NEXT: [[I:%.*]] = alloca i32, align 4 +; LICM-TMS-NEXT: store i32 [[N:%.*]], ptr [[N_ADDR]], align 4 +; LICM-TMS-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[A]]) +; LICM-TMS-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[I]]) +; LICM-TMS-NEXT: store i32 0, ptr [[I]], align 4 +; LICM-TMS-NEXT: [[I2:%.*]] = load i32, ptr [[N_ADDR]], align 4 +; LICM-TMS-NEXT: [[I4:%.*]] = load i32, ptr @v, align 4 +; LICM-TMS-NEXT: [[I_PROMOTED:%.*]] = load i32, ptr [[I]], align 4 +; LICM-TMS-NEXT: [[U_PROMOTED:%.*]] = load i32, ptr @u, align 1 +; LICM-TMS-NEXT: [[A_PROMOTED:%.*]] = load i32, ptr [[A]], align 1 +; LICM-TMS-NEXT: br label [[FOR_COND:%.*]] +; LICM-TMS: for.cond: +; LICM-TMS-NEXT: [[I43:%.*]] = phi i32 [ [[I4]], [[FOR_INC:%.*]] ], [ [[A_PROMOTED]], [[ENTRY:%.*]] ] +; LICM-TMS-NEXT: [[INC12:%.*]] = phi i32 [ [[INC1:%.*]], [[FOR_INC]] ], [ [[U_PROMOTED]], [[ENTRY]] ] +; LICM-TMS-NEXT: [[INC1]] = phi i32 [ [[INC:%.*]], [[FOR_INC]] ], [ [[I_PROMOTED]], [[ENTRY]] ] +; LICM-TMS-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC1]], [[I2]] +; LICM-TMS-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_COND_CLEANUP:%.*]] +; LICM-TMS: for.cond.cleanup: +; LICM-TMS-NEXT: [[I43_LCSSA:%.*]] = phi i32 [ [[I43]], [[FOR_COND]] ] +; LICM-TMS-NEXT: [[INC12_LCSSA:%.*]] = phi i32 [ [[INC12]], [[FOR_COND]] ] +; LICM-TMS-NEXT: [[INC1_LCSSA:%.*]] = phi i32 [ [[INC1]], [[FOR_COND]] ] +; LICM-TMS-NEXT: store i32 [[INC1_LCSSA]], ptr [[I]], align 4 +; LICM-TMS-NEXT: store i32 [[INC12_LCSSA]], ptr @u, align 1 +; LICM-TMS-NEXT: store i32 [[I43_LCSSA]], ptr [[A]], align 1 +; LICM-TMS-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[I]]) +; LICM-TMS-NEXT: br label [[FOR_END:%.*]] +; LICM-TMS: for.body: +; LICM-TMS-NEXT: br label [[FOR_INC]] +; LICM-TMS: for.inc: +; LICM-TMS-NEXT: [[INC]] = add nsw i32 [[INC1]], 1 +; LICM-TMS-NEXT: br label [[FOR_COND]] +; LICM-TMS: for.end: +; LICM-TMS-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[A]]) +; LICM-TMS-NEXT: ret void +; +; LICM-NO-TMS-LABEL: @f( +; LICM-NO-TMS-NEXT: entry: +; LICM-NO-TMS-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +; LICM-NO-TMS-NEXT: [[A:%.*]] = alloca i32, align 4 +; LICM-NO-TMS-NEXT: [[I:%.*]] = alloca i32, align 4 +; LICM-NO-TMS-NEXT: store i32 [[N:%.*]], ptr [[N_ADDR]], align 4 +; LICM-NO-TMS-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[A]]) +; LICM-NO-TMS-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[I]]) +; LICM-NO-TMS-NEXT: store i32 0, ptr [[I]], align 4 +; LICM-NO-TMS-NEXT: [[I2:%.*]] = load i32, ptr [[N_ADDR]], align 4 +; LICM-NO-TMS-NEXT: [[I4:%.*]] = load i32, ptr @v, align 4 +; LICM-NO-TMS-NEXT: [[I_PROMOTED:%.*]] = load i32, ptr [[I]], align 4 +; LICM-NO-TMS-NEXT: [[A_PROMOTED:%.*]] = load i32, ptr [[A]], align 1 +; LICM-NO-TMS-NEXT: br label [[FOR_COND:%.*]] +; LICM-NO-TMS: for.cond: +; LICM-NO-TMS-NEXT: [[I42:%.*]] = phi i32 [ [[I4]], [[FOR_INC:%.*]] ], [ [[A_PROMOTED]], [[ENTRY:%.*]] ] +; LICM-NO-TMS-NEXT: [[INC1:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_INC]] ], [ [[I_PROMOTED]], [[ENTRY]] ] +; LICM-NO-TMS-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC1]], [[I2]] +; LICM-NO-TMS-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_COND_CLEANUP:%.*]] +; LICM-NO-TMS: for.cond.cleanup: +; LICM-NO-TMS-NEXT: [[I42_LCSSA:%.*]] = phi i32 [ [[I42]], [[FOR_COND]] ] +; LICM-NO-TMS-NEXT: [[INC1_LCSSA:%.*]] = phi i32 [ [[INC1]], [[FOR_COND]] ] +; LICM-NO-TMS-NEXT: store i32 [[INC1_LCSSA]], ptr [[I]], align 4 +; LICM-NO-TMS-NEXT: store i32 [[I42_LCSSA]], ptr [[A]], align 1 +; LICM-NO-TMS-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[I]]) +; LICM-NO-TMS-NEXT: br label [[FOR_END:%.*]] +; LICM-NO-TMS: for.body: +; LICM-NO-TMS-NEXT: store i32 [[INC1]], ptr @u, align 4 +; LICM-NO-TMS-NEXT: br label [[FOR_INC]] +; LICM-NO-TMS: for.inc: +; LICM-NO-TMS-NEXT: [[INC]] = add nsw i32 [[INC1]], 1 +; LICM-NO-TMS-NEXT: br label [[FOR_COND]] +; LICM-NO-TMS: for.end: +; LICM-NO-TMS-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[A]]) +; LICM-NO-TMS-NEXT: ret void +; +entry: + %n.addr = alloca i32, align 4 + %a = alloca i32, align 4 + %i = alloca i32, align 4 + store i32 %n, ptr %n.addr, align 4 + call void @llvm.lifetime.start.p0(i64 4, ptr %a) + call void @llvm.lifetime.start.p0(i64 4, ptr %i) + store i32 0, ptr %i, align 4 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %i1 = load i32, ptr %i, align 4 + %i2 = load i32, ptr %n.addr, align 4 + %cmp = icmp slt i32 %i1, %i2 + br i1 %cmp, label %for.body, label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond + call void @llvm.lifetime.end.p0(i64 4, ptr %i) + br label %for.end + +for.body: ; preds = %for.cond + %i3 = load i32, ptr %i, align 4 + store i32 %i3, ptr @u, align 4 + %i4 = load i32, ptr @v, align 4 + store i32 %i4, ptr %a, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %i5 = load i32, ptr %i, align 4 + %inc = add nsw i32 %i5, 1 + store i32 %inc, ptr %i, align 4 + br label %for.cond + +for.end: ; preds = %for.cond.cleanup + call void @llvm.lifetime.end.p0(i64 4, ptr %a) + ret void +} + +; Function Attrs: argmemonly nocallback nofree nosync nounwind willreturn +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) + +; Function Attrs: argmemonly nocallback nofree nosync nounwind willreturn +declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) Index: llvm/test/Transforms/LICM/promote-sink-store.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LICM/promote-sink-store.ll @@ -0,0 +1,134 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -licm -licm-force-thread-model-single -S %s | FileCheck %s --check-prefixes LICM-TMS +; RUN: opt -licm -S %s | FileCheck %s --check-prefixes LICM-NO-TMS + +@u = dso_local local_unnamed_addr global i32 0, align 4 +@v = dso_local local_unnamed_addr global i32 0, align 4 + +define dso_local void @f(ptr noalias nocapture noundef readonly %arg, ptr noalias nocapture noundef readonly %arg1, i32 noundef %arg2) local_unnamed_addr { +; LICM-TMS-LABEL: @f( +; LICM-TMS-NEXT: bb: +; LICM-TMS-NEXT: [[I:%.*]] = icmp sgt i32 [[ARG2:%.*]], 0 +; LICM-TMS-NEXT: br i1 [[I]], label [[BB3:%.*]], label [[BB26:%.*]] +; LICM-TMS: bb3: +; LICM-TMS-NEXT: [[I4:%.*]] = load i32, ptr @v, align 4 +; LICM-TMS-NEXT: [[I5:%.*]] = load i32, ptr @u, align 4 +; LICM-TMS-NEXT: [[I6:%.*]] = zext i32 [[ARG2]] to i64 +; LICM-TMS-NEXT: [[V_PROMOTED:%.*]] = load i32, ptr @v, align 1 +; LICM-TMS-NEXT: br label [[BB7:%.*]] +; LICM-TMS: bb7: +; LICM-TMS-NEXT: [[I203:%.*]] = phi i32 [ [[V_PROMOTED]], [[BB3]] ], [ [[I202:%.*]], [[BB21:%.*]] ] +; LICM-TMS-NEXT: [[I8:%.*]] = phi i64 [ 0, [[BB3]] ], [ [[I23:%.*]], [[BB21]] ] +; LICM-TMS-NEXT: [[I9:%.*]] = phi i32 [ [[I5]], [[BB3]] ], [ [[I14:%.*]], [[BB21]] ] +; LICM-TMS-NEXT: [[I10:%.*]] = phi i32 [ [[I4]], [[BB3]] ], [ [[I22:%.*]], [[BB21]] ] +; LICM-TMS-NEXT: [[I11:%.*]] = getelementptr inbounds i32, ptr [[ARG:%.*]], i64 [[I8]] +; LICM-TMS-NEXT: [[I12:%.*]] = load i32, ptr [[I11]], align 4 +; LICM-TMS-NEXT: [[I13:%.*]] = icmp eq i32 [[I12]], 0 +; LICM-TMS-NEXT: [[I14]] = add nsw i32 [[I9]], 1 +; LICM-TMS-NEXT: br i1 [[I13]], label [[BB15:%.*]], label [[BB25:%.*]] +; LICM-TMS: bb15: +; LICM-TMS-NEXT: [[I16:%.*]] = getelementptr inbounds i32, ptr [[ARG1:%.*]], i64 [[I8]] +; LICM-TMS-NEXT: [[I17:%.*]] = load i32, ptr [[I16]], align 4 +; LICM-TMS-NEXT: [[I18:%.*]] = icmp eq i32 [[I17]], 0 +; LICM-TMS-NEXT: br i1 [[I18]], label [[BB21]], label [[BB19:%.*]] +; LICM-TMS: bb19: +; LICM-TMS-NEXT: [[I20:%.*]] = add nsw i32 [[I10]], 1 +; LICM-TMS-NEXT: br label [[BB21]] +; LICM-TMS: bb21: +; LICM-TMS-NEXT: [[I202]] = phi i32 [ [[I203]], [[BB15]] ], [ [[I20]], [[BB19]] ] +; LICM-TMS-NEXT: [[I22]] = phi i32 [ [[I10]], [[BB15]] ], [ [[I20]], [[BB19]] ] +; LICM-TMS-NEXT: [[I23]] = add nuw nsw i64 [[I8]], 1 +; LICM-TMS-NEXT: [[I24:%.*]] = icmp eq i64 [[I23]], [[I6]] +; LICM-TMS-NEXT: br i1 [[I24]], label [[BB25]], label [[BB7]] +; LICM-TMS: bb25: +; LICM-TMS-NEXT: [[I201:%.*]] = phi i32 [ [[I202]], [[BB21]] ], [ [[I203]], [[BB7]] ] +; LICM-TMS-NEXT: [[I14_LCSSA:%.*]] = phi i32 [ [[I14]], [[BB21]] ], [ [[I14]], [[BB7]] ] +; LICM-TMS-NEXT: store i32 [[I201]], ptr @v, align 1 +; LICM-TMS-NEXT: store i32 [[I14_LCSSA]], ptr @u, align 4 +; LICM-TMS-NEXT: br label [[BB26]] +; LICM-TMS: bb26: +; LICM-TMS-NEXT: ret void +; +; LICM-NO-TMS-LABEL: @f( +; LICM-NO-TMS-NEXT: bb: +; LICM-NO-TMS-NEXT: [[I:%.*]] = icmp sgt i32 [[ARG2:%.*]], 0 +; LICM-NO-TMS-NEXT: br i1 [[I]], label [[BB3:%.*]], label [[BB26:%.*]] +; LICM-NO-TMS: bb3: +; LICM-NO-TMS-NEXT: [[I4:%.*]] = load i32, ptr @v, align 4 +; LICM-NO-TMS-NEXT: [[I5:%.*]] = load i32, ptr @u, align 4 +; LICM-NO-TMS-NEXT: [[I6:%.*]] = zext i32 [[ARG2]] to i64 +; LICM-NO-TMS-NEXT: br label [[BB7:%.*]] +; LICM-NO-TMS: bb7: +; LICM-NO-TMS-NEXT: [[I8:%.*]] = phi i64 [ 0, [[BB3]] ], [ [[I23:%.*]], [[BB21:%.*]] ] +; LICM-NO-TMS-NEXT: [[I9:%.*]] = phi i32 [ [[I5]], [[BB3]] ], [ [[I14:%.*]], [[BB21]] ] +; LICM-NO-TMS-NEXT: [[I10:%.*]] = phi i32 [ [[I4]], [[BB3]] ], [ [[I22:%.*]], [[BB21]] ] +; LICM-NO-TMS-NEXT: [[I11:%.*]] = getelementptr inbounds i32, ptr [[ARG:%.*]], i64 [[I8]] +; LICM-NO-TMS-NEXT: [[I12:%.*]] = load i32, ptr [[I11]], align 4 +; LICM-NO-TMS-NEXT: [[I13:%.*]] = icmp eq i32 [[I12]], 0 +; LICM-NO-TMS-NEXT: [[I14]] = add nsw i32 [[I9]], 1 +; LICM-NO-TMS-NEXT: br i1 [[I13]], label [[BB15:%.*]], label [[BB25:%.*]] +; LICM-NO-TMS: bb15: +; LICM-NO-TMS-NEXT: [[I16:%.*]] = getelementptr inbounds i32, ptr [[ARG1:%.*]], i64 [[I8]] +; LICM-NO-TMS-NEXT: [[I17:%.*]] = load i32, ptr [[I16]], align 4 +; LICM-NO-TMS-NEXT: [[I18:%.*]] = icmp eq i32 [[I17]], 0 +; LICM-NO-TMS-NEXT: br i1 [[I18]], label [[BB21]], label [[BB19:%.*]] +; LICM-NO-TMS: bb19: +; LICM-NO-TMS-NEXT: [[I20:%.*]] = add nsw i32 [[I10]], 1 +; LICM-NO-TMS-NEXT: store i32 [[I20]], ptr @v, align 4 +; LICM-NO-TMS-NEXT: br label [[BB21]] +; LICM-NO-TMS: bb21: +; LICM-NO-TMS-NEXT: [[I22]] = phi i32 [ [[I10]], [[BB15]] ], [ [[I20]], [[BB19]] ] +; LICM-NO-TMS-NEXT: [[I23]] = add nuw nsw i64 [[I8]], 1 +; LICM-NO-TMS-NEXT: [[I24:%.*]] = icmp eq i64 [[I23]], [[I6]] +; LICM-NO-TMS-NEXT: br i1 [[I24]], label [[BB25]], label [[BB7]] +; LICM-NO-TMS: bb25: +; LICM-NO-TMS-NEXT: [[I14_LCSSA:%.*]] = phi i32 [ [[I14]], [[BB21]] ], [ [[I14]], [[BB7]] ] +; LICM-NO-TMS-NEXT: store i32 [[I14_LCSSA]], ptr @u, align 4 +; LICM-NO-TMS-NEXT: br label [[BB26]] +; LICM-NO-TMS: bb26: +; LICM-NO-TMS-NEXT: ret void +; +bb: + %i = icmp sgt i32 %arg2, 0 + br i1 %i, label %bb3, label %bb26 + +bb3: ; preds = %bb + %i4 = load i32, ptr @v, align 4 + %i5 = load i32, ptr @u, align 4 + %i6 = zext i32 %arg2 to i64 + br label %bb7 + +bb7: ; preds = %bb21, %bb3 + %i8 = phi i64 [ 0, %bb3 ], [ %i23, %bb21 ] + %i9 = phi i32 [ %i5, %bb3 ], [ %i14, %bb21 ] + %i10 = phi i32 [ %i4, %bb3 ], [ %i22, %bb21 ] + %i11 = getelementptr inbounds i32, ptr %arg, i64 %i8 + %i12 = load i32, ptr %i11, align 4 + %i13 = icmp eq i32 %i12, 0 + %i14 = add nsw i32 %i9, 1 + br i1 %i13, label %bb15, label %bb25 + +bb15: ; preds = %bb7 + %i16 = getelementptr inbounds i32, ptr %arg1, i64 %i8 + %i17 = load i32, ptr %i16, align 4 + %i18 = icmp eq i32 %i17, 0 + br i1 %i18, label %bb21, label %bb19 + +bb19: ; preds = %bb15 + %i20 = add nsw i32 %i10, 1 + store i32 %i20, ptr @v, align 4 + br label %bb21 + +bb21: ; preds = %bb19, %bb15 + %i22 = phi i32 [ %i10, %bb15 ], [ %i20, %bb19 ] + %i23 = add nuw nsw i64 %i8, 1 + %i24 = icmp eq i64 %i23, %i6 + br i1 %i24, label %bb25, label %bb7 + +bb25: ; preds = %bb21, %bb7 + store i32 %i14, ptr @u, align 4 + br label %bb26 + +bb26: ; preds = %bb25, %bb + ret void +}