Index: llvm/include/llvm/Analysis/TargetTransformInfo.h =================================================================== --- llvm/include/llvm/Analysis/TargetTransformInfo.h +++ llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -372,6 +372,8 @@ unsigned getAssumedAddrSpace(const Value *V) const; + bool isSingleThreaded() const; + std::pair getPredicatedAddrSpace(const Value *V) const; @@ -1574,6 +1576,7 @@ virtual bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const = 0; virtual unsigned getAssumedAddrSpace(const Value *V) const = 0; + virtual bool isSingleThreaded() const = 0; virtual std::pair getPredicatedAddrSpace(const Value *V) const = 0; virtual Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, @@ -1952,6 +1955,8 @@ return Impl.getAssumedAddrSpace(V); } + bool isSingleThreaded() const override { return Impl.isSingleThreaded(); } + std::pair getPredicatedAddrSpace(const Value *V) const override { return Impl.getPredicatedAddrSpace(V); Index: llvm/include/llvm/Analysis/TargetTransformInfoImpl.h =================================================================== --- llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -108,6 +108,8 @@ unsigned getAssumedAddrSpace(const Value *V) const { return -1; } + bool isSingleThreaded() const { return false; } + std::pair getPredicatedAddrSpace(const Value *V) const { return std::make_pair(nullptr, -1); Index: llvm/include/llvm/CodeGen/BasicTTIImpl.h =================================================================== --- llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -47,6 +47,7 @@ #include "llvm/Support/MachineValueType.h" #include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" #include #include #include @@ -287,6 +288,11 @@ return getTLI()->getTargetMachine().getAssumedAddrSpace(V); } + bool isSingleThreaded() const { + return getTLI()->getTargetMachine().Options.ThreadModel == + ThreadModel::Single; + } + std::pair getPredicatedAddrSpace(const Value *V) const { return getTLI()->getTargetMachine().getPredicatedAddrSpace(V); Index: llvm/include/llvm/Transforms/Utils/LoopUtils.h =================================================================== --- llvm/include/llvm/Transforms/Utils/LoopUtils.h +++ llvm/include/llvm/Transforms/Utils/LoopUtils.h @@ -210,8 +210,9 @@ const SmallSetVector &, SmallVectorImpl &, SmallVectorImpl &, SmallVectorImpl &, PredIteratorCache &, LoopInfo *, DominatorTree *, AssumptionCache *AC, - const TargetLibraryInfo *, Loop *, MemorySSAUpdater &, ICFLoopSafetyInfo *, - OptimizationRemarkEmitter *, bool AllowSpeculation); + const TargetLibraryInfo *, TargetTransformInfo *, Loop *, + MemorySSAUpdater &, ICFLoopSafetyInfo *, OptimizationRemarkEmitter *, + bool AllowSpeculation); /// Does a BFS from a given node to all of its children inside a given loop. /// The returned vector of nodes includes the starting point. Index: llvm/lib/Analysis/TargetTransformInfo.cpp =================================================================== --- llvm/lib/Analysis/TargetTransformInfo.cpp +++ llvm/lib/Analysis/TargetTransformInfo.cpp @@ -273,6 +273,10 @@ return TTIImpl->getAssumedAddrSpace(V); } +bool TargetTransformInfo::isSingleThreaded() const { + return TTIImpl->isSingleThreaded(); +} + std::pair TargetTransformInfo::getPredicatedAddrSpace(const Value *V) const { return TTIImpl->getPredicatedAddrSpace(V); Index: llvm/lib/Transforms/Scalar/LICM.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LICM.cpp +++ llvm/lib/Transforms/Scalar/LICM.cpp @@ -76,6 +76,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetOptions.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/AssumeBundleBuilder.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -110,6 +111,10 @@ "licm-control-flow-hoisting", cl::Hidden, cl::init(false), cl::desc("Enable control flow (and PHI) hoisting in LICM")); +static cl::opt SingleThread("licm-force-thread-model-single", cl::Hidden, + cl::init(false), + cl::desc("Allow data races in LICM pass")); + static cl::opt MaxNumUsesTraversed( "licm-max-num-uses-traversed", cl::Hidden, cl::init(8), cl::desc("Max num uses visited for identifying load " @@ -487,7 +492,8 @@ collectPromotionCandidates(MSSA, AA, L)) { LocalPromoted |= promoteLoopAccessesToScalars( PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC, LI, - DT, AC, TLI, L, MSSAU, &SafetyInfo, ORE, LicmAllowSpeculation); + DT, AC, TLI, TTI, L, MSSAU, &SafetyInfo, ORE, + LicmAllowSpeculation); } Promoted |= LocalPromoted; } while (LocalPromoted); @@ -1909,17 +1915,21 @@ if (auto *A = dyn_cast(Object)) return A->hasByValAttr(); + if (auto *G = dyn_cast(Object)) + return !G->isConstant(); + // TODO: Noalias has nothing to do with writability, this should check for // an allocator function. return isNoAliasCall(Object); } -bool isThreadLocalObject(const Value *Object, const Loop *L, - DominatorTree *DT) { +bool isThreadLocalObject(const Value *Object, const Loop *L, DominatorTree *DT, + TargetTransformInfo *TTI) { // The object must be function-local to start with, and then not captured // before/in the loop. - return isIdentifiedFunctionLocal(Object) && - isNotCapturedBeforeOrInLoop(Object, L, DT); + return (isIdentifiedFunctionLocal(Object) && + isNotCapturedBeforeOrInLoop(Object, L, DT)) || + (TTI->isSingleThreaded() || SingleThread); } } // namespace @@ -1935,9 +1945,9 @@ SmallVectorImpl &InsertPts, SmallVectorImpl &MSSAInsertPts, PredIteratorCache &PIC, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, - const TargetLibraryInfo *TLI, Loop *CurLoop, MemorySSAUpdater &MSSAU, - ICFLoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE, - bool AllowSpeculation) { + const TargetLibraryInfo *TLI, TargetTransformInfo *TTI, Loop *CurLoop, + MemorySSAUpdater &MSSAU, ICFLoopSafetyInfo *SafetyInfo, + OptimizationRemarkEmitter *ORE, bool AllowSpeculation) { // Verify inputs. assert(LI != nullptr && DT != nullptr && CurLoop != nullptr && SafetyInfo != nullptr && @@ -2147,7 +2157,8 @@ // violating the memory model. if (StoreSafety == StoreSafetyUnknown) { Value *Object = getUnderlyingObject(SomePtr); - if (isWritableObject(Object) && isThreadLocalObject(Object, CurLoop, DT)) + if (isWritableObject(Object) && + isThreadLocalObject(Object, CurLoop, DT, TTI)) StoreSafety = StoreSafe; } Index: llvm/test/Transforms/LICM/promote-sink-store-arg.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LICM/promote-sink-store-arg.ll @@ -0,0 +1,164 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -licm -licm-force-thread-model-single -S %s | FileCheck %s + +; Function Attrs: nounwind uwtable +define dso_local void @f(ptr noalias noundef %0, ptr noalias noundef %1, i32 noundef %2, i32 noundef %3, i32 noundef %4) { +; CHECK-LABEL: @f( +; CHECK-NEXT: [[TMP6:%.*]] = alloca ptr, align 8 +; CHECK-NEXT: [[TMP7:%.*]] = alloca ptr, align 8 +; CHECK-NEXT: [[TMP8:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP9:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP10:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP11:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP12:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store ptr [[TMP0:%.*]], ptr [[TMP6]], align 8 +; CHECK-NEXT: store ptr [[TMP1:%.*]], ptr [[TMP7]], align 8 +; CHECK-NEXT: store i32 [[TMP2:%.*]], ptr [[TMP8]], align 4 +; CHECK-NEXT: store i32 [[TMP3:%.*]], ptr [[TMP9]], align 4 +; CHECK-NEXT: store i32 [[TMP4:%.*]], ptr [[TMP10]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[TMP11]]) +; CHECK-NEXT: store i32 0, ptr [[TMP11]], align 4 +; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP8]], align 4 +; CHECK-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP6]], align 8 +; CHECK-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP7]], align 8 +; CHECK-NEXT: [[DOTPROMOTED:%.*]] = load i32, ptr [[TMP11]], align 4 +; CHECK-NEXT: [[DOTPROMOTED2:%.*]] = load i32, ptr [[TMP9]], align 4 +; CHECK-NEXT: [[DOTPROMOTED5:%.*]] = load i32, ptr [[TMP10]], align 4 +; CHECK-NEXT: br label [[TMP16:%.*]] +; CHECK: 16: +; CHECK-NEXT: [[TMP17:%.*]] = phi i32 [ [[TMP39:%.*]], [[TMP40:%.*]] ], [ [[DOTPROMOTED5]], [[TMP5:%.*]] ] +; CHECK-NEXT: [[TMP18:%.*]] = phi i32 [ [[TMP31:%.*]], [[TMP40]] ], [ [[DOTPROMOTED2]], [[TMP5]] ] +; CHECK-NEXT: [[TMP19:%.*]] = phi i32 [ [[TMP41:%.*]], [[TMP40]] ], [ [[DOTPROMOTED]], [[TMP5]] ] +; CHECK-NEXT: [[TMP20:%.*]] = icmp slt i32 [[TMP19]], [[TMP13]] +; CHECK-NEXT: br i1 [[TMP20]], label [[TMP22:%.*]], label [[TMP21:%.*]] +; CHECK: 21: +; CHECK-NEXT: [[DOTLCSSA6:%.*]] = phi i32 [ [[TMP17]], [[TMP16]] ] +; CHECK-NEXT: [[DOTLCSSA3:%.*]] = phi i32 [ [[TMP18]], [[TMP16]] ] +; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ [[TMP19]], [[TMP16]] ] +; CHECK-NEXT: store i32 [[DOTLCSSA]], ptr [[TMP11]], align 4 +; CHECK-NEXT: store i32 [[DOTLCSSA3]], ptr [[TMP9]], align 4 +; CHECK-NEXT: store i32 [[DOTLCSSA6]], ptr [[TMP10]], align 4 +; CHECK-NEXT: store i32 2, ptr [[TMP12]], align 4 +; CHECK-NEXT: br label [[TMP42:%.*]] +; CHECK: 22: +; CHECK-NEXT: [[TMP23:%.*]] = sext i32 [[TMP19]] to i64 +; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i64 [[TMP23]] +; CHECK-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +; CHECK-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +; CHECK-NEXT: br i1 [[TMP26]], label [[TMP27:%.*]], label [[TMP30:%.*]] +; CHECK: 27: +; CHECK-NEXT: [[DOTLCSSA7:%.*]] = phi i32 [ [[TMP17]], [[TMP22]] ] +; CHECK-NEXT: [[DOTLCSSA4:%.*]] = phi i32 [ [[TMP18]], [[TMP22]] ] +; CHECK-NEXT: [[DOTLCSSA1:%.*]] = phi i32 [ [[TMP19]], [[TMP22]] ] +; CHECK-NEXT: store i32 [[DOTLCSSA1]], ptr [[TMP11]], align 4 +; CHECK-NEXT: store i32 [[DOTLCSSA4]], ptr [[TMP9]], align 4 +; CHECK-NEXT: store i32 [[DOTLCSSA7]], ptr [[TMP10]], align 4 +; CHECK-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP9]], align 4 +; CHECK-NEXT: [[TMP29:%.*]] = add nsw i32 [[TMP28]], 1 +; CHECK-NEXT: store i32 [[TMP29]], ptr [[TMP9]], align 4 +; CHECK-NEXT: store i32 2, ptr [[TMP12]], align 4 +; CHECK-NEXT: br label [[TMP42]] +; CHECK: 30: +; CHECK-NEXT: [[TMP31]] = add nsw i32 [[TMP18]], 1 +; CHECK-NEXT: [[TMP32:%.*]] = sext i32 [[TMP19]] to i64 +; CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i64 [[TMP32]] +; CHECK-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP33]], align 4 +; CHECK-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +; CHECK-NEXT: br i1 [[TMP35]], label [[TMP36:%.*]], label [[TMP38:%.*]] +; CHECK: 36: +; CHECK-NEXT: [[TMP37:%.*]] = add nsw i32 [[TMP17]], 1 +; CHECK-NEXT: br label [[TMP38]] +; CHECK: 38: +; CHECK-NEXT: [[TMP39]] = phi i32 [ [[TMP37]], [[TMP36]] ], [ [[TMP17]], [[TMP30]] ] +; CHECK-NEXT: br label [[TMP40]] +; CHECK: 40: +; CHECK-NEXT: [[TMP41]] = add nsw i32 [[TMP19]], 1 +; CHECK-NEXT: br label [[TMP16]] +; CHECK: 42: +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[TMP11]]) +; CHECK-NEXT: br label [[TMP43:%.*]] +; CHECK: 43: +; CHECK-NEXT: ret void +; + %6 = alloca ptr, align 8 + %7 = alloca ptr, align 8 + %8 = alloca i32, align 4 + %9 = alloca i32, align 4 + %10 = alloca i32, align 4 + %11 = alloca i32, align 4 + %12 = alloca i32, align 4 + store ptr %0, ptr %6, align 8 + store ptr %1, ptr %7, align 8 + store i32 %2, ptr %8, align 4 + store i32 %3, ptr %9, align 4 + store i32 %4, ptr %10, align 4 + call void @llvm.lifetime.start.p0(i64 4, ptr %11) + store i32 0, ptr %11, align 4 + br label %13 + +13: ; preds = %41, %5 + %14 = load i32, ptr %11, align 4 + %15 = load i32, ptr %8, align 4 + %16 = icmp slt i32 %14, %15 + br i1 %16, label %18, label %17 + +17: ; preds = %13 + store i32 2, ptr %12, align 4 + br label %44 + +18: ; preds = %13 + %19 = load ptr, ptr %6, align 8 + %20 = load i32, ptr %11, align 4 + %21 = sext i32 %20 to i64 + %22 = getelementptr inbounds i32, ptr %19, i64 %21 + %23 = load i32, ptr %22, align 4 + %24 = icmp ne i32 %23, 0 + br i1 %24, label %25, label %28 + +25: ; preds = %18 + %26 = load i32, ptr %9, align 4 + %27 = add nsw i32 %26, 1 + store i32 %27, ptr %9, align 4 + store i32 2, ptr %12, align 4 + br label %44 + +28: ; preds = %18 + %29 = load i32, ptr %9, align 4 + %30 = add nsw i32 %29, 1 + store i32 %30, ptr %9, align 4 + %31 = load ptr, ptr %7, align 8 + %32 = load i32, ptr %11, align 4 + %33 = sext i32 %32 to i64 + %34 = getelementptr inbounds i32, ptr %31, i64 %33 + %35 = load i32, ptr %34, align 4 + %36 = icmp ne i32 %35, 0 + br i1 %36, label %37, label %40 + +37: ; preds = %28 + %38 = load i32, ptr %10, align 4 + %39 = add nsw i32 %38, 1 + store i32 %39, ptr %10, align 4 + br label %40 + +40: ; preds = %37, %28 + br label %41 + +41: ; preds = %40 + %42 = load i32, ptr %11, align 4 + %43 = add nsw i32 %42, 1 + store i32 %43, ptr %11, align 4 + br label %13 + +44: ; preds = %25, %17 + call void @llvm.lifetime.end.p0(i64 4, ptr %11) + br label %45 + +45: ; preds = %44 + ret void +} + +; Function Attrs: argmemonly nocallback nofree nosync nounwind willreturn +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) + +; Function Attrs: argmemonly nocallback nofree nosync nounwind willreturn +declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) Index: llvm/test/Transforms/LICM/promote-sink-store-capture.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LICM/promote-sink-store-capture.ll @@ -0,0 +1,154 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -licm -licm-force-thread-model-single -S %s | FileCheck %s + +; Function Attrs: nounwind uwtable +define dso_local void @f(ptr noalias noundef %0, i32 noundef %1, i32 noundef %2) { +; CHECK-LABEL: @f( +; CHECK-NEXT: [[TMP4:%.*]] = alloca ptr, align 8 +; CHECK-NEXT: [[TMP5:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP6:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP7:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP8:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP9:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store ptr [[TMP0:%.*]], ptr [[TMP4]], align 8 +; CHECK-NEXT: store i32 [[TMP1:%.*]], ptr [[TMP5]], align 4 +; CHECK-NEXT: store i32 [[TMP2:%.*]], ptr [[TMP6]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[TMP7]]) +; CHECK-NEXT: store i32 0, ptr [[TMP7]], align 4 +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP5]], align 4 +; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP6]], align 4 +; CHECK-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP4]], align 8 +; CHECK-NEXT: [[DOTPROMOTED:%.*]] = load i32, ptr [[TMP7]], align 4 +; CHECK-NEXT: [[DOTPROMOTED2:%.*]] = load i32, ptr [[TMP8]], align 4 +; CHECK-NEXT: br label [[TMP13:%.*]] +; CHECK: 13: +; CHECK-NEXT: [[TMP14:%.*]] = phi i32 [ [[TMP30:%.*]], [[TMP32:%.*]] ], [ [[DOTPROMOTED2]], [[TMP3:%.*]] ] +; CHECK-NEXT: [[TMP15:%.*]] = phi i32 [ [[TMP33:%.*]], [[TMP32]] ], [ [[DOTPROMOTED]], [[TMP3]] ] +; CHECK-NEXT: [[TMP16:%.*]] = icmp slt i32 [[TMP15]], [[TMP10]] +; CHECK-NEXT: br i1 [[TMP16]], label [[TMP18:%.*]], label [[TMP17:%.*]] +; CHECK: 17: +; CHECK-NEXT: [[DOTLCSSA3:%.*]] = phi i32 [ [[TMP14]], [[TMP13]] ] +; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ [[TMP15]], [[TMP13]] ] +; CHECK-NEXT: store i32 [[DOTLCSSA]], ptr [[TMP7]], align 4 +; CHECK-NEXT: store i32 [[DOTLCSSA3]], ptr [[TMP8]], align 4 +; CHECK-NEXT: store i32 2, ptr [[TMP8]], align 4 +; CHECK-NEXT: br label [[TMP34:%.*]] +; CHECK: 18: +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[TMP9]]) +; CHECK-NEXT: store i32 [[TMP11]], ptr [[TMP9]], align 4 +; CHECK-NEXT: [[TMP19:%.*]] = sext i32 [[TMP15]] to i64 +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i64 [[TMP19]] +; CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +; CHECK-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +; CHECK-NEXT: br i1 [[TMP22]], label [[TMP23:%.*]], label [[TMP26:%.*]] +; CHECK: 23: +; CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP9]], align 4 +; CHECK-NEXT: [[TMP25:%.*]] = add nsw i32 [[TMP24]], 1 +; CHECK-NEXT: store i32 [[TMP25]], ptr [[TMP9]], align 4 +; CHECK-NEXT: br label [[TMP29:%.*]] +; CHECK: 26: +; CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP9]], align 4 +; CHECK-NEXT: [[TMP28:%.*]] = add nsw i32 [[TMP27]], 1 +; CHECK-NEXT: store i32 [[TMP28]], ptr [[TMP9]], align 4 +; CHECK-NEXT: br label [[TMP29]] +; CHECK: 29: +; CHECK-NEXT: [[TMP30]] = phi i32 [ 0, [[TMP26]] ], [ 2, [[TMP23]] ] +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[TMP9]]) +; CHECK-NEXT: switch i32 [[TMP30]], label [[DOTLOOPEXIT:%.*]] [ +; CHECK-NEXT: i32 0, label [[TMP31:%.*]] +; CHECK-NEXT: ] +; CHECK: 31: +; CHECK-NEXT: br label [[TMP32]] +; CHECK: 32: +; CHECK-NEXT: [[TMP33]] = add nsw i32 [[TMP15]], 1 +; CHECK-NEXT: br label [[TMP13]] +; CHECK: .loopexit: +; CHECK-NEXT: [[DOTLCSSA4:%.*]] = phi i32 [ [[TMP30]], [[TMP29]] ] +; CHECK-NEXT: [[DOTLCSSA1:%.*]] = phi i32 [ [[TMP15]], [[TMP29]] ] +; CHECK-NEXT: store i32 [[DOTLCSSA1]], ptr [[TMP7]], align 4 +; CHECK-NEXT: store i32 [[DOTLCSSA4]], ptr [[TMP8]], align 4 +; CHECK-NEXT: br label [[TMP34]] +; CHECK: 34: +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[TMP7]]) +; CHECK-NEXT: br label [[TMP35:%.*]] +; CHECK: 35: +; CHECK-NEXT: ret void +; + %4 = alloca ptr, align 8 + %5 = alloca i32, align 4 + %6 = alloca i32, align 4 + %7 = alloca i32, align 4 + %8 = alloca i32, align 4 + %9 = alloca i32, align 4 + store ptr %0, ptr %4, align 8 + store i32 %1, ptr %5, align 4 + store i32 %2, ptr %6, align 4 + call void @llvm.lifetime.start.p0(i64 4, ptr %7) + store i32 0, ptr %7, align 4 + br label %10 + +10: ; preds = %32, %3 + %11 = load i32, ptr %7, align 4 + %12 = load i32, ptr %5, align 4 + %13 = icmp slt i32 %11, %12 + br i1 %13, label %15, label %14 + +14: ; preds = %10 + store i32 2, ptr %8, align 4 + br label %35 + +15: ; preds = %10 + call void @llvm.lifetime.start.p0(i64 4, ptr %9) + %16 = load i32, ptr %6, align 4 + store i32 %16, ptr %9, align 4 + %17 = load ptr, ptr %4, align 8 + %18 = load i32, ptr %7, align 4 + %19 = sext i32 %18 to i64 + %20 = getelementptr inbounds i32, ptr %17, i64 %19 + %21 = load i32, ptr %20, align 4 + %22 = icmp ne i32 %21, 0 + br i1 %22, label %23, label %26 + +23: ; preds = %15 + %24 = load i32, ptr %9, align 4 + %25 = add nsw i32 %24, 1 + store i32 %25, ptr %9, align 4 + store i32 2, ptr %8, align 4 + br label %29 + +26: ; preds = %15 + %27 = load i32, ptr %9, align 4 + %28 = add nsw i32 %27, 1 + store i32 %28, ptr %9, align 4 + store i32 0, ptr %8, align 4 + br label %29 + +29: ; preds = %26, %23 + call void @llvm.lifetime.end.p0(i64 4, ptr %9) + %30 = load i32, ptr %8, align 4 + switch i32 %30, label %35 [ + i32 0, label %31 + ] + +31: ; preds = %29 + br label %32 + +32: ; preds = %31 + %33 = load i32, ptr %7, align 4 + %34 = add nsw i32 %33, 1 + store i32 %34, ptr %7, align 4 + br label %10 + +35: ; preds = %29, %14 + call void @llvm.lifetime.end.p0(i64 4, ptr %7) + br label %36 + +36: ; preds = %35 + ret void +} + +; Function Attrs: argmemonly nocallback nofree nosync nounwind willreturn +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) + +; Function Attrs: argmemonly nocallback nofree nosync nounwind willreturn +declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) Index: llvm/test/Transforms/LICM/promote-sink-store-constant-global.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LICM/promote-sink-store-constant-global.ll @@ -0,0 +1,149 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -licm -licm-force-thread-model-single -S %s | FileCheck %s + +@u = dso_local constant i32 7, align 4 +@v = dso_local constant i32 11, align 4 + +; Function Attrs: nounwind uwtable +define dso_local void @f(ptr noalias noundef %0, ptr noalias noundef %1, i32 noundef %2) { +; CHECK-LABEL: @f( +; CHECK-NEXT: [[TMP4:%.*]] = alloca ptr, align 8 +; CHECK-NEXT: [[TMP5:%.*]] = alloca ptr, align 8 +; CHECK-NEXT: [[TMP6:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP7:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP8:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP9:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store ptr [[TMP0:%.*]], ptr [[TMP4]], align 8 +; CHECK-NEXT: store ptr [[TMP1:%.*]], ptr [[TMP5]], align 8 +; CHECK-NEXT: store i32 [[TMP2:%.*]], ptr [[TMP6]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[TMP7]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[TMP8]]) +; CHECK-NEXT: store i32 0, ptr [[TMP8]], align 4 +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP6]], align 4 +; CHECK-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP4]], align 8 +; CHECK-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP5]], align 8 +; CHECK-NEXT: [[DOTPROMOTED:%.*]] = load i32, ptr [[TMP8]], align 4 +; CHECK-NEXT: [[DOTPROMOTED2:%.*]] = load i32, ptr [[TMP7]], align 1 +; CHECK-NEXT: br label [[TMP13:%.*]] +; CHECK: 13: +; CHECK-NEXT: [[TMP14:%.*]] = phi i32 [ [[TMP31:%.*]], [[TMP32:%.*]] ], [ [[DOTPROMOTED2]], [[TMP3:%.*]] ] +; CHECK-NEXT: [[TMP15:%.*]] = phi i32 [ [[TMP33:%.*]], [[TMP32]] ], [ [[DOTPROMOTED]], [[TMP3]] ] +; CHECK-NEXT: [[TMP16:%.*]] = icmp slt i32 [[TMP15]], [[TMP10]] +; CHECK-NEXT: br i1 [[TMP16]], label [[TMP18:%.*]], label [[TMP17:%.*]] +; CHECK: 17: +; CHECK-NEXT: [[DOTLCSSA3:%.*]] = phi i32 [ [[TMP14]], [[TMP13]] ] +; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ [[TMP15]], [[TMP13]] ] +; CHECK-NEXT: store i32 [[DOTLCSSA]], ptr [[TMP8]], align 4 +; CHECK-NEXT: store i32 [[DOTLCSSA3]], ptr [[TMP7]], align 1 +; CHECK-NEXT: store i32 2, ptr [[TMP9]], align 4 +; CHECK-NEXT: br label [[TMP34:%.*]] +; CHECK: 18: +; CHECK-NEXT: [[TMP19:%.*]] = sext i32 [[TMP15]] to i64 +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[TMP11]], i64 [[TMP19]] +; CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP20]], align 4 +; CHECK-NEXT: [[TMP22:%.*]] = icmp ne i32 [[TMP21]], 0 +; CHECK-NEXT: br i1 [[TMP22]], label [[TMP23:%.*]], label [[TMP24:%.*]] +; CHECK: 23: +; CHECK-NEXT: [[DOTLCSSA4:%.*]] = phi i32 [ [[TMP14]], [[TMP18]] ] +; CHECK-NEXT: [[DOTLCSSA1:%.*]] = phi i32 [ [[TMP15]], [[TMP18]] ] +; CHECK-NEXT: store i32 [[DOTLCSSA1]], ptr [[TMP8]], align 4 +; CHECK-NEXT: store i32 [[DOTLCSSA4]], ptr [[TMP7]], align 1 +; CHECK-NEXT: store i32 7, ptr [[TMP7]], align 4 +; CHECK-NEXT: store i32 2, ptr [[TMP9]], align 4 +; CHECK-NEXT: br label [[TMP34]] +; CHECK: 24: +; CHECK-NEXT: [[TMP25:%.*]] = sext i32 [[TMP15]] to i64 +; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i64 [[TMP25]] +; CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4 +; CHECK-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +; CHECK-NEXT: br i1 [[TMP28]], label [[TMP29:%.*]], label [[TMP30:%.*]] +; CHECK: 29: +; CHECK-NEXT: br label [[TMP30]] +; CHECK: 30: +; CHECK-NEXT: [[TMP31]] = phi i32 [ 11, [[TMP29]] ], [ 7, [[TMP24]] ] +; CHECK-NEXT: br label [[TMP32]] +; CHECK: 32: +; CHECK-NEXT: [[TMP33]] = add nsw i32 [[TMP15]], 1 +; CHECK-NEXT: br label [[TMP13]] +; CHECK: 34: +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[TMP8]]) +; CHECK-NEXT: br label [[TMP35:%.*]] +; CHECK: 35: +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[TMP7]]) +; CHECK-NEXT: ret void +; + %4 = alloca ptr, align 8 + %5 = alloca ptr, align 8 + %6 = alloca i32, align 4 + %7 = alloca i32, align 4 + %8 = alloca i32, align 4 + %9 = alloca i32, align 4 + store ptr %0, ptr %4, align 8 + store ptr %1, ptr %5, align 8 + store i32 %2, ptr %6, align 4 + call void @llvm.lifetime.start.p0(i64 4, ptr %7) + call void @llvm.lifetime.start.p0(i64 4, ptr %8) + store i32 0, ptr %8, align 4 + br label %10 + +10: ; preds = %32, %3 + %11 = load i32, ptr %8, align 4 + %12 = load i32, ptr %6, align 4 + %13 = icmp slt i32 %11, %12 + br i1 %13, label %15, label %14 + +14: ; preds = %10 + store i32 2, ptr %9, align 4 + br label %35 + +15: ; preds = %10 + %16 = load ptr, ptr %4, align 8 + %17 = load i32, ptr %8, align 4 + %18 = sext i32 %17 to i64 + %19 = getelementptr inbounds i32, ptr %16, i64 %18 + %20 = load i32, ptr %19, align 4 + %21 = icmp ne i32 %20, 0 + br i1 %21, label %22, label %23 + +22: ; preds = %15 + store i32 7, ptr %7, align 4 + store i32 2, ptr %9, align 4 + br label %35 + +23: ; preds = %15 + store i32 7, ptr %7, align 4 + %24 = load ptr, ptr %5, align 8 + %25 = load i32, ptr %8, align 4 + %26 = sext i32 %25 to i64 + %27 = getelementptr inbounds i32, ptr %24, i64 %26 + %28 = load i32, ptr %27, align 4 + %29 = icmp ne i32 %28, 0 + br i1 %29, label %30, label %31 + +30: ; preds = %23 + store i32 11, ptr %7, align 4 + br label %31 + +31: ; preds = %30, %23 + br label %32 + +32: ; preds = %31 + %33 = load i32, ptr %8, align 4 + %34 = add nsw i32 %33, 1 + store i32 %34, ptr %8, align 4 + br label %10 + +35: ; preds = %22, %14 + call void @llvm.lifetime.end.p0(i64 4, ptr %8) + br label %36 + +36: ; preds = %35 + call void @llvm.lifetime.end.p0(i64 4, ptr %7) + ret void +} + +; Function Attrs: argmemonly nocallback nofree nosync nounwind willreturn +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) + +; Function Attrs: argmemonly nocallback nofree nosync nounwind willreturn +declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) Index: llvm/test/Transforms/LICM/promote-sink-store-global.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LICM/promote-sink-store-global.ll @@ -0,0 +1,93 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -licm -licm-force-thread-model-single -S %s | FileCheck %s + +@u = dso_local global i32 0, align 4 +@v = dso_local global i32 0, align 4 + +; Function Attrs: nounwind uwtable +define dso_local void @f(ptr noalias noundef %0, ptr noalias noundef %1, i32 noundef %2) { +; CHECK-LABEL: @f( +; CHECK-NEXT: [[TMP4:%.*]] = alloca ptr, align 8 +; CHECK-NEXT: [[TMP5:%.*]] = alloca ptr, align 8 +; CHECK-NEXT: [[TMP6:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[TMP7:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store ptr [[TMP0:%.*]], ptr [[TMP4]], align 8 +; CHECK-NEXT: store ptr [[TMP1:%.*]], ptr [[TMP5]], align 8 +; CHECK-NEXT: store i32 [[TMP2:%.*]], ptr [[TMP6]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[TMP7]]) +; CHECK-NEXT: store i32 0, ptr [[TMP7]], align 4 +; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP6]], align 4 +; CHECK-NEXT: [[DOTPROMOTED:%.*]] = load i32, ptr [[TMP7]], align 4 +; CHECK-NEXT: [[U_PROMOTED:%.*]] = load i32, ptr @u, align 4 +; CHECK-NEXT: [[V_PROMOTED:%.*]] = load i32, ptr @v, align 4 +; CHECK-NEXT: br label [[TMP9:%.*]] +; CHECK: 9: +; CHECK-NEXT: [[TMP10:%.*]] = phi i32 [ [[TMP17:%.*]], [[TMP18:%.*]] ], [ [[V_PROMOTED]], [[TMP3:%.*]] ] +; CHECK-NEXT: [[TMP11:%.*]] = phi i32 [ [[TMP16:%.*]], [[TMP18]] ], [ [[U_PROMOTED]], [[TMP3]] ] +; CHECK-NEXT: [[TMP12:%.*]] = phi i32 [ [[TMP19:%.*]], [[TMP18]] ], [ [[DOTPROMOTED]], [[TMP3]] ] +; CHECK-NEXT: [[TMP13:%.*]] = icmp slt i32 [[TMP12]], [[TMP8]] +; CHECK-NEXT: br i1 [[TMP13]], label [[TMP15:%.*]], label [[TMP14:%.*]] +; CHECK: 14: +; CHECK-NEXT: [[DOTLCSSA2:%.*]] = phi i32 [ [[TMP10]], [[TMP9]] ] +; CHECK-NEXT: [[DOTLCSSA1:%.*]] = phi i32 [ [[TMP11]], [[TMP9]] ] +; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ [[TMP12]], [[TMP9]] ] +; CHECK-NEXT: store i32 [[DOTLCSSA]], ptr [[TMP7]], align 4 +; CHECK-NEXT: store i32 [[DOTLCSSA1]], ptr @u, align 4 +; CHECK-NEXT: store i32 [[DOTLCSSA2]], ptr @v, align 4 +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[TMP7]]) +; CHECK-NEXT: br label [[TMP20:%.*]] +; CHECK: 15: +; CHECK-NEXT: [[TMP16]] = add nsw i32 [[TMP11]], 1 +; CHECK-NEXT: [[TMP17]] = add nsw i32 [[TMP10]], 1 +; CHECK-NEXT: br label [[TMP18]] +; CHECK: 18: +; CHECK-NEXT: [[TMP19]] = add nsw i32 [[TMP12]], 1 +; CHECK-NEXT: br label [[TMP9]] +; CHECK: 20: +; CHECK-NEXT: ret void +; + %4 = alloca ptr, align 8 + %5 = alloca ptr, align 8 + %6 = alloca i32, align 4 + %7 = alloca i32, align 4 + store ptr %0, ptr %4, align 8 + store ptr %1, ptr %5, align 8 + store i32 %2, ptr %6, align 4 + call void @llvm.lifetime.start.p0(i64 4, ptr %7) + store i32 0, ptr %7, align 4 + br label %8 + +8: ; preds = %18, %3 + %9 = load i32, ptr %7, align 4 + %10 = load i32, ptr %6, align 4 + %11 = icmp slt i32 %9, %10 + br i1 %11, label %13, label %12 + +12: ; preds = %8 + call void @llvm.lifetime.end.p0(i64 4, ptr %7) + br label %21 + +13: ; preds = %8 + %14 = load i32, ptr @u, align 4 + %15 = add nsw i32 %14, 1 + store i32 %15, ptr @u, align 4 + %16 = load i32, ptr @v, align 4 + %17 = add nsw i32 %16, 1 + store i32 %17, ptr @v, align 4 + br label %18 + +18: ; preds = %13 + %19 = load i32, ptr %7, align 4 + %20 = add nsw i32 %19, 1 + store i32 %20, ptr %7, align 4 + br label %8 + +21: ; preds = %12 + ret void +} + +; Function Attrs: argmemonly nocallback nofree nosync nounwind willreturn +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) + +; Function Attrs: argmemonly nocallback nofree nosync nounwind willreturn +declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) Index: llvm/test/Transforms/LICM/promote-sink-store.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LICM/promote-sink-store.ll @@ -0,0 +1,177 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -licm -licm-force-thread-model-single -S %s | FileCheck %s --check-prefixes FTMS +; RUN: opt -licm -S %s | FileCheck %s --check-prefixes NFTMS + +@u = dso_local local_unnamed_addr global i32 0, align 4 +@v = dso_local local_unnamed_addr global i32 0, align 4 + +define dso_local void @f(ptr noalias nocapture noundef readonly %arg, ptr noalias nocapture noundef readonly %arg1, i32 noundef %arg2) local_unnamed_addr { +; CHECK-LABEL: @f( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[I:%.*]] = icmp sgt i32 [[ARG2:%.*]], 0 +; CHECK-NEXT: br i1 [[I]], label [[BB3:%.*]], label [[BB26:%.*]] +; CHECK: bb3: +; CHECK-NEXT: [[I4:%.*]] = load i32, ptr @v, align 4 +; CHECK-NEXT: [[I5:%.*]] = load i32, ptr @u, align 4 +; CHECK-NEXT: [[I6:%.*]] = zext i32 [[ARG2]] to i64 +; CHECK-NEXT: [[V_PROMOTED:%.*]] = load i32, ptr @v, align 1 +; CHECK-NEXT: br label [[BB7:%.*]] +; CHECK: bb7: +; CHECK-NEXT: [[I203:%.*]] = phi i32 [ [[V_PROMOTED]], [[BB3]] ], [ [[I202:%.*]], [[BB21:%.*]] ] +; CHECK-NEXT: [[I8:%.*]] = phi i64 [ 0, [[BB3]] ], [ [[I23:%.*]], [[BB21]] ] +; CHECK-NEXT: [[I9:%.*]] = phi i32 [ [[I5]], [[BB3]] ], [ [[I14:%.*]], [[BB21]] ] +; CHECK-NEXT: [[I10:%.*]] = phi i32 [ [[I4]], [[BB3]] ], [ [[I22:%.*]], [[BB21]] ] +; CHECK-NEXT: [[I11:%.*]] = getelementptr inbounds i32, ptr [[ARG:%.*]], i64 [[I8]] +; CHECK-NEXT: [[I12:%.*]] = load i32, ptr [[I11]], align 4 +; CHECK-NEXT: [[I13:%.*]] = icmp eq i32 [[I12]], 0 +; CHECK-NEXT: [[I14]] = add nsw i32 [[I9]], 1 +; CHECK-NEXT: br i1 [[I13]], label [[BB15:%.*]], label [[BB25:%.*]] +; CHECK: bb15: +; CHECK-NEXT: [[I16:%.*]] = getelementptr inbounds i32, ptr [[ARG1:%.*]], i64 [[I8]] +; CHECK-NEXT: [[I17:%.*]] = load i32, ptr [[I16]], align 4 +; CHECK-NEXT: [[I18:%.*]] = icmp eq i32 [[I17]], 0 +; CHECK-NEXT: br i1 [[I18]], label [[BB21]], label [[BB19:%.*]] +; CHECK: bb19: +; CHECK-NEXT: [[I20:%.*]] = add nsw i32 [[I10]], 1 +; CHECK-NEXT: br label [[BB21]] +; CHECK: bb21: +; CHECK-NEXT: [[I202]] = phi i32 [ [[I203]], [[BB15]] ], [ [[I20]], [[BB19]] ] +; CHECK-NEXT: [[I22]] = phi i32 [ [[I10]], [[BB15]] ], [ [[I20]], [[BB19]] ] +; CHECK-NEXT: [[I23]] = add nuw nsw i64 [[I8]], 1 +; CHECK-NEXT: [[I24:%.*]] = icmp eq i64 [[I23]], [[I6]] +; CHECK-NEXT: br i1 [[I24]], label [[BB25]], label [[BB7]] +; CHECK: bb25: +; CHECK-NEXT: [[I201:%.*]] = phi i32 [ [[I202]], [[BB21]] ], [ [[I203]], [[BB7]] ] +; CHECK-NEXT: [[I14_LCSSA:%.*]] = phi i32 [ [[I14]], [[BB21]] ], [ [[I14]], [[BB7]] ] +; CHECK-NEXT: store i32 [[I201]], ptr @v, align 1 +; CHECK-NEXT: store i32 [[I14_LCSSA]], ptr @u, align 4 +; CHECK-NEXT: br label [[BB26]] +; CHECK: bb26: +; CHECK-NEXT: ret void +; +; FTMS-LABEL: @f( +; FTMS-NEXT: bb: +; FTMS-NEXT: [[I:%.*]] = icmp sgt i32 [[ARG2:%.*]], 0 +; FTMS-NEXT: br i1 [[I]], label [[BB3:%.*]], label [[BB26:%.*]] +; FTMS: bb3: +; FTMS-NEXT: [[I4:%.*]] = load i32, ptr @v, align 4 +; FTMS-NEXT: [[I5:%.*]] = load i32, ptr @u, align 4 +; FTMS-NEXT: [[I6:%.*]] = zext i32 [[ARG2]] to i64 +; FTMS-NEXT: [[V_PROMOTED:%.*]] = load i32, ptr @v, align 1 +; FTMS-NEXT: br label [[BB7:%.*]] +; FTMS: bb7: +; FTMS-NEXT: [[I203:%.*]] = phi i32 [ [[V_PROMOTED]], [[BB3]] ], [ [[I202:%.*]], [[BB21:%.*]] ] +; FTMS-NEXT: [[I8:%.*]] = phi i64 [ 0, [[BB3]] ], [ [[I23:%.*]], [[BB21]] ] +; FTMS-NEXT: [[I9:%.*]] = phi i32 [ [[I5]], [[BB3]] ], [ [[I14:%.*]], [[BB21]] ] +; FTMS-NEXT: [[I10:%.*]] = phi i32 [ [[I4]], [[BB3]] ], [ [[I22:%.*]], [[BB21]] ] +; FTMS-NEXT: [[I11:%.*]] = getelementptr inbounds i32, ptr [[ARG:%.*]], i64 [[I8]] +; FTMS-NEXT: [[I12:%.*]] = load i32, ptr [[I11]], align 4 +; FTMS-NEXT: [[I13:%.*]] = icmp eq i32 [[I12]], 0 +; FTMS-NEXT: [[I14]] = add nsw i32 [[I9]], 1 +; FTMS-NEXT: br i1 [[I13]], label [[BB15:%.*]], label [[BB25:%.*]] +; FTMS: bb15: +; FTMS-NEXT: [[I16:%.*]] = getelementptr inbounds i32, ptr [[ARG1:%.*]], i64 [[I8]] +; FTMS-NEXT: [[I17:%.*]] = load i32, ptr [[I16]], align 4 +; FTMS-NEXT: [[I18:%.*]] = icmp eq i32 [[I17]], 0 +; FTMS-NEXT: br i1 [[I18]], label [[BB21]], label [[BB19:%.*]] +; FTMS: bb19: +; FTMS-NEXT: [[I20:%.*]] = add nsw i32 [[I10]], 1 +; FTMS-NEXT: br label [[BB21]] +; FTMS: bb21: +; FTMS-NEXT: [[I202]] = phi i32 [ [[I203]], [[BB15]] ], [ [[I20]], [[BB19]] ] +; FTMS-NEXT: [[I22]] = phi i32 [ [[I10]], [[BB15]] ], [ [[I20]], [[BB19]] ] +; FTMS-NEXT: [[I23]] = add nuw nsw i64 [[I8]], 1 +; FTMS-NEXT: [[I24:%.*]] = icmp eq i64 [[I23]], [[I6]] +; FTMS-NEXT: br i1 [[I24]], label [[BB25]], label [[BB7]] +; FTMS: bb25: +; FTMS-NEXT: [[I201:%.*]] = phi i32 [ [[I202]], [[BB21]] ], [ [[I203]], [[BB7]] ] +; FTMS-NEXT: [[I14_LCSSA:%.*]] = phi i32 [ [[I14]], [[BB21]] ], [ [[I14]], [[BB7]] ] +; FTMS-NEXT: store i32 [[I201]], ptr @v, align 1 +; FTMS-NEXT: store i32 [[I14_LCSSA]], ptr @u, align 4 +; FTMS-NEXT: br label [[BB26]] +; FTMS: bb26: +; FTMS-NEXT: ret void +; +; NFTMS-LABEL: @f( +; NFTMS-NEXT: bb: +; NFTMS-NEXT: [[I:%.*]] = icmp sgt i32 [[ARG2:%.*]], 0 +; NFTMS-NEXT: br i1 [[I]], label [[BB3:%.*]], label [[BB26:%.*]] +; NFTMS: bb3: +; NFTMS-NEXT: [[I4:%.*]] = load i32, ptr @v, align 4 +; NFTMS-NEXT: [[I5:%.*]] = load i32, ptr @u, align 4 +; NFTMS-NEXT: [[I6:%.*]] = zext i32 [[ARG2]] to i64 +; NFTMS-NEXT: br label [[BB7:%.*]] +; NFTMS: bb7: +; NFTMS-NEXT: [[I8:%.*]] = phi i64 [ 0, [[BB3]] ], [ [[I23:%.*]], [[BB21:%.*]] ] +; NFTMS-NEXT: [[I9:%.*]] = phi i32 [ [[I5]], [[BB3]] ], [ [[I14:%.*]], [[BB21]] ] +; NFTMS-NEXT: [[I10:%.*]] = phi i32 [ [[I4]], [[BB3]] ], [ [[I22:%.*]], [[BB21]] ] +; NFTMS-NEXT: [[I11:%.*]] = getelementptr inbounds i32, ptr [[ARG:%.*]], i64 [[I8]] +; NFTMS-NEXT: [[I12:%.*]] = load i32, ptr [[I11]], align 4 +; NFTMS-NEXT: [[I13:%.*]] = icmp eq i32 [[I12]], 0 +; NFTMS-NEXT: [[I14]] = add nsw i32 [[I9]], 1 +; NFTMS-NEXT: br i1 [[I13]], label [[BB15:%.*]], label [[BB25:%.*]] +; NFTMS: bb15: +; NFTMS-NEXT: [[I16:%.*]] = getelementptr inbounds i32, ptr [[ARG1:%.*]], i64 [[I8]] +; NFTMS-NEXT: [[I17:%.*]] = load i32, ptr [[I16]], align 4 +; NFTMS-NEXT: [[I18:%.*]] = icmp eq i32 [[I17]], 0 +; NFTMS-NEXT: br i1 [[I18]], label [[BB21]], label [[BB19:%.*]] +; NFTMS: bb19: +; NFTMS-NEXT: [[I20:%.*]] = add nsw i32 [[I10]], 1 +; NFTMS-NEXT: store i32 [[I20]], ptr @v, align 4 +; NFTMS-NEXT: br label [[BB21]] +; NFTMS: bb21: +; NFTMS-NEXT: [[I22]] = phi i32 [ [[I10]], [[BB15]] ], [ [[I20]], [[BB19]] ] +; NFTMS-NEXT: [[I23]] = add nuw nsw i64 [[I8]], 1 +; NFTMS-NEXT: [[I24:%.*]] = icmp eq i64 [[I23]], [[I6]] +; NFTMS-NEXT: br i1 [[I24]], label [[BB25]], label [[BB7]] +; NFTMS: bb25: +; NFTMS-NEXT: [[I14_LCSSA:%.*]] = phi i32 [ [[I14]], [[BB21]] ], [ [[I14]], [[BB7]] ] +; NFTMS-NEXT: store i32 [[I14_LCSSA]], ptr @u, align 4 +; NFTMS-NEXT: br label [[BB26]] +; NFTMS: bb26: +; NFTMS-NEXT: ret void +; +bb: + %i = icmp sgt i32 %arg2, 0 + br i1 %i, label %bb3, label %bb26 + +bb3: ; preds = %bb + %i4 = load i32, ptr @v, align 4 + %i5 = load i32, ptr @u, align 4 + %i6 = zext i32 %arg2 to i64 + br label %bb7 + +bb7: ; preds = %bb21, %bb3 + %i8 = phi i64 [ 0, %bb3 ], [ %i23, %bb21 ] + %i9 = phi i32 [ %i5, %bb3 ], [ %i14, %bb21 ] + %i10 = phi i32 [ %i4, %bb3 ], [ %i22, %bb21 ] + %i11 = getelementptr inbounds i32, ptr %arg, i64 %i8 + %i12 = load i32, ptr %i11, align 4 + %i13 = icmp eq i32 %i12, 0 + %i14 = add nsw i32 %i9, 1 + br i1 %i13, label %bb15, label %bb25 + +bb15: ; preds = %bb7 + %i16 = getelementptr inbounds i32, ptr %arg1, i64 %i8 + %i17 = load i32, ptr %i16, align 4 + %i18 = icmp eq i32 %i17, 0 + br i1 %i18, label %bb21, label %bb19 + +bb19: ; preds = %bb15 + %i20 = add nsw i32 %i10, 1 + store i32 %i20, ptr @v, align 4 + br label %bb21 + +bb21: ; preds = %bb19, %bb15 + %i22 = phi i32 [ %i10, %bb15 ], [ %i20, %bb19 ] + %i23 = add nuw nsw i64 %i8, 1 + %i24 = icmp eq i64 %i23, %i6 + br i1 %i24, label %bb25, label %bb7 + +bb25: ; preds = %bb21, %bb7 + store i32 %i14, ptr @u, align 4 + br label %bb26 + +bb26: ; preds = %bb25, %bb + ret void +}