Index: llvm/include/llvm/Analysis/TargetTransformInfo.h =================================================================== --- llvm/include/llvm/Analysis/TargetTransformInfo.h +++ llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -372,6 +372,8 @@ unsigned getAssumedAddrSpace(const Value *V) const; + bool isSingleThreaded() const; + std::pair getPredicatedAddrSpace(const Value *V) const; @@ -1577,6 +1579,7 @@ virtual bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const = 0; virtual unsigned getAssumedAddrSpace(const Value *V) const = 0; + virtual bool isSingleThreaded() const = 0; virtual std::pair getPredicatedAddrSpace(const Value *V) const = 0; virtual Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, @@ -1956,6 +1959,8 @@ return Impl.getAssumedAddrSpace(V); } + bool isSingleThreaded() const override { return Impl.isSingleThreaded(); } + std::pair getPredicatedAddrSpace(const Value *V) const override { return Impl.getPredicatedAddrSpace(V); Index: llvm/include/llvm/Analysis/TargetTransformInfoImpl.h =================================================================== --- llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -108,6 +108,8 @@ unsigned getAssumedAddrSpace(const Value *V) const { return -1; } + bool isSingleThreaded() const { return false; } + std::pair getPredicatedAddrSpace(const Value *V) const { return std::make_pair(nullptr, -1); Index: llvm/include/llvm/CodeGen/BasicTTIImpl.h =================================================================== --- llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -47,6 +47,7 @@ #include "llvm/Support/MachineValueType.h" #include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" #include #include #include @@ -287,6 +288,11 @@ return getTLI()->getTargetMachine().getAssumedAddrSpace(V); } + bool isSingleThreaded() const { + return getTLI()->getTargetMachine().Options.ThreadModel == + ThreadModel::Single; + } + std::pair getPredicatedAddrSpace(const Value *V) const { return getTLI()->getTargetMachine().getPredicatedAddrSpace(V); Index: llvm/include/llvm/Transforms/Utils/LoopUtils.h =================================================================== --- llvm/include/llvm/Transforms/Utils/LoopUtils.h +++ llvm/include/llvm/Transforms/Utils/LoopUtils.h @@ -206,11 +206,12 @@ /// \p AllowSpeculation is whether values should be hoisted even if they are not /// guaranteed to execute in the loop, but are safe to speculatively execute. bool promoteLoopAccessesToScalars( - const SmallSetVector &, SmallVectorImpl &, - SmallVectorImpl &, SmallVectorImpl &, - PredIteratorCache &, LoopInfo *, DominatorTree *, const TargetLibraryInfo *, - Loop *, MemorySSAUpdater &, ICFLoopSafetyInfo *, - OptimizationRemarkEmitter *, bool AllowSpeculation); + AAResults *, const SmallSetVector &, + SmallVectorImpl &, SmallVectorImpl &, + SmallVectorImpl &, PredIteratorCache &, LoopInfo *, + DominatorTree *, const TargetLibraryInfo *, TargetTransformInfo *, Loop *, + MemorySSAUpdater &, ICFLoopSafetyInfo *, OptimizationRemarkEmitter *, + bool AllowSpeculation); /// Does a BFS from a given node to all of its children inside a given loop. /// The returned vector of nodes includes the starting point. Index: llvm/lib/Analysis/TargetTransformInfo.cpp =================================================================== --- llvm/lib/Analysis/TargetTransformInfo.cpp +++ llvm/lib/Analysis/TargetTransformInfo.cpp @@ -273,6 +273,10 @@ return TTIImpl->getAssumedAddrSpace(V); } +bool TargetTransformInfo::isSingleThreaded() const { + return TTIImpl->isSingleThreaded(); +} + std::pair TargetTransformInfo::getPredicatedAddrSpace(const Value *V) const { return TTIImpl->getPredicatedAddrSpace(V); Index: llvm/lib/Transforms/Scalar/LICM.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LICM.cpp +++ llvm/lib/Transforms/Scalar/LICM.cpp @@ -75,6 +75,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetOptions.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/AssumeBundleBuilder.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -109,6 +110,10 @@ "licm-control-flow-hoisting", cl::Hidden, cl::init(false), cl::desc("Enable control flow (and PHI) hoisting in LICM")); +static cl::opt SingleThread("licm-force-thread-model-single", cl::Hidden, + cl::init(false), + cl::desc("Allow data races in LICM pass")); + static cl::opt MaxNumUsesTraversed( "licm-max-num-uses-traversed", cl::Hidden, cl::init(8), cl::desc("Max num uses visited for identifying load " @@ -480,8 +485,9 @@ for (const SmallSetVector &PointerMustAliases : collectPromotionCandidates(MSSA, AA, L)) { LocalPromoted |= promoteLoopAccessesToScalars( - PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC, LI, - DT, TLI, L, MSSAU, &SafetyInfo, ORE, LicmAllowSpeculation); + AA, PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC, + LI, DT, TLI, TTI, L, MSSAU, &SafetyInfo, ORE, + LicmAllowSpeculation); } Promoted |= LocalPromoted; } while (LocalPromoted); @@ -1900,17 +1906,21 @@ if (auto *A = dyn_cast(Object)) return A->hasByValAttr(); + if (auto *G = dyn_cast(Object)) + return !G->isConstant(); + // TODO: Noalias has nothing to do with writability, this should check for // an allocator function. return isNoAliasCall(Object); } -bool isThreadLocalObject(const Value *Object, const Loop *L, - DominatorTree *DT) { +bool isThreadLocalObject(const Value *Object, const Loop *L, DominatorTree *DT, + TargetTransformInfo *TTI) { // The object must be function-local to start with, and then not captured // before/in the loop. - return isIdentifiedFunctionLocal(Object) && - isNotCapturedBeforeOrInLoop(Object, L, DT); + return (isIdentifiedFunctionLocal(Object) && + isNotCapturedBeforeOrInLoop(Object, L, DT)) || + (TTI->isSingleThreaded() || SingleThread); } } // namespace @@ -1921,13 +1931,14 @@ /// loop invariant. /// bool llvm::promoteLoopAccessesToScalars( - const SmallSetVector &PointerMustAliases, + AAResults *AA, const SmallSetVector &PointerMustAliases, SmallVectorImpl &ExitBlocks, SmallVectorImpl &InsertPts, SmallVectorImpl &MSSAInsertPts, PredIteratorCache &PIC, LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI, - Loop *CurLoop, MemorySSAUpdater &MSSAU, ICFLoopSafetyInfo *SafetyInfo, - OptimizationRemarkEmitter *ORE, bool AllowSpeculation) { + TargetTransformInfo *TTI, Loop *CurLoop, MemorySSAUpdater &MSSAU, + ICFLoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE, + bool AllowSpeculation) { // Verify inputs. assert(LI != nullptr && DT != nullptr && CurLoop != nullptr && SafetyInfo != nullptr && @@ -2137,7 +2148,8 @@ // violating the memory model. if (StoreSafety == StoreSafetyUnknown) { Value *Object = getUnderlyingObject(SomePtr); - if (isWritableObject(Object) && isThreadLocalObject(Object, CurLoop, DT)) + if (isWritableObject(Object) && + isThreadLocalObject(Object, CurLoop, DT, TTI)) StoreSafety = StoreSafe; } Index: llvm/test/Transforms/LICM/promote-sink-store.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LICM/promote-sink-store.ll @@ -0,0 +1,94 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -licm -licm-force-thread-model-single -S %s | FileCheck %s + +@u = dso_local local_unnamed_addr global i32 0, align 4 +@v = dso_local local_unnamed_addr global i32 0, align 4 + +define dso_local void @f(ptr noalias nocapture noundef readonly %arg, ptr noalias nocapture noundef readonly %arg1, i32 noundef %arg2) local_unnamed_addr { +; CHECK-LABEL: @f( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[I:%.*]] = icmp sgt i32 [[ARG2:%.*]], 0 +; CHECK-NEXT: br i1 [[I]], label [[BB3:%.*]], label [[BB26:%.*]] +; CHECK: bb3: +; CHECK-NEXT: [[I4:%.*]] = load i32, ptr @v, align 4 +; CHECK-NEXT: [[I5:%.*]] = load i32, ptr @u, align 4 +; CHECK-NEXT: [[I6:%.*]] = zext i32 [[ARG2]] to i64 +; CHECK-NEXT: [[V_PROMOTED:%.*]] = load i32, ptr @v, align 1 +; CHECK-NEXT: br label [[BB7:%.*]] +; CHECK: bb7: +; CHECK-NEXT: [[I203:%.*]] = phi i32 [ [[V_PROMOTED]], [[BB3]] ], [ [[I202:%.*]], [[BB21:%.*]] ] +; CHECK-NEXT: [[I8:%.*]] = phi i64 [ 0, [[BB3]] ], [ [[I23:%.*]], [[BB21]] ] +; CHECK-NEXT: [[I9:%.*]] = phi i32 [ [[I5]], [[BB3]] ], [ [[I14:%.*]], [[BB21]] ] +; CHECK-NEXT: [[I10:%.*]] = phi i32 [ [[I4]], [[BB3]] ], [ [[I22:%.*]], [[BB21]] ] +; CHECK-NEXT: [[I11:%.*]] = getelementptr inbounds i32, ptr [[ARG:%.*]], i64 [[I8]] +; CHECK-NEXT: [[I12:%.*]] = load i32, ptr [[I11]], align 4 +; CHECK-NEXT: [[I13:%.*]] = icmp eq i32 [[I12]], 0 +; CHECK-NEXT: [[I14]] = add nsw i32 [[I9]], 1 +; CHECK-NEXT: br i1 [[I13]], label [[BB15:%.*]], label [[BB25:%.*]] +; CHECK: bb15: +; CHECK-NEXT: [[I16:%.*]] = getelementptr inbounds i32, ptr [[ARG1:%.*]], i64 [[I8]] +; CHECK-NEXT: [[I17:%.*]] = load i32, ptr [[I16]], align 4 +; CHECK-NEXT: [[I18:%.*]] = icmp eq i32 [[I17]], 0 +; CHECK-NEXT: br i1 [[I18]], label [[BB21]], label [[BB19:%.*]] +; CHECK: bb19: +; CHECK-NEXT: [[I20:%.*]] = add nsw i32 [[I10]], 1 +; CHECK-NEXT: br label [[BB21]] +; CHECK: bb21: +; CHECK-NEXT: [[I202]] = phi i32 [ [[I203]], [[BB15]] ], [ [[I20]], [[BB19]] ] +; CHECK-NEXT: [[I22]] = phi i32 [ [[I10]], [[BB15]] ], [ [[I20]], [[BB19]] ] +; CHECK-NEXT: [[I23]] = add nuw nsw i64 [[I8]], 1 +; CHECK-NEXT: [[I24:%.*]] = icmp eq i64 [[I23]], [[I6]] +; CHECK-NEXT: br i1 [[I24]], label [[BB25]], label [[BB7]] +; CHECK: bb25: +; CHECK-NEXT: [[I201:%.*]] = phi i32 [ [[I202]], [[BB21]] ], [ [[I203]], [[BB7]] ] +; CHECK-NEXT: [[I14_LCSSA:%.*]] = phi i32 [ [[I14]], [[BB21]] ], [ [[I14]], [[BB7]] ] +; CHECK-NEXT: store i32 [[I201]], ptr @v, align 1 +; CHECK-NEXT: store i32 [[I14_LCSSA]], ptr @u, align 4 +; CHECK-NEXT: br label [[BB26]] +; CHECK: bb26: +; CHECK-NEXT: ret void +; +bb: + %i = icmp sgt i32 %arg2, 0 + br i1 %i, label %bb3, label %bb26 + +bb3: ; preds = %bb + %i4 = load i32, ptr @v, align 4 + %i5 = load i32, ptr @u, align 4 + %i6 = zext i32 %arg2 to i64 + br label %bb7 + +bb7: ; preds = %bb21, %bb3 + %i8 = phi i64 [ 0, %bb3 ], [ %i23, %bb21 ] + %i9 = phi i32 [ %i5, %bb3 ], [ %i14, %bb21 ] + %i10 = phi i32 [ %i4, %bb3 ], [ %i22, %bb21 ] + %i11 = getelementptr inbounds i32, ptr %arg, i64 %i8 + %i12 = load i32, ptr %i11, align 4 + %i13 = icmp eq i32 %i12, 0 + %i14 = add nsw i32 %i9, 1 + br i1 %i13, label %bb15, label %bb25 + +bb15: ; preds = %bb7 + %i16 = getelementptr inbounds i32, ptr %arg1, i64 %i8 + %i17 = load i32, ptr %i16, align 4 + %i18 = icmp eq i32 %i17, 0 + br i1 %i18, label %bb21, label %bb19 + +bb19: ; preds = %bb15 + %i20 = add nsw i32 %i10, 1 + store i32 %i20, ptr @v, align 4 + br label %bb21 + +bb21: ; preds = %bb19, %bb15 + %i22 = phi i32 [ %i10, %bb15 ], [ %i20, %bb19 ] + %i23 = add nuw nsw i64 %i8, 1 + %i24 = icmp eq i64 %i23, %i6 + br i1 %i24, label %bb25, label %bb7 + +bb25: ; preds = %bb21, %bb7 + store i32 %i14, ptr @u, align 4 + br label %bb26 + +bb26: ; preds = %bb25, %bb + ret void +} Index: llvm/test/Transforms/LICM/without-force-thread-model-single.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LICM/without-force-thread-model-single.ll @@ -0,0 +1,90 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -licm -S %s | FileCheck %s + +@u = dso_local local_unnamed_addr global i32 0, align 4 +@v = dso_local local_unnamed_addr global i32 0, align 4 + +define dso_local void @f(ptr noalias nocapture noundef readonly %arg, ptr noalias nocapture noundef readonly %arg1, i32 noundef %arg2) local_unnamed_addr { +; CHECK-LABEL: @f( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[I:%.*]] = icmp sgt i32 [[ARG2:%.*]], 0 +; CHECK-NEXT: br i1 [[I]], label [[BB3:%.*]], label [[BB26:%.*]] +; CHECK: bb3: +; CHECK-NEXT: [[I4:%.*]] = load i32, ptr @v, align 4 +; CHECK-NEXT: [[I5:%.*]] = load i32, ptr @u, align 4 +; CHECK-NEXT: [[I6:%.*]] = zext i32 [[ARG2]] to i64 +; CHECK-NEXT: br label [[BB7:%.*]] +; CHECK: bb7: +; CHECK-NEXT: [[I8:%.*]] = phi i64 [ 0, [[BB3]] ], [ [[I23:%.*]], [[BB21:%.*]] ] +; CHECK-NEXT: [[I9:%.*]] = phi i32 [ [[I5]], [[BB3]] ], [ [[I14:%.*]], [[BB21]] ] +; CHECK-NEXT: [[I10:%.*]] = phi i32 [ [[I4]], [[BB3]] ], [ [[I22:%.*]], [[BB21]] ] +; CHECK-NEXT: [[I11:%.*]] = getelementptr inbounds i32, ptr [[ARG:%.*]], i64 [[I8]] +; CHECK-NEXT: [[I12:%.*]] = load i32, ptr [[I11]], align 4 +; CHECK-NEXT: [[I13:%.*]] = icmp eq i32 [[I12]], 0 +; CHECK-NEXT: [[I14]] = add nsw i32 [[I9]], 1 +; CHECK-NEXT: br i1 [[I13]], label [[BB15:%.*]], label [[BB25:%.*]] +; CHECK: bb15: +; CHECK-NEXT: [[I16:%.*]] = getelementptr inbounds i32, ptr [[ARG1:%.*]], i64 [[I8]] +; CHECK-NEXT: [[I17:%.*]] = load i32, ptr [[I16]], align 4 +; CHECK-NEXT: [[I18:%.*]] = icmp eq i32 [[I17]], 0 +; CHECK-NEXT: br i1 [[I18]], label [[BB21]], label [[BB19:%.*]] +; CHECK: bb19: +; CHECK-NEXT: [[I20:%.*]] = add nsw i32 [[I10]], 1 +; CHECK-NEXT: store i32 [[I20]], ptr @v, align 4 +; CHECK-NEXT: br label [[BB21]] +; CHECK: bb21: +; CHECK-NEXT: [[I22]] = phi i32 [ [[I10]], [[BB15]] ], [ [[I20]], [[BB19]] ] +; CHECK-NEXT: [[I23]] = add nuw nsw i64 [[I8]], 1 +; CHECK-NEXT: [[I24:%.*]] = icmp eq i64 [[I23]], [[I6]] +; CHECK-NEXT: br i1 [[I24]], label [[BB25]], label [[BB7]] +; CHECK: bb25: +; CHECK-NEXT: [[I14_LCSSA:%.*]] = phi i32 [ [[I14]], [[BB21]] ], [ [[I14]], [[BB7]] ] +; CHECK-NEXT: store i32 [[I14_LCSSA]], ptr @u, align 4 +; CHECK-NEXT: br label [[BB26]] +; CHECK: bb26: +; CHECK-NEXT: ret void +; +bb: + %i = icmp sgt i32 %arg2, 0 + br i1 %i, label %bb3, label %bb26 + +bb3: ; preds = %bb + %i4 = load i32, ptr @v, align 4 + %i5 = load i32, ptr @u, align 4 + %i6 = zext i32 %arg2 to i64 + br label %bb7 + +bb7: ; preds = %bb21, %bb3 + %i8 = phi i64 [ 0, %bb3 ], [ %i23, %bb21 ] + %i9 = phi i32 [ %i5, %bb3 ], [ %i14, %bb21 ] + %i10 = phi i32 [ %i4, %bb3 ], [ %i22, %bb21 ] + %i11 = getelementptr inbounds i32, ptr %arg, i64 %i8 + %i12 = load i32, ptr %i11, align 4 + %i13 = icmp eq i32 %i12, 0 + %i14 = add nsw i32 %i9, 1 + br i1 %i13, label %bb15, label %bb25 + +bb15: ; preds = %bb7 + %i16 = getelementptr inbounds i32, ptr %arg1, i64 %i8 + %i17 = load i32, ptr %i16, align 4 + %i18 = icmp eq i32 %i17, 0 + br i1 %i18, label %bb21, label %bb19 + +bb19: ; preds = %bb15 + %i20 = add nsw i32 %i10, 1 + store i32 %i20, ptr @v, align 4 + br label %bb21 + +bb21: ; preds = %bb19, %bb15 + %i22 = phi i32 [ %i10, %bb15 ], [ %i20, %bb19 ] + %i23 = add nuw nsw i64 %i8, 1 + %i24 = icmp eq i64 %i23, %i6 + br i1 %i24, label %bb25, label %bb7 + +bb25: ; preds = %bb21, %bb7 + store i32 %i14, ptr @u, align 4 + br label %bb26 + +bb26: ; preds = %bb25, %bb + ret void +}