Index: llvm/include/llvm/Analysis/TargetTransformInfo.h =================================================================== --- llvm/include/llvm/Analysis/TargetTransformInfo.h +++ llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -370,6 +370,8 @@ unsigned getAssumedAddrSpace(const Value *V) const; + bool isSingleThreaded() const; + std::pair getPredicatedAddrSpace(const Value *V) const; @@ -1542,6 +1544,7 @@ virtual bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const = 0; virtual unsigned getAssumedAddrSpace(const Value *V) const = 0; + virtual bool isSingleThreaded() const = 0; virtual std::pair getPredicatedAddrSpace(const Value *V) const = 0; virtual Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, @@ -1917,6 +1920,10 @@ return Impl.getAssumedAddrSpace(V); } + bool isSingleThreaded() const override { + return Impl.isSingleThreaded(); + } + std::pair getPredicatedAddrSpace(const Value *V) const override { return Impl.getPredicatedAddrSpace(V); Index: llvm/include/llvm/Analysis/TargetTransformInfoImpl.h =================================================================== --- llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -108,6 +108,8 @@ unsigned getAssumedAddrSpace(const Value *V) const { return -1; } + bool isSingleThreaded() const { return false; } + std::pair getPredicatedAddrSpace(const Value *V) const { return std::make_pair(nullptr, -1); Index: llvm/include/llvm/CodeGen/BasicTTIImpl.h =================================================================== --- llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -47,6 +47,7 @@ #include "llvm/Support/MachineValueType.h" #include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" #include #include #include @@ -287,6 +288,11 @@ return getTLI()->getTargetMachine().getAssumedAddrSpace(V); } + bool isSingleThreaded() const { + return getTLI()->getTargetMachine().Options.ThreadModel == + ThreadModel::Single; + } + std::pair getPredicatedAddrSpace(const Value *V) const { return getTLI()->getTargetMachine().getPredicatedAddrSpace(V); Index: llvm/include/llvm/Transforms/Utils/LoopUtils.h =================================================================== --- llvm/include/llvm/Transforms/Utils/LoopUtils.h +++ llvm/include/llvm/Transforms/Utils/LoopUtils.h @@ -208,11 +208,12 @@ /// \p AllowSpeculation is whether values should be hoisted even if they are not /// guaranteed to execute in the loop, but are safe to speculatively execute. bool promoteLoopAccessesToScalars( - const SmallSetVector &, SmallVectorImpl &, - SmallVectorImpl &, SmallVectorImpl &, - PredIteratorCache &, LoopInfo *, DominatorTree *, const TargetLibraryInfo *, - Loop *, MemorySSAUpdater &, ICFLoopSafetyInfo *, - OptimizationRemarkEmitter *, bool AllowSpeculation); + AAResults *AA, const SmallSetVector &, + SmallVectorImpl &, SmallVectorImpl &, + SmallVectorImpl &, PredIteratorCache &, LoopInfo *, + DominatorTree *, const TargetLibraryInfo *, TargetTransformInfo *, Loop *, + MemorySSAUpdater &, ICFLoopSafetyInfo *, OptimizationRemarkEmitter *, + bool AllowSpeculation); /// Does a BFS from a given node to all of its children inside a given loop. /// The returned vector of nodes includes the starting point. Index: llvm/lib/Analysis/TargetTransformInfo.cpp =================================================================== --- llvm/lib/Analysis/TargetTransformInfo.cpp +++ llvm/lib/Analysis/TargetTransformInfo.cpp @@ -273,6 +273,10 @@ return TTIImpl->getAssumedAddrSpace(V); } +bool TargetTransformInfo::isSingleThreaded() const { + return TTIImpl->isSingleThreaded(); +} + std::pair TargetTransformInfo::getPredicatedAddrSpace(const Value *V) const { return TTIImpl->getPredicatedAddrSpace(V); Index: llvm/lib/Transforms/Scalar/LICM.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LICM.cpp +++ llvm/lib/Transforms/Scalar/LICM.cpp @@ -75,6 +75,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetOptions.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/AssumeBundleBuilder.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -109,6 +110,11 @@ "licm-control-flow-hoisting", cl::Hidden, cl::init(false), cl::desc("Enable control flow (and PHI) hoisting in LICM")); +static cl::opt + ThreadModelSingle("licm-force-thread-model-single", cl::Hidden, + cl::init(false), + cl::desc("Allow data races in LICM pass")); + static cl::opt MaxNumUsesTraversed( "licm-max-num-uses-traversed", cl::Hidden, cl::init(8), cl::desc("Max num uses visited for identifying load " @@ -486,8 +492,9 @@ for (const SmallSetVector &PointerMustAliases : collectPromotionCandidates(MSSA, AA, L)) { LocalPromoted |= promoteLoopAccessesToScalars( - PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC, LI, - DT, TLI, L, MSSAU, &SafetyInfo, ORE, LicmAllowSpeculation); + AA, PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC, + LI, DT, TLI, TTI, L, MSSAU, &SafetyInfo, ORE, + LicmAllowSpeculation); } Promoted |= LocalPromoted; } while (LocalPromoted); @@ -1905,13 +1912,14 @@ /// loop invariant. /// bool llvm::promoteLoopAccessesToScalars( - const SmallSetVector &PointerMustAliases, + AAResults *AA, const SmallSetVector &PointerMustAliases, SmallVectorImpl &ExitBlocks, SmallVectorImpl &InsertPts, SmallVectorImpl &MSSAInsertPts, PredIteratorCache &PIC, LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI, - Loop *CurLoop, MemorySSAUpdater &MSSAU, ICFLoopSafetyInfo *SafetyInfo, - OptimizationRemarkEmitter *ORE, bool AllowSpeculation) { + TargetTransformInfo *TTI, Loop *CurLoop, MemorySSAUpdater &MSSAU, + ICFLoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE, + bool AllowSpeculation) { // Verify inputs. assert(LI != nullptr && DT != nullptr && CurLoop != nullptr && SafetyInfo != nullptr && @@ -1961,6 +1969,7 @@ bool SafeToInsertStore = false; bool StoreIsGuanteedToExecute = false; bool FoundLoadToPromote = false; + bool PointToConstantMemory = false; SmallVector LoopUses; @@ -2070,6 +2079,8 @@ Store->getPointerOperand(), Store->getValueOperand()->getType(), Store->getAlign(), MDL, Preheader->getTerminator(), DT, TLI); } + if (AA->pointsToConstantMemory(Store->getOperand(1))) + PointToConstantMemory = true; } else return false; // Not a load or store. @@ -2112,7 +2123,9 @@ // stores along paths which originally didn't have them without violating the // memory model. if (!SafeToInsertStore) { - if (IsKnownThreadLocalObject) + if (IsKnownThreadLocalObject || + ((TTI->isSingleThreaded() || ThreadModelSingle) && + !PointToConstantMemory)) SafeToInsertStore = true; else { Value *Object = getUnderlyingObject(SomePtr); Index: llvm/test/Transforms/LICM/promote-sink-store.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LICM/promote-sink-store.ll @@ -0,0 +1,93 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -licm -licm-force-thread-model-single -S %s | FileCheck %s + +@u = dso_local local_unnamed_addr global i32 0, align 4 +@v = dso_local local_unnamed_addr global i32 0, align 4 + +; Function Attrs: nofree norecurse nosync nounwind uwtable +define dso_local void @f(ptr noalias nocapture noundef readonly %0, ptr noalias nocapture noundef readonly %1, i32 noundef %2) local_unnamed_addr { +; CHECK-LABEL: @f( +; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP2:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP4]], label [[TMP5:%.*]], label [[TMP31:%.*]] +; CHECK: 5: +; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr @v, align 4 +; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr @u, align 4 +; CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP2]] to i64 +; CHECK-NEXT: [[V_PROMOTED:%.*]] = load i32, ptr @v, align 1 +; CHECK-NEXT: br label [[TMP9:%.*]] +; CHECK: 9: +; CHECK-NEXT: [[TMP10:%.*]] = phi i32 [ [[V_PROMOTED]], [[TMP5]] ], [ [[TMP25:%.*]], [[TMP24:%.*]] ] +; CHECK-NEXT: [[TMP11:%.*]] = phi i64 [ 0, [[TMP5]] ], [ [[TMP27:%.*]], [[TMP24]] ] +; CHECK-NEXT: [[TMP12:%.*]] = phi i32 [ [[TMP7]], [[TMP5]] ], [ [[TMP17:%.*]], [[TMP24]] ] +; CHECK-NEXT: [[TMP13:%.*]] = phi i32 [ [[TMP6]], [[TMP5]] ], [ [[TMP26:%.*]], [[TMP24]] ] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP0:%.*]], i64 [[TMP11]] +; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 0 +; CHECK-NEXT: [[TMP17]] = add nsw i32 [[TMP12]], 1 +; CHECK-NEXT: br i1 [[TMP16]], label [[TMP18:%.*]], label [[TMP29:%.*]] +; CHECK: 18: +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP1:%.*]], i64 [[TMP11]] +; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 +; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i32 [[TMP20]], 0 +; CHECK-NEXT: br i1 [[TMP21]], label [[TMP24]], label [[TMP22:%.*]] +; CHECK: 22: +; CHECK-NEXT: [[TMP23:%.*]] = add nsw i32 [[TMP13]], 1 +; CHECK-NEXT: br label [[TMP24]] +; CHECK: 24: +; CHECK-NEXT: [[TMP25]] = phi i32 [ [[TMP10]], [[TMP18]] ], [ [[TMP23]], [[TMP22]] ] +; CHECK-NEXT: [[TMP26]] = phi i32 [ [[TMP13]], [[TMP18]] ], [ [[TMP23]], [[TMP22]] ] +; CHECK-NEXT: [[TMP27]] = add nuw nsw i64 [[TMP11]], 1 +; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[TMP27]], [[TMP8]] +; CHECK-NEXT: br i1 [[TMP28]], label [[TMP29]], label [[TMP9]] +; CHECK: 29: +; CHECK-NEXT: [[TMP30:%.*]] = phi i32 [ [[TMP25]], [[TMP24]] ], [ [[TMP10]], [[TMP9]] ] +; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ [[TMP17]], [[TMP24]] ], [ [[TMP17]], [[TMP9]] ] +; CHECK-NEXT: store i32 [[TMP30]], ptr @v, align 1 +; CHECK-NEXT: store i32 [[DOTLCSSA]], ptr @u, align 4 +; CHECK-NEXT: br label [[TMP31]] +; CHECK: 31: +; CHECK-NEXT: ret void +; + %4 = icmp sgt i32 %2, 0 + br i1 %4, label %5, label %28 + +5: ; preds = %3 + %6 = load i32, ptr @v, align 4 + %7 = load i32, ptr @u, align 4 + %8 = zext i32 %2 to i64 + br label %9 + +9: ; preds = %5, %23 + %10 = phi i64 [ 0, %5 ], [ %25, %23 ] + %11 = phi i32 [ %7, %5 ], [ %16, %23 ] + %12 = phi i32 [ %6, %5 ], [ %24, %23 ] + %13 = getelementptr inbounds i32, ptr %0, i64 %10 + %14 = load i32, ptr %13, align 4 + %15 = icmp eq i32 %14, 0 + %16 = add nsw i32 %11, 1 + br i1 %15, label %17, label %27 + +17: ; preds = %9 + %18 = getelementptr inbounds i32, ptr %1, i64 %10 + %19 = load i32, ptr %18, align 4 + %20 = icmp eq i32 %19, 0 + br i1 %20, label %23, label %21 + +21: ; preds = %17 + %22 = add nsw i32 %12, 1 + store i32 %22, ptr @v, align 4 + br label %23 + +23: ; preds = %17, %21 + %24 = phi i32 [ %12, %17 ], [ %22, %21 ] + %25 = add nuw nsw i64 %10, 1 + %26 = icmp eq i64 %25, %8 + br i1 %26, label %27, label %9 + +27: ; preds = %9, %23 + store i32 %16, ptr @u, align 4 + br label %28 + +28: ; preds = %27, %3 + ret void +} Index: llvm/test/Transforms/LICM/without-allow-data-race.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LICM/without-allow-data-race.ll @@ -0,0 +1,89 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -licm -S %s | FileCheck %s + +@u = dso_local local_unnamed_addr global i32 0, align 4 +@v = dso_local local_unnamed_addr global i32 0, align 4 + +; Function Attrs: nofree norecurse nosync nounwind uwtable +define dso_local void @f(ptr noalias nocapture noundef readonly %0, ptr noalias nocapture noundef readonly %1, i32 noundef %2) local_unnamed_addr { +; CHECK-LABEL: @f( +; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP2:%.*]], 0 +; CHECK-NEXT: br i1 [[TMP4]], label [[TMP5:%.*]], label [[TMP28:%.*]] +; CHECK: 5: +; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr @v, align 4 +; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr @u, align 4 +; CHECK-NEXT: [[TMP8:%.*]] = zext i32 [[TMP2]] to i64 +; CHECK-NEXT: br label [[TMP9:%.*]] +; CHECK: 9: +; CHECK-NEXT: [[TMP10:%.*]] = phi i64 [ 0, [[TMP5]] ], [ [[TMP25:%.*]], [[TMP23:%.*]] ] +; CHECK-NEXT: [[TMP11:%.*]] = phi i32 [ [[TMP7]], [[TMP5]] ], [ [[TMP16:%.*]], [[TMP23]] ] +; CHECK-NEXT: [[TMP12:%.*]] = phi i32 [ [[TMP6]], [[TMP5]] ], [ [[TMP24:%.*]], [[TMP23]] ] +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP0:%.*]], i64 [[TMP10]] +; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 +; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i32 [[TMP14]], 0 +; CHECK-NEXT: [[TMP16]] = add nsw i32 [[TMP11]], 1 +; CHECK-NEXT: br i1 [[TMP15]], label [[TMP17:%.*]], label [[TMP27:%.*]] +; CHECK: 17: +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP1:%.*]], i64 [[TMP10]] +; CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4 +; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i32 [[TMP19]], 0 +; CHECK-NEXT: br i1 [[TMP20]], label [[TMP23]], label [[TMP21:%.*]] +; CHECK: 21: +; CHECK-NEXT: [[TMP22:%.*]] = add nsw i32 [[TMP12]], 1 +; CHECK-NEXT: store i32 [[TMP22]], ptr @v, align 4 +; CHECK-NEXT: br label [[TMP23]] +; CHECK: 23: +; CHECK-NEXT: [[TMP24]] = phi i32 [ [[TMP12]], [[TMP17]] ], [ [[TMP22]], [[TMP21]] ] +; CHECK-NEXT: [[TMP25]] = add nuw nsw i64 [[TMP10]], 1 +; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[TMP25]], [[TMP8]] +; CHECK-NEXT: br i1 [[TMP26]], label [[TMP27]], label [[TMP9]] +; CHECK: 27: +; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ [[TMP16]], [[TMP23]] ], [ [[TMP16]], [[TMP9]] ] +; CHECK-NEXT: store i32 [[DOTLCSSA]], ptr @u, align 4 +; CHECK-NEXT: br label [[TMP28]] +; CHECK: 28: +; CHECK-NEXT: ret void +; + %4 = icmp sgt i32 %2, 0 + br i1 %4, label %5, label %28 + +5: ; preds = %3 + %6 = load i32, ptr @v, align 4 + %7 = load i32, ptr @u, align 4 + %8 = zext i32 %2 to i64 + br label %9 + +9: ; preds = %5, %23 + %10 = phi i64 [ 0, %5 ], [ %25, %23 ] + %11 = phi i32 [ %7, %5 ], [ %16, %23 ] + %12 = phi i32 [ %6, %5 ], [ %24, %23 ] + %13 = getelementptr inbounds i32, ptr %0, i64 %10 + %14 = load i32, ptr %13, align 4 + %15 = icmp eq i32 %14, 0 + %16 = add nsw i32 %11, 1 + br i1 %15, label %17, label %27 + +17: ; preds = %9 + %18 = getelementptr inbounds i32, ptr %1, i64 %10 + %19 = load i32, ptr %18, align 4 + %20 = icmp eq i32 %19, 0 + br i1 %20, label %23, label %21 + +21: ; preds = %17 + %22 = add nsw i32 %12, 1 + store i32 %22, ptr @v, align 4 + br label %23 + +23: ; preds = %17, %21 + %24 = phi i32 [ %12, %17 ], [ %22, %21 ] + %25 = add nuw nsw i64 %10, 1 + %26 = icmp eq i64 %25, %8 + br i1 %26, label %27, label %9 + +27: ; preds = %9, %23 + store i32 %16, ptr @u, align 4 + br label %28 + +28: ; preds = %27, %3 + ret void +}