diff --git a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp --- a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp @@ -1476,25 +1476,100 @@ return Changed; } -// Infer the nosync attribute. For the moment, the inference is trivial -// and relies on the readnone attribute already being infered. This will -// be replaced with a more robust implementation in the near future. +// Return true if this is an atomic which has an ordering stronger than +// unordered. Note that this is different than the predicate we use in +// Attributor. Here we chose to be conservative and consider monotonic +// operations potentially synchronizing. We generally don't do much with +// monotonic operations, so this is simply risk reduction. +static bool isOrderedAtomic(Instruction *I) { + if (!I->isAtomic()) + return false; + + if (auto *FI = dyn_cast(I)) + // All legal orderings for fence are stronger than monotonic. + return FI->getSyncScopeID() != SyncScope::SingleThread; + else if (isa(I) || isa(I)) + return true; + else if (auto *SI = dyn_cast(I)) + return !SI->isUnordered(); + else if (auto *LI = dyn_cast(I)) + return !LI->isUnordered(); + else { + llvm_unreachable("unknown atomic instruction?"); + } +} + +static bool InstrBreaksNoSync(Instruction &I, const SCCNodeSet &SCCNodes) { + // Volatile may synchronize + if (I.isVolatile()) + return true; + + // An ordered atomic may synchronize. (See comment about on monotonic.) + if (isOrderedAtomic(&I)) + return true; + + auto *CB = dyn_cast(&I); + if (!CB) + // Non call site cases covered by the two checks above + return false; + + if (CB->hasFnAttr(Attribute::NoSync)) + return false; + + // readnone + not convergent implies nosync + // (This is needed to initialize inference from declarations which aren't + // explicitly nosync, but are readnone and not convergent.) + if (CB->hasFnAttr(Attribute::ReadNone) && + !CB->hasFnAttr(Attribute::Convergent)) + return false; + + // Non volatile memset/memcpy/memmoves are nosync + // NOTE: Only intrinsics with volatile flags should be handled here. All + // others should be marked in Intrinsics.td. + if (auto *MI = dyn_cast(&I)) + if (!MI->isVolatile()) + return false; + + // Speculatively assume in SCC. + if (Function *Callee = CB->getCalledFunction()) + if (SCCNodes.contains(Callee)) + return false; + + return true; +} + +// Infer the nosync attribute. static bool addNoSyncAttr(const SCCNodeSet &SCCNodes) { - bool Changed = false; + AttributeInferer AI; + AI.registerAttrInference(AttributeInferer::InferenceDescriptor{ + Attribute::NoSync, + // Skip already marked functions. + [](const Function &F) { return F.hasNoSync(); }, + // Instructions that break nosync assumption. + [&SCCNodes](Instruction &I) { + return InstrBreaksNoSync(I, SCCNodes); + }, + [](Function &F) { + LLVM_DEBUG(dbgs() + << "Adding nosync attr to fn " << F.getName() << "\n"); + F.setNoSync(); + ++NumNoSync; + }, + /* RequiresExactDefinition= */ true}); + bool Changed = AI.run(SCCNodes); + // readnone + not convergent implies nosync + // (This is here so that we don't have to duplicate the function local + // memory reasoning of the readnone analysis.) for (Function *F : SCCNodes) { if (!F || F->hasNoSync()) continue; - - // readnone + not convergent implies nosync if (!F->doesNotAccessMemory() || F->isConvergent()) continue; - F->setNoSync(); NumNoSync++; Changed = true; } - return Changed; } diff --git a/llvm/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll b/llvm/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll --- a/llvm/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll +++ b/llvm/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll @@ -73,12 +73,12 @@ declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i1) nounwind ; CHECK: attributes #0 = { norecurse nosync nounwind readnone willreturn } -; CHECK: attributes #1 = { nofree norecurse nounwind willreturn writeonly } +; CHECK: attributes #1 = { nofree norecurse nosync nounwind willreturn writeonly } ; CHECK: attributes #2 = { nounwind readonly } ; CHECK: attributes #3 = { nounwind } ; CHECK: attributes #4 = { nosync nounwind readnone willreturn } -; CHECK: attributes #5 = { nofree nounwind willreturn } -; CHECK: attributes #6 = { nofree norecurse nounwind willreturn } +; CHECK: attributes #5 = { nofree nosync nounwind willreturn } +; CHECK: attributes #6 = { nofree norecurse nosync nounwind willreturn } ; CHECK: attributes #7 = { argmemonly nofree nosync nounwind willreturn } ; Root note. diff --git a/llvm/test/CodeGen/AMDGPU/inline-attr.ll b/llvm/test/CodeGen/AMDGPU/inline-attr.ll --- a/llvm/test/CodeGen/AMDGPU/inline-attr.ll +++ b/llvm/test/CodeGen/AMDGPU/inline-attr.ll @@ -7,13 +7,13 @@ ; GCN: %mul.i = fmul float %load, 1.500000e+01 ; UNSAFE: attributes #0 = { norecurse nosync nounwind readnone willreturn "unsafe-fp-math"="true" } -; UNSAFE: attributes #1 = { nofree norecurse nounwind willreturn "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="true" } +; UNSAFE: attributes #1 = { nofree norecurse nosync nounwind willreturn "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="true" } ; NOINFS: attributes #0 = { norecurse nosync nounwind readnone willreturn "no-infs-fp-math"="true" } -; NOINFS: attributes #1 = { nofree norecurse nounwind willreturn "less-precise-fpmad"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="false" "unsafe-fp-math"="false" } +; NOINFS: attributes #1 = { nofree norecurse nosync nounwind willreturn "less-precise-fpmad"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="false" "unsafe-fp-math"="false" } ; NONANS: attributes #0 = { norecurse nosync nounwind readnone willreturn "no-nans-fp-math"="true" } -; NONANS: attributes #1 = { nofree norecurse nounwind willreturn "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="true" "unsafe-fp-math"="false" } +; NONANS: attributes #1 = { nofree norecurse nosync nounwind willreturn "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="true" "unsafe-fp-math"="false" } define float @foo(float %x) #0 { entry: diff --git a/llvm/test/Transforms/FunctionAttrs/nofree.ll b/llvm/test/Transforms/FunctionAttrs/nofree.ll --- a/llvm/test/Transforms/FunctionAttrs/nofree.ll +++ b/llvm/test/Transforms/FunctionAttrs/nofree.ll @@ -36,7 +36,7 @@ declare void @free(i8* nocapture) local_unnamed_addr #2 define i32 @_Z4foo3Pi(i32* nocapture readonly %a) local_unnamed_addr #3 { -; CHECK: Function Attrs: norecurse nounwind readonly uwtable willreturn +; CHECK: Function Attrs: norecurse nosync nounwind readonly uwtable willreturn ; CHECK-LABEL: @_Z4foo3Pi( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A:%.*]], align 4 diff --git a/llvm/test/Transforms/FunctionAttrs/nosync.ll b/llvm/test/Transforms/FunctionAttrs/nosync.ll --- a/llvm/test/Transforms/FunctionAttrs/nosync.ll +++ b/llvm/test/Transforms/FunctionAttrs/nosync.ll @@ -93,7 +93,7 @@ ; singlethread fences are okay define void @test9(i8* %p) { -; CHECK: Function Attrs: nofree norecurse nounwind willreturn +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn ; CHECK-LABEL: @test9( ; CHECK-NEXT: fence syncscope("singlethread") seq_cst ; CHECK-NEXT: ret void @@ -137,7 +137,7 @@ } define i32 @load_unordered(i32* nocapture readonly %0) norecurse nounwind uwtable { -; CHECK: Function Attrs: norecurse nounwind readonly uwtable willreturn +; CHECK: Function Attrs: norecurse nosync nounwind readonly uwtable willreturn ; CHECK-LABEL: @load_unordered( ; CHECK-NEXT: [[TMP2:%.*]] = load atomic i32, i32* [[TMP0:%.*]] unordered, align 4 ; CHECK-NEXT: ret i32 [[TMP2]] @@ -148,7 +148,7 @@ ; atomic store with unordered ordering. define void @store_unordered(i32* nocapture %0) norecurse nounwind uwtable { -; CHECK: Function Attrs: nofree norecurse nounwind uwtable willreturn writeonly +; CHECK: Function Attrs: nofree norecurse nosync nounwind uwtable willreturn writeonly ; CHECK-LABEL: @store_unordered( ; CHECK-NEXT: store atomic i32 10, i32* [[TMP0:%.*]] unordered, align 4 ; CHECK-NEXT: ret void @@ -209,9 +209,9 @@ declare void @nosync_function() noinline nounwind uwtable nosync define void @call_nosync_function() nounwind uwtable noinline { -; CHECK: Function Attrs: noinline nounwind uwtable +; CHECK: Function Attrs: noinline nosync nounwind uwtable ; CHECK-LABEL: @call_nosync_function( -; CHECK-NEXT: tail call void @nosync_function() #[[ATTR7:[0-9]+]] +; CHECK-NEXT: tail call void @nosync_function() #[[ATTR8:[0-9]+]] ; CHECK-NEXT: ret void ; tail call void @nosync_function() noinline nounwind uwtable @@ -225,7 +225,7 @@ define void @call_might_sync() nounwind uwtable noinline { ; CHECK: Function Attrs: noinline nounwind uwtable ; CHECK-LABEL: @call_might_sync( -; CHECK-NEXT: tail call void @might_sync() #[[ATTR7]] +; CHECK-NEXT: tail call void @might_sync() #[[ATTR8]] ; CHECK-NEXT: ret void ; tail call void @might_sync() noinline nounwind uwtable @@ -248,7 +248,7 @@ ; positive, non-volatile intrinsic. define i32 @memset_non_volatile(i8* %ptr1, i8 %val) { -; CHECK: Function Attrs: nofree nounwind willreturn writeonly +; CHECK: Function Attrs: nofree nosync nounwind willreturn writeonly ; CHECK-LABEL: @memset_non_volatile( ; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* [[PTR1:%.*]], i8 [[VAL:%.*]], i32 8, i1 false) ; CHECK-NEXT: ret i32 4 diff --git a/llvm/test/Transforms/FunctionAttrs/operand-bundles-scc.ll b/llvm/test/Transforms/FunctionAttrs/operand-bundles-scc.ll --- a/llvm/test/Transforms/FunctionAttrs/operand-bundles-scc.ll +++ b/llvm/test/Transforms/FunctionAttrs/operand-bundles-scc.ll @@ -14,4 +14,4 @@ } -; CHECK: attributes #0 = { nofree nounwind } +; CHECK: attributes #0 = { nofree nosync nounwind } diff --git a/llvm/test/Transforms/FunctionAttrs/read-write-scc.ll b/llvm/test/Transforms/FunctionAttrs/read-write-scc.ll --- a/llvm/test/Transforms/FunctionAttrs/read-write-scc.ll +++ b/llvm/test/Transforms/FunctionAttrs/read-write-scc.ll @@ -17,4 +17,4 @@ ret void } -; CHECK: attributes #0 = { nofree nounwind } +; CHECK: attributes #0 = { nofree nosync nounwind } diff --git a/llvm/test/Transforms/InferFunctionAttrs/norecurse_debug.ll b/llvm/test/Transforms/InferFunctionAttrs/norecurse_debug.ll --- a/llvm/test/Transforms/InferFunctionAttrs/norecurse_debug.ll +++ b/llvm/test/Transforms/InferFunctionAttrs/norecurse_debug.ll @@ -52,5 +52,5 @@ !28 = !DILocation(line: 9, column: 18, scope: !2) !29 = !DILocation(line: 10, column: 1, scope: !2) -; CHECK: attributes #0 = { nofree norecurse nounwind willreturn } +; CHECK: attributes #0 = { nofree norecurse nosync nounwind willreturn } ; CHECK-NOT: foo.coefficient1