diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -420,11 +420,12 @@ // // Note these are to support the Objective-C ARC optimizer which wants to // eliminate retain and releases where possible. +// TODO: Annotate the intrinsics properly, consider DefaultAttrsIntrinsic. def int_objc_autorelease : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty]>; -def int_objc_autoreleasePoolPop : Intrinsic<[], [llvm_ptr_ty]>; -def int_objc_autoreleasePoolPush : Intrinsic<[llvm_ptr_ty], []>; +def int_objc_autoreleasePoolPop : Intrinsic<[], [llvm_ptr_ty], [IntrNoSync, IntrNoCallback]>; +def int_objc_autoreleasePoolPush : Intrinsic<[llvm_ptr_ty], [], [IntrNoSync, IntrNoCallback]>; def int_objc_autoreleaseReturnValue : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty]>; def int_objc_copyWeak : Intrinsic<[], diff --git a/llvm/lib/Analysis/GlobalsModRef.cpp b/llvm/lib/Analysis/GlobalsModRef.cpp --- a/llvm/lib/Analysis/GlobalsModRef.cpp +++ b/llvm/lib/Analysis/GlobalsModRef.cpp @@ -511,6 +511,18 @@ Handles.front().I = Handles.begin(); bool KnowNothing = false; + // Intrinsics, like any other synchronizing function, can make effects + // of other threads visible. Without nosync we know nothing really. + // Similarly, if `nocallback` is missing the function, or intrinsic, + // can call into the module arbitrarily. If both are set the function + // has an effect but will not interact with accesses of internal + // globals inside the module. We are conservative here for optnone + // functions, might not be necessary. + auto MaySyncOrCallIntoModule = [](const Function &F) { + return !F.isDeclaration() || !F.hasNoSync() || + !F.hasFnAttribute(Attribute::NoCallback); + }; + // Collect the mod/ref properties due to called functions. We only compute // one mod-ref set. for (unsigned i = 0, e = SCC.size(); i != e && !KnowNothing; ++i) { @@ -525,7 +537,7 @@ // Can't do better than that! } else if (F->onlyReadsMemory()) { FI.addModRefInfo(ModRefInfo::Ref); - if (!F->isIntrinsic() && !F->onlyAccessesArgMemory()) + if (!F->onlyAccessesArgMemory() && MaySyncOrCallIntoModule(*F)) // This function might call back into the module and read a global - // consider every global as possibly being read by this function. FI.setMayReadAnyGlobal(); @@ -533,7 +545,7 @@ FI.addModRefInfo(ModRefInfo::ModRef); if (!F->onlyAccessesArgMemory()) FI.setMayReadAnyGlobal(); - if (!F->isIntrinsic()) { + if (MaySyncOrCallIntoModule(*F)) { KnowNothing = true; break; } diff --git a/llvm/test/Analysis/GlobalsModRef/functions_without_nosync.ll b/llvm/test/Analysis/GlobalsModRef/functions_without_nosync.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Analysis/GlobalsModRef/functions_without_nosync.ll @@ -0,0 +1,38 @@ +; RUN: opt -globals-aa -gvn -S < %s | FileCheck %s +; RUN: opt -aa-pipeline=basic-aa,globals-aa -passes='require,gvn' -S < %s | FileCheck %s +; +; Functions w/o `nosync` attribute may communicate via memory and must be +; treated conservatively. Taken from https://reviews.llvm.org/D115302. + +target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64" +target triple = "nvptx64-nvidia-cuda" + +@s = internal local_unnamed_addr addrspace(3) global i32 undef, align 4 + +; CHECK-LABEL: @bar_sync +; CHECK: store +; CHECK: tail call void @llvm.nvvm.bar.sync(i32 0) +; CHECK: load +define dso_local i32 @bar_sync(i32 %0) local_unnamed_addr { + store i32 %0, i32* addrspacecast (i32 addrspace(3)* @s to i32*), align 4 + tail call void @llvm.nvvm.bar.sync(i32 0) + %2 = load i32, i32* addrspacecast (i32 addrspace(3)* @s to i32*), align 4 + ret i32 %2 +} + +declare void @llvm.nvvm.bar.sync(i32) #0 + +; CHECK-LABEL: @barrier0 +; CHECK: store +; CHECK: tail call void @llvm.nvvm.barrier0() +; CHECK: load +define dso_local i32 @barrier0(i32 %0) local_unnamed_addr { + store i32 %0, i32* addrspacecast (i32 addrspace(3)* @s to i32*), align 4 + tail call void @llvm.nvvm.barrier0() + %2 = load i32, i32* addrspacecast (i32 addrspace(3)* @s to i32*), align 4 + ret i32 %2 +} + +declare void @llvm.nvvm.barrier0() #0 + +attributes #0 = { convergent nounwind } diff --git a/llvm/test/Analysis/GlobalsModRef/nosync_nocallback.ll b/llvm/test/Analysis/GlobalsModRef/nosync_nocallback.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Analysis/GlobalsModRef/nosync_nocallback.ll @@ -0,0 +1,133 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes +; RUN: opt -aa-pipeline=basic-aa,globals-aa -passes='require,gvn' -S < %s | FileCheck %s + +; Make sure we do not hoist the load before the intrinsic, unknown function, or +; optnone function except if we know the unknown function is nosync and nocallback. + +@G1 = internal global i32 undef +@G2 = internal global i32 undef +@G3 = internal global i32 undef +@G4 = internal global i32 undef + +define void @test_barrier(i1 %c) { +; CHECK-LABEL: define {{[^@]+}}@test_barrier +; CHECK-SAME: (i1 [[C:%.*]]) { +; CHECK-NEXT: br i1 [[C]], label [[INIT:%.*]], label [[CHECK:%.*]] +; CHECK: init: +; CHECK-NEXT: store i32 0, ptr @G1, align 4 +; CHECK-NEXT: br label [[CHECK]] +; CHECK: check: +; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() +; CHECK-NEXT: [[V:%.*]] = load i32, ptr @G1, align 4 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[V]], 0 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) +; CHECK-NEXT: ret void +; + br i1 %c, label %init, label %check +init: + store i32 0, ptr @G1 + br label %check +check: + call void @llvm.amdgcn.s.barrier() + %v = load i32, ptr @G1 + %cmp = icmp eq i32 %v, 0 + call void @llvm.assume(i1 %cmp) + ret void +} + +define void @test_unknown(i1 %c) { +; CHECK-LABEL: define {{[^@]+}}@test_unknown +; CHECK-SAME: (i1 [[C:%.*]]) { +; CHECK-NEXT: br i1 [[C]], label [[INIT:%.*]], label [[CHECK:%.*]] +; CHECK: init: +; CHECK-NEXT: store i32 0, ptr @G2, align 4 +; CHECK-NEXT: br label [[CHECK]] +; CHECK: check: +; CHECK-NEXT: call void @unknown() +; CHECK-NEXT: [[V:%.*]] = load i32, ptr @G2, align 4 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[V]], 0 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) +; CHECK-NEXT: ret void +; + br i1 %c, label %init, label %check +init: + store i32 0, ptr @G2 + br label %check +check: + call void @unknown() + %v = load i32, ptr @G2 + %cmp = icmp eq i32 %v, 0 + call void @llvm.assume(i1 %cmp) + ret void +} + +define void @test_optnone(i1 %c) { +; CHECK-LABEL: define {{[^@]+}}@test_optnone +; CHECK-SAME: (i1 [[C:%.*]]) { +; CHECK-NEXT: br i1 [[C]], label [[INIT:%.*]], label [[CHECK:%.*]] +; CHECK: init: +; CHECK-NEXT: store i32 0, ptr @G3, align 4 +; CHECK-NEXT: br label [[CHECK]] +; CHECK: check: +; CHECK-NEXT: call void @optnone() +; CHECK-NEXT: [[V:%.*]] = load i32, ptr @G3, align 4 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[V]], 0 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) +; CHECK-NEXT: ret void +; + br i1 %c, label %init, label %check +init: + store i32 0, ptr @G3 + br label %check +check: + call void @optnone() + %v = load i32, ptr @G3 + %cmp = icmp eq i32 %v, 0 + call void @llvm.assume(i1 %cmp) + ret void +} + +define void @optnone() optnone nosync nocallback noinline { +; CHECK: Function Attrs: nocallback noinline nosync optnone +; CHECK-LABEL: define {{[^@]+}}@optnone +; CHECK-SAME: () #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: ret void +; + ret void +} + +; Here hoisting is legal and we use it to verify it will happen. +define void @test_unknown_annotated(i1 %c) { +; CHECK-LABEL: define {{[^@]+}}@test_unknown_annotated +; CHECK-SAME: (i1 [[C:%.*]]) { +; CHECK-NEXT: br i1 [[C]], label [[INIT:%.*]], label [[DOTCHECK_CRIT_EDGE:%.*]] +; CHECK: .check_crit_edge: +; CHECK-NEXT: [[V_PRE:%.*]] = load i32, ptr @G4, align 4 +; CHECK-NEXT: br label [[CHECK:%.*]] +; CHECK: init: +; CHECK-NEXT: store i32 0, ptr @G4, align 4 +; CHECK-NEXT: br label [[CHECK]] +; CHECK: check: +; CHECK-NEXT: [[V:%.*]] = phi i32 [ [[V_PRE]], [[DOTCHECK_CRIT_EDGE]] ], [ 0, [[INIT]] ] +; CHECK-NEXT: call void @unknown_nosync_nocallback() +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[V]], 0 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) +; CHECK-NEXT: ret void +; + br i1 %c, label %init, label %check +init: + store i32 0, ptr @G4 + br label %check +check: + call void @unknown_nosync_nocallback() + %v = load i32, ptr @G4 + %cmp = icmp eq i32 %v, 0 + call void @llvm.assume(i1 %cmp) + ret void +} + +declare void @unknown() +declare void @unknown_nosync_nocallback() nosync nocallback +declare void @llvm.amdgcn.s.barrier() +declare void @llvm.assume(i1 noundef) + diff --git a/llvm/test/Transforms/ObjCARC/basic.ll b/llvm/test/Transforms/ObjCARC/basic.ll --- a/llvm/test/Transforms/ObjCARC/basic.ll +++ b/llvm/test/Transforms/ObjCARC/basic.ll @@ -3074,5 +3074,6 @@ !5 = !{i32 2, !"Debug Info Version", i32 3} ; CHECK: attributes [[NUW]] = { nounwind } -; CHECK: attributes #1 = { nocallback nofree nosync nounwind readnone speculatable willreturn } +; CHECK: attributes #1 = { nocallback nosync nounwind } +; CHECK: attributes #2 = { nocallback nofree nosync nounwind readnone speculatable willreturn } ; CHECK: ![[RELEASE]] = !{}