diff --git a/llvm/lib/Analysis/AliasAnalysis.cpp b/llvm/lib/Analysis/AliasAnalysis.cpp --- a/llvm/lib/Analysis/AliasAnalysis.cpp +++ b/llvm/lib/Analysis/AliasAnalysis.cpp @@ -42,6 +42,7 @@ #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" @@ -226,6 +227,18 @@ AAQueryInfo &AAQI) { ModRefInfo Result = ModRefInfo::ModRef; + // Unless convergent intrinsic carries explicit memory access attributes, + // assume that it may touch anything. + // TODO: This is overly conservative. We may need to query TTI whether + // particular intrinsic really affects the given memory location. + // E.g. some intrinsics may affect only specific address spaces. + if (Call->isConvergent() && isa(Call)) { + Function *F = Call->getCalledFunction(); + if (!(F->onlyReadsMemory() || F->doesNotAccessMemory() || + F->onlyAccessesArgMemory())) + return ModRefInfo::ModRef; + } + for (const auto &AA : AAs) { Result = intersectModRef(Result, AA->getModRefInfo(Call, Loc, AAQI)); diff --git a/llvm/test/Analysis/GlobalsModRef/intrinsic_convergent.ll b/llvm/test/Analysis/GlobalsModRef/intrinsic_convergent.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Analysis/GlobalsModRef/intrinsic_convergent.ll @@ -0,0 +1,33 @@ +; RUN: opt -globals-aa -gvn -S < %s | FileCheck %s +target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64" +target triple = "nvptx64-nvidia-cuda" + +@s = internal local_unnamed_addr addrspace(3) global i32 undef, align 4 + +; CHECK-LABEL: @bar_sync +; CHECK: store +; CHECK: tail call void @llvm.nvvm.bar.sync(i32 0) +; CHECK: load +define dso_local i32 @bar_sync(i32 %0) local_unnamed_addr { + store i32 %0, i32* addrspacecast (i32 addrspace(3)* @s to i32*), align 4 + tail call void @llvm.nvvm.bar.sync(i32 0) + %2 = load i32, i32* addrspacecast (i32 addrspace(3)* @s to i32*), align 4 + ret i32 %2 +} + +declare void @llvm.nvvm.bar.sync(i32) #0 + +; CHECK-LABEL: @barrier0 +; CHECK: store +; CHECK: tail call void @llvm.nvvm.barrier0() +; CHECK: load +define dso_local i32 @barrier0(i32 %0) local_unnamed_addr { + store i32 %0, i32* addrspacecast (i32 addrspace(3)* @s to i32*), align 4 + tail call void @llvm.nvvm.barrier0() + %2 = load i32, i32* addrspacecast (i32 addrspace(3)* @s to i32*), align 4 + ret i32 %2 +} + +declare void @llvm.nvvm.barrier0() #0 + +attributes #0 = { convergent nounwind }