Index: llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.h =================================================================== --- /dev/null +++ llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.h @@ -0,0 +1,25 @@ +//===-- NVPTXTargetTransformInfo.cpp - NVPTX-specific Alias Analysis ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/AliasAnalysis.h" + +namespace llvm { +/// NVPTX-specific alias analysis result. This AAResult understands that +/// pointers in different, non-generic address spaces cannot alias. +class NVPTXAAResult : public AAResultBase { + const Function &F; + +public: + NVPTXAAResult(const Function &F) : F(F) {} + + AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB); + + // TODO: Implement pointsToConstantMemory. +}; +} // namespace llvm Index: llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.cpp =================================================================== --- /dev/null +++ llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.cpp @@ -0,0 +1,105 @@ +//===-- NVPTXAliasAnalysis.cpp - NVPTX-specific Alias Analysis ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is an NVPTX-specific alias analysis that determines whether two pointers +// alias based on their address spaces. It reports two pointers do not alias if +// they point to different non-generic address spaces (i.e. global, shared, +// constant, and local). +// +//===----------------------------------------------------------------------===// + +#include "NVPTXAliasAnalysis.h" +#include "MCTargetDesc/NVPTXBaseInfo.h" +#include "NVPTX.h" +#include "NVPTXUtilities.h" + +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" + +using namespace llvm; + +#define DEBUG_TYPE "nvptxaa" + +// To make the logic future proof, we explicitly check for the address spaces +// the target understands instead of "!= ADDRESS_SPACE_GENERIC". +static bool isSpecificAddressSpace(unsigned AS) { + return (AS == ADDRESS_SPACE_GLOBAL || AS == ADDRESS_SPACE_SHARED || + AS == ADDRESS_SPACE_CONST || AS == ADDRESS_SPACE_LOCAL || + AS == ADDRESS_SPACE_PARAM); +} + +// Returns the address space of P or P's underlying object if P is generic. We +// check both P and P's underlying object because NVPTXAliasAnalysis is used +// both before and after memory space inference. Before memory space inference, +// non-generic address space attributes are likely on underlying objects such as +// Arguments and GlobalVariables; after memory space inference, the attributes +// are propagated to pointers derived from non-generic objects. +static unsigned getAddressSpace(const Value *P, const DataLayout &DL) { + // Fast path: if P is already non-generic, returns its address space. + unsigned AS = P->getType()->getPointerAddressSpace(); + if (isSpecificAddressSpace(AS)) + return AS; + + // Slow path: returns the address space of P's underlying object. + SmallVector Objs; + GetUnderlyingObjects(const_cast(P), Objs, DL); + for (auto Obj : Objs) { + // Compute Obj's address space. + unsigned int ObjAS = ADDRESS_SPACE_GENERIC; + if (auto *PTy = dyn_cast(Obj->getType())) { + ObjAS = PTy->getAddressSpace(); + // If Obj is a CUDA kernel parameter, it must reside in global memory. + if (auto A = dyn_cast(Obj)) { + Triple TargetTriple(A->getParent()->getParent()->getTargetTriple()); + if (TargetTriple.getOS() == Triple::CUDA && + isKernelFunction(*A->getParent())) + ObjAS = ADDRESS_SPACE_GLOBAL; + } + // "alloca"s reside in local memory. + if (isa(Obj)) + ObjAS = ADDRESS_SPACE_LOCAL; + } + // Conservatively returns "generic" when any underlying object is generic. + if (!isSpecificAddressSpace(ObjAS)) + return ADDRESS_SPACE_GENERIC; + // Returns "generic" if P has two non-generic underlying objects and they + // reside in different address spaces. + if (isSpecificAddressSpace(AS) && AS != ObjAS) + return ADDRESS_SPACE_GENERIC; + AS = ObjAS; + } + + return AS; +} + +AliasResult NVPTXAAResult::alias(const MemoryLocation &LocA, + const MemoryLocation &LocB) { + const DataLayout &DL = F.getParent()->getDataLayout(); + + const Value *V1 = LocA.Ptr; + const Value *V2 = LocB.Ptr; + + // Pointers in two different non-generic address spaces cannot alias. + if (V1->getType()->isPointerTy() && V2->getType()->isPointerTy()) { + auto AS1 = getAddressSpace(V1, DL); + auto AS2 = getAddressSpace(V2, DL); + if (isSpecificAddressSpace(AS1) && isSpecificAddressSpace(AS2) && + AS1 != AS2) + return NoAlias; + } + + // Otherwise, defer to other chained alias analyses. + return MayAlias; +} Index: llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h =================================================================== --- llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h +++ llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h @@ -64,6 +64,9 @@ TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None); void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP); + + std::unique_ptr + getAAResultProvider(const Function *F); }; } // end namespace llvm Index: llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp =================================================================== --- llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp +++ llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp @@ -8,6 +8,7 @@ //===----------------------------------------------------------------------===// #include "NVPTXTargetTransformInfo.h" +#include "NVPTXAliasAnalysis.h" #include "NVPTXUtilities.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" @@ -129,3 +130,19 @@ UP.Partial = UP.Runtime = true; UP.PartialThreshold = UP.Threshold / 4; } + +namespace { +class NVPTXAAResultProvider : public TargetTransformInfo::AAResultProvider { + NVPTXAAResult Result; + +public: + NVPTXAAResultProvider(const Function &F) : Result(F) {} + virtual ~NVPTXAAResultProvider() {} + void addAAResult(AAResults &AAR) override { AAR.addAAResult(Result); } +}; +} // anonymous namespace + +std::unique_ptr +NVPTXTTIImpl::getAAResultProvider(const Function *F) { + return make_unique(*F); +} Index: llvm/test/CodeGen/NVPTX/aa.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/NVPTX/aa.ll @@ -0,0 +1,145 @@ +; RUN: opt < %s -target-specific-aa -aa-eval -print-no-aliases -print-may-aliases -disable-output 2>&1 \ +; RUN: | FileCheck --check-prefix=AA %s +; RUN: opt < %s -target-specific-aa -licm -S | FileCheck %s + +target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64" +target triple = "nvptx64-nvidia-cuda" + +@s = internal unnamed_addr addrspace(3) constant [1024 x double] zeroinitializer, align 8 + +; AA-LABEL: Function: all_addrspaces + +; AA-DAG: MayAlias: i32 addrspace(1)* %global, i32* %generic +; AA-DAG: MayAlias: i32 addrspace(1)* %global, i32 addrspace(2)* %as2 +; AA-DAG: NoAlias: i32 addrspace(1)* %global, i32 addrspace(3)* %shared +; AA-DAG: NoAlias: i32 addrspace(1)* %global, i32 addrspace(4)* %const +; AA-DAG: NoAlias: i32 addrspace(1)* %global, i32 addrspace(5)* %local +; AA-DAG: MayAlias: i32 addrspace(1)* %global, i32 addrspace(6)* %as6 +; AA-DAG: NoAlias: i32 addrspace(1)* %global, i32 addrspace(101)* %param + +; AA-DAG: MayAlias: i32 addrspace(2)* %as2, i32* %generic +; AA-DAG: MayAlias: i32 addrspace(2)* %as2, i32 addrspace(3)* %shared +; AA-DAG: MayAlias: i32 addrspace(2)* %as2, i32 addrspace(4)* %const +; AA-DAG: MayAlias: i32 addrspace(2)* %as2, i32 addrspace(5)* %local +; AA-DAG: MayAlias: i32 addrspace(2)* %as2, i32 addrspace(6)* %as6 + +; AA-DAG: MayAlias: i32 addrspace(3)* %shared, i32* %generic +; AA-DAG: NoAlias: i32 addrspace(3)* %shared, i32 addrspace(4)* %const +; AA-DAG: NoAlias: i32 addrspace(3)* %shared, i32 addrspace(5)* %local +; AA-DAG: MayAlias: i32 addrspace(3)* %shared, i32 addrspace(6)* %as6 + +; AA-DAG: MayAlias: i32 addrspace(4)* %const, i32* %generic +; AA-DAG: NoAlias: i32 addrspace(4)* %const, i32 addrspace(5)* %local +; AA-DAG: MayAlias: i32 addrspace(4)* %const, i32 addrspace(6)* %as6 + +; AA-DAG: MayAlias: i32 addrspace(5)* %local, i32* %generic +; AA-DAG: MayAlias: i32 addrspace(5)* %local, i32 addrspace(6)* %as6 + +; AA-DAG: MayAlias: i32 addrspace(6)* %as6, i32* %generic + +; AA-DAG: MayAlias: i32 addrspace(101)* %param, i32* %generic +; AA-DAG: MayAlias: i32 addrspace(101)* %param, i32 addrspace(2)* %as2 +; AA-DAG: NoAlias: i32 addrspace(101)* %param, i32 addrspace(3)* %shared +; AA-DAG: NoAlias: i32 addrspace(101)* %param, i32 addrspace(4)* %const +; AA-DAG: NoAlias: i32 addrspace(101)* %param, i32 addrspace(5)* %local +; AA-DAG: MayAlias: i32 addrspace(101)* %param, i32 addrspace(6)* %as6 +define void @all_addrspaces( + i32* %generic, + i32 addrspace(1)* %global, + i32 addrspace(2)* %as2, + i32 addrspace(3)* %shared, + i32 addrspace(4)* %const, + i32 addrspace(5)* %local, + i32 addrspace(6)* %as6, + i32 addrspace(101)* %param) { + ret void +} + +declare i32 addrspace(1)* @get_global(i32*) +declare i32 addrspace(5)* @get_local(i32*) +declare i32* @get_generic(i32*) + +; Check that we infer that alloca's are in the local address space. This means +; that the pointer returned by get_global can't alias the result of an alloca. +; +; AA-LABEL: Function: infer_as_from_alloca +; AA-DAG: NoAlias: i32 addrspace(1)* %global, i32* %a +; AA-DAG: NoAlias: i32 addrspace(1)* %global, i32* %gep +; AA-DAG: MayAlias: i32 addrspace(5)* %local, i32* %a +; AA-DAG: MayAlias: i32 addrspace(5)* %local, i32* %gep +define void @infer_as_from_alloca() { + %a = alloca i32 + %gep = getelementptr i32, i32* %a, i32 42 + %global = call i32 addrspace(1)* @get_global(i32* %a) + %local = call i32 addrspace(5)* @get_local(i32 *%a) + ret void +} + +; Check that our rules are correct and conservative when we "merge" pointers of +; different (inferred) address spaces. +; +; AA-LABEL: Function: infer_through_select +; AA-DAG: MayAlias: i32 addrspace(5)* %local, i32* %a_or_b +; AA-DAG: NoAlias: i32 addrspace(1)* %global, i32* %a_or_b +; AA-DAG: MayAlias: i32 addrspace(1)* %global, i32* %kernel_param +; AA-DAG: NoAlias: i32 addrspace(5)* %local, i32* %kernel_param1 +; AA-DAG: NoAlias: i32 addrspace(5)* %local, i32* %kernel_param +; AA-DAG: MayAlias: i32 addrspace(1)* %global, i32* %kernel_param_or_alloca +; AA-DAG: MayAlias: i32 addrspace(5)* %local, i32* %kernel_param_or_alloca +; AA-DAG: MayAlias: i32 addrspace(1)* %global, i32* %kernel_param_or_generic +; AA-DAG: MayAlias: i32 addrspace(5)* %local, i32* %kernel_param_or_generic +; AA-DAG: MayAlias: i32 addrspace(1)* %global, i32* %alloca_or_generic +; AA-DAG: MayAlias: i32 addrspace(5)* %local, i32* %alloca_or_generic +define void @infer_through_select_kernel( + i1 %cond1, i1 %cond2, i1 %cond3, i1 %cond4, i1 %cond5, + i32* %kernel_param1, i32* %kernel_param2) { + %a = alloca i32 + %b = alloca i32 + %a_or_b = select i1 %cond1, i32* %a, i32* %b ; local AS + + %global = call i32 addrspace(1)* @get_global(i32* %a) + %local = call i32 addrspace(5)* @get_local(i32* %a) + %generic = call i32* @get_generic(i32* %a) + + %kernel_param = select i1 %cond2, i32* %kernel_param1, i32* %kernel_param2 ; global AS + %kernel_param_or_alloca = select i1 %cond3, i32* %kernel_param1, i32* %a ; generic AS + %kernel_param_or_generic = select i1 %cond4, i32* %kernel_param, i32* %generic ; generic AS + %alloca_or_generic = select i1 %cond5, i32* %a, i32* %generic ; generic AS + ret void +} + +; __global__ void test_alias(double *g, int n) { +; __shared__ double s[1024]; +; for (int i = 0; i < n; ++i) { +; double v = *g; +; s[i] = v; +; } +; } +; +; With NVPTXAliasAnalysis, LICM hoists "v = *g" out of the loop. +define void @licm(double* %g, i32 %n) { +; AA-LABEL: Function: licm: +; CHECK-LABEL: @licm( +entry: + br label %loop + +; CHECK: %v = load double, double* %g +; CHECK: loop: +loop: + %i = phi i32 [ 0, %entry ], [ %inc, %loop ] + %s2 = getelementptr inbounds [1024 x double], [1024 x double] addrspace(3)* @s, i64 0, i32 %i + %s3 = addrspacecast double addrspace(3)* %s2 to double* + %v = load double, double* %g + store double %v, double* %s3 + %inc = add nsw i32 %i, 1 + %exitcond = icmp eq i32 %inc, %n + br i1 %exitcond, label %exit, label %loop + +exit: + ret void +} +; AA: NoAlias: double* %g, double* %s3 + +!nvvm.annotations = !{!0, !1} +!0 = !{void (double*, i32)* @licm, !"kernel", i32 1} +!1 = !{void (i1, i1, i1, i1, i1, i32*, i32*)* @infer_through_select_kernel, !"kernel", i32 1}