Index: lib/Target/NVPTX/CMakeLists.txt =================================================================== --- lib/Target/NVPTX/CMakeLists.txt +++ lib/Target/NVPTX/CMakeLists.txt @@ -9,6 +9,7 @@ add_public_tablegen_target(NVPTXCommonTableGen) set(NVPTXCodeGen_sources + NVPTXAliasAnalysis.cpp NVPTXAllocaHoisting.cpp NVPTXAsmPrinter.cpp NVPTXAssignValidGlobalNames.cpp Index: lib/Target/NVPTX/NVPTXAliasAnalysis.h =================================================================== --- /dev/null +++ lib/Target/NVPTX/NVPTXAliasAnalysis.h @@ -0,0 +1,26 @@ +//===- NVPTXAliasAnalysis.h - NVPTX-specific alias analysis ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This is the interface for the NVPTX-specific alias analysis. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXALIASANALYSIS_H +#define LLVM_LIB_TARGET_NVPTX_NVPTXALIASANALYSIS_H + +namespace llvm { + +class ImmutablePass; + +ImmutablePass * +createNVPTXAliasAnalysisPass(const NVPTXTargetMachine *TM = nullptr); + +} // end namespace llvm + +#endif Index: lib/Target/NVPTX/NVPTXAliasAnalysis.cpp =================================================================== --- /dev/null +++ lib/Target/NVPTX/NVPTXAliasAnalysis.cpp @@ -0,0 +1,151 @@ +//===-- NVPTXAliasAnalysis.cpp - NVPTX-specific alias analysis ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is an NVPTX-specific alias analysis that determines whether two pointers +// alias based on their address spaces. It reports two pointers do not alias if +// they point to different non-generic address spaces (i.e. global, shared, +// constant, and local). +// +//===----------------------------------------------------------------------===// + +#include "NVPTX.h" +#include "NVPTXAliasAnalysis.h" +#include "NVPTXTargetMachine.h" +#include "NVPTXUtilities.h" +#include "MCTargetDesc/NVPTXBaseInfo.h" + +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" + +using namespace llvm; + +#define DEBUG_TYPE "nvptxaa" + +namespace llvm { +void initializeNVPTXAliasAnalysisPass(PassRegistry &); +} + +// Returns the address space of P or P's underlying object if P is generic. We +// check both P and P's underlying object because NVPTXAliasAnalysis is used +// both before and after memory space inference. Before memory space inference, +// non-generic address space attributes are likely on underlying objects such as +// Arguments and GlobalVariables; after memory space inference, the attributes +// are propagated to pointers derived from non-generic objects. +static unsigned getAddressSpace(const Value *P, const DataLayout &DL, + const NVPTXTargetMachine *TM) { + // Fast path: if P is already non-generic, returns its address space. + unsigned AS = P->getType()->getPointerAddressSpace(); + if (AS != ADDRESS_SPACE_GENERIC) + return AS; + + // Slow path: returns the address space of P's underlying object. + SmallVector Objs; + GetUnderlyingObjects(const_cast(P), Objs, DL); + for (auto Obj : Objs) { + // Compute Obj's address space. + unsigned int ObjAS = ADDRESS_SPACE_GENERIC; + if (auto *PTy = dyn_cast(Obj->getType())) { + ObjAS = PTy->getAddressSpace(); + // If Obj is a CUDA kernel parameter, it must reside in global memory. + if (auto A = dyn_cast(Obj)) { + if (TM && TM->getDrvInterface() == NVPTX::CUDA && + isKernelFunction(*A->getParent())) + ObjAS = ADDRESS_SPACE_GLOBAL; + } + // "alloca"s reside in local memory. + if (isa(Obj)) + ObjAS = ADDRESS_SPACE_LOCAL; + } + // Conservatively returns "generic" when any underlying object is generic. + if (ObjAS == ADDRESS_SPACE_GENERIC) + return ADDRESS_SPACE_GENERIC; + // Returns "generic" if P has two non-generic underlying objects and they + // reside in different address spaces. + if (AS != ADDRESS_SPACE_GENERIC && AS != ObjAS) + return ADDRESS_SPACE_GENERIC; + AS = ObjAS; + } + + return AS; +} + +namespace { + +class NVPTXAliasAnalysis : public ImmutablePass, public AliasAnalysis { +public: + static char ID; + + NVPTXAliasAnalysis(const NVPTXTargetMachine *TM = nullptr) + : ImmutablePass(ID), TM(TM) { + initializeNVPTXAliasAnalysisPass(*PassRegistry::getPassRegistry()); + } + + const char *getPassName() const override { + return "NVPTX-specific alias analysis"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + AliasAnalysis::getAnalysisUsage(AU); + } + + bool doInitialization(Module &M) override { + InitializeAliasAnalysis(this, &M.getDataLayout()); + return true; + } + + AliasResult alias(const MemoryLocation &LocA, + const MemoryLocation &LocB) override { + const Value *V1 = LocA.Ptr; + const Value *V2 = LocB.Ptr; + + // Pointers in two different non-generic address spaces cannot alias. + if (V1->getType()->isPointerTy() && V2->getType()->isPointerTy()) { + auto AS1 = getAddressSpace(V1, *DL, TM); + auto AS2 = getAddressSpace(V2, *DL, TM); + if (AS1 != AS2 && AS1 != ADDRESS_SPACE_GENERIC && + AS2 != ADDRESS_SPACE_GENERIC) { + return NoAlias; + } + } + + // Otherwise, defer to other chained alias analyses. + return AliasAnalysis::alias(LocA, LocB); + } + + // getAdjustedAnalysisPointer - This method is used when a pass implements + // an analysis interface through multiple inheritance. If needed, it + // should override this to adjust the this pointer as needed for the + // specified pass info. + void *getAdjustedAnalysisPointer(const void *ID) override { + if (ID == &AliasAnalysis::ID) + return static_cast(this); + return this; + } + +private: + const NVPTXTargetMachine *TM; +}; + +} // End of anonymous namespace + +// Register this pass, this is required for ImmutablePass. +char NVPTXAliasAnalysis::ID = 0; +INITIALIZE_AG_PASS(NVPTXAliasAnalysis, AliasAnalysis, "nvptxaa", + "NVPTX Alias Analysis", false, true, false) + +ImmutablePass *createNVPTXAliasAnalysisPass(const NVPTXTargetMachine *TM) { + return new NVPTXAliasAnalysis(TM); +} Index: lib/Target/NVPTX/NVPTXTargetMachine.cpp =================================================================== --- lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -50,6 +50,7 @@ namespace llvm { void initializeNVVMReflectPass(PassRegistry&); void initializeGenericToNVVMPass(PassRegistry&); +void initializeNVPTXAliasAnalysisPass(PassRegistry&); void initializeNVPTXAllocaHoistingPass(PassRegistry &); void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&); void initializeNVPTXFavorNonGenericAddrSpacesPass(PassRegistry &); @@ -68,6 +69,7 @@ PassRegistry &PR = *PassRegistry::getPassRegistry(); initializeNVVMReflectPass(PR); initializeGenericToNVVMPass(PR); + initializeNVPTXAliasAnalysisPass(PR); initializeNVPTXAllocaHoistingPass(PR); initializeNVPTXAssignValidGlobalNamesPass(PR); initializeNVPTXFavorNonGenericAddrSpacesPass(PR); Index: test/CodeGen/NVPTX/aa.ll =================================================================== --- /dev/null +++ test/CodeGen/NVPTX/aa.ll @@ -0,0 +1,43 @@ +; RUN: opt < %s -nvptxaa -aa-eval -print-no-aliases -disable-output 2>&1 | FileCheck --check-prefix=AA %s +; RUN: opt < %s -nvptxaa -licm -S | FileCheck %s + +target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64" +target triple = "nvptx64-nvidia-cuda" + +@s = internal unnamed_addr addrspace(3) constant [1024 x double] zeroinitializer, align 8 + +; __global__ void test_alias(double *g, int n) { +; __shared__ double s[1024]; +; for (int i = 0; i < n; ++i) { +; double v = *g; +; s[i] = v; +; } +; } +; +; With NVPTXAliasAnalysis, LICM hoists "v = *g" out of the loop. +define void @licm(double* %g, i32 %n) { +; AA-LABEL: Function: licm: +; CHECK-LABEL: @licm( +entry: + %g2 = addrspacecast double* %g to double addrspace(1)* + br label %loop + +; CHECK: %v = load double, double addrspace(1)* %g2 +; CHECK: loop: +loop: + %i = phi i32 [ 0, %entry ], [ %inc, %loop ] + %s2 = getelementptr inbounds [1024 x double], [1024 x double] addrspace(3)* @s, i64 0, i32 %i + %s3 = addrspacecast double addrspace(3)* %s2 to double* + %v = load double, double addrspace(1)* %g2 + store double %v, double* %s3 + %inc = add nsw i32 %i, 1 + %exitcond = icmp eq i32 %inc, %n + br i1 %exitcond, label %exit, label %loop + +exit: + ret void +} +; AA-CHECK: NoAlias: double addrspace(1)* %g2, double* %s3 + +!nvvm.annotations = !{!0} +!0 = !{void (double*, i32)* @licm, !"kernel", i32 1}