diff --git a/llvm/lib/Target/NVPTX/CMakeLists.txt b/llvm/lib/Target/NVPTX/CMakeLists.txt --- a/llvm/lib/Target/NVPTX/CMakeLists.txt +++ b/llvm/lib/Target/NVPTX/CMakeLists.txt @@ -11,6 +11,7 @@ add_public_tablegen_target(NVPTXCommonTableGen) set(NVPTXCodeGen_sources + NVPTXAliasAnalysis.cpp NVPTXAllocaHoisting.cpp NVPTXAtomicLower.cpp NVPTXAsmPrinter.cpp diff --git a/llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.h b/llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.h new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.h @@ -0,0 +1,106 @@ +//===-------------------- NVPTXAliasAnalysis.h ------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// This is the NVPTX address space based alias analysis pass. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXALIASANALYSIS_H +#define LLVM_LIB_TARGET_NVPTX_NVPTXALIASANALYSIS_H + +#include "llvm/Analysis/AliasAnalysis.h" + +namespace llvm { + +class DataLayout; +class MemoryLocation; + +class NVPTXAAResult : public AAResultBase { + friend AAResultBase; + + const DataLayout &DL; + +public: + explicit NVPTXAAResult(const DataLayout &DL) : DL(DL) {} + NVPTXAAResult(NVPTXAAResult &&Arg) + : AAResultBase(std::move(Arg)), DL(Arg.DL) {} + + /// Handle invalidation events from the new pass manager. + /// + /// By definition, this result is stateless and so remains valid. + bool invalidate(Function &, const PreservedAnalyses &, + FunctionAnalysisManager::Invalidator &Inv) { + return false; + } + + AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB, + AAQueryInfo &AAQI); + bool pointsToConstantMemory(const MemoryLocation &Loc, AAQueryInfo &AAQI, + bool OrLocal); +}; + +/// Analysis pass providing a never-invalidated alias analysis result. +class NVPTXAA : public AnalysisInfoMixin { + friend AnalysisInfoMixin; + + static AnalysisKey Key; + +public: + using Result = NVPTXAAResult; + + NVPTXAAResult run(Function &F, AnalysisManager &AM) { + return NVPTXAAResult(F.getParent()->getDataLayout()); + } +}; + +/// Legacy wrapper pass to provide the NVPTXAAResult object. +class NVPTXAAWrapperPass : public ImmutablePass { + std::unique_ptr Result; + +public: + static char ID; + + NVPTXAAWrapperPass(); + + NVPTXAAResult &getResult() { return *Result; } + const NVPTXAAResult &getResult() const { return *Result; } + + bool doInitialization(Module &M) override { + Result.reset(new NVPTXAAResult(M.getDataLayout())); + return false; + } + + bool doFinalization(Module &M) override { + Result.reset(); + return false; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override; +}; + +// Wrapper around ExternalAAWrapperPass so that the default +// constructor gets the callback. +class NVPTXExternalAAWrapper : public ExternalAAWrapperPass { +public: + static char ID; + + NVPTXExternalAAWrapper() + : ExternalAAWrapperPass([](Pass &P, Function &, AAResults &AAR) { + if (auto *WrapperPass = + P.getAnalysisIfAvailable()) + AAR.addAAResult(WrapperPass->getResult()); + }) {} +}; + +ImmutablePass *createNVPTXAAWrapperPass(); +void initializeNVPTXAAWrapperPassPass(PassRegistry &); +ImmutablePass *createNVPTXExternalAAWrapperPass(); +void initializeNVPTXExternalAAWrapperPass(PassRegistry &); + +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_NVPTX_NVPTXALIASANALYSIS_H diff --git a/llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.cpp b/llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.cpp @@ -0,0 +1,82 @@ +//===--------------------- NVPTXAliasAnalysis.cpp--------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// This is the NVPTX address space based alias analysis pass. +//===----------------------------------------------------------------------===// + +#include "NVPTXAliasAnalysis.h" +#include "MCTargetDesc/NVPTXBaseInfo.h" +#include "NVPTX.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/Instructions.h" + +using namespace llvm; + +#define DEBUG_TYPE "NVPTX-aa" + +AnalysisKey NVPTXAA::Key; + +char NVPTXAAWrapperPass::ID = 0; +char NVPTXExternalAAWrapper::ID = 0; + +INITIALIZE_PASS(NVPTXAAWrapperPass, "nvptx-aa", + "NVPTX Address space based Alias Analysis", false, true) + +INITIALIZE_PASS(NVPTXExternalAAWrapper, "nvptx-aa-wrapper", + "NVPTX Address space based Alias Analysis Wrapper", false, true) + +ImmutablePass *llvm::createNVPTXAAWrapperPass() { + return new NVPTXAAWrapperPass(); +} + +ImmutablePass *llvm::createNVPTXExternalAAWrapperPass() { + return new NVPTXExternalAAWrapper(); +} + +NVPTXAAWrapperPass::NVPTXAAWrapperPass() : ImmutablePass(ID) { + initializeNVPTXAAWrapperPassPass(*PassRegistry::getPassRegistry()); +} + +void NVPTXAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); +} + +static AliasResult::Kind getAliasResult(unsigned AS1, unsigned AS2) { + if ((AS1 == ADDRESS_SPACE_GENERIC) || (AS2 == ADDRESS_SPACE_GENERIC)) + return AliasResult::MayAlias; + return (AS1 == AS2 ? AliasResult::MayAlias : AliasResult::NoAlias); +} + +AliasResult NVPTXAAResult::alias(const MemoryLocation &Loc1, + const MemoryLocation &Loc2, + AAQueryInfo &AAQI) { + unsigned AS1 = Loc1.Ptr->getType()->getPointerAddressSpace(); + unsigned AS2 = Loc2.Ptr->getType()->getPointerAddressSpace(); + + return getAliasResult(AS1, AS2); +} + +static bool isConstOrParam(unsigned AS) { + return AS == AddressSpace::ADDRESS_SPACE_CONST || + AS == AddressSpace::ADDRESS_SPACE_PARAM; +} + +bool NVPTXAAResult::pointsToConstantMemory(const MemoryLocation &Loc, + AAQueryInfo &AAQI, bool OrLocal) { + unsigned AS = Loc.Ptr->getType()->getPointerAddressSpace(); + if (isConstOrParam(AS)) + return true; + + const Value *Base = getUnderlyingObject(Loc.Ptr); + AS = Base->getType()->getPointerAddressSpace(); + if (isConstOrParam(AS)) + return true; + + return AAResultBase::pointsToConstantMemory(Loc, AAQI, OrLocal); +} diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h --- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h +++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h @@ -63,6 +63,7 @@ } void adjustPassManager(PassManagerBuilder &) override; + void registerDefaultAliasAnalyses(AAManager &AAM) override; void registerPassBuilderCallbacks(PassBuilder &PB) override; TargetTransformInfo getTargetTransformInfo(const Function &F) const override; diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp --- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -12,6 +12,7 @@ #include "NVPTXTargetMachine.h" #include "NVPTX.h" +#include "NVPTXAliasAnalysis.h" #include "NVPTXAllocaHoisting.h" #include "NVPTXAtomicLower.h" #include "NVPTXLowerAggrCopies.h" @@ -73,6 +74,8 @@ void initializeNVPTXLowerArgsPass(PassRegistry &); void initializeNVPTXLowerAllocaPass(PassRegistry &); void initializeNVPTXProxyRegErasurePass(PassRegistry &); +void initializeNVPTXAAWrapperPassPass(PassRegistry &); +void initializeNVPTXExternalAAWrapperPass(PassRegistry &); } // end namespace llvm @@ -94,6 +97,8 @@ initializeNVPTXLowerAllocaPass(PR); initializeNVPTXLowerAggrCopiesPass(PR); initializeNVPTXProxyRegErasurePass(PR); + initializeNVPTXAAWrapperPassPass(PR); + initializeNVPTXExternalAAWrapperPass(PR); } static std::string computeDataLayout(bool is64Bit, bool UseShortPointers) { @@ -203,11 +208,17 @@ void NVPTXTargetMachine::adjustPassManager(PassManagerBuilder &Builder) { Builder.addExtension( - PassManagerBuilder::EP_EarlyAsPossible, - [&](const PassManagerBuilder &, legacy::PassManagerBase &PM) { - PM.add(createNVVMReflectPass(Subtarget.getSmVersion())); - PM.add(createNVVMIntrRangePass(Subtarget.getSmVersion())); - }); + PassManagerBuilder::EP_EarlyAsPossible, + [&](const PassManagerBuilder &, legacy::PassManagerBase &PM) { + PM.add(createNVPTXAAWrapperPass()); + PM.add(createNVPTXExternalAAWrapperPass()); + PM.add(createNVVMReflectPass(Subtarget.getSmVersion())); + PM.add(createNVVMIntrRangePass(Subtarget.getSmVersion())); + }); +} + +void NVPTXTargetMachine::registerDefaultAliasAnalyses(AAManager &AAM) { + AAM.registerFunctionAnalysis(); } void NVPTXTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) { @@ -225,6 +236,18 @@ return false; }); + PB.registerAnalysisRegistrationCallback([](FunctionAnalysisManager &FAM) { + FAM.registerPass([&] { return NVPTXAA(); }); + }); + + PB.registerParseAACallback([](StringRef AAName, AAManager &AAM) { + if (AAName == "nvptx-aa") { + AAM.registerFunctionAnalysis(); + return true; + } + return false; + }); + PB.registerPipelineStartEPCallback( [this](ModulePassManager &PM, OptimizationLevel Level) { FunctionPassManager FPM; @@ -310,6 +333,12 @@ disablePass(&PatchableFunctionID); disablePass(&ShrinkWrapID); + addPass(createNVPTXAAWrapperPass()); + addPass(createExternalAAWrapperPass([](Pass &P, Function &, AAResults &AAR) { + if (auto *WrapperPass = P.getAnalysisIfAvailable()) + AAR.addAAResult(WrapperPass->getResult()); + })); + // NVVMReflectPass is added in addEarlyAsPossiblePasses, so hopefully running // it here does nothing. But since we need it for correctness when lowering // to NVPTX, run it here too, in case whoever built our pass pipeline didn't diff --git a/llvm/test/CodeGen/NVPTX/nvptx-aa.ll b/llvm/test/CodeGen/NVPTX/nvptx-aa.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/nvptx-aa.ll @@ -0,0 +1,110 @@ +; RUN: opt -nvptx-aa -nvptx-aa-wrapper -aa-eval -print-all-alias-modref-info < %s -S 2>&1 \ +; RUN: | FileCheck %s --check-prefixes CHECK-ALIAS +; +; RUN: opt -nvptx-aa -nvptx-aa-wrapper -licm < %s -S | FileCheck %s --check-prefixes CHECK-AA-CONST +; RUN: opt -basic-aa -licm < %s -S | FileCheck %s --check-prefixes CHECK-NOAA-CONST + +target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64" +target triple = "nvptx64-nvidia-cuda" + +; CHECK-ALIAS-LABEL: Function: test +; CHECK-ALIAS: MayAlias: i8* %gen, i8 addrspace(1)* %global +; CHECK-ALIAS: MayAlias: i8* %gen, i8 addrspace(3)* %shared +; CHECK-ALIAS: NoAlias: i8 addrspace(1)* %global, i8 addrspace(3)* %shared +; CHECK-ALIAS: MayAlias: i8 addrspace(4)* %const, i8* %gen +; CHECK-ALIAS: NoAlias: i8 addrspace(4)* %const, i8 addrspace(1)* %global +; CHECK-ALIAS: NoAlias: i8 addrspace(4)* %const, i8 addrspace(3)* %shared +; CHECK-ALIAS: MayAlias: i8* %gen, i8 addrspace(5)* %local +; CHECK-ALIAS: NoAlias: i8 addrspace(1)* %global, i8 addrspace(5)* %local +; CHECK-ALIAS: NoAlias: i8 addrspace(5)* %local, i8 addrspace(3)* %shared +; CHECK-ALIAS: NoAlias: i8 addrspace(4)* %const, i8 addrspace(5)* %local + +define i8 @test_alias(i8* %gen, i8 addrspace(1)* %global, i8 addrspace(3)* %shared, i8 addrspace(4)* %const, i8 addrspace(5)* %local) { + %param = addrspacecast i8* %gen to i8 addrspace(101)* + %v1 = load i8, i8* %gen + %v2 = load i8, i8 addrspace(1)* %global + %v3 = load i8, i8 addrspace(3)* %shared + %v4 = load i8, i8 addrspace(4)* %const + %v5 = load i8, i8 addrspace(5)* %local + %v6 = load i8, i8 addrspace(101)* %param + %res1 = add i8 %v1, %v2 + %res2 = add i8 %res1, %v3 + %res3 = add i8 %res2, %v4 + %res4 = add i8 %res3, %v5 + %res5 = add i8 %res4, %v6 + ret i8 %res5 +} + +; CHECK-ALIAS-LABEL: Function: test_const +; CHECK-ALIAS: MayAlias: i8* %gen, i8 addrspace(1)* %global +; CHECK-ALIAS: NoAlias: i8 addrspace(4)* %const, i8 addrspace(1)* %global +; CHECK-ALIAS: MayAlias: i8 addrspace(4)* %const, i8* %gen +; +define i8 @test_const(i8* %gen, i8 addrspace(1)* %global, i8 addrspace(4)* %const) { +; +; Even though %gen and %const may alias and there is a store to %gen, +; LICM should be able to hoist %load_const because it is known to be +; constant (AA::pointsToConstantMemory()). +; +; CHECK-AA-CONST-LABEL: @test_const +; CHECK-AA-CONST-LABEL: entry +; CHECK-AA-CONST: %[[LOAD_CONST:.+]] = load i8, i8 addrspace(4)* +; CHECK-AA-CONST-LABEL: loop +; CHECK-AA-CONST: add {{.*}}%[[LOAD_CONST]] +; +; Without NVPTX AA the load is left in the loop because we assume that +; it may be clobbered by the store. +; +; CHECK-NOAA-CONST-LABEL: @test_const +; CHECK-NOAA-CONST-LABEL: loop +; CHECK-NOAA-CONST: %[[LOAD_CONST:.+]] = load i8, i8 addrspace(4)* +; CHECK-NOAA-CONST: add {{.*}}%[[LOAD_CONST]] +entry: + br label %loop +loop: + %v = phi i8 [0, %entry], [%v2, %loop] + %load_global = load i8, i8 addrspace(1)* %global + store i8 %load_global, i8* %gen + %load_const = load i8, i8 addrspace(4)* %const + %v2 = add i8 %v, %load_const + %cond = icmp eq i8 %load_const, 0 + br i1 %cond, label %done, label %loop +done: + ret i8 %v2 +} + +; Same as @test_const above, but for param space. Check that the load +; from param space is hoisted out of the loop when NVPTX-AA is +; enabled. +; +; CHECK-ALIAS-LABEL: Function: test_param +; CHECK-ALIAS: MayAlias: i8* %gen, i8 addrspace(1)* %global +; CHECK-ALIAS: NoAlias: i8 addrspace(1)* %global, i8 addrspace(101)* %param +; CHECK-ALIAS: MayAlias: i8* %gen, i8 addrspace(101)* %param +; +define i8 @test_param(i8* %gen, i8 addrspace(1)* %global, i8* %param_gen) { +; +; CHECK-AA-CONST-LABEL: @test_param +; CHECK-AA-CONST-LABEL: entry +; CHECK-AA-CONST: %[[LOAD_PARAM:.+]] = load i8, i8 addrspace(101)* +; CHECK-AA-CONST-LABEL: loop +; CHECK-AA-CONST: add {{.*}}%[[LOAD_PARAM]] +; +; CHECK-NOAA-CONST-LABEL: @test_param +; CHECK-NOAA-CONST-LABEL: loop +; CHECK-NOAA-CONST: %[[LOAD_PARAM:.+]] = load i8, i8 addrspace(101)* +; CHECK-NOAA-CONST: add {{.*}}%[[LOAD_PARAM]] +entry: + %param = addrspacecast i8* %param_gen to i8 addrspace(101)* + br label %loop +loop: + %v = phi i8 [0, %entry], [%v2, %loop] + %load_global = load i8, i8 addrspace(1)* %global + store i8 %load_global, i8* %gen + %load_const = load i8, i8 addrspace(101)* %param + %v2 = add i8 %v, %load_const + %cond = icmp eq i8 %load_const, 0 + br i1 %cond, label %done, label %loop +done: + ret i8 %v2 +}