Index: llvm/include/llvm/Analysis/TargetTransformInfo.h =================================================================== --- llvm/include/llvm/Analysis/TargetTransformInfo.h +++ llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -489,6 +489,12 @@ /// would typically be allowed using throughput or size cost models. bool hasDivRemOp(Type *DataType, bool IsSigned) const; + + /// Return true if particular instruction (assumed to be a memory access + /// instruction) has volatile variant. If that's the case then we can avoid + /// addrspacecast to generic AS for volatile loads/stores. + bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const; + /// Return true if target doesn't mind addresses in vectors. bool prefersVectorizedAddressing() const; @@ -967,6 +973,7 @@ virtual bool isLegalMaskedScatter(Type *DataType) = 0; virtual bool isLegalMaskedGather(Type *DataType) = 0; virtual bool hasDivRemOp(Type *DataType, bool IsSigned) = 0; + virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) = 0; virtual bool prefersVectorizedAddressing() = 0; virtual int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, @@ -1192,6 +1199,9 @@ bool hasDivRemOp(Type *DataType, bool IsSigned) override { return Impl.hasDivRemOp(DataType, IsSigned); } + bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) override { + return Impl.hasVolatileVariant(I, AddrSpace); + } bool prefersVectorizedAddressing() override { return Impl.prefersVectorizedAddressing(); } Index: llvm/include/llvm/Analysis/TargetTransformInfoImpl.h =================================================================== --- llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -255,6 +255,8 @@ bool hasDivRemOp(Type *DataType, bool IsSigned) { return false; } + bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) { return false; } + bool prefersVectorizedAddressing() { return true; } int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, Index: llvm/lib/Analysis/TargetTransformInfo.cpp =================================================================== --- llvm/lib/Analysis/TargetTransformInfo.cpp +++ llvm/lib/Analysis/TargetTransformInfo.cpp @@ -180,6 +180,11 @@ return TTIImpl->hasDivRemOp(DataType, IsSigned); } +bool TargetTransformInfo::hasVolatileVariant(Instruction *I, + unsigned AddrSpace) const { + return TTIImpl->hasVolatileVariant(I, AddrSpace); +} + bool TargetTransformInfo::prefersVectorizedAddressing() const { return TTIImpl->prefersVectorizedAddressing(); } Index: llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h =================================================================== --- llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h +++ llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h @@ -63,6 +63,22 @@ void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP); + bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) { + // Volatile loads/stores are only supported for shared and global address + // spaces, or for generic AS that maps to them. + if (!(AddrSpace == llvm::ADDRESS_SPACE_GENERIC || + AddrSpace == llvm::ADDRESS_SPACE_GLOBAL || + AddrSpace == llvm::ADDRESS_SPACE_SHARED)) + return false; + + switch(I->getOpcode()){ + default: + return false; + case Instruction::Load: + case Instruction::Store: + return true; + } + } }; } // end namespace llvm Index: llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp =================================================================== --- llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp +++ llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp @@ -148,10 +148,9 @@ // Changes the flat address expressions in function F to point to specific // address spaces if InferredAddrSpace says so. Postorder is the postorder of // all flat expressions in the use-def graph of function F. - bool - rewriteWithNewAddressSpaces(ArrayRef Postorder, - const ValueToAddrSpaceMapTy &InferredAddrSpace, - Function *F) const; + bool rewriteWithNewAddressSpaces( + const TargetTransformInfo &TTI, ArrayRef Postorder, + const ValueToAddrSpaceMapTy &InferredAddrSpace, Function *F) const; void appendsFlatAddressExpressionToPostorderStack( Value *V, std::vector> &PostorderStack, @@ -602,7 +601,7 @@ // Changes the address spaces of the flat address expressions who are inferred // to point to a specific address space. - return rewriteWithNewAddressSpaces(Postorder, InferredAddrSpace, &F); + return rewriteWithNewAddressSpaces(TTI, Postorder, InferredAddrSpace, &F); } // Constants need to be tracked through RAUW to handle cases with nested @@ -711,22 +710,29 @@ /// \p returns true if \p U is the pointer operand of a memory instruction with /// a single pointer operand that can have its address space changed by simply /// mutating the use to a new value. -static bool isSimplePointerUseValidToReplace(Use &U) { +static bool isSimplePointerUseValidToReplace(const TargetTransformInfo &TTI, + Use &U, unsigned AddrSpace) { User *Inst = U.getUser(); unsigned OpNo = U.getOperandNo(); + bool VolatileIsAllowed = false; + if (auto *I = dyn_cast(Inst)) + VolatileIsAllowed = TTI.hasVolatileVariant(I, AddrSpace); if (auto *LI = dyn_cast(Inst)) - return OpNo == LoadInst::getPointerOperandIndex() && !LI->isVolatile(); + return OpNo == LoadInst::getPointerOperandIndex() && + (VolatileIsAllowed || !LI->isVolatile()); if (auto *SI = dyn_cast(Inst)) - return OpNo == StoreInst::getPointerOperandIndex() && !SI->isVolatile(); + return OpNo == StoreInst::getPointerOperandIndex() && + (VolatileIsAllowed || !SI->isVolatile()); if (auto *RMW = dyn_cast(Inst)) - return OpNo == AtomicRMWInst::getPointerOperandIndex() && !RMW->isVolatile(); + return OpNo == AtomicRMWInst::getPointerOperandIndex() && + (VolatileIsAllowed || !RMW->isVolatile()); if (auto *CmpX = dyn_cast(Inst)) { return OpNo == AtomicCmpXchgInst::getPointerOperandIndex() && - !CmpX->isVolatile(); + (VolatileIsAllowed || !CmpX->isVolatile()); } return false; @@ -820,7 +826,7 @@ } bool InferAddressSpaces::rewriteWithNewAddressSpaces( - ArrayRef Postorder, + const TargetTransformInfo &TTI, ArrayRef Postorder, const ValueToAddrSpaceMapTy &InferredAddrSpace, Function *F) const { // For each address expression to be modified, creates a clone of it with its // pointer operands converted to the new address space. Since the pointer @@ -880,7 +886,8 @@ // to the next instruction. I = skipToNextUser(I, E); - if (isSimplePointerUseValidToReplace(U)) { + if (isSimplePointerUseValidToReplace( + TTI, U, V->getType()->getPointerAddressSpace())) { // If V is used as the pointer operand of a compatible memory operation, // sets the pointer operand to NewV. This replacement does not change // the element type, so the resultant load/store is still valid. Index: llvm/test/CodeGen/NVPTX/ld-st-addrrspace.py =================================================================== --- /dev/null +++ llvm/test/CodeGen/NVPTX/ld-st-addrrspace.py @@ -0,0 +1,81 @@ +# This test generates all variants of load/store instructions and verifies that +# LLVM generates correct PTX for them. + +# RUN: python %s > %t.ll +# RUN: llc < %t.ll -march=nvptx64 -mcpu=sm_30 | FileCheck -check-prefixes=CHECK,CHECK_P64 %t.ll +# RUN: llc < %t.ll -march=nvptx -mcpu=sm_30 | FileCheck -check-prefixes=CHECK,CHECK_P32 %t.ll + +from itertools import product +from string import Template + +llvm_type_to_ptx_type = { + "i8": "u8", + "i16": "u16", + "i32": "u32", + "i64": "u64", + "half": "b16", + "<2 x half>": "b32", + "float": "f32", + "double": "f64" +} + +llvm_type_to_ptx_reg = { + "i8": "r", + "i16": "r", + "i32": "r", + "i64": "rd", + "half": "h", + "<2 x half>": "hh", + "float": "f", + "double": "fd" +} + +as_id = { + "" : 0, + ".global" : 1, + ".shared" : 3, + ".const" : 4, + ".local" : 5, + ".param" : 101 +} + +def get_as_id(space): + return as_id[space] + +def gen_load_tests(): + load_template = """ +define ${type} @ld${_volatile}${_space}.${ptx_type}(${type} addrspace(${as})* %ptr) { +; CHECK_P32: ld${_volatile}${_space}.${ptx_type} %${ptx_reg}{{[0-9]+}}, [%r{{[0-9]+}}] +; CHECK_P64: ld${_volatile}${_space}.${ptx_type} %${ptx_reg}{{[0-9]+}}, [%rd{{[0-9]+}}] +; CHECK: ret + %a = load ${volatile} ${type}, ${type} addrspace(${as})* %ptr + ret ${type} %a +} +""" + for op_type, volatile, space in product( + ["i8", "i16", "i32", "i64", + "half", "float", "double", "<2 x half>"], + [True, False], + ["", ".shared", ".global", ".const", ".local", ".param"]): + + params = { + "type" : op_type, + "volatile" : "volatile" if volatile else "", + "_volatile" : ".volatile" if volatile else "", + "_space" : space, + "ptx_reg": llvm_type_to_ptx_reg[op_type], + "ptx_type": llvm_type_to_ptx_type[op_type], + "as" : get_as_id(space), + } + + # Volatile is only supported for global, shared and generic. + if volatile and not space in ["",".global",".shared"]: + continue + + test_params = params + print(Template(load_template).substitute(test_params)) + +def main(): + gen_load_tests() + +main()