Index: llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h =================================================================== --- llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h +++ llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h @@ -489,6 +489,13 @@ /// would typically be allowed using throughput or size cost models. bool hasDivRemOp(Type *DataType, bool IsSigned) const; + /// Return true if the given instruction (assumed to be a memory access + /// instruction) has a volatile variant. If that's the case then we can avoid + /// addrspacecast to generic AS for volatile loads/stores. Default + /// implementation returns false, which prevents address space inference for + /// volatile loads/stores. + bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const; + /// Return true if target doesn't mind addresses in vectors. bool prefersVectorizedAddressing() const; @@ -967,6 +974,7 @@ virtual bool isLegalMaskedScatter(Type *DataType) = 0; virtual bool isLegalMaskedGather(Type *DataType) = 0; virtual bool hasDivRemOp(Type *DataType, bool IsSigned) = 0; + virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) = 0; virtual bool prefersVectorizedAddressing() = 0; virtual int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, @@ -1192,6 +1200,9 @@ bool hasDivRemOp(Type *DataType, bool IsSigned) override { return Impl.hasDivRemOp(DataType, IsSigned); } + bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) override { + return Impl.hasVolatileVariant(I, AddrSpace); + } bool prefersVectorizedAddressing() override { return Impl.prefersVectorizedAddressing(); } Index: llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h =================================================================== --- llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h +++ llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -255,6 +255,8 @@ bool hasDivRemOp(Type *DataType, bool IsSigned) { return false; } + bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) { return false; } + bool prefersVectorizedAddressing() { return true; } int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, Index: llvm/trunk/lib/Analysis/TargetTransformInfo.cpp =================================================================== --- llvm/trunk/lib/Analysis/TargetTransformInfo.cpp +++ llvm/trunk/lib/Analysis/TargetTransformInfo.cpp @@ -180,6 +180,11 @@ return TTIImpl->hasDivRemOp(DataType, IsSigned); } +bool TargetTransformInfo::hasVolatileVariant(Instruction *I, + unsigned AddrSpace) const { + return TTIImpl->hasVolatileVariant(I, AddrSpace); +} + bool TargetTransformInfo::prefersVectorizedAddressing() const { return TTIImpl->prefersVectorizedAddressing(); } Index: llvm/trunk/lib/Target/NVPTX/NVPTXTargetTransformInfo.h =================================================================== --- llvm/trunk/lib/Target/NVPTX/NVPTXTargetTransformInfo.h +++ llvm/trunk/lib/Target/NVPTX/NVPTXTargetTransformInfo.h @@ -63,6 +63,22 @@ void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP); + bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) { + // Volatile loads/stores are only supported for shared and global address + // spaces, or for generic AS that maps to them. + if (!(AddrSpace == llvm::ADDRESS_SPACE_GENERIC || + AddrSpace == llvm::ADDRESS_SPACE_GLOBAL || + AddrSpace == llvm::ADDRESS_SPACE_SHARED)) + return false; + + switch(I->getOpcode()){ + default: + return false; + case Instruction::Load: + case Instruction::Store: + return true; + } + } }; } // end namespace llvm Index: llvm/trunk/lib/Transforms/Scalar/InferAddressSpaces.cpp =================================================================== --- llvm/trunk/lib/Transforms/Scalar/InferAddressSpaces.cpp +++ llvm/trunk/lib/Transforms/Scalar/InferAddressSpaces.cpp @@ -148,10 +148,9 @@ // Changes the flat address expressions in function F to point to specific // address spaces if InferredAddrSpace says so. Postorder is the postorder of // all flat expressions in the use-def graph of function F. - bool - rewriteWithNewAddressSpaces(ArrayRef Postorder, - const ValueToAddrSpaceMapTy &InferredAddrSpace, - Function *F) const; + bool rewriteWithNewAddressSpaces( + const TargetTransformInfo &TTI, ArrayRef Postorder, + const ValueToAddrSpaceMapTy &InferredAddrSpace, Function *F) const; void appendsFlatAddressExpressionToPostorderStack( Value *V, std::vector> &PostorderStack, @@ -602,7 +601,7 @@ // Changes the address spaces of the flat address expressions who are inferred // to point to a specific address space. - return rewriteWithNewAddressSpaces(Postorder, InferredAddrSpace, &F); + return rewriteWithNewAddressSpaces(TTI, Postorder, InferredAddrSpace, &F); } // Constants need to be tracked through RAUW to handle cases with nested @@ -710,23 +709,32 @@ /// \p returns true if \p U is the pointer operand of a memory instruction with /// a single pointer operand that can have its address space changed by simply -/// mutating the use to a new value. -static bool isSimplePointerUseValidToReplace(Use &U) { +/// mutating the use to a new value. If the memory instruction is volatile, +/// return true only if the target allows the memory instruction to be volatile +/// in the new address space. +static bool isSimplePointerUseValidToReplace(const TargetTransformInfo &TTI, + Use &U, unsigned AddrSpace) { User *Inst = U.getUser(); unsigned OpNo = U.getOperandNo(); + bool VolatileIsAllowed = false; + if (auto *I = dyn_cast(Inst)) + VolatileIsAllowed = TTI.hasVolatileVariant(I, AddrSpace); if (auto *LI = dyn_cast(Inst)) - return OpNo == LoadInst::getPointerOperandIndex() && !LI->isVolatile(); + return OpNo == LoadInst::getPointerOperandIndex() && + (VolatileIsAllowed || !LI->isVolatile()); if (auto *SI = dyn_cast(Inst)) - return OpNo == StoreInst::getPointerOperandIndex() && !SI->isVolatile(); + return OpNo == StoreInst::getPointerOperandIndex() && + (VolatileIsAllowed || !SI->isVolatile()); if (auto *RMW = dyn_cast(Inst)) - return OpNo == AtomicRMWInst::getPointerOperandIndex() && !RMW->isVolatile(); + return OpNo == AtomicRMWInst::getPointerOperandIndex() && + (VolatileIsAllowed || !RMW->isVolatile()); if (auto *CmpX = dyn_cast(Inst)) { return OpNo == AtomicCmpXchgInst::getPointerOperandIndex() && - !CmpX->isVolatile(); + (VolatileIsAllowed || !CmpX->isVolatile()); } return false; @@ -820,7 +828,7 @@ } bool InferAddressSpaces::rewriteWithNewAddressSpaces( - ArrayRef Postorder, + const TargetTransformInfo &TTI, ArrayRef Postorder, const ValueToAddrSpaceMapTy &InferredAddrSpace, Function *F) const { // For each address expression to be modified, creates a clone of it with its // pointer operands converted to the new address space. Since the pointer @@ -880,7 +888,8 @@ // to the next instruction. I = skipToNextUser(I, E); - if (isSimplePointerUseValidToReplace(U)) { + if (isSimplePointerUseValidToReplace( + TTI, U, V->getType()->getPointerAddressSpace())) { // If V is used as the pointer operand of a compatible memory operation, // sets the pointer operand to NewV. This replacement does not change // the element type, so the resultant load/store is still valid. Index: llvm/trunk/test/CodeGen/NVPTX/ld-st-addrrspace.py =================================================================== --- llvm/trunk/test/CodeGen/NVPTX/ld-st-addrrspace.py +++ llvm/trunk/test/CodeGen/NVPTX/ld-st-addrrspace.py @@ -0,0 +1,97 @@ +# This test generates all variants of load/store instructions and verifies that +# LLVM generates correct PTX for them. + +# RUN: python %s > %t.ll +# RUN: llc < %t.ll -march=nvptx64 -mcpu=sm_30 | FileCheck -check-prefixes=CHECK,CHECK_P64 %t.ll +# RUN: llc < %t.ll -march=nvptx -mcpu=sm_30 | FileCheck -check-prefixes=CHECK,CHECK_P32 %t.ll + +from itertools import product +from string import Template + +llvm_type_to_ptx_type = { + "i8": "u8", + "i16": "u16", + "i32": "u32", + "i64": "u64", + "half": "b16", + "<2 x half>": "b32", + "float": "f32", + "double": "f64" +} + +llvm_type_to_ptx_reg = { + "i8": "r", + "i16": "r", + "i32": "r", + "i64": "rd", + "half": "h", + "<2 x half>": "hh", + "float": "f", + "double": "fd" +} + +addrspace_id = { + "": 0, + ".global": 1, + ".shared": 3, + ".const": 4, + ".local": 5, + ".param": 101 +} + + +def gen_load_tests(): + load_template = """ +define ${type} @ld${_volatile}${_space}.${ptx_type}(${type} addrspace(${asid})* %ptr) { +; CHECK_P32: ld${_volatile}${_volatile_as}.${ptx_type} %${ptx_reg}{{[0-9]+}}, [%r{{[0-9]+}}] +; CHECK_P64: ld${_volatile}${_volatile_as}.${ptx_type} %${ptx_reg}{{[0-9]+}}, [%rd{{[0-9]+}}] +; CHECK: ret + %p = ${generic_ptr} + %a = load ${volatile} ${type}, ${type}* %p + ret ${type} %a +} +""" + for op_type, volatile, space in product( + ["i8", "i16", "i32", "i64", "half", "float", "double", "<2 x half>"], + [True, False], # volatile + ["", ".shared", ".global", ".const", ".local", ".param"]): + + # Volatile is only supported for global, shared and generic. + if volatile and not space in ["", ".global", ".shared"]: + continue + + # Volatile is only supported for global, shared and generic. + # All other volatile accesses are done in generic AS. + if volatile and not space in ["", ".global", ".shared"]: + volatile_as = "" + else: + volatile_as = space + + params = { + "type": op_type, + "volatile": "volatile" if volatile else "", + "_volatile": ".volatile" if volatile else "", + "_volatile_as": volatile_as, + "_space": space, + "ptx_reg": llvm_type_to_ptx_reg[op_type], + "ptx_type": llvm_type_to_ptx_type[op_type], + "asid": addrspace_id[space], + } + + # LLVM does not accept "addrspacecast Type* addrspace(0) to Type*", so we + # need to avoid it for generic pointer tests. + if space: + generic_ptr_template = ("addrspacecast ${type} addrspace(${asid})* %ptr " + "to ${type}*") + else: + generic_ptr_template = "select i1 true, ${type}* %ptr, ${type}* %ptr" + params["generic_ptr"] = Template(generic_ptr_template).substitute(params) + + print(Template(load_template).substitute(params)) + + +def main(): + gen_load_tests() + + +main()