Index: llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp =================================================================== --- llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -3657,6 +3657,37 @@ return true; } } + + // Handle addrspacecast nodes + if (AddrSpaceCastSDNode *ASCN = dyn_cast(Addr)) { + // Check if there's an add behind the addrspacecast + SDNode *ADDN = dyn_cast(ASCN->getOperand(0)); + + if (ADDN && (ADDN->getOpcode() == ISD::ADD)) { + // Yes, so create new addrspacecast and add nodes. + // We transform addrspacecast(add(x, y)) to add(addrspacecast(x), y). + SDValue NewASCN = CurDAG->getAddrSpaceCast( + SDLoc(ASCN), ASCN->getValueType(0), ADDN->getOperand(0), + ASCN->getSrcAddressSpace(), ASCN->getDestAddressSpace()); + SDValue NewADDN = + CurDAG->getNode(ISD::ADD, SDLoc(ADDN), ADDN->getValueType(0), + NewASCN, ADDN->getOperand(1)); + + // Replace the old addrspacecast by the new add, effectively swapping the + // order of the addrspacecast and add. + ReplaceUses(Addr, NewADDN); + + // Instruction selection is not called for this new addrspacecast node, so + // call it manually. + SelectAddrSpaceCast(cast(NewASCN)); + + // Finally, run instruction selection for the address on these new nodes. + // This will trigger the add case above, and move the addition to the + // addressing mode for the memory operation. + return SelectADDRri_imp(OpNode, NewADDN, Base, Offset, mvt); + } + } + return false; } Index: llvm/test/CodeGen/NVPTX/addrspace-offsets.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/NVPTX/addrspace-offsets.ll @@ -0,0 +1,29 @@ +; RUN: llc < %s | FileCheck %s + +target triple = "nvptx64-nvidia-cuda" + +define void @addrspacecast_offset(i64) { + %ptr = inttoptr i64 %0 to i16* + +; CHECK: st.global.u16 [%rd{{[0-9]+}}], %rs{{[0-9]+}} + %gep1 = getelementptr i16, i16* %ptr, i64 0 + %asc1 = addrspacecast i16* %gep1 to i16 addrspace(1)* + store i16 0, i16 addrspace(1)* %asc1, align 16 + +; CHECK: st.global.u16 [%rd{{[0-9]+}}+32], %rs{{[0-9]+}} + %gep2 = getelementptr i16, i16* %ptr, i64 16 + %asc2 = addrspacecast i16* %gep2 to i16 addrspace(1)* + store i16 0, i16 addrspace(1)* %asc2, align 16 + +; CHECK: st.global.u16 [%rd{{[0-9]+}}+64], %rs{{[0-9]+}} + %gep3 = getelementptr i16, i16* %ptr, i64 32 + %asc3 = addrspacecast i16* %gep3 to i16 addrspace(1)* + store i16 0, i16 addrspace(1)* %asc3, align 16 + +; CHECK: st.global.u16 [%rd{{[0-9]+}}+96], %rs{{[0-9]+}} + %gep4 = getelementptr i16, i16* %ptr, i64 48 + %asc4 = addrspacecast i16* %gep4 to i16 addrspace(1)* + store i16 0, i16 addrspace(1)* %asc4, align 16 + + ret void +}