Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -297,6 +297,7 @@ INTERP_MOV, INTERP_P1, INTERP_P2, + PC_ADD_REL_OFFSET, FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE, STORE_MSKOR, LOAD_CONSTANT, Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -2779,6 +2779,7 @@ NODE_NAME_CASE(CVT_F32_UBYTE3) NODE_NAME_CASE(BUILD_VERTICAL_VECTOR) NODE_NAME_CASE(CONST_DATA_PTR) + NODE_NAME_CASE(PC_ADD_REL_OFFSET) case AMDGPUISD::FIRST_MEM_OPCODE_NUMBER: break; NODE_NAME_CASE(SENDMSG) NODE_NAME_CASE(INTERP_MOV) Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp @@ -70,7 +70,10 @@ case MachineOperand::MO_GlobalAddress: { const GlobalValue *GV = MO.getGlobal(); MCSymbol *Sym = Ctx.getOrCreateSymbol(StringRef(GV->getName())); - MCOp = MCOperand::createExpr(MCSymbolRefExpr::create(Sym, Ctx)); + const MCExpr *SymExpr = MCSymbolRefExpr::create(Sym, Ctx); + const MCExpr *Expr = MCBinaryExpr::createAdd(SymExpr, + MCConstantExpr::create(MO.getOffset(), Ctx), Ctx); + MCOp = MCOperand::createExpr(Expr); break; } case MachineOperand::MO_ExternalSymbol: { Index: llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp +++ llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp @@ -81,6 +81,7 @@ return 2; case FK_SecRel_4: case FK_Data_4: + case FK_PCRel_4: return 4; case FK_SecRel_8: case FK_Data_8: @@ -105,27 +106,6 @@ break; } - case AMDGPU::fixup_si_rodata: { - uint32_t *Dst = (uint32_t*)(Data + Fixup.getOffset()); - // We emit constant data at the end of the text section and generate its - // address using the following code sequence: - // s_getpc_b64 s[0:1] - // s_add_u32 s0, s0, $symbol - // s_addc_u32 s1, s1, 0 - // - // s_getpc_b64 returns the address of the s_add_u32 instruction and then - // the fixup replaces $symbol with a literal constant, which is a - // pc-relative offset from the encoding of the $symbol operand to the - // constant data. - // - // What we want here is an offset from the start of the s_add_u32 - // instruction to the constant data, but since the encoding of $symbol - // starts 4 bytes after the start of the add instruction, we end up - // with an offset that is 4 bytes too small. This requires us to - // add 4 to the fixup value before applying it. - *Dst = Value + 4; - break; - } default: { // FIXME: Copied from AArch64 unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind()); @@ -152,7 +132,6 @@ const static MCFixupKindInfo Infos[AMDGPU::NumTargetFixupKinds] = { // name offset bits flags { "fixup_si_sopp_br", 0, 16, MCFixupKindInfo::FKF_IsPCRel }, - { "fixup_si_rodata", 0, 32, MCFixupKindInfo::FKF_IsPCRel } }; if (Kind < FirstTargetFixupKind) @@ -174,14 +153,14 @@ namespace { class ELFAMDGPUAsmBackend : public AMDGPUAsmBackend { - bool Is64Bit; + const Triple &TT; public: - ELFAMDGPUAsmBackend(const Target &T, bool Is64Bit) : - AMDGPUAsmBackend(T), Is64Bit(Is64Bit) { } + ELFAMDGPUAsmBackend(const Target &T, const Triple &TT) : + AMDGPUAsmBackend(T), TT(TT) { } MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override { - return createAMDGPUELFObjectWriter(Is64Bit, OS); + return createAMDGPUELFObjectWriter(TT, OS); } }; @@ -191,5 +170,5 @@ const MCRegisterInfo &MRI, const Triple &TT, StringRef CPU) { // Use 64-bit ELF for amdgcn - return new ELFAMDGPUAsmBackend(T, TT.getArch() == Triple::amdgcn); + return new ELFAMDGPUAsmBackend(T, TT); } Index: llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp +++ llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp @@ -18,23 +18,27 @@ class AMDGPUELFObjectWriter : public MCELFObjectTargetWriter { public: - AMDGPUELFObjectWriter(bool Is64Bit); + AMDGPUELFObjectWriter(const Triple &TT); protected: unsigned getRelocType(MCContext &Ctx, const MCValue &Target, const MCFixup &Fixup, bool IsPCRel) const override { return Fixup.getKind(); } - }; } // End anonymous namespace -AMDGPUELFObjectWriter::AMDGPUELFObjectWriter(bool Is64Bit) - : MCELFObjectTargetWriter(Is64Bit, ELF::ELFOSABI_AMDGPU_HSA, - ELF::EM_AMDGPU, false) { } +AMDGPUELFObjectWriter::AMDGPUELFObjectWriter(const Triple &TT) + : MCELFObjectTargetWriter(TT.getArch() == Triple::amdgcn, // Is64Bit + ELF::ELFOSABI_AMDGPU_HSA, + ELF::EM_AMDGPU, + // HasRelocationAddend + TT.getOS() == Triple::AMDHSA) {} + -MCObjectWriter *llvm::createAMDGPUELFObjectWriter(bool Is64Bit, raw_pwrite_stream &OS) { - MCELFObjectTargetWriter *MOTW = new AMDGPUELFObjectWriter(Is64Bit); +MCObjectWriter *llvm::createAMDGPUELFObjectWriter(const Triple &TT, + raw_pwrite_stream &OS) { + MCELFObjectTargetWriter *MOTW = new AMDGPUELFObjectWriter(TT); return createELFObjectWriter(MOTW, OS, true); } Index: llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUFixupKinds.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUFixupKinds.h +++ llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUFixupKinds.h @@ -18,9 +18,6 @@ /// 16-bit PC relative fixup for SOPP branch instructions. fixup_si_sopp_br = FirstTargetFixupKind, - /// fixup for global addresses with constant initializers - fixup_si_rodata, - // Marker LastTargetFixupKind, NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind Index: llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h +++ llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h @@ -46,7 +46,7 @@ MCAsmBackend *createAMDGPUAsmBackend(const Target &T, const MCRegisterInfo &MRI, const Triple &TT, StringRef CPU); -MCObjectWriter *createAMDGPUELFObjectWriter(bool Is64Bit, +MCObjectWriter *createAMDGPUELFObjectWriter(const Triple &TT, raw_pwrite_stream &OS); } // End llvm namespace Index: llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp +++ llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp @@ -248,14 +248,13 @@ return MRI.getEncodingValue(MO.getReg()); if (MO.isExpr()) { - const MCSymbolRefExpr *Expr = cast(MO.getExpr()); - const MCSymbol &Sym = Expr->getSymbol(); + const MCSymbolRefExpr *Expr = dyn_cast(MO.getExpr()); MCFixupKind Kind; - if (Sym.isExternal()) + if (Expr && Expr->getSymbol().isExternal()) Kind = FK_Data_4; else - Kind = (MCFixupKind)AMDGPU::fixup_si_rodata; - Fixups.push_back(MCFixup::create(4, Expr, Kind, MI.getLoc())); + Kind = FK_PCRel_4; + Fixups.push_back(MCFixup::create(4, MO.getExpr(), Kind, MI.getLoc())); } // Figure out the operand number, needed for isSrcOperand check Index: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.h =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.h +++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.h @@ -23,7 +23,8 @@ class SITargetLowering final : public AMDGPUTargetLowering { SDValue LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT, const SDLoc &DL, SDValue Chain, unsigned Offset, bool Signed) const; - + SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op, + SelectionDAG &DAG) const override; SDValue lowerImplicitZextParam(SelectionDAG &DAG, SDValue Op, MVT VT, unsigned Offset) const; Index: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp +++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp @@ -1416,6 +1416,40 @@ return DAG.getUNDEF(ASC->getValueType(0)); } +SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI, + SDValue Op, + SelectionDAG &DAG) const { + GlobalAddressSDNode *GSD = cast(Op); + + if (GSD->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS) + return AMDGPUTargetLowering::LowerGlobalAddress(MFI, Op, DAG); + + SDLoc DL(GSD); + const GlobalValue *GV = GSD->getGlobal(); + MVT PtrVT = getPointerTy(DAG.getDataLayout(), GSD->getAddressSpace()); + + // In order to support pc-relative addressing, the PC_ADD_REL_OFFSET SDNode is + // lowered to the following code sequence: + // s_getpc_b64 s[0:1] + // s_add_u32 s0, s0, $symbol + // s_addc_u32 s1, s1, 0 + // + // s_getpc_b64 returns the address of the s_add_u32 instruction and then + // a fixup or relocation is emitted to replace $symbol with a literal + // constant, which is a pc-relative offset from the encoding of the $symbol + // operand to the global variable. + // + // What we want here is an offset from the value returned by s_getpc + // (which is the address of the s_add_u32 instruction) to the global + // variable, but since the encoding of $symbol starts 4 bytes after the start + // of the s_add_u32 instruction, we end up with an offset that is 4 bytes too + // small. This requires us to add 4 to the global variable offset in order to + // compute the correct address. + SDValue GA = DAG.getTargetGlobalAddress(GV, DL, MVT::i32, + GSD->getOffset() + 4); + return DAG.getNode(AMDGPUISD::PC_ADD_REL_OFFSET, DL, PtrVT, GA); +} + SDValue SITargetLowering::copyToM0(SelectionDAG &DAG, SDValue Chain, const SDLoc &DL, SDValue V) const { // We can't use S_MOV_B32 directly, because there is no way to specify m0 as Index: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp +++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -914,7 +914,7 @@ break; } - case AMDGPU::SI_CONSTDATA_PTR: { + case AMDGPU::SI_PC_ADD_REL_OFFSET: { const SIRegisterInfo *TRI = static_cast(ST.getRegisterInfo()); MachineFunction &MF = *MBB.getParent(); Index: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td +++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td @@ -137,6 +137,10 @@ def SIsampled : SDSample<"AMDGPUISD::SAMPLED">; def SIsamplel : SDSample<"AMDGPUISD::SAMPLEL">; +def SIpc_add_rel_offset : SDNode<"AMDGPUISD::PC_ADD_REL_OFFSET", + SDTypeProfile<1, 1, [SDTCisVT<0, iPTR>, SDTCisSameAs<0,1>]> +>; + //===----------------------------------------------------------------------===// // PatFrags for FLAT instructions //===----------------------------------------------------------------------===// @@ -454,7 +458,7 @@ let ParserMatchClass = SoppBrTarget; } -def const_ga : Operand; +def si_ga : Operand; def InterpSlot : Operand { let PrintMethod = "printInterpSlot"; Index: llvm/trunk/lib/Target/AMDGPU/SIInstructions.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIInstructions.td +++ llvm/trunk/lib/Target/AMDGPU/SIInstructions.td @@ -2107,10 +2107,10 @@ let Defs = [SCC] in { -def SI_CONSTDATA_PTR : InstSI < +def SI_PC_ADD_REL_OFFSET : InstSI < (outs SReg_64:$dst), - (ins const_ga:$ptr), - "", [(set SReg_64:$dst, (i64 (AMDGPUconstdata_ptr (tglobaladdr:$ptr))))] + (ins si_ga:$ptr), + "", [(set SReg_64:$dst, (i64 (SIpc_add_rel_offset (tglobaladdr:$ptr))))] > { let SALU = 1; }