Index: lib/Target/AMDGPU/AMDGPUMCInstLower.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUMCInstLower.cpp +++ lib/Target/AMDGPU/AMDGPUMCInstLower.cpp @@ -16,6 +16,7 @@ #include "AMDGPUSubtarget.h" #include "AMDGPUTargetMachine.h" #include "MCTargetDesc/AMDGPUInstPrinter.h" +#include "MCTargetDesc/AMDGPUMCExpr.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "R600AsmPrinter.h" #include "SIInstrInfo.h" @@ -148,8 +149,12 @@ MCSymbol *Sym = Ctx.getOrCreateSymbol(SymbolName); const MCExpr *SymExpr = MCSymbolRefExpr::create(Sym, getVariantKind(MO.getTargetFlags()),Ctx); - const MCExpr *Expr = MCBinaryExpr::createAdd(SymExpr, - MCConstantExpr::create(MO.getOffset(), Ctx), Ctx); + const MCExpr *Expr = MCBinaryExpr::createAdd( + SymExpr, MCConstantExpr::create(MO.getOffset(), Ctx), Ctx); + + if (MO.getTargetFlags() == SIInstrInfo::MO_PCREL32_HI) + Expr = AMDGPUMCExpr::create(AMDGPUMCExpr::VK_AMDGPU_PCREL_HI32, Expr, Ctx); + MCOp = MCOperand::createExpr(Expr); return true; } Index: lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp =================================================================== --- lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp +++ lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp @@ -70,6 +70,7 @@ case FK_SecRel_4: case FK_Data_4: case FK_PCRel_4: + case AMDGPU::fixup_si_pcrel_hi32: return 4; case FK_SecRel_8: case FK_Data_8: @@ -92,6 +93,9 @@ return BrImm; } + case AMDGPU::fixup_si_pcrel_hi32: { + return Value >> 32; + } case FK_Data_1: case FK_Data_2: case FK_Data_4: @@ -133,6 +137,7 @@ const static MCFixupKindInfo Infos[AMDGPU::NumTargetFixupKinds] = { // name offset bits flags { "fixup_si_sopp_br", 0, 16, MCFixupKindInfo::FKF_IsPCRel }, + { "fixup_si_pcrel_hi32", 0, 32, MCFixupKindInfo::FKF_IsPCRel } }; if (Kind < FirstTargetFixupKind) Index: lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp =================================================================== --- lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp +++ lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "AMDGPUMCTargetDesc.h" +#include "AMDGPUFixupKinds.h" #include "llvm/BinaryFormat/ELF.h" #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCExpr.h" @@ -69,9 +70,10 @@ return ELF::R_AMDGPU_REL64; } - switch (Fixup.getKind()) { + switch ((unsigned)Fixup.getKind()) { default: break; case FK_PCRel_4: + case AMDGPU::fixup_si_pcrel_hi32: return ELF::R_AMDGPU_REL32; case FK_Data_4: case FK_SecRel_4: Index: lib/Target/AMDGPU/MCTargetDesc/AMDGPUFixupKinds.h =================================================================== --- lib/Target/AMDGPU/MCTargetDesc/AMDGPUFixupKinds.h +++ lib/Target/AMDGPU/MCTargetDesc/AMDGPUFixupKinds.h @@ -17,6 +17,9 @@ /// 16-bit PC relative fixup for SOPP branch instructions. fixup_si_sopp_br = FirstTargetFixupKind, + // Extract the high 32-bits of a 64-bit PC relative value. + fixup_si_pcrel_hi32, + // Marker LastTargetFixupKind, NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind Index: lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.h =================================================================== --- /dev/null +++ lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.h @@ -0,0 +1,63 @@ +//===-- AMDGPUMCExpr.h - AMDGPU specific MC expression classes --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_LANAIMCEXPR_H +#define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_LANAIMCEXPR_H + +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCValue.h" + +namespace llvm { + +class AMDGPUMCExpr : public MCTargetExpr { +public: + enum VariantKind { + VK_AMDGPU_None, + VK_AMDGPU_PCREL_HI32 + }; + +protected: + explicit AMDGPUMCExpr(VariantKind Kind, const MCExpr *Expr) + : Kind(Kind), Expr(Expr) {} + + ~AMDGPUMCExpr() = default; + +private: + const VariantKind Kind; + const MCExpr *Expr; + +public: + static const AMDGPUMCExpr *create(VariantKind Kind, const MCExpr *Expr, + MCContext &Ctx); + + // Returns the kind of this expression. + VariantKind getKind() const { return Kind; } + + // Returns the child of this expression. + const MCExpr *getSubExpr() const { return Expr; } + + void printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const override; + void visitUsedExpr(MCStreamer &Streamer) const override; + + bool evaluateAsRelocatableImpl(MCValue &Res, + const MCAsmLayout *Layout, + const MCFixup *Fixup) const override; + + MCFragment *findAssociatedFragment() const override { + llvm_unreachable("not implemented"); + } + + void fixELFSymbolsInTLSFixups(MCAssembler &) const override { + // No TLS symbols to handle + } +}; + + +} // end namespace llvm + +#endif Index: lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.cpp =================================================================== --- /dev/null +++ lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCExpr.cpp @@ -0,0 +1,55 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "AMDGPUMCExpr.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCStreamer.h" +using namespace llvm; + + +const AMDGPUMCExpr *AMDGPUMCExpr::create(VariantKind Kind, const MCExpr *Expr, + MCContext &Ctx) { + return new (Ctx) AMDGPUMCExpr(Kind, Expr); +} + +void AMDGPUMCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const { + if (Kind == VK_AMDGPU_None) { + Expr->print(OS, MAI); + return; + } + + switch (Kind) { + case VK_AMDGPU_PCREL_HI32: + OS << "hi32"; + break; + default: + llvm_unreachable("Invalid kind!"); + } + + OS << '('; + const MCExpr *Expr = getSubExpr(); + Expr->print(OS, MAI); + OS << ')'; +} + +void AMDGPUMCExpr::visitUsedExpr(MCStreamer &Streamer) const { + Streamer.visitUsedExpr(*getSubExpr()); +} + +bool AMDGPUMCExpr::evaluateAsRelocatableImpl(MCValue &Res, + const MCAsmLayout *Layout, + const MCFixup *Fixup) const { + if (!getSubExpr()->evaluateAsRelocatable(Res, Layout, Fixup)) + return false; + + Res = + MCValue::get(Res.getSymA(), Res.getSymB(), Res.getConstant(), getKind()); + + return true; +} Index: lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt =================================================================== --- lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt +++ lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt @@ -5,6 +5,7 @@ AMDGPUInstPrinter.cpp AMDGPUMCAsmInfo.cpp AMDGPUMCCodeEmitter.cpp + AMDGPUMCExpr.cpp AMDGPUMCTargetDesc.cpp AMDGPUTargetStreamer.cpp R600MCCodeEmitter.cpp Index: lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp =================================================================== --- lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp +++ lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp @@ -15,6 +15,7 @@ #include "AMDGPU.h" #include "MCTargetDesc/AMDGPUFixupKinds.h" #include "MCTargetDesc/AMDGPUMCCodeEmitter.h" +#include "MCTargetDesc/AMDGPUMCExpr.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "SIDefines.h" #include "Utils/AMDGPUBaseInfo.h" @@ -397,19 +398,24 @@ return RegEnc; } -static bool needsPCRel(const MCExpr *Expr) { +static bool needsPCRel(const MCExpr *Expr, bool &IsHi) { switch (Expr->getKind()) { - case MCExpr::SymbolRef: + case MCExpr::SymbolRef: { return true; + } case MCExpr::Binary: { auto *BE = cast(Expr); if (BE->getOpcode() == MCBinaryExpr::Sub) return false; - return needsPCRel(BE->getLHS()) || needsPCRel(BE->getRHS()); + return needsPCRel(BE->getLHS(), IsHi) || needsPCRel(BE->getRHS(), IsHi); } case MCExpr::Unary: - return needsPCRel(cast(Expr)->getSubExpr()); - case MCExpr::Target: + return needsPCRel(cast(Expr)->getSubExpr(), IsHi); + case MCExpr::Target: { + assert(cast(Expr)->getKind() == AMDGPUMCExpr::VK_AMDGPU_PCREL_HI32); + IsHi = true; + return true; + } case MCExpr::Constant: return false; } @@ -435,9 +441,11 @@ // .Ltmp1: // s_add_u32 s2, s2, (extern_const_addrspace+16)-.Ltmp1 MCFixupKind Kind; - if (needsPCRel(MO.getExpr())) - Kind = FK_PCRel_4; - else + bool IsHi = false; + + if (needsPCRel(MO.getExpr(), IsHi)) { + Kind = IsHi ? (MCFixupKind)AMDGPU::fixup_si_pcrel_hi32 : FK_PCRel_4; + } else Kind = FK_Data_4; const MCInstrDesc &Desc = MCII.get(MI.getOpcode()); Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -4043,6 +4043,9 @@ } bool SITargetLowering::shouldEmitFixup(const GlobalValue *GV) const { + if (GV->getValueType()->isFunctionTy() && GV->hasLocalLinkage()) + return true; + const Triple &TT = getTargetMachine().getTargetTriple(); return (GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS || GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT) && @@ -4624,13 +4627,14 @@ // FIXME: Should not make address space based decisions here. if (shouldEmitFixup(GV)) - return buildPCRelGlobalAddress(DAG, GV, DL, GSD->getOffset(), PtrVT); + return buildPCRelGlobalAddress(DAG, GV, DL, GSD->getOffset(), PtrVT, + SIInstrInfo::MO_PCREL32_LO); else if (shouldEmitPCReloc(GV)) return buildPCRelGlobalAddress(DAG, GV, DL, GSD->getOffset(), PtrVT, - SIInstrInfo::MO_REL32); + SIInstrInfo::MO_REL32_LO); SDValue GOTAddr = buildPCRelGlobalAddress(DAG, GV, DL, 0, PtrVT, - SIInstrInfo::MO_GOTPCREL32); + SIInstrInfo::MO_GOTPCREL32_LO); Type *Ty = PtrVT.getTypeForEVT(*DAG.getContext()); PointerType *PtrTy = PointerType::get(Ty, AMDGPUAS::CONSTANT_ADDRESS); Index: lib/Target/AMDGPU/SIInstrInfo.h =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.h +++ lib/Target/AMDGPU/SIInstrInfo.h @@ -141,26 +141,36 @@ unsigned OpIdx0, unsigned OpIdx1) const override; -public: + public: enum TargetOperandFlags { - MO_MASK = 0x7, + MO_MASK = 0xf, MO_NONE = 0, + + // PC relative, lo, hi. + MO_PCREL32_LO = 1, + MO_PCREL32_HI = 2, + // MO_GOTPCREL -> symbol@GOTPCREL -> R_AMDGPU_GOTPCREL. - MO_GOTPCREL = 1, + MO_GOTPCREL = 3, + // MO_GOTPCREL32_LO -> symbol@gotpcrel32@lo -> R_AMDGPU_GOTPCREL32_LO. - MO_GOTPCREL32 = 2, - MO_GOTPCREL32_LO = 2, + MO_GOTPCREL32 = 4, + MO_GOTPCREL32_LO = 5, + // MO_GOTPCREL32_HI -> symbol@gotpcrel32@hi -> R_AMDGPU_GOTPCREL32_HI. - MO_GOTPCREL32_HI = 3, + MO_GOTPCREL32_HI = 6, + + // MO_REL32_LO -> symbol@rel32@lo -> R_AMDGPU_REL32_LO. - MO_REL32 = 4, - MO_REL32_LO = 4, + MO_REL32 = 7, + MO_REL32_LO = 7, + // MO_REL32_HI -> symbol@rel32@hi -> R_AMDGPU_REL32_HI. - MO_REL32_HI = 5, + MO_REL32_HI = 8, - MO_LONG_BRANCH_FORWARD = 6, - MO_LONG_BRANCH_BACKWARD = 7 + MO_LONG_BRANCH_FORWARD = 9, + MO_LONG_BRANCH_BACKWARD = 10 }; explicit SIInstrInfo(const GCNSubtarget &ST); Index: lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.cpp +++ lib/Target/AMDGPU/SIInstrInfo.cpp @@ -1316,10 +1316,7 @@ MachineInstrBuilder MIB = BuildMI(MF, DL, get(AMDGPU::S_ADDC_U32), RegHi) .addReg(RegHi); - if (MI.getOperand(2).getTargetFlags() == SIInstrInfo::MO_NONE) - MIB.addImm(0); - else - MIB.add(MI.getOperand(2)); + MIB.add(MI.getOperand(2)); Bundler.append(MIB); finalizeBundle(MBB, Bundler.begin()); Index: test/CodeGen/AMDGPU/function-call-relocs.ll =================================================================== --- test/CodeGen/AMDGPU/function-call-relocs.ll +++ test/CodeGen/AMDGPU/function-call-relocs.ll @@ -1,37 +1,62 @@ -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji < %s | FileCheck %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -show-mc-encoding < %s | FileCheck -enable-var-scope -check-prefix=ASM %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -filetype=obj < %s | llvm-objdump -triple amdgcn--amdhsa -mcpu=fiji -d - | FileCheck -enable-var-scope -check-prefix=DIS %s declare void @func(i32 addrspace(1)* %out) -declare protected void @protected_func(i32 addrspace(1)* %out) +declare protected void @protected_func(i32 addrspace(1)*) -declare hidden void @hidden_func(i32 addrspace(1)* %out) +declare hidden void @hidden_func(i32 addrspace(1)*) -; CHECK-LABEL: call_func: -; CHECK: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}} -; CHECK: s_add_u32 s[[GOT_ADDR_LO:[0-9]+]], s[[PC_LO]], func@gotpcrel32@lo+4 -; CHECK: s_addc_u32 s[[GOT_ADDR_HI:[0-9]+]], s[[PC_HI]], func@gotpcrel32@hi+4 -; CHECK: s_load_dwordx2 s{{\[}}[[ADDR_LO:[0-9]+]]:[[ADDR_HI:[0-9]+]]{{\]}}, s{{\[}}[[GOT_ADDR_LO]]:[[GOT_ADDR_HI]]{{\]}}, 0x0 -; CHECK: s_swappc_b64 s{{\[}}{{[0-9]+:[0-9]+}}{{\]}}, s{{\[}}[[ADDR_LO]]:[[ADDR_HI]]{{\]}} +; DIS: 0000000000000000 internal_func: +define internal void @internal_func(i32 addrspace(1)* %out) { + ret void +} + +; DIS: 0000000000000100 call_internal_func: + +; DIS: s_getpc_b64 s[8:9] // 000000000120: BE881C00 +; DIS-NEXT: s_add_u32 s8, s8, 0xfffffedc // 000000000124: 8008FF08 FFFFFEDC +; DIS-NEXT: s_addc_u32 s9, s9, -1 // 00000000012C: 8209FF09 FFFFFFFF + +; ASM-LABEL: call_internal_func: +; ASM: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}} +; ASM: s_add_u32 s[[ADDR_LO:[0-9]+]], s[[PC_LO]], internal_func+4 ; encoding: [0x08,0xff,0x08,0x80,A,A,A,A] +; ASM-NEXT: ; fixup A - offset: 4, value: internal_func+4, kind: FK_PCRel_4 +; ASM: s_addc_u32 s[[ADDR_HI:[0-9]+]], s[[PC_HI]], hi32(internal_func+4) ; encoding: [0x09,0xff,0x09,0x82,A,A,A,A] +; ASM-NEXT: ; fixup A - offset: 4, value: hi32(internal_func+4), kind: fixup_si_pcrel_hi32 + +; ASM: s_swappc_b64 s{{\[}}{{[0-9]+:[0-9]+}}{{\]}}, s{{\[}}[[ADDR_LO]]:[[ADDR_HI]]{{\]}} +define amdgpu_kernel void @call_internal_func(i32 addrspace(1)* %out) { + call void @internal_func(i32 addrspace(1)* %out) + ret void +} + +; ASM-LABEL: call_func: +; ASM: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}} +; ASM: s_add_u32 s[[GOT_ADDR_LO:[0-9]+]], s[[PC_LO]], func@gotpcrel32@lo+4 +; ASM: s_addc_u32 s[[GOT_ADDR_HI:[0-9]+]], s[[PC_HI]], func@gotpcrel32@hi+4 +; ASM: s_load_dwordx2 s{{\[}}[[ADDR_LO:[0-9]+]]:[[ADDR_HI:[0-9]+]]{{\]}}, s{{\[}}[[GOT_ADDR_LO]]:[[GOT_ADDR_HI]]{{\]}}, 0x0 +; ASM: s_swappc_b64 s{{\[}}{{[0-9]+:[0-9]+}}{{\]}}, s{{\[}}[[ADDR_LO]]:[[ADDR_HI]]{{\]}} define amdgpu_kernel void @call_func(i32 addrspace(1)* %out) { call void @func(i32 addrspace(1)* %out) ret void } -; CHECK-LABEL: call_protected_func: -; CHECK: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}} -; CHECK: s_add_u32 s[[ADDR_LO:[0-9]+]], s[[PC_LO]], protected_func@rel32@lo+4 -; CHECK: s_addc_u32 s[[ADDR_HI:[0-9]+]], s[[PC_HI]], protected_func@rel32@hi+4 -; CHECK: s_swappc_b64 s{{\[}}{{[0-9]+:[0-9]+}}{{\]}}, s{{\[}}[[ADDR_LO]]:[[ADDR_HI]]{{\]}} +; ASM-LABEL: call_protected_func: +; ASM: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}} +; ASM: s_add_u32 s[[ADDR_LO:[0-9]+]], s[[PC_LO]], protected_func@rel32@lo+4 +; ASM: s_addc_u32 s[[ADDR_HI:[0-9]+]], s[[PC_HI]], protected_func@rel32@hi+4 +; ASM: s_swappc_b64 s{{\[}}{{[0-9]+:[0-9]+}}{{\]}}, s{{\[}}[[ADDR_LO]]:[[ADDR_HI]]{{\]}} define amdgpu_kernel void @call_protected_func(i32 addrspace(1)* %out) { call void @protected_func(i32 addrspace(1)* %out) ret void } -; CHECK-LABEL: call_hidden_func: -; CHECK: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}} -; CHECK: s_add_u32 s[[ADDR_LO:[0-9]+]], s[[PC_LO]], hidden_func@rel32@lo+4 -; CHECK: s_addc_u32 s[[ADDR_HI:[0-9]+]], s[[PC_HI]], hidden_func@rel32@hi+4 -; CHECK: s_swappc_b64 s{{\[}}{{[0-9]+:[0-9]+}}{{\]}}, s{{\[}}[[ADDR_LO]]:[[ADDR_HI]]{{\]}} +; ASM-LABEL: call_hidden_func: +; ASM: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}} +; ASM: s_add_u32 s[[ADDR_LO:[0-9]+]], s[[PC_LO]], hidden_func@rel32@lo+4 +; ASM: s_addc_u32 s[[ADDR_HI:[0-9]+]], s[[PC_HI]], hidden_func@rel32@hi+4 +; ASM: s_swappc_b64 s{{\[}}{{[0-9]+:[0-9]+}}{{\]}}, s{{\[}}[[ADDR_LO]]:[[ADDR_HI]]{{\]}} define amdgpu_kernel void @call_hidden_func(i32 addrspace(1)* %out) { call void @hidden_func(i32 addrspace(1)* %out) ret void @@ -39,12 +64,12 @@ declare i64 @funci() -; CHECK-LABEL: tail_call_func: -; CHECK: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}} -; CHECK: s_add_u32 s[[GOT_ADDR_LO:[0-9]+]], s[[PC_LO]], funci@gotpcrel32@lo+4 -; CHECK: s_addc_u32 s[[GOT_ADDR_HI:[0-9]+]], s[[PC_HI]], funci@gotpcrel32@hi+4 -; CHECK: s_load_dwordx2 s{{\[}}[[ADDR_LO:[0-9]+]]:[[ADDR_HI:[0-9]+]]{{\]}}, s{{\[}}[[GOT_ADDR_LO]]:[[GOT_ADDR_HI]]{{\]}}, 0x0 -; CHECK: s_setpc_b64 s{{\[}}[[ADDR_LO]]:[[ADDR_HI]]{{\]}} +; ASM-LABEL: tail_call_func: +; ASM: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}} +; ASM: s_add_u32 s[[GOT_ADDR_LO:[0-9]+]], s[[PC_LO]], funci@gotpcrel32@lo+4 +; ASM: s_addc_u32 s[[GOT_ADDR_HI:[0-9]+]], s[[PC_HI]], funci@gotpcrel32@hi+4 +; ASM: s_load_dwordx2 s{{\[}}[[ADDR_LO:[0-9]+]]:[[ADDR_HI:[0-9]+]]{{\]}}, s{{\[}}[[GOT_ADDR_LO]]:[[GOT_ADDR_HI]]{{\]}}, 0x0 +; ASM: s_setpc_b64 s{{\[}}[[ADDR_LO]]:[[ADDR_HI]]{{\]}} define i64 @tail_call_func() { %ret = tail call i64 @funci() ret i64 %ret Index: test/CodeGen/AMDGPU/global-constant.ll =================================================================== --- test/CodeGen/AMDGPU/global-constant.ll +++ test/CodeGen/AMDGPU/global-constant.ll @@ -9,8 +9,8 @@ ; GCN: s_getpc_b64 s{{\[}}[[PC0_LO:[0-9]+]]:[[PC0_HI:[0-9]+]]{{\]}} ; Non-HSA OSes use fixup into .text section. -; NOHSA: s_add_u32 s{{[0-9]+}}, s[[PC0_LO]], private1 -; NOHSA: s_addc_u32 s{{[0-9]+}}, s[[PC0_HI]], 0 +; NOHSA: s_add_u32 s{{[0-9]+}}, s[[PC0_LO]], private1+4{{$}} +; NOHSA: s_addc_u32 s{{[0-9]+}}, s[[PC0_HI]], hi32(private1+4){{$}} ; HSA OSes use relocations. ; HSA: s_add_u32 s{{[0-9]+}}, s[[PC0_LO]], private1@rel32@lo+4 @@ -19,8 +19,8 @@ ; GCN: s_getpc_b64 s{{\[}}[[PC1_LO:[0-9]+]]:[[PC1_HI:[0-9]+]]{{\]}} ; Non-HSA OSes use fixup into .text section. -; NOHSA: s_add_u32 s{{[0-9]+}}, s[[PC1_LO]], private2 -; NOHSA: s_addc_u32 s{{[0-9]+}}, s[[PC1_HI]], 0 +; NOHSA: s_add_u32 s{{[0-9]+}}, s[[PC1_LO]], private2+4{{$}} +; NOHSA: s_addc_u32 s{{[0-9]+}}, s[[PC1_HI]], hi32(private2+4){{$}} ; HSA OSes use relocations. ; HSA: s_add_u32 s{{[0-9]+}}, s[[PC1_LO]], private2@rel32@lo+4