diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp @@ -179,29 +179,82 @@ PPCMCCodeEmitter::getMemRI34PCRelEncoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const { - // Encode (imm, reg) as a memri34, which has the low 34-bits as the - // displacement and the next 5 bits as an immediate 0. + // Encode the PCRelative version of memri34: imm34(r0). + // In the PC relative version the register for the address must be zero. + // The 34 bit immediate can fall into one of three cases: + // 1) It is a relocation to be filled in by the linker represented as: + // (MCExpr::SymbolRef) + // 2) It is a relocation + SignedOffset represented as: + // (MCExpr::Binary(MCExpr::SymbolRef + MCExpr::Constant)) + // 3) It is a known value at compile time. + + // Make sure that the register is a zero as expected. assert(MI.getOperand(OpNo + 1).isImm() && "Expecting an immediate."); uint64_t RegBits = getMachineOpValue(MI, MI.getOperand(OpNo + 1), Fixups, STI) << 34; + assert(RegBits == 0 && "Operand must be 0."); - if (RegBits != 0) - report_fatal_error("Operand must be 0"); - + // If this is not a MCExpr then we are in case 3) and we are dealing with + // a value known at compile time, not a relocation. const MCOperand &MO = MI.getOperand(OpNo); - if (MO.isExpr()) { - const MCExpr *Expr = MO.getExpr(); + if (!MO.isExpr()) + return ((getMachineOpValue(MI, MO, Fixups, STI)) & 0x3FFFFFFFFUL) | RegBits; + + // At this point in the function it is known that MO is of type MCExpr. + // Therefore we are dealing with either case 1) a symbol ref or + // case 2) a symbol ref plus a constant. + const MCExpr *Expr = MO.getExpr(); + switch (Expr->getKind()) { + default: + llvm_unreachable("Unsupported MCExpr for getMemRI34PCRelEncoding."); + case MCExpr::SymbolRef: { + // Relocation alone. const MCSymbolRefExpr *SRE = cast(Expr); (void)SRE; + // Currently these are the only valid PCRelative Relocations. assert((SRE->getKind() == MCSymbolRefExpr::VK_PCREL || SRE->getKind() == MCSymbolRefExpr::VK_PPC_GOT_PCREL) && "VariantKind must be VK_PCREL or VK_PPC_GOT_PCREL"); + // Generate the fixup for the relocation. Fixups.push_back( MCFixup::create(IsLittleEndian ? 0 : 1, Expr, static_cast(PPC::fixup_ppc_pcrel34))); + // There is no offset to return so just return 0. return 0; } - return ((getMachineOpValue(MI, MO, Fixups, STI)) & 0x3FFFFFFFFUL) | RegBits; + case MCExpr::Binary: { + // Relocation plus some offset. + const MCBinaryExpr *BE = cast(Expr); + assert(BE->getOpcode() == MCBinaryExpr::Add && + "Binary expression opcode must be an add."); + + const MCExpr *LHS = BE->getLHS(); + const MCExpr *RHS = BE->getRHS(); + + // Need to check in both directions. Reloc+Offset and Offset+Reloc. + if (LHS->getKind() != MCExpr::SymbolRef) + std::swap(LHS, RHS); + + if (LHS->getKind() != MCExpr::SymbolRef || + RHS->getKind() != MCExpr::Constant) + llvm_unreachable("Expecting to have one constant and one relocation."); + + const MCSymbolRefExpr *SRE = cast(LHS); + const MCConstantExpr *CE = cast(RHS); + + // Currently these are the only valid PCRelative Relocations. + assert((SRE->getKind() == MCSymbolRefExpr::VK_PCREL || + SRE->getKind() == MCSymbolRefExpr::VK_PPC_GOT_PCREL) && + "VariantKind must be VK_PCREL or VK_PPC_GOT_PCREL"); + // Generate the fixup for the relocation. + Fixups.push_back( + MCFixup::create(IsLittleEndian ? 0 : 1, Expr, + static_cast(PPC::fixup_ppc_pcrel34))); + assert(isInt<34>(CE->getValue()) && "Value must fit in 34 bits."); + // Return the offset that should be added to the relocation by the linker. + return (CE->getValue() & 0x3FFFFFFFFUL) | RegBits; + } + } } uint64_t diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -15972,10 +15972,59 @@ return SDValue(); } +// Transform +// (add C1, (MAT_PCREL_ADDR GlobalAddr+C2)) to +// (MAT_PCREL_ADDR GlobalAddr+(C1+C2)) +// In this case both C1 and C2 must be known constants. +// C1+C2 must fit into a 34 bit signed integer. +static SDValue combineADDToMAT_PCREL_ADDR(SDNode *N, SelectionDAG &DAG, + const PPCSubtarget &Subtarget) { + if (!Subtarget.isUsingPCRelativeCalls()) + return SDValue(); + + // Check both Operand 0 and Operand 1 of the ADD node for the PCRel node. + // If we find that node try to cast the Global Address and the Constant. + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + + if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR) + std::swap(LHS, RHS); + + if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR) + return SDValue(); + + // Operand zero of PPCISD::MAT_PCREL_ADDR is the GA node. + GlobalAddressSDNode *GSDN = dyn_cast(LHS.getOperand(0)); + ConstantSDNode* ConstNode = dyn_cast(RHS); + + // Check that both casts succeeded. + if (!GSDN || !ConstNode) + return SDValue(); + + int64_t NewOffset = GSDN->getOffset() + ConstNode->getSExtValue(); + SDLoc DL(GSDN); + + // The signed int offset needs to fit in 34 bits. + if (!isInt<34>(NewOffset)) + return SDValue(); + + // The new global address is a copy of the old global address except + // that it has the updated Offset. + SDValue GA = + DAG.getTargetGlobalAddress(GSDN->getGlobal(), DL, GSDN->getValueType(0), + NewOffset, GSDN->getTargetFlags()); + SDValue MatPCRel = + DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, GSDN->getValueType(0), GA); + return MatPCRel; +} + SDValue PPCTargetLowering::combineADD(SDNode *N, DAGCombinerInfo &DCI) const { if (auto Value = combineADDToADDZE(N, DCI.DAG, Subtarget)) return Value; + if (auto Value = combineADDToMAT_PCREL_ADDR(N, DCI.DAG, Subtarget)) + return Value; + return SDValue(); } diff --git a/llvm/test/CodeGen/PowerPC/global-address-non-got-indirect-access.ll b/llvm/test/CodeGen/PowerPC/global-address-non-got-indirect-access.ll --- a/llvm/test/CodeGen/PowerPC/global-address-non-got-indirect-access.ll +++ b/llvm/test/CodeGen/PowerPC/global-address-non-got-indirect-access.ll @@ -131,9 +131,8 @@ define ppc_fp128 @_Z23ReadStaticLongDoubleVarv() { ; CHECK-LABEL: _Z23ReadStaticLongDoubleVarv: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: paddi r3, 0, _ZL19StaticLongDoubleVar@PCREL, 1 -; CHECK-NEXT: lfd f2, 8(r3) ; CHECK-NEXT: plfd f1, _ZL19StaticLongDoubleVar@PCREL(0), 1 +; CHECK-NEXT: plfd f2, _ZL19StaticLongDoubleVar@PCREL+8(0), 1 ; CHECK-NEXT: blr entry: %0 = load ppc_fp128, ppc_fp128* @_ZL19StaticLongDoubleVar, align 16 @@ -144,9 +143,8 @@ define i128 @_Z27ReadStaticSigned__Int128Varv() { ; CHECK-LABEL: _Z27ReadStaticSigned__Int128Varv: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: paddi r3, 0, _ZL23StaticSigned__Int128Var@PCREL, 1 -; CHECK-NEXT: ld r4, 8(r3) ; CHECK-NEXT: pld r3, _ZL23StaticSigned__Int128Var@PCREL(0), 1 +; CHECK-NEXT: pld r4, _ZL23StaticSigned__Int128Var@PCREL+8(0), 1 ; CHECK-NEXT: blr entry: %0 = load i128, i128* @_ZL23StaticSigned__Int128Var, align 16 @@ -340,8 +338,7 @@ define void @_Z24WriteStaticLongDoubleVarg(ppc_fp128 %val) { ; CHECK-LABEL: _Z24WriteStaticLongDoubleVarg: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: paddi r3, 0, _ZL19StaticLongDoubleVar@PCREL, 1 -; CHECK-NEXT: stfd f2, 8(r3) +; CHECK-NEXT: pstfd f2, _ZL19StaticLongDoubleVar@PCREL+8(0), 1 ; CHECK-NEXT: pstfd f1, _ZL19StaticLongDoubleVar@PCREL(0), 1 ; CHECK-NEXT: blr entry: @@ -353,8 +350,7 @@ define void @_Z28WriteStaticSigned__Int128Varn(i128 %val) { ; CHECK-LABEL: _Z28WriteStaticSigned__Int128Varn: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: paddi r5, 0, _ZL23StaticSigned__Int128Var@PCREL, 1 -; CHECK-NEXT: std r4, 8(r5) +; CHECK-NEXT: pstd r4, _ZL23StaticSigned__Int128Var@PCREL+8(0), 1 ; CHECK-NEXT: pstd r3, _ZL23StaticSigned__Int128Var@PCREL(0), 1 ; CHECK-NEXT: blr entry: @@ -490,8 +486,7 @@ define signext i32 @_Z15ReadStaticArrayv() { ; CHECK-LABEL: _Z15ReadStaticArrayv: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: paddi r3, 0, _ZL5array@PCREL, 1 -; CHECK-NEXT: lwa r3, 12(r3) +; CHECK-NEXT: plwa r3, _ZL5array@PCREL+12(0), 1 ; CHECK-NEXT: blr entry: %0 = load i32, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @_ZL5array, i64 0, i64 3), align 4 @@ -502,9 +497,8 @@ define void @_Z16WriteStaticArrayv() { ; CHECK-LABEL: _Z16WriteStaticArrayv: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: paddi r3, 0, _ZL5array@PCREL, 1 -; CHECK-NEXT: li r4, 5 -; CHECK-NEXT: stw r4, 12(r3) +; CHECK-NEXT: li r3, 5 +; CHECK-NEXT: pstw r3, _ZL5array@PCREL+12(0), 1 ; CHECK-NEXT: blr entry: store i32 5, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @_ZL5array, i64 0, i64 3), align 4 @@ -518,8 +512,7 @@ define signext i32 @_Z16ReadStaticStructv() { ; CHECK-LABEL: _Z16ReadStaticStructv: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: paddi r3, 0, _ZL9structure@PCREL, 1 -; CHECK-NEXT: lwa r3, 4(r3) +; CHECK-NEXT: plwa r3, _ZL9structure@PCREL+4(0), 1 ; CHECK-NEXT: blr entry: %0 = load i32, i32* getelementptr inbounds (%struct.Struct, %struct.Struct* @_ZL9structure, i64 0, i32 2), align 4 @@ -530,9 +523,8 @@ define void @_Z17WriteStaticStructv() { ; CHECK-LABEL: _Z17WriteStaticStructv: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: paddi r3, 0, _ZL9structure@PCREL, 1 -; CHECK-NEXT: li r4, 3 -; CHECK-NEXT: stw r4, 4(r3) +; CHECK-NEXT: li r3, 3 +; CHECK-NEXT: pstw r3, _ZL9structure@PCREL+4(0), 1 ; CHECK-NEXT: blr entry: store i32 3, i32* getelementptr inbounds (%struct.Struct, %struct.Struct* @_ZL9structure, i64 0, i32 2), align 4 diff --git a/llvm/test/CodeGen/PowerPC/pcrel-relocation-plus-offset.ll b/llvm/test/CodeGen/PowerPC/pcrel-relocation-plus-offset.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/pcrel-relocation-plus-offset.ll @@ -0,0 +1,73 @@ +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=future -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-S +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=future -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: --filetype=obj < %s | \ +; RUN: llvm-objdump --mcpu=future -dr - | FileCheck %s --check-prefix=CHECK-O + + +@array1 = external local_unnamed_addr global [10 x i32], align 4 +@array2 = common dso_local local_unnamed_addr global [10 x i32] zeroinitializer, align 4 + +define dso_local signext i32 @getElementLocal7() local_unnamed_addr { +; CHECK-S-LABEL: getElementLocal7: +; CHECK-S: # %bb.0: # %entry +; CHECK-S-NEXT: plwa r3, array2@PCREL+28(0), 1 +; CHECK-S-NEXT: blr +; CHECK-O-LABEL: : +; CHECK-O: 00 00 10 04 1c 00 60 a4 plwa 3, 28(0), 1 +; CHECK-O-NEXT: 0000000000000000: R_PPC64_PCREL34 array2+0x1c +; CHECK-O-NEXT: 20 00 80 4e blr +entry: + %0 = load i32, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @array2, i64 0, i64 7), align 4 + ret i32 %0 +} + +define dso_local signext i32 @getElementLocalNegative() local_unnamed_addr { +; CHECK-S-LABEL: getElementLocalNegative: +; CHECK-S: # %bb.0: # %entry +; CHECK-S-NEXT: plwa r3, array2@PCREL-8(0), 1 +; CHECK-S-NEXT: blr +; CHECK-O-LABEL: : +; CHECK-O: ff ff 13 04 f8 ff 60 a4 plwa 3, -8(0), 1 +; CHECK-O-NEXT: 0000000000000020: R_PPC64_PCREL34 array2-0x8 +; CHECK-O-NEXT: 20 00 80 4e blr +entry: + %0 = load i32, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @array2, i64 0, i64 -2), align 4 + ret i32 %0 +} + +define dso_local signext i32 @getElementExtern4() local_unnamed_addr { +; CHECK-S-LABEL: getElementExtern4: +; CHECK-S: # %bb.0: # %entry +; CHECK-S-NEXT: pld r3, array1@got@pcrel(0), 1 +; CHECK-S-NEXT: lwa r3, 16(r3) +; CHECK-S-NEXT: blr +; CHECK-O-LABEL: : +; CHECK-O: 00 00 10 04 00 00 60 e4 pld 3, 0(0), 1 +; CHECK-O-NEXT: 0000000000000040: R_PPC64_GOT_PCREL34 array1 +; CHECK-O-NEXT: 12 00 63 e8 lwa 3, 16(3) +; CHECK-O-NEXT: 20 00 80 4e blr +entry: + %0 = load i32, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @array1, i64 0, i64 4), align 4 + ret i32 %0 +} + +define dso_local signext i32 @getElementExternNegative() local_unnamed_addr { +; CHECK-S-LABEL: getElementExternNegative: +; CHECK-S: # %bb.0: # %entry +; CHECK-S-NEXT: pld r3, array1@got@pcrel(0), 1 +; CHECK-S-NEXT: lwa r3, -4(r3) +; CHECK-S-NEXT: blr +; CHECK-O-LABEL: : +; CHECK-O: 00 00 10 04 00 00 60 e4 pld 3, 0(0), 1 +; CHECK-O-NEXT: 0000000000000060: R_PPC64_GOT_PCREL34 array1 +; CHECK-O-NEXT: fe ff 63 e8 lwa 3, -4(3) +; CHECK-O-NEXT: 20 00 80 4e blr +entry: + %0 = load i32, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @array1, i64 0, i64 -1), align 4 + ret i32 %0 +} + +