Index: include/llvm/CodeGen/MachineBasicBlock.h =================================================================== --- include/llvm/CodeGen/MachineBasicBlock.h +++ include/llvm/CodeGen/MachineBasicBlock.h @@ -115,6 +115,10 @@ /// branch. bool AddressTaken = false; + /// Indicate that this basic block needs its symbol be emitted regardless of + /// whether the flow just falls-through to it. + bool LabelMustBeEmitted = false; + /// Indicate that this basic block is the entry block of an EH scope, i.e., /// the block that used to have a catchpad or cleanuppad instruction in the /// LLVM IR. @@ -159,6 +163,13 @@ /// branch. void setHasAddressTaken() { AddressTaken = true; } + /// Test whether this block must have its label emitted. + bool hasLabelMustBeEmitted() const { return LabelMustBeEmitted; } + + /// Set this block to reflect that, regardless how we flow to it, we need + /// its label be emitted. + void setLabelMustBeEmitted() { LabelMustBeEmitted = true; } + /// Return the MachineFunction containing this basic block. const MachineFunction *getParent() const { return xParent; } MachineFunction *getParent() { return xParent; } Index: lib/CodeGen/AsmPrinter/AsmPrinter.cpp =================================================================== --- lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -2842,13 +2842,16 @@ // Print the main label for the block. if (MBB.pred_empty() || - (isBlockOnlyReachableByFallthrough(&MBB) && !MBB.isEHFuncletEntry())) { + (isBlockOnlyReachableByFallthrough(&MBB) && !MBB.isEHFuncletEntry() && + !MBB.hasLabelMustBeEmitted())) { if (isVerbose()) { // NOTE: Want this comment at start of line, don't emit with AddComment. OutStreamer->emitRawComment(" %bb." + Twine(MBB.getNumber()) + ":", false); } } else { + if (isVerbose() && MBB.hasLabelMustBeEmitted()) + OutStreamer->AddComment("Label of block must be emitted"); OutStreamer->EmitLabel(MBB.getSymbol()); } } Index: lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h =================================================================== --- lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h +++ lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h @@ -50,6 +50,7 @@ MO_LO, MO_HI, MO_PCREL_HI, + MO_PCREL_LO, }; } // namespace RISCVII Index: lib/Target/RISCV/RISCVISelLowering.h =================================================================== --- lib/Target/RISCV/RISCVISelLowering.h +++ lib/Target/RISCV/RISCVISelLowering.h @@ -32,7 +32,8 @@ SELECT_CC, BuildPairF64, SplitF64, - TAIL + TAIL, + WRAPPER_PIC }; } Index: lib/Target/RISCV/RISCVISelLowering.cpp =================================================================== --- lib/Target/RISCV/RISCVISelLowering.cpp +++ lib/Target/RISCV/RISCVISelLowering.cpp @@ -298,21 +298,32 @@ int64_t Offset = N->getOffset(); MVT XLenVT = Subtarget.getXLenVT(); - if (isPositionIndependent() || Subtarget.is64Bit()) + if (Subtarget.is64Bit()) report_fatal_error("Unable to lowerGlobalAddress"); + // In order to maximise the opportunity for common subexpression elimination, // emit a separate ADD node for the global address offset instead of folding // it in the global address node. Later peephole optimisations may choose to // fold it back in when profitable. - SDValue GAHi = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_HI); - SDValue GALo = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_LO); - SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, GAHi), 0); - SDValue MNLo = - SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, GALo), 0); + SDValue Addr; + if (isPositionIndependent()) { + Addr = DAG.getNode( + RISCVISD::WRAPPER_PIC, DL, Ty, + DAG.getTargetGlobalAddress( + GV, DL, Ty, 0, + Subtarget.classifyPICGlobalReference(GV, getTargetMachine()))); + } else { + SDValue GAHi = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_HI); + SDValue GALo = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_LO); + SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, GAHi), 0); + Addr = SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, GALo), 0); + } + if (Offset != 0) - return DAG.getNode(ISD::ADD, DL, Ty, MNLo, + Addr = DAG.getNode(ISD::ADD, DL, Ty, Addr, DAG.getConstant(Offset, DL, XLenVT)); - return MNLo; + + return Addr; } SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op, @@ -323,9 +334,14 @@ const BlockAddress *BA = N->getBlockAddress(); int64_t Offset = N->getOffset(); - if (isPositionIndependent() || Subtarget.is64Bit()) + if (Subtarget.is64Bit()) report_fatal_error("Unable to lowerBlockAddress"); + if (isPositionIndependent()) + return DAG.getNode( + RISCVISD::WRAPPER_PIC, DL, Ty, + DAG.getTargetBlockAddress(BA, Ty, Offset, RISCVII::MO_PCREL_HI)); + SDValue BAHi = DAG.getTargetBlockAddress(BA, Ty, Offset, RISCVII::MO_HI); SDValue BALo = DAG.getTargetBlockAddress(BA, Ty, Offset, RISCVII::MO_LO); SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, BAHi), 0); @@ -353,7 +369,9 @@ SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, CPALo), 0); return MNLo; } else { - report_fatal_error("Unable to lowerConstantPool"); + return DAG.getNode(RISCVISD::WRAPPER_PIC, DL, Ty, + DAG.getTargetConstantPool(CPA, Ty, Alignment, Offset, + RISCVII::MO_PCREL_HI)); } } @@ -549,6 +567,106 @@ return BB; } +static MachineBasicBlock *emitAddressingPCrel(MachineInstr &MI, + MachineBasicBlock *BB) { + const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo(); + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + DebugLoc DL = MI.getDebugLoc(); + MachineFunction::iterator I = ++BB->getIterator(); + + const MachineOperand &Rdest = MI.getOperand(0); + const MachineOperand &Symbol = MI.getOperand(1); + + MachineFunction *F = BB->getParent(); + MachineRegisterInfo &MRI = F->getRegInfo(); + + MachineBasicBlock *NewMBB = F->CreateMachineBasicBlock(LLVM_BB); + // Tell AsmPrinter that we unconditionally want the symbol of this label be + // emitted. + NewMBB->setLabelMustBeEmitted(); + // This is needed to give this MBB a symbol. + NewMBB->setHasAddressTaken(); + + F->insert(I, NewMBB); + + // TmpLabel: + // AUIPC rtmp, %pcrel_hi(symbol) + unsigned Rtmp = MRI.createVirtualRegister(&RISCV::GPRRegClass); + BuildMI(NewMBB, DL, TII.get(RISCV::AUIPC), Rtmp).add(Symbol); + // ADDI rdest, rtmp, %pcrel_lo(TmpLabel) + MachineInstr *ADDI = BuildMI(NewMBB, DL, TII.get(RISCV::ADDI), Rdest.getReg()) + .addReg(Rtmp) + .addMBB(NewMBB, RISCVII::MO_PCREL_LO); + + // Move all the rest of instructions in NewMBB + NewMBB->splice(NewMBB->end(), BB, std::next(MachineBasicBlock::iterator(MI)), + BB->end()); + // Update machine-CFG edges. + NewMBB->transferSuccessorsAndUpdatePHIs(BB); + // Make the original basic block fall-through to the new. + BB->addSuccessor(NewMBB); + + // Fold the offset to the load/stores so we can save the ADDI above. + SmallPtrSet FoldOffsetInstrs; + for (auto &UsesMI : MRI.use_nodbg_instructions(Rdest.getReg())) { + switch (UsesMI.getOpcode()) { + default: + continue; + case RISCV::LB: + case RISCV::LH: + case RISCV::LW: + case RISCV::LBU: + case RISCV::LHU: + case RISCV::LWU: + case RISCV::LD: + case RISCV::FLW: + case RISCV::FLD: + case RISCV::SB: + case RISCV::SH: + case RISCV::SW: + case RISCV::SD: + case RISCV::FSW: + case RISCV::FSD: + break; + } + + LLVM_DEBUG(UsesMI.dump()); + + if (UsesMI.getOperand(1).isReg() && UsesMI.getOperand(2).isImm() && + UsesMI.getOperand(2).getImm() == 0) + FoldOffsetInstrs.insert(&UsesMI); + } + + for (MachineInstr *FoldMI : FoldOffsetInstrs) { + LLVM_DEBUG(dbgs() << "Folding offset of PC-rel into load/store"); + LLVM_DEBUG(FoldMI->dump()); + FoldMI->getOperand(1).setReg(Rtmp); + // There is no changeTo for MBB yet. + FoldMI->RemoveOperand(2); + FoldMI->addOperand(MachineOperand::CreateMBB(NewMBB, RISCVII::MO_PCREL_LO)); + } + + // We may now be able to remove the ADDI. + if (MRI.use_nodbg_empty(Rdest.getReg())) + ADDI->eraseFromParent(); + + MI.eraseFromParent(); // The pseudo instruction is gone now. + return NewMBB; +} + +MachineBasicBlock *emitAddressingPIC(MachineInstr &MI, MachineBasicBlock *BB) { + + const MachineOperand &GlobalAddrOp = MI.getOperand(1); + unsigned char TargetFlags = GlobalAddrOp.getTargetFlags(); + + switch (TargetFlags) { + default: + llvm_unreachable("Unexpected target flags"); + case RISCVII::MO_PCREL_HI: + return emitAddressingPCrel(MI, BB); + } +} + MachineBasicBlock * RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const { @@ -563,6 +681,8 @@ return emitBuildPairF64Pseudo(MI, BB); case RISCV::SplitF64Pseudo: return emitSplitF64Pseudo(MI, BB); + case RISCV::PseudoAddrPIC: + return emitAddressingPIC(MI, BB); } // To "insert" a SELECT instruction, we actually have to insert the triangle @@ -1577,6 +1697,8 @@ return "RISCVISD::SplitF64"; case RISCVISD::TAIL: return "RISCVISD::TAIL"; + case RISCVISD::WRAPPER_PIC: + return "RISCVISD::WRAPPER_PIC"; } return nullptr; } Index: lib/Target/RISCV/RISCVInstrInfo.td =================================================================== --- lib/Target/RISCV/RISCVInstrInfo.td +++ lib/Target/RISCV/RISCVInstrInfo.td @@ -47,6 +47,7 @@ def Tail : SDNode<"RISCVISD::TAIL", SDT_RISCVCall, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; +def WrapperPIC : SDNode<"RISCVISD::WRAPPER_PIC", SDTIntUnaryOp>; //===----------------------------------------------------------------------===// // Operand and SDNode transformation definitions. @@ -764,6 +765,16 @@ def PseudoLLA : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [], "lla", "$dst, $src">; +let usesCustomInserter = 1 in +def PseudoAddrPIC : Pseudo<(outs GPR:$dst), (ins ixlenimm:$addr), + [(set GPR:$dst, (WrapperPIC tglobaladdr:$addr))]>; + +def : Pat<(WrapperPIC tblockaddress:$addr), + (PseudoAddrPIC tblockaddress:$addr)>; + +def : Pat<(WrapperPIC tconstpool:$addr), + (PseudoAddrPIC tconstpool:$addr)>; + /// Loads multiclass LdPat { Index: lib/Target/RISCV/RISCVMCInstLower.cpp =================================================================== --- lib/Target/RISCV/RISCVMCInstLower.cpp +++ lib/Target/RISCV/RISCVMCInstLower.cpp @@ -43,6 +43,12 @@ case RISCVII::MO_HI: Kind = RISCVMCExpr::VK_RISCV_HI; break; + case RISCVII::MO_PCREL_LO: + Kind = RISCVMCExpr::VK_RISCV_PCREL_LO; + break; + case RISCVII::MO_PCREL_HI: + Kind = RISCVMCExpr::VK_RISCV_PCREL_HI; + break; } const MCExpr *ME = Index: lib/Target/RISCV/RISCVSubtarget.h =================================================================== --- lib/Target/RISCV/RISCVSubtarget.h +++ lib/Target/RISCV/RISCVSubtarget.h @@ -81,6 +81,9 @@ bool enableLinkerRelax() const { return EnableLinkerRelax; } MVT getXLenVT() const { return XLenVT; } unsigned getXLen() const { return XLen; } + + unsigned char classifyPICGlobalReference(const GlobalValue *GV, + const TargetMachine &TM) const; }; } // End llvm namespace Index: lib/Target/RISCV/RISCVSubtarget.cpp =================================================================== --- lib/Target/RISCV/RISCVSubtarget.cpp +++ lib/Target/RISCV/RISCVSubtarget.cpp @@ -46,3 +46,13 @@ : RISCVGenSubtargetInfo(TT, CPU, FS), FrameLowering(initializeSubtargetDependencies(CPU, FS, TT.isArch64Bit())), InstrInfo(), RegInfo(getHwMode()), TLInfo(TM, *this) {} + +unsigned char +RISCVSubtarget::classifyPICGlobalReference(const GlobalValue *GV, + const TargetMachine &TM) const { + if (TM.shouldAssumeDSOLocal(*GV->getParent(), GV)) + return RISCVII::MO_PCREL_HI; + + // GOT access not implemented yet. + report_fatal_error("PIC global reference not supported"); +} Index: test/CodeGen/RISCV/blockaddr-pic.ll =================================================================== --- /dev/null +++ test/CodeGen/RISCV/blockaddr-pic.ll @@ -0,0 +1,53 @@ +; RUN: llc -mtriple riscv32 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefix=RV32-NOPIC +; RUN: llc -mtriple riscv32 -relocation-model=pic -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefix=RV32-PIC + +define dso_local signext i32 @foo(i32 signext %w) nounwind { + +; RV32-NOPIC-LABEL: foo: +; RV32-NOPIC: lui a1, %hi(.Ltmp0) +; RV32-NOPIC: addi a1, a1, %lo(.Ltmp0) +; RV32-NOPIC: sw a1, 8(sp) +; RV32-NOPIC: addi a1, zero, 101 +; RV32-NOPIC: lw a0, 8(sp) +; RV32-NOPIC: jr a0 +; RV32-NOPIC: .Ltmp0: # Block address taken +; +; RV32-PIC-LABEL: foo: +; RV32-PIC: .LBB0_1: # Block address taken +; RV32-PIC: # %entry +; RV32-PIC: # Label of block must be emitted +; RV32-PIC: auipc a1, %pcrel_hi(.Ltmp0) +; RV32-PIC: addi a1, a1, %pcrel_lo(.LBB0_1) +; RV32-PIC: sw a1, 8(sp) +; RV32-PIC: addi a1, zero, 101 +; RV32-PIC: lw a0, 8(sp) +; RV32-PIC: jr a0 +; RV32-PIC: .Ltmp0: # Block address taken + +entry: + %x = alloca i8*, align 8 + store i8* blockaddress(@foo, %test_block), i8** %x, align 8 + %cmp = icmp sgt i32 %w, 100 + br i1 %cmp, label %if.then, label %if.end + +if.then: + %addr = load i8*, i8** %x, align 8 + br label %indirectgoto + +if.end: + br label %return + +test_block: + br label %return + +return: + %retval = phi i32 [ 3, %if.end ], [ 4, %test_block ] + ret i32 %retval + +indirectgoto: + %indirect.goto.dest = phi i8* [ %addr, %if.then ] + indirectbr i8* %addr, [ label %test_block ] +} + Index: test/CodeGen/RISCV/const-pool.ll =================================================================== --- /dev/null +++ test/CodeGen/RISCV/const-pool.ll @@ -0,0 +1,26 @@ +; RUN: llc -mtriple riscv32 -mattr=+f -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefix=RV32-NOPIC +; RUN: llc -mtriple riscv32 -mattr=+f -relocation-model=pic \ +; RUN: -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefix=RV32-PIC + +@gf = internal global float undef + +define void @test() nounwind { +; RV32-NOPIC-LABEL: test: +; RV32-NOPIC: lui a0, %hi(.LCPI0_0) +; RV32-NOPIC: addi a0, a0, %lo(.LCPI0_0) +; RV32-NOPIC: flw ft0, 0(a0) + +; RV32-PIC-LABEL: test: +; RV32-PIC: .LBB0_1: # Block address taken +; RV32-PIC: # %entry +; RV32-PIC: # Label of block must be emitted +; RV32-PIC: auipc a0, %pcrel_hi(.LCPI0_0) +; RV32-PIC: flw ft0, %pcrel_lo(.LBB0_1)(a0) +entry: + %f = load float, float* @gf + %f2 = fadd float %f, 0x3FEB333340000000 + store float %f2, float* @gf + ret void +} Index: test/CodeGen/RISCV/global-addr-pic.ll =================================================================== --- /dev/null +++ test/CodeGen/RISCV/global-addr-pic.ll @@ -0,0 +1,137 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple riscv32 -relocation-model=pic -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefix=RV32 + +@local = internal global i32 42, align 4 + +@local8 = internal global i8 42, align 1 +@local16 = internal global i16 42, align 2 + +@addr = internal global i32* zeroinitializer + +; fold pcrel_lo into load. no addi +define signext i32 @get_local() nounwind { +; RV32-LABEL: get_local: +; RV32: # %bb.0: # %entry +; RV32-NEXT: .LBB0_1: # Block address taken +; RV32-NEXT: # %entry +; RV32-NEXT: # Label of block must be emitted +; RV32-NEXT: auipc a0, %pcrel_hi(local) +; RV32-NEXT: lw a0, %pcrel_lo(.LBB0_1)(a0) +; RV32-NEXT: ret +entry: + %0 = load i32, i32* @local, align 4 + ret i32 %0 +} + +; fold pcrel_lo into store. no addi +define void @set_local(i32 signext %a) nounwind { +; RV32-LABEL: set_local: +; RV32: # %bb.0: # %entry +; RV32-NEXT: .LBB1_1: # Block address taken +; RV32-NEXT: # %entry +; RV32-NEXT: # Label of block must be emitted +; RV32-NEXT: auipc a1, %pcrel_hi(local) +; RV32-NEXT: sw a0, %pcrel_lo(.LBB1_1)(a1) +; RV32-NEXT: ret +entry: + store i32 %a, i32* @local, align 4 + ret void +} + +; fold pcrel_lo into load and fold. no addi used in addressing +define void @update_local() nounwind { +; RV32-LABEL: update_local: +; RV32: # %bb.0: # %entry +; RV32-NEXT: .LBB2_1: # Block address taken +; RV32-NEXT: # %entry +; RV32-NEXT: # Label of block must be emitted +; RV32-NEXT: auipc a0, %pcrel_hi(local) +; RV32-NEXT: lw a1, %pcrel_lo(.LBB2_1)(a0) +; RV32-NEXT: addi a1, a1, 1 +; RV32-NEXT: sw a1, %pcrel_lo(.LBB2_1)(a0) +; RV32-NEXT: ret +entry: + %0 = load i32, i32* @local, align 4 + %inc = add nsw i32 %0, 1 + store i32 %inc, i32* @local, align 4 + ret void +} + +; subword fold. no addi used in addressing +define void @update_local8() nounwind { +; RV32-LABEL: update_local8: +; RV32: # %bb.0: # %entry +; RV32-NEXT: .LBB3_1: # Block address taken +; RV32-NEXT: # %entry +; RV32-NEXT: # Label of block must be emitted +; RV32-NEXT: auipc a0, %pcrel_hi(local8) +; RV32-NEXT: lb a1, %pcrel_lo(.LBB3_1)(a0) +; RV32-NEXT: addi a1, a1, 1 +; RV32-NEXT: sb a1, %pcrel_lo(.LBB3_1)(a0) +; RV32-NEXT: ret +entry: + %0 = load i8, i8* @local8, align 1 + %inc = add nsw i8 %0, 1 + store i8 %inc, i8* @local8, align 1 + ret void +} + +; subword fold. no addi used in addressing +define void @update_local16() nounwind { +; RV32-LABEL: update_local16: +; RV32: # %bb.0: # %entry +; RV32-NEXT: .LBB4_1: # Block address taken +; RV32-NEXT: # %entry +; RV32-NEXT: # Label of block must be emitted +; RV32-NEXT: auipc a0, %pcrel_hi(local16) +; RV32-NEXT: lh a1, %pcrel_lo(.LBB4_1)(a0) +; RV32-NEXT: addi a1, a1, 1 +; RV32-NEXT: sh a1, %pcrel_lo(.LBB4_1)(a0) +; RV32-NEXT: ret +entry: + %0 = load i16, i16* @local16, align 2 + %inc = add nsw i16 %0, 1 + store i16 %inc, i16* @local16, align 2 + ret void +} + +; addi must remain +define i32* @get_local_ptr() nounwind { +; RV32-LABEL: get_local_ptr: +; RV32: # %bb.0: # %entry +; RV32-NEXT: .LBB5_1: # Block address taken +; RV32-NEXT: # %entry +; RV32-NEXT: # Label of block must be emitted +; RV32-NEXT: auipc a0, %pcrel_hi(local) +; RV32-NEXT: addi a0, a0, %pcrel_lo(.LBB5_1) +; RV32-NEXT: ret +entry: + ret i32* @local +} + +; Fold the two stores but we need the addi for 'local' +define void @update_and_store_local_address() nounwind { +; RV32-LABEL: update_and_store_local_address: +; RV32: # %bb.0: # %entry +; RV32-NEXT: .LBB6_1: # Block address taken +; RV32-NEXT: # %entry +; RV32-NEXT: # Label of block must be emitted +; RV32-NEXT: auipc a0, %pcrel_hi(addr) +; RV32-NEXT: .LBB6_2: # Block address taken +; RV32-NEXT: # %entry +; RV32-NEXT: # Label of block must be emitted +; RV32-NEXT: auipc a1, %pcrel_hi(local) +; RV32-NEXT: addi a2, a1, %pcrel_lo(.LBB6_2) +; RV32-NEXT: sw a2, %pcrel_lo(.LBB6_1)(a0) +; RV32-NEXT: lw a0, %pcrel_lo(.LBB6_2)(a1) +; RV32-NEXT: addi a0, a0, 1 +; RV32-NEXT: sw a0, %pcrel_lo(.LBB6_2)(a1) +; RV32-NEXT: ret +entry: + %0 = load i32, i32* @local, align 4 + %inc = add nsw i32 %0, 1 + store i32 %inc, i32* @local, align 4 + store i32* @local, i32** @addr, align 4 + ret void +}