diff --git a/llvm/lib/Target/RISCV/CMakeLists.txt b/llvm/lib/Target/RISCV/CMakeLists.txt --- a/llvm/lib/Target/RISCV/CMakeLists.txt +++ b/llvm/lib/Target/RISCV/CMakeLists.txt @@ -21,6 +21,7 @@ add_llvm_target(RISCVCodeGen RISCVAsmPrinter.cpp RISCVCodeGenPrepare.cpp + RISCVCleanupLocalDynamicTLSPass.cpp RISCVMakeCompressible.cpp RISCVExpandAtomicPseudoInsts.cpp RISCVExpandPseudoInsts.cpp diff --git a/llvm/lib/Target/RISCV/RISCV.h b/llvm/lib/Target/RISCV/RISCV.h --- a/llvm/lib/Target/RISCV/RISCV.h +++ b/llvm/lib/Target/RISCV/RISCV.h @@ -76,6 +76,9 @@ RISCVSubtarget &, RISCVRegisterBankInfo &); void initializeRISCVDAGToDAGISelPass(PassRegistry &); + +void initializeRISCVLDTLSCleanupPass(PassRegistry &); +FunctionPass *createRISCVCleanupLocalDynamicTLSPass(); } // namespace llvm #endif diff --git a/llvm/lib/Target/RISCV/RISCVCleanupLocalDynamicTLSPass.cpp b/llvm/lib/Target/RISCV/RISCVCleanupLocalDynamicTLSPass.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVCleanupLocalDynamicTLSPass.cpp @@ -0,0 +1,150 @@ +//===-- RISCVCleanupLocalDynamicTLSPass.cpp ---------------------*- C++ -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Local-dynamic access to thread-local variables proceeds in three stages. +// +// 1. The offset of this Module's thread-local area from TPIDR_EL0 is calculated +// in much the same way as a general-dynamic TLS-descriptor access against +// the special symbol _TLS_MODULE_BASE. +// 2. The variable's offset from _TLS_MODULE_BASE_ is calculated. +// 3. These two are added, together with TO, to obtain the variable's +// true address. +// +// This is only better than general-dynamic access to the variable if two or +// more of the first stage TLS-descriptor calculations can be combined. This +// pass looks through a function and performs such combinations. +// +//===----------------------------------------------------------------------===// +#include "RISCV.h" +#include "RISCVInstrInfo.h" +#include "RISCVMachineFunctionInfo.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +using namespace llvm; + +#define TLSCLEANUP_PASS_NAME "RISCV Local Dynamic TLS Access Clean-up" + +namespace { +struct RISCVLDTLSCleanup : public MachineFunctionPass { + static char ID; + RISCVLDTLSCleanup() : MachineFunctionPass(ID) { + initializeRISCVLDTLSCleanupPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &MF) override { + if (skipFunction(MF.getFunction())) + return false; + + RISCVMachineFunctionInfo *MFI = MF.getInfo(); + if (MFI->getNumLocalDynamicTLSAccesses() < 2) { + // No point folding accesses if there isn't at least two. + return false; + } + + MachineDominatorTree *DT = &getAnalysis(); + return VisitNode(DT->getRootNode(), 0); + } + + // Visit the dominator subtree rooted at Node in pre-order. + // If TLSBaseAddrReg is non-null, then use that to replace any + // TLS_base_addr instructions. Otherwise, create the register + // when the first such instruction is seen, and then use it + // as we encounter more instructions. + bool VisitNode(MachineDomTreeNode *Node, unsigned TLSBaseAddrReg) { + MachineBasicBlock *BB = Node->getBlock(); + bool Changed = false; + + // Traverse the current block. + for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; + ++I) { + switch (I->getOpcode()) { + case RISCV::PseudoLA_TLSDESC: + // Make sure it's a local dynamic access. + if (!I->getOperand(1).isSymbol() || + strcmp(I->getOperand(1).getSymbolName(), "_TLS_MODULE_BASE_")) + break; + + if (TLSBaseAddrReg) + I = replaceTLSBaseAddrCall(*I, TLSBaseAddrReg); + else + I = setRegister(*I, &TLSBaseAddrReg); + Changed = true; + break; + default: + break; + } + } + + // Visit the children of this block in the dominator tree. + for (MachineDomTreeNode *N : *Node) { + Changed |= VisitNode(N, TLSBaseAddrReg); + } + + return Changed; + } + + // Replace the TLS_base_addr instruction I with a copy from + // TLSBaseAddrReg, returning the new instruction. + MachineInstr *replaceTLSBaseAddrCall(MachineInstr &I, + unsigned TLSBaseAddrReg) { + MachineFunction *MF = I.getParent()->getParent(); + const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); + // Insert a Copy from TLSBaseAddrReg to X10, which is where the rest of the + // code sequence assumes the address will be. + MachineInstr *Copy = + BuildMI(*I.getParent(), I, I.getDebugLoc(), + TII->get(TargetOpcode::COPY), I.getOperand(0).getReg()) + .addReg(TLSBaseAddrReg); + + // Update the call site info. + if (I.shouldUpdateCallSiteInfo()) + I.getMF()->eraseCallSiteInfo(&I); + + // Erase the TLS_base_addr instruction. + I.eraseFromParent(); + + return Copy; + } + + // Create a virtual register in *TLSBaseAddrReg, and populate it by + // inserting a copy instruction after I. Returns the new instruction. + MachineInstr *setRegister(MachineInstr &I, unsigned *TLSBaseAddrReg) { + MachineFunction *MF = I.getParent()->getParent(); + const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); + + // Create a virtual register for the TLS base address. + MachineRegisterInfo &RegInfo = MF->getRegInfo(); + *TLSBaseAddrReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); + MachineInstr *Copy = + BuildMI(*I.getParent(), ++I.getIterator(), I.getDebugLoc(), + TII->get(TargetOpcode::COPY), *TLSBaseAddrReg) + .addReg(I.getOperand(0).getReg()); + + return Copy; + } + + StringRef getPassName() const override { return TLSCLEANUP_PASS_NAME; } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addRequired(); + MachineFunctionPass::getAnalysisUsage(AU); + } +}; +} // namespace + +INITIALIZE_PASS(RISCVLDTLSCleanup, "riscv-local-dynamic-tls-cleanup", + TLSCLEANUP_PASS_NAME, false, false) + +char RISCVLDTLSCleanup::ID = 0; +FunctionPass *llvm::createRISCVCleanupLocalDynamicTLSPass() { + return new RISCVLDTLSCleanup(); +} diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -73,7 +73,7 @@ cl::opt EnableRISCVTLSDESC("riscv-enable-tlsdesc", cl::desc("Enable the tlsdesc for RISC-V"), - cl::init(false), cl::Hidden); + cl::init(true), cl::Hidden); extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() { RegisterTargetMachine X(getTheRISCV32Target()); @@ -312,6 +312,11 @@ bool RISCVPassConfig::addInstSelector() { addPass(createRISCVISelDag(getRISCVTargetMachine(), getOptLevel())); + // For ELF, cleanup any local-dynamic TLS accesses. + if (EnableRISCVTLSDESC && TM->getTargetTriple().isOSBinFormatELF() && + getOptLevel() != CodeGenOpt::None) + addPass(createRISCVCleanupLocalDynamicTLSPass()); + return false; } diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll --- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll @@ -81,6 +81,8 @@ ; CHECK-NEXT: Lazy Branch Probability Analysis ; CHECK-NEXT: Lazy Block Frequency Analysis ; CHECK-NEXT: RISC-V DAG->DAG Pattern Instruction Selection +; CHECK-NEXT: MachineDominator Tree Construction +; CHECK-NEXT: RISCV Local Dynamic TLS Access Clean-up ; CHECK-NEXT: Finalize ISel and expand pseudo-instructions ; CHECK-NEXT: Lazy Machine Block Frequency Analysis ; CHECK-NEXT: Early Tail Duplication diff --git a/llvm/test/CodeGen/RISCV/machinelicm-address-pseudos.ll b/llvm/test/CodeGen/RISCV/machinelicm-address-pseudos.ll --- a/llvm/test/CodeGen/RISCV/machinelicm-address-pseudos.ll +++ b/llvm/test/CodeGen/RISCV/machinelicm-address-pseudos.ll @@ -143,56 +143,38 @@ define void @test_la_tls_gd(i32 signext %n) nounwind { ; RV32I-LABEL: test_la_tls_gd: ; RV32I: # %bb.0: # %entry -; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: li s2, 0 -; RV32I-NEXT: .Lpcrel_hi3: -; RV32I-NEXT: auipc a0, %tls_gd_pcrel_hi(gd) -; RV32I-NEXT: addi s1, a0, %pcrel_lo(.Lpcrel_hi3) +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: li a2, 0 ; RV32I-NEXT: .LBB3_1: # %loop ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: mv a0, s1 -; RV32I-NEXT: call __tls_get_addr@plt +; RV32I-NEXT: .Ltlsdesc_hi0: +; RV32I-NEXT: auipc a0, %tlsdesc_hi(gd) +; RV32I-NEXT: lw a3, %tlsdesc_lo(.Ltlsdesc_hi0)(a0) +; RV32I-NEXT: addi a0, a0, %tlsdesc_add_lo(.Ltlsdesc_hi0) +; RV32I-NEXT: jalr t0, 0(a3), %tlsdesc_call(.Ltlsdesc_hi0) +; RV32I-NEXT: add a0, a0, tp ; RV32I-NEXT: lw a0, 0(a0) -; RV32I-NEXT: addi s2, s2, 1 -; RV32I-NEXT: blt s2, s0, .LBB3_1 +; RV32I-NEXT: addi a2, a2, 1 +; RV32I-NEXT: blt a2, a1, .LBB3_1 ; RV32I-NEXT: # %bb.2: # %ret -; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload -; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; ; RV64I-LABEL: test_la_tls_gd: ; RV64I: # %bb.0: # %entry -; RV64I-NEXT: addi sp, sp, -32 -; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: li s2, 0 -; RV64I-NEXT: .Lpcrel_hi3: -; RV64I-NEXT: auipc a0, %tls_gd_pcrel_hi(gd) -; RV64I-NEXT: addi s1, a0, %pcrel_lo(.Lpcrel_hi3) +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: li a2, 0 ; RV64I-NEXT: .LBB3_1: # %loop ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: mv a0, s1 -; RV64I-NEXT: call __tls_get_addr@plt +; RV64I-NEXT: .Ltlsdesc_hi0: +; RV64I-NEXT: auipc a0, %tlsdesc_hi(gd) +; RV64I-NEXT: ld a3, %tlsdesc_lo(.Ltlsdesc_hi0)(a0) +; RV64I-NEXT: addi a0, a0, %tlsdesc_add_lo(.Ltlsdesc_hi0) +; RV64I-NEXT: jalr t0, 0(a3), %tlsdesc_call(.Ltlsdesc_hi0) +; RV64I-NEXT: add a0, a0, tp ; RV64I-NEXT: lw a0, 0(a0) -; RV64I-NEXT: addiw s2, s2, 1 -; RV64I-NEXT: blt s2, s0, .LBB3_1 +; RV64I-NEXT: addiw a2, a2, 1 +; RV64I-NEXT: blt a2, a1, .LBB3_1 ; RV64I-NEXT: # %bb.2: # %ret -; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: ld s2, 0(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 32 ; RV64I-NEXT: ret entry: br label %loop diff --git a/llvm/test/CodeGen/RISCV/mir-target-flags.ll b/llvm/test/CodeGen/RISCV/mir-target-flags.ll --- a/llvm/test/CodeGen/RISCV/mir-target-flags.ll +++ b/llvm/test/CodeGen/RISCV/mir-target-flags.ll @@ -42,14 +42,16 @@ ; RV32-MED-NEXT: target-flags(riscv-pcrel-lo) ; RV32-MED: target-flags(riscv-pcrel-hi) @g_i ; RV32-MED-NEXT: target-flags(riscv-pcrel-lo) -; RV32-MED: target-flags(riscv-tls-gd-hi) @t_un -; RV32-MED-NEXT: target-flags(riscv-pcrel-lo) -; RV32-MED: target-flags(riscv-plt) &__tls_get_addr -; RV32-MED: target-flags(riscv-tls-gd-hi) @t_ld -; RV32-MED-NEXT: target-flags(riscv-pcrel-lo) -; RV32-MED: target-flags(riscv-plt) &__tls_get_addr +; RV32-MED: target-flags(riscv-tlsdesc-hi) @t_un +; RV32-MED-NEXT: target-flags(riscv-tlsdesc-load-lo) +; RV32-MED-NEXT: target-flags(riscv-tlsdesc-add-lo) +; RV32-MED: target-flags(riscv-tlsdesc-call) +; RV32-MED: target-flags(riscv-tlsdesc-hi) &_TLS_MODULE_BASE_ +; RV32-MED-NEXT: target-flags(riscv-tlsdesc-load-lo) +; RV32-MED: target-flags(riscv-tlsdesc-add-lo) +; RV32-MED: target-flags(riscv-tlsdesc-call) ; RV32-MED: target-flags(riscv-tls-got-hi) @t_ie -; RV32-MED-NEXT: target-flags(riscv-pcrel-lo) +; RV32-MED-NEXT: target-flags(riscv-pcrel-lo) ; RV32-MED: target-flags(riscv-tprel-hi) @t_le ; RV32-MED-NEXT: target-flags(riscv-tprel-add) @t_le ; RV32-MED-NEXT: target-flags(riscv-tprel-lo) @t_le diff --git a/llvm/test/CodeGen/RISCV/tls-models.ll b/llvm/test/CodeGen/RISCV/tls-models.ll --- a/llvm/test/CodeGen/RISCV/tls-models.ll +++ b/llvm/test/CodeGen/RISCV/tls-models.ll @@ -6,20 +6,14 @@ ; RUN: llc -mtriple=riscv32 < %s | FileCheck -check-prefix=RV32-NOPIC %s ; RUN: llc -mtriple=riscv64 < %s | FileCheck -check-prefix=RV64-NOPIC %s -; RUN: llc -mtriple=riscv32 -relocation-model=pic -riscv-enable-tlsdesc < %s \ -; RUN: | FileCheck -check-prefix=RV32-PIC-TLSDESC %s -; RUN: llc -mtriple=riscv64 -relocation-model=pic -riscv-enable-tlsdesc < %s \ -; RUN: | FileCheck -check-prefix=RV64-PIC-TLSDESC %s -; RUN: llc -mtriple=riscv32 < %s -riscv-enable-tlsdesc | FileCheck -check-prefix=RV32-NOPIC-TLSDESC %s -; RUN: llc -mtriple=riscv64 < %s -riscv-enable-tlsdesc | FileCheck -check-prefix=RV64-NOPIC-TLSDESC %s - - ; Check that TLS symbols are lowered correctly based on the specified ; model. Make sure they're external to avoid them all being optimised to Local ; Exec for the executable. @unspecified = external thread_local global i32 @ld = external thread_local(localdynamic) global i32 +@ld2 = external thread_local(localdynamic) global i32 +@ld3 = external thread_local(localdynamic) global i32 @ie = external thread_local(initialexec) global i32 @le = external thread_local(localexec) global i32 @@ -29,26 +23,22 @@ define ptr @f1() nounwind { ; RV32-PIC-LABEL: f1: ; RV32-PIC: # %bb.0: # %entry -; RV32-PIC-NEXT: addi sp, sp, -16 -; RV32-PIC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32-PIC-NEXT: .Lpcrel_hi0: -; RV32-PIC-NEXT: auipc a0, %tls_gd_pcrel_hi(unspecified) -; RV32-PIC-NEXT: addi a0, a0, %pcrel_lo(.Lpcrel_hi0) -; RV32-PIC-NEXT: call __tls_get_addr@plt -; RV32-PIC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32-PIC-NEXT: addi sp, sp, 16 +; RV32-PIC-NEXT: .Ltlsdesc_hi0: +; RV32-PIC-NEXT: auipc a0, %tlsdesc_hi(unspecified) +; RV32-PIC-NEXT: lw a1, %tlsdesc_lo(.Ltlsdesc_hi0)(a0) +; RV32-PIC-NEXT: addi a0, a0, %tlsdesc_add_lo(.Ltlsdesc_hi0) +; RV32-PIC-NEXT: jalr t0, 0(a1), %tlsdesc_call(.Ltlsdesc_hi0) +; RV32-PIC-NEXT: add a0, a0, tp ; RV32-PIC-NEXT: ret ; ; RV64-PIC-LABEL: f1: ; RV64-PIC: # %bb.0: # %entry -; RV64-PIC-NEXT: addi sp, sp, -16 -; RV64-PIC-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64-PIC-NEXT: .Lpcrel_hi0: -; RV64-PIC-NEXT: auipc a0, %tls_gd_pcrel_hi(unspecified) -; RV64-PIC-NEXT: addi a0, a0, %pcrel_lo(.Lpcrel_hi0) -; RV64-PIC-NEXT: call __tls_get_addr@plt -; RV64-PIC-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64-PIC-NEXT: addi sp, sp, 16 +; RV64-PIC-NEXT: .Ltlsdesc_hi0: +; RV64-PIC-NEXT: auipc a0, %tlsdesc_hi(unspecified) +; RV64-PIC-NEXT: ld a1, %tlsdesc_lo(.Ltlsdesc_hi0)(a0) +; RV64-PIC-NEXT: addi a0, a0, %tlsdesc_add_lo(.Ltlsdesc_hi0) +; RV64-PIC-NEXT: jalr t0, 0(a1), %tlsdesc_call(.Ltlsdesc_hi0) +; RV64-PIC-NEXT: add a0, a0, tp ; RV64-PIC-NEXT: ret ; ; RV32-NOPIC-LABEL: f1: @@ -66,7 +56,6 @@ ; RV64-NOPIC-NEXT: ld a0, %pcrel_lo(.Lpcrel_hi0)(a0) ; RV64-NOPIC-NEXT: add a0, a0, tp ; RV64-NOPIC-NEXT: ret -; ; RV32-PIC-TLSDESC-LABEL: f1: ; RV32-PIC-TLSDESC: # %bb.0: # %entry ; RV32-PIC-TLSDESC-NEXT: .Ltlsdesc_hi0: @@ -76,7 +65,6 @@ ; RV32-PIC-TLSDESC-NEXT: jalr t0, 0(a1), %tlsdesc_call(.Ltlsdesc_hi0) ; RV32-PIC-TLSDESC-NEXT: add a0, a0, tp ; RV32-PIC-TLSDESC-NEXT: ret -; ; RV64-PIC-TLSDESC-LABEL: f1: ; RV64-PIC-TLSDESC: # %bb.0: # %entry ; RV64-PIC-TLSDESC-NEXT: .Ltlsdesc_hi0: @@ -86,7 +74,6 @@ ; RV64-PIC-TLSDESC-NEXT: jalr t0, 0(a1), %tlsdesc_call(.Ltlsdesc_hi0) ; RV64-PIC-TLSDESC-NEXT: add a0, a0, tp ; RV64-PIC-TLSDESC-NEXT: ret -; ; RV32-NOPIC-TLSDESC-LABEL: f1: ; RV32-NOPIC-TLSDESC: # %bb.0: # %entry ; RV32-NOPIC-TLSDESC-NEXT: .Lpcrel_hi0: @@ -94,7 +81,6 @@ ; RV32-NOPIC-TLSDESC-NEXT: lw a0, %pcrel_lo(.Lpcrel_hi0)(a0) ; RV32-NOPIC-TLSDESC-NEXT: add a0, a0, tp ; RV32-NOPIC-TLSDESC-NEXT: ret -; ; RV64-NOPIC-TLSDESC-LABEL: f1: ; RV64-NOPIC-TLSDESC: # %bb.0: # %entry ; RV64-NOPIC-TLSDESC-NEXT: .Lpcrel_hi0: @@ -112,26 +98,28 @@ define ptr @f2() nounwind { ; RV32-PIC-LABEL: f2: ; RV32-PIC: # %bb.0: # %entry -; RV32-PIC-NEXT: addi sp, sp, -16 -; RV32-PIC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32-PIC-NEXT: .Lpcrel_hi1: -; RV32-PIC-NEXT: auipc a0, %tls_gd_pcrel_hi(ld) -; RV32-PIC-NEXT: addi a0, a0, %pcrel_lo(.Lpcrel_hi1) -; RV32-PIC-NEXT: call __tls_get_addr@plt -; RV32-PIC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32-PIC-NEXT: addi sp, sp, 16 +; RV32-PIC-NEXT: .Ltlsdesc_hi1: +; RV32-PIC-NEXT: auipc a0, %tlsdesc_hi(_TLS_MODULE_BASE_) +; RV32-PIC-NEXT: lw a1, %tlsdesc_lo(.Ltlsdesc_hi1)(a0) +; RV32-PIC-NEXT: addi a0, a0, %tlsdesc_add_lo(.Ltlsdesc_hi1) +; RV32-PIC-NEXT: jalr t0, 0(a1), %tlsdesc_call(.Ltlsdesc_hi1) +; RV32-PIC-NEXT: add a0, a0, tp +; RV32-PIC-NEXT: lui a1, %tprel_hi(ld) +; RV32-PIC-NEXT: add a0, a1, a0 +; RV32-PIC-NEXT: addi a0, a0, %tprel_lo(ld) ; RV32-PIC-NEXT: ret ; ; RV64-PIC-LABEL: f2: ; RV64-PIC: # %bb.0: # %entry -; RV64-PIC-NEXT: addi sp, sp, -16 -; RV64-PIC-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64-PIC-NEXT: .Lpcrel_hi1: -; RV64-PIC-NEXT: auipc a0, %tls_gd_pcrel_hi(ld) -; RV64-PIC-NEXT: addi a0, a0, %pcrel_lo(.Lpcrel_hi1) -; RV64-PIC-NEXT: call __tls_get_addr@plt -; RV64-PIC-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64-PIC-NEXT: addi sp, sp, 16 +; RV64-PIC-NEXT: .Ltlsdesc_hi1: +; RV64-PIC-NEXT: auipc a0, %tlsdesc_hi(_TLS_MODULE_BASE_) +; RV64-PIC-NEXT: ld a1, %tlsdesc_lo(.Ltlsdesc_hi1)(a0) +; RV64-PIC-NEXT: addi a0, a0, %tlsdesc_add_lo(.Ltlsdesc_hi1) +; RV64-PIC-NEXT: jalr t0, 0(a1), %tlsdesc_call(.Ltlsdesc_hi1) +; RV64-PIC-NEXT: add a0, a0, tp +; RV64-PIC-NEXT: lui a1, %tprel_hi(ld) +; RV64-PIC-NEXT: add a0, a1, a0 +; RV64-PIC-NEXT: addi a0, a0, %tprel_lo(ld) ; RV64-PIC-NEXT: ret ; ; RV32-NOPIC-LABEL: f2: @@ -149,7 +137,6 @@ ; RV64-NOPIC-NEXT: ld a0, %pcrel_lo(.Lpcrel_hi1)(a0) ; RV64-NOPIC-NEXT: add a0, a0, tp ; RV64-NOPIC-NEXT: ret -; ; RV32-PIC-TLSDESC-LABEL: f2: ; RV32-PIC-TLSDESC: # %bb.0: # %entry ; RV32-PIC-TLSDESC-NEXT: .Ltlsdesc_hi1: @@ -162,7 +149,6 @@ ; RV32-PIC-TLSDESC-NEXT: add a0, a1, a0 ; RV32-PIC-TLSDESC-NEXT: addi a0, a0, %tprel_lo(ld) ; RV32-PIC-TLSDESC-NEXT: ret -; ; RV64-PIC-TLSDESC-LABEL: f2: ; RV64-PIC-TLSDESC: # %bb.0: # %entry ; RV64-PIC-TLSDESC-NEXT: .Ltlsdesc_hi1: @@ -175,7 +161,6 @@ ; RV64-PIC-TLSDESC-NEXT: add a0, a1, a0 ; RV64-PIC-TLSDESC-NEXT: addi a0, a0, %tprel_lo(ld) ; RV64-PIC-TLSDESC-NEXT: ret -; ; RV32-NOPIC-TLSDESC-LABEL: f2: ; RV32-NOPIC-TLSDESC: # %bb.0: # %entry ; RV32-NOPIC-TLSDESC-NEXT: .Lpcrel_hi1: @@ -183,7 +168,6 @@ ; RV32-NOPIC-TLSDESC-NEXT: lw a0, %pcrel_lo(.Lpcrel_hi1)(a0) ; RV32-NOPIC-TLSDESC-NEXT: add a0, a0, tp ; RV32-NOPIC-TLSDESC-NEXT: ret -; ; RV64-NOPIC-TLSDESC-LABEL: f2: ; RV64-NOPIC-TLSDESC: # %bb.0: # %entry ; RV64-NOPIC-TLSDESC-NEXT: .Lpcrel_hi1: @@ -201,17 +185,17 @@ define ptr @f3() nounwind { ; RV32-PIC-LABEL: f3: ; RV32-PIC: # %bb.0: # %entry -; RV32-PIC-NEXT: .Lpcrel_hi2: +; RV32-PIC-NEXT: .Lpcrel_hi0: ; RV32-PIC-NEXT: auipc a0, %tls_ie_pcrel_hi(ie) -; RV32-PIC-NEXT: lw a0, %pcrel_lo(.Lpcrel_hi2)(a0) +; RV32-PIC-NEXT: lw a0, %pcrel_lo(.Lpcrel_hi0)(a0) ; RV32-PIC-NEXT: add a0, a0, tp ; RV32-PIC-NEXT: ret ; ; RV64-PIC-LABEL: f3: ; RV64-PIC: # %bb.0: # %entry -; RV64-PIC-NEXT: .Lpcrel_hi2: +; RV64-PIC-NEXT: .Lpcrel_hi0: ; RV64-PIC-NEXT: auipc a0, %tls_ie_pcrel_hi(ie) -; RV64-PIC-NEXT: ld a0, %pcrel_lo(.Lpcrel_hi2)(a0) +; RV64-PIC-NEXT: ld a0, %pcrel_lo(.Lpcrel_hi0)(a0) ; RV64-PIC-NEXT: add a0, a0, tp ; RV64-PIC-NEXT: ret ; @@ -230,7 +214,6 @@ ; RV64-NOPIC-NEXT: ld a0, %pcrel_lo(.Lpcrel_hi2)(a0) ; RV64-NOPIC-NEXT: add a0, a0, tp ; RV64-NOPIC-NEXT: ret -; ; RV32-PIC-TLSDESC-LABEL: f3: ; RV32-PIC-TLSDESC: # %bb.0: # %entry ; RV32-PIC-TLSDESC-NEXT: .Lpcrel_hi0: @@ -238,7 +221,6 @@ ; RV32-PIC-TLSDESC-NEXT: lw a0, %pcrel_lo(.Lpcrel_hi0)(a0) ; RV32-PIC-TLSDESC-NEXT: add a0, a0, tp ; RV32-PIC-TLSDESC-NEXT: ret -; ; RV64-PIC-TLSDESC-LABEL: f3: ; RV64-PIC-TLSDESC: # %bb.0: # %entry ; RV64-PIC-TLSDESC-NEXT: .Lpcrel_hi0: @@ -246,7 +228,6 @@ ; RV64-PIC-TLSDESC-NEXT: ld a0, %pcrel_lo(.Lpcrel_hi0)(a0) ; RV64-PIC-TLSDESC-NEXT: add a0, a0, tp ; RV64-PIC-TLSDESC-NEXT: ret -; ; RV32-NOPIC-TLSDESC-LABEL: f3: ; RV32-NOPIC-TLSDESC: # %bb.0: # %entry ; RV32-NOPIC-TLSDESC-NEXT: .Lpcrel_hi2: @@ -254,7 +235,6 @@ ; RV32-NOPIC-TLSDESC-NEXT: lw a0, %pcrel_lo(.Lpcrel_hi2)(a0) ; RV32-NOPIC-TLSDESC-NEXT: add a0, a0, tp ; RV32-NOPIC-TLSDESC-NEXT: ret -; ; RV64-NOPIC-TLSDESC-LABEL: f3: ; RV64-NOPIC-TLSDESC: # %bb.0: # %entry ; RV64-NOPIC-TLSDESC-NEXT: .Lpcrel_hi2: @@ -297,28 +277,24 @@ ; RV64-NOPIC-NEXT: add a0, a0, tp, %tprel_add(le) ; RV64-NOPIC-NEXT: addi a0, a0, %tprel_lo(le) ; RV64-NOPIC-NEXT: ret -; ; RV32-PIC-TLSDESC-LABEL: f4: ; RV32-PIC-TLSDESC: # %bb.0: # %entry ; RV32-PIC-TLSDESC-NEXT: lui a0, %tprel_hi(le) ; RV32-PIC-TLSDESC-NEXT: add a0, a0, tp, %tprel_add(le) ; RV32-PIC-TLSDESC-NEXT: addi a0, a0, %tprel_lo(le) ; RV32-PIC-TLSDESC-NEXT: ret -; ; RV64-PIC-TLSDESC-LABEL: f4: ; RV64-PIC-TLSDESC: # %bb.0: # %entry ; RV64-PIC-TLSDESC-NEXT: lui a0, %tprel_hi(le) ; RV64-PIC-TLSDESC-NEXT: add a0, a0, tp, %tprel_add(le) ; RV64-PIC-TLSDESC-NEXT: addi a0, a0, %tprel_lo(le) ; RV64-PIC-TLSDESC-NEXT: ret -; ; RV32-NOPIC-TLSDESC-LABEL: f4: ; RV32-NOPIC-TLSDESC: # %bb.0: # %entry ; RV32-NOPIC-TLSDESC-NEXT: lui a0, %tprel_hi(le) ; RV32-NOPIC-TLSDESC-NEXT: add a0, a0, tp, %tprel_add(le) ; RV32-NOPIC-TLSDESC-NEXT: addi a0, a0, %tprel_lo(le) ; RV32-NOPIC-TLSDESC-NEXT: ret -; ; RV64-NOPIC-TLSDESC-LABEL: f4: ; RV64-NOPIC-TLSDESC: # %bb.0: # %entry ; RV64-NOPIC-TLSDESC-NEXT: lui a0, %tprel_hi(le) @@ -328,3 +304,201 @@ entry: ret ptr @le } + +; localdynamic specified + +define i32 @dedup_localdynamic() nounwind { +; RV32-PIC-LABEL: dedup_localdynamic: +; RV32-PIC: # %bb.0: # %entry +; RV32-PIC-NEXT: .Ltlsdesc_hi2: +; RV32-PIC-NEXT: auipc a0, %tlsdesc_hi(_TLS_MODULE_BASE_) +; RV32-PIC-NEXT: lw a1, %tlsdesc_lo(.Ltlsdesc_hi2)(a0) +; RV32-PIC-NEXT: addi a0, a0, %tlsdesc_add_lo(.Ltlsdesc_hi2) +; RV32-PIC-NEXT: jalr t0, 0(a1), %tlsdesc_call(.Ltlsdesc_hi2) +; RV32-PIC-NEXT: add a0, a0, tp +; RV32-PIC-NEXT: lui a1, %tprel_hi(ld) +; RV32-PIC-NEXT: add a1, a1, a0 +; RV32-PIC-NEXT: lw a1, %tprel_lo(ld)(a1) +; RV32-PIC-NEXT: bnez a1, .LBB4_2 +; RV32-PIC-NEXT: # %bb.1: +; RV32-PIC-NEXT: lui a1, %tprel_hi(ld2) +; RV32-PIC-NEXT: add a1, a1, a0 +; RV32-PIC-NEXT: lw a1, %tprel_lo(ld2)(a1) +; RV32-PIC-NEXT: .LBB4_2: +; RV32-PIC-NEXT: lui a2, %tprel_hi(ld3) +; RV32-PIC-NEXT: add a0, a2, a0 +; RV32-PIC-NEXT: lw a0, %tprel_lo(ld3)(a0) +; RV32-PIC-NEXT: add a0, a1, a0 +; RV32-PIC-NEXT: ret +; +; RV64-PIC-LABEL: dedup_localdynamic: +; RV64-PIC: # %bb.0: # %entry +; RV64-PIC-NEXT: .Ltlsdesc_hi2: +; RV64-PIC-NEXT: auipc a0, %tlsdesc_hi(_TLS_MODULE_BASE_) +; RV64-PIC-NEXT: ld a1, %tlsdesc_lo(.Ltlsdesc_hi2)(a0) +; RV64-PIC-NEXT: addi a0, a0, %tlsdesc_add_lo(.Ltlsdesc_hi2) +; RV64-PIC-NEXT: jalr t0, 0(a1), %tlsdesc_call(.Ltlsdesc_hi2) +; RV64-PIC-NEXT: add a0, a0, tp +; RV64-PIC-NEXT: lui a1, %tprel_hi(ld) +; RV64-PIC-NEXT: add a1, a1, a0 +; RV64-PIC-NEXT: lw a1, %tprel_lo(ld)(a1) +; RV64-PIC-NEXT: bnez a1, .LBB4_2 +; RV64-PIC-NEXT: # %bb.1: +; RV64-PIC-NEXT: lui a1, %tprel_hi(ld2) +; RV64-PIC-NEXT: add a1, a1, a0 +; RV64-PIC-NEXT: lw a1, %tprel_lo(ld2)(a1) +; RV64-PIC-NEXT: .LBB4_2: +; RV64-PIC-NEXT: lui a2, %tprel_hi(ld3) +; RV64-PIC-NEXT: add a0, a2, a0 +; RV64-PIC-NEXT: lw a0, %tprel_lo(ld3)(a0) +; RV64-PIC-NEXT: addw a0, a1, a0 +; RV64-PIC-NEXT: ret +; +; RV32-NOPIC-LABEL: dedup_localdynamic: +; RV32-NOPIC: # %bb.0: # %entry +; RV32-NOPIC-NEXT: .Lpcrel_hi3: +; RV32-NOPIC-NEXT: auipc a0, %tls_ie_pcrel_hi(ld) +; RV32-NOPIC-NEXT: lw a0, %pcrel_lo(.Lpcrel_hi3)(a0) +; RV32-NOPIC-NEXT: add a0, a0, tp +; RV32-NOPIC-NEXT: lw a0, 0(a0) +; RV32-NOPIC-NEXT: bnez a0, .LBB4_2 +; RV32-NOPIC-NEXT: # %bb.1: +; RV32-NOPIC-NEXT: .Lpcrel_hi4: +; RV32-NOPIC-NEXT: auipc a0, %tls_ie_pcrel_hi(ld2) +; RV32-NOPIC-NEXT: lw a0, %pcrel_lo(.Lpcrel_hi4)(a0) +; RV32-NOPIC-NEXT: add a0, a0, tp +; RV32-NOPIC-NEXT: lw a0, 0(a0) +; RV32-NOPIC-NEXT: .LBB4_2: +; RV32-NOPIC-NEXT: .Lpcrel_hi5: +; RV32-NOPIC-NEXT: auipc a1, %tls_ie_pcrel_hi(ld3) +; RV32-NOPIC-NEXT: lw a1, %pcrel_lo(.Lpcrel_hi5)(a1) +; RV32-NOPIC-NEXT: add a1, a1, tp +; RV32-NOPIC-NEXT: lw a1, 0(a1) +; RV32-NOPIC-NEXT: add a0, a0, a1 +; RV32-NOPIC-NEXT: ret +; +; RV64-NOPIC-LABEL: dedup_localdynamic: +; RV64-NOPIC: # %bb.0: # %entry +; RV64-NOPIC-NEXT: .Lpcrel_hi3: +; RV64-NOPIC-NEXT: auipc a0, %tls_ie_pcrel_hi(ld) +; RV64-NOPIC-NEXT: ld a0, %pcrel_lo(.Lpcrel_hi3)(a0) +; RV64-NOPIC-NEXT: add a0, a0, tp +; RV64-NOPIC-NEXT: lw a0, 0(a0) +; RV64-NOPIC-NEXT: bnez a0, .LBB4_2 +; RV64-NOPIC-NEXT: # %bb.1: +; RV64-NOPIC-NEXT: .Lpcrel_hi4: +; RV64-NOPIC-NEXT: auipc a0, %tls_ie_pcrel_hi(ld2) +; RV64-NOPIC-NEXT: ld a0, %pcrel_lo(.Lpcrel_hi4)(a0) +; RV64-NOPIC-NEXT: add a0, a0, tp +; RV64-NOPIC-NEXT: lw a0, 0(a0) +; RV64-NOPIC-NEXT: .LBB4_2: +; RV64-NOPIC-NEXT: .Lpcrel_hi5: +; RV64-NOPIC-NEXT: auipc a1, %tls_ie_pcrel_hi(ld3) +; RV64-NOPIC-NEXT: ld a1, %pcrel_lo(.Lpcrel_hi5)(a1) +; RV64-NOPIC-NEXT: add a1, a1, tp +; RV64-NOPIC-NEXT: lw a1, 0(a1) +; RV64-NOPIC-NEXT: addw a0, a0, a1 +; RV64-NOPIC-NEXT: ret +; RV32-PIC-TLSDESC-LABEL: dedup_localdynamic: +; RV32-PIC-TLSDESC: # %bb.0: # %entry +; RV32-PIC-TLSDESC-NEXT: .Ltlsdesc_hi2: +; RV32-PIC-TLSDESC-NEXT: auipc a0, %tlsdesc_hi(_TLS_MODULE_BASE_) +; RV32-PIC-TLSDESC-NEXT: lw a1, %tlsdesc_lo(.Ltlsdesc_hi2)(a0) +; RV32-PIC-TLSDESC-NEXT: addi a0, a0, %tlsdesc_add_lo(.Ltlsdesc_hi2) +; RV32-PIC-TLSDESC-NEXT: jalr t0, 0(a1), %tlsdesc_call(.Ltlsdesc_hi2) +; RV32-PIC-TLSDESC-NEXT: add a0, a0, tp +; RV32-PIC-TLSDESC-NEXT: lui a1, %tprel_hi(ld) +; RV32-PIC-TLSDESC-NEXT: add a1, a1, a0 +; RV32-PIC-TLSDESC-NEXT: lw a1, %tprel_lo(ld)(a1) +; RV32-PIC-TLSDESC-NEXT: bnez a1, .LBB4_2 +; RV32-PIC-TLSDESC-NEXT: # %bb.1: +; RV32-PIC-TLSDESC-NEXT: lui a1, %tprel_hi(ld2) +; RV32-PIC-TLSDESC-NEXT: add a1, a1, a0 +; RV32-PIC-TLSDESC-NEXT: lw a1, %tprel_lo(ld2)(a1) +; RV32-PIC-TLSDESC-NEXT: .LBB4_2: +; RV32-PIC-TLSDESC-NEXT: lui a2, %tprel_hi(ld3) +; RV32-PIC-TLSDESC-NEXT: add a0, a2, a0 +; RV32-PIC-TLSDESC-NEXT: lw a0, %tprel_lo(ld3)(a0) +; RV32-PIC-TLSDESC-NEXT: add a0, a1, a0 +; RV32-PIC-TLSDESC-NEXT: ret +; RV64-PIC-TLSDESC-LABEL: dedup_localdynamic: +; RV64-PIC-TLSDESC: # %bb.0: # %entry +; RV64-PIC-TLSDESC-NEXT: .Ltlsdesc_hi2: +; RV64-PIC-TLSDESC-NEXT: auipc a0, %tlsdesc_hi(_TLS_MODULE_BASE_) +; RV64-PIC-TLSDESC-NEXT: ld a1, %tlsdesc_lo(.Ltlsdesc_hi2)(a0) +; RV64-PIC-TLSDESC-NEXT: addi a0, a0, %tlsdesc_add_lo(.Ltlsdesc_hi2) +; RV64-PIC-TLSDESC-NEXT: jalr t0, 0(a1), %tlsdesc_call(.Ltlsdesc_hi2) +; RV64-PIC-TLSDESC-NEXT: add a0, a0, tp +; RV64-PIC-TLSDESC-NEXT: lui a1, %tprel_hi(ld) +; RV64-PIC-TLSDESC-NEXT: add a1, a1, a0 +; RV64-PIC-TLSDESC-NEXT: lw a1, %tprel_lo(ld)(a1) +; RV64-PIC-TLSDESC-NEXT: bnez a1, .LBB4_2 +; RV64-PIC-TLSDESC-NEXT: # %bb.1: +; RV64-PIC-TLSDESC-NEXT: lui a1, %tprel_hi(ld2) +; RV64-PIC-TLSDESC-NEXT: add a1, a1, a0 +; RV64-PIC-TLSDESC-NEXT: lw a1, %tprel_lo(ld2)(a1) +; RV64-PIC-TLSDESC-NEXT: .LBB4_2: +; RV64-PIC-TLSDESC-NEXT: lui a2, %tprel_hi(ld3) +; RV64-PIC-TLSDESC-NEXT: add a0, a2, a0 +; RV64-PIC-TLSDESC-NEXT: lw a0, %tprel_lo(ld3)(a0) +; RV64-PIC-TLSDESC-NEXT: addw a0, a1, a0 +; RV64-PIC-TLSDESC-NEXT: ret +; RV32-NOPIC-TLSDESC-LABEL: dedup_localdynamic: +; RV32-NOPIC-TLSDESC: # %bb.0: # %entry +; RV32-NOPIC-TLSDESC-NEXT: .Lpcrel_hi3: +; RV32-NOPIC-TLSDESC-NEXT: auipc a0, %tls_ie_pcrel_hi(ld) +; RV32-NOPIC-TLSDESC-NEXT: lw a0, %pcrel_lo(.Lpcrel_hi3)(a0) +; RV32-NOPIC-TLSDESC-NEXT: add a0, a0, tp +; RV32-NOPIC-TLSDESC-NEXT: lw a0, 0(a0) +; RV32-NOPIC-TLSDESC-NEXT: bnez a0, .LBB4_2 +; RV32-NOPIC-TLSDESC-NEXT: # %bb.1: +; RV32-NOPIC-TLSDESC-NEXT: .Lpcrel_hi4: +; RV32-NOPIC-TLSDESC-NEXT: auipc a0, %tls_ie_pcrel_hi(ld2) +; RV32-NOPIC-TLSDESC-NEXT: lw a0, %pcrel_lo(.Lpcrel_hi4)(a0) +; RV32-NOPIC-TLSDESC-NEXT: add a0, a0, tp +; RV32-NOPIC-TLSDESC-NEXT: lw a0, 0(a0) +; RV32-NOPIC-TLSDESC-NEXT: .LBB4_2: +; RV32-NOPIC-TLSDESC-NEXT: .Lpcrel_hi5: +; RV32-NOPIC-TLSDESC-NEXT: auipc a1, %tls_ie_pcrel_hi(ld3) +; RV32-NOPIC-TLSDESC-NEXT: lw a1, %pcrel_lo(.Lpcrel_hi5)(a1) +; RV32-NOPIC-TLSDESC-NEXT: add a1, a1, tp +; RV32-NOPIC-TLSDESC-NEXT: lw a1, 0(a1) +; RV32-NOPIC-TLSDESC-NEXT: add a0, a0, a1 +; RV32-NOPIC-TLSDESC-NEXT: ret +; RV64-NOPIC-TLSDESC-LABEL: dedup_localdynamic: +; RV64-NOPIC-TLSDESC: # %bb.0: # %entry +; RV64-NOPIC-TLSDESC-NEXT: .Lpcrel_hi3: +; RV64-NOPIC-TLSDESC-NEXT: auipc a0, %tls_ie_pcrel_hi(ld) +; RV64-NOPIC-TLSDESC-NEXT: ld a0, %pcrel_lo(.Lpcrel_hi3)(a0) +; RV64-NOPIC-TLSDESC-NEXT: add a0, a0, tp +; RV64-NOPIC-TLSDESC-NEXT: lw a0, 0(a0) +; RV64-NOPIC-TLSDESC-NEXT: bnez a0, .LBB4_2 +; RV64-NOPIC-TLSDESC-NEXT: # %bb.1: +; RV64-NOPIC-TLSDESC-NEXT: .Lpcrel_hi4: +; RV64-NOPIC-TLSDESC-NEXT: auipc a0, %tls_ie_pcrel_hi(ld2) +; RV64-NOPIC-TLSDESC-NEXT: ld a0, %pcrel_lo(.Lpcrel_hi4)(a0) +; RV64-NOPIC-TLSDESC-NEXT: add a0, a0, tp +; RV64-NOPIC-TLSDESC-NEXT: lw a0, 0(a0) +; RV64-NOPIC-TLSDESC-NEXT: .LBB4_2: +; RV64-NOPIC-TLSDESC-NEXT: .Lpcrel_hi5: +; RV64-NOPIC-TLSDESC-NEXT: auipc a1, %tls_ie_pcrel_hi(ld3) +; RV64-NOPIC-TLSDESC-NEXT: ld a1, %pcrel_lo(.Lpcrel_hi5)(a1) +; RV64-NOPIC-TLSDESC-NEXT: add a1, a1, tp +; RV64-NOPIC-TLSDESC-NEXT: lw a1, 0(a1) +; RV64-NOPIC-TLSDESC-NEXT: addw a0, a0, a1 +; RV64-NOPIC-TLSDESC-NEXT: ret +entry: + %0 = load i32, ptr @ld, align 4 + %1 = icmp eq i32 %0, 0 + br i1 %1, label %2, label %4 + +2: + %3 = load i32, ptr @ld2, align 4 + br label %4 + +4: + %5 = phi i32 [ %3, %2 ], [ %0, %entry ] + %6 = load i32, ptr @ld3, align 4 + %7 = add nsw i32 %5, %6 + ret i32 %7 +}