diff --git a/llvm/lib/Target/RISCV/CMakeLists.txt b/llvm/lib/Target/RISCV/CMakeLists.txt --- a/llvm/lib/Target/RISCV/CMakeLists.txt +++ b/llvm/lib/Target/RISCV/CMakeLists.txt @@ -21,6 +21,7 @@ add_llvm_target(RISCVCodeGen RISCVAsmPrinter.cpp RISCVCodeGenPrepare.cpp + RISCVCleanupLocalDynamicTLSPass.cpp RISCVMakeCompressible.cpp RISCVExpandAtomicPseudoInsts.cpp RISCVExpandPseudoInsts.cpp diff --git a/llvm/lib/Target/RISCV/RISCV.h b/llvm/lib/Target/RISCV/RISCV.h --- a/llvm/lib/Target/RISCV/RISCV.h +++ b/llvm/lib/Target/RISCV/RISCV.h @@ -76,6 +76,9 @@ RISCVSubtarget &, RISCVRegisterBankInfo &); void initializeRISCVDAGToDAGISelPass(PassRegistry &); + +void initializeRISCVLDTLSCleanupPass(PassRegistry &); +FunctionPass *createRISCVCleanupLocalDynamicTLSPass(); } // namespace llvm #endif diff --git a/llvm/lib/Target/RISCV/RISCVCleanupLocalDynamicTLSPass.cpp b/llvm/lib/Target/RISCV/RISCVCleanupLocalDynamicTLSPass.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVCleanupLocalDynamicTLSPass.cpp @@ -0,0 +1,150 @@ +//===-- RISCVCleanupLocalDynamicTLSPass.cpp ---------------------*- C++ -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Local-dynamic access to thread-local variables proceeds in three stages. +// +// 1. The offset of this Module's thread-local area from TPIDR_EL0 is calculated +// in much the same way as a general-dynamic TLS-descriptor access against +// the special symbol _TLS_MODULE_BASE. +// 2. The variable's offset from _TLS_MODULE_BASE_ is calculated. +// 3. These two are added, together with TO, to obtain the variable's +// true address. +// +// This is only better than general-dynamic access to the variable if two or +// more of the first stage TLS-descriptor calculations can be combined. This +// pass looks through a function and performs such combinations. +// +//===----------------------------------------------------------------------===// +#include "RISCV.h" +#include "RISCVInstrInfo.h" +#include "RISCVMachineFunctionInfo.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +using namespace llvm; + +#define TLSCLEANUP_PASS_NAME "RISCV Local Dynamic TLS Access Clean-up" + +namespace { +struct RISCVLDTLSCleanup : public MachineFunctionPass { + static char ID; + RISCVLDTLSCleanup() : MachineFunctionPass(ID) { + initializeRISCVLDTLSCleanupPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &MF) override { + if (skipFunction(MF.getFunction())) + return false; + + RISCVMachineFunctionInfo *MFI = MF.getInfo(); + if (MFI->getNumLocalDynamicTLSAccesses() < 2) { + // No point folding accesses if there isn't at least two. + return false; + } + + MachineDominatorTree *DT = &getAnalysis(); + return VisitNode(DT->getRootNode(), 0); + } + + // Visit the dominator subtree rooted at Node in pre-order. + // If TLSBaseAddrReg is non-null, then use that to replace any + // TLS_base_addr instructions. Otherwise, create the register + // when the first such instruction is seen, and then use it + // as we encounter more instructions. + bool VisitNode(MachineDomTreeNode *Node, unsigned TLSBaseAddrReg) { + MachineBasicBlock *BB = Node->getBlock(); + bool Changed = false; + + // Traverse the current block. + for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; + ++I) { + switch (I->getOpcode()) { + case RISCV::PseudoLA_TLSDESC: + // Make sure it's a local dynamic access. + if (!I->getOperand(1).isSymbol() || + strcmp(I->getOperand(1).getSymbolName(), "_TLS_MODULE_BASE_")) + break; + + if (TLSBaseAddrReg) + I = replaceTLSBaseAddrCall(*I, TLSBaseAddrReg); + else + I = setRegister(*I, &TLSBaseAddrReg); + Changed = true; + break; + default: + break; + } + } + + // Visit the children of this block in the dominator tree. + for (MachineDomTreeNode *N : *Node) { + Changed |= VisitNode(N, TLSBaseAddrReg); + } + + return Changed; + } + + // Replace the TLS_base_addr instruction I with a copy from + // TLSBaseAddrReg, returning the new instruction. + MachineInstr *replaceTLSBaseAddrCall(MachineInstr &I, + unsigned TLSBaseAddrReg) { + MachineFunction *MF = I.getParent()->getParent(); + const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); + // Insert a Copy from TLSBaseAddrReg to X10, which is where the rest of the + // code sequence assumes the address will be. + MachineInstr *Copy = + BuildMI(*I.getParent(), I, I.getDebugLoc(), + TII->get(TargetOpcode::COPY), I.getOperand(0).getReg()) + .addReg(TLSBaseAddrReg); + + // Update the call site info. + if (I.shouldUpdateCallSiteInfo()) + I.getMF()->eraseCallSiteInfo(&I); + + // Erase the TLS_base_addr instruction. + I.eraseFromParent(); + + return Copy; + } + + // Create a virtual register in *TLSBaseAddrReg, and populate it by + // inserting a copy instruction after I. Returns the new instruction. + MachineInstr *setRegister(MachineInstr &I, unsigned *TLSBaseAddrReg) { + MachineFunction *MF = I.getParent()->getParent(); + const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); + + // Create a virtual register for the TLS base address. + MachineRegisterInfo &RegInfo = MF->getRegInfo(); + *TLSBaseAddrReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass); + MachineInstr *Copy = + BuildMI(*I.getParent(), ++I.getIterator(), I.getDebugLoc(), + TII->get(TargetOpcode::COPY), *TLSBaseAddrReg) + .addReg(I.getOperand(0).getReg()); + + return Copy; + } + + StringRef getPassName() const override { return TLSCLEANUP_PASS_NAME; } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addRequired(); + MachineFunctionPass::getAnalysisUsage(AU); + } +}; +} // namespace + +INITIALIZE_PASS(RISCVLDTLSCleanup, "riscv-local-dynamic-tls-cleanup", + TLSCLEANUP_PASS_NAME, false, false) + +char RISCVLDTLSCleanup::ID = 0; +FunctionPass *llvm::createRISCVCleanupLocalDynamicTLSPass() { + return new RISCVLDTLSCleanup(); +} diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -312,6 +312,11 @@ bool RISCVPassConfig::addInstSelector() { addPass(createRISCVISelDag(getRISCVTargetMachine(), getOptLevel())); + // For ELF, cleanup any local-dynamic TLS accesses. + if (EnableRISCVTLSDESC && TM->getTargetTriple().isOSBinFormatELF() && + getOptLevel() != CodeGenOpt::None) + addPass(createRISCVCleanupLocalDynamicTLSPass()); + return false; } diff --git a/llvm/test/CodeGen/RISCV/tls-models.ll b/llvm/test/CodeGen/RISCV/tls-models.ll --- a/llvm/test/CodeGen/RISCV/tls-models.ll +++ b/llvm/test/CodeGen/RISCV/tls-models.ll @@ -20,6 +20,8 @@ @unspecified = external thread_local global i32 @ld = external thread_local(localdynamic) global i32 +@ld2 = external thread_local(localdynamic) global i32 +@ld3 = external thread_local(localdynamic) global i32 @ie = external thread_local(initialexec) global i32 @le = external thread_local(localexec) global i32 @@ -328,3 +330,217 @@ entry: ret ptr @le } + +; localdynamic specified + +define i32 @dedup_localdynamic() nounwind { +; RV32-PIC-LABEL: dedup_localdynamic: +; RV32-PIC: # %bb.0: # %entry +; RV32-PIC-NEXT: addi sp, sp, -16 +; RV32-PIC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32-PIC-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32-PIC-NEXT: .Lpcrel_hi3: +; RV32-PIC-NEXT: auipc a0, %tls_gd_pcrel_hi(ld) +; RV32-PIC-NEXT: addi a0, a0, %pcrel_lo(.Lpcrel_hi3) +; RV32-PIC-NEXT: call __tls_get_addr@plt +; RV32-PIC-NEXT: lw s0, 0(a0) +; RV32-PIC-NEXT: bnez s0, .LBB4_2 +; RV32-PIC-NEXT: # %bb.1: +; RV32-PIC-NEXT: .Lpcrel_hi4: +; RV32-PIC-NEXT: auipc a0, %tls_gd_pcrel_hi(ld2) +; RV32-PIC-NEXT: addi a0, a0, %pcrel_lo(.Lpcrel_hi4) +; RV32-PIC-NEXT: call __tls_get_addr@plt +; RV32-PIC-NEXT: lw s0, 0(a0) +; RV32-PIC-NEXT: .LBB4_2: +; RV32-PIC-NEXT: .Lpcrel_hi5: +; RV32-PIC-NEXT: auipc a0, %tls_gd_pcrel_hi(ld3) +; RV32-PIC-NEXT: addi a0, a0, %pcrel_lo(.Lpcrel_hi5) +; RV32-PIC-NEXT: call __tls_get_addr@plt +; RV32-PIC-NEXT: lw a0, 0(a0) +; RV32-PIC-NEXT: add a0, s0, a0 +; RV32-PIC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32-PIC-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32-PIC-NEXT: addi sp, sp, 16 +; RV32-PIC-NEXT: ret +; +; RV64-PIC-LABEL: dedup_localdynamic: +; RV64-PIC: # %bb.0: # %entry +; RV64-PIC-NEXT: addi sp, sp, -16 +; RV64-PIC-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64-PIC-NEXT: sd s0, 0(sp) # 8-byte Folded Spill +; RV64-PIC-NEXT: .Lpcrel_hi3: +; RV64-PIC-NEXT: auipc a0, %tls_gd_pcrel_hi(ld) +; RV64-PIC-NEXT: addi a0, a0, %pcrel_lo(.Lpcrel_hi3) +; RV64-PIC-NEXT: call __tls_get_addr@plt +; RV64-PIC-NEXT: lw s0, 0(a0) +; RV64-PIC-NEXT: bnez s0, .LBB4_2 +; RV64-PIC-NEXT: # %bb.1: +; RV64-PIC-NEXT: .Lpcrel_hi4: +; RV64-PIC-NEXT: auipc a0, %tls_gd_pcrel_hi(ld2) +; RV64-PIC-NEXT: addi a0, a0, %pcrel_lo(.Lpcrel_hi4) +; RV64-PIC-NEXT: call __tls_get_addr@plt +; RV64-PIC-NEXT: lw s0, 0(a0) +; RV64-PIC-NEXT: .LBB4_2: +; RV64-PIC-NEXT: .Lpcrel_hi5: +; RV64-PIC-NEXT: auipc a0, %tls_gd_pcrel_hi(ld3) +; RV64-PIC-NEXT: addi a0, a0, %pcrel_lo(.Lpcrel_hi5) +; RV64-PIC-NEXT: call __tls_get_addr@plt +; RV64-PIC-NEXT: lw a0, 0(a0) +; RV64-PIC-NEXT: addw a0, s0, a0 +; RV64-PIC-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64-PIC-NEXT: ld s0, 0(sp) # 8-byte Folded Reload +; RV64-PIC-NEXT: addi sp, sp, 16 +; RV64-PIC-NEXT: ret +; +; RV32-NOPIC-LABEL: dedup_localdynamic: +; RV32-NOPIC: # %bb.0: # %entry +; RV32-NOPIC-NEXT: .Lpcrel_hi3: +; RV32-NOPIC-NEXT: auipc a0, %tls_ie_pcrel_hi(ld) +; RV32-NOPIC-NEXT: lw a0, %pcrel_lo(.Lpcrel_hi3)(a0) +; RV32-NOPIC-NEXT: add a0, a0, tp +; RV32-NOPIC-NEXT: lw a0, 0(a0) +; RV32-NOPIC-NEXT: bnez a0, .LBB4_2 +; RV32-NOPIC-NEXT: # %bb.1: +; RV32-NOPIC-NEXT: .Lpcrel_hi4: +; RV32-NOPIC-NEXT: auipc a0, %tls_ie_pcrel_hi(ld2) +; RV32-NOPIC-NEXT: lw a0, %pcrel_lo(.Lpcrel_hi4)(a0) +; RV32-NOPIC-NEXT: add a0, a0, tp +; RV32-NOPIC-NEXT: lw a0, 0(a0) +; RV32-NOPIC-NEXT: .LBB4_2: +; RV32-NOPIC-NEXT: .Lpcrel_hi5: +; RV32-NOPIC-NEXT: auipc a1, %tls_ie_pcrel_hi(ld3) +; RV32-NOPIC-NEXT: lw a1, %pcrel_lo(.Lpcrel_hi5)(a1) +; RV32-NOPIC-NEXT: add a1, a1, tp +; RV32-NOPIC-NEXT: lw a1, 0(a1) +; RV32-NOPIC-NEXT: add a0, a0, a1 +; RV32-NOPIC-NEXT: ret +; +; RV64-NOPIC-LABEL: dedup_localdynamic: +; RV64-NOPIC: # %bb.0: # %entry +; RV64-NOPIC-NEXT: .Lpcrel_hi3: +; RV64-NOPIC-NEXT: auipc a0, %tls_ie_pcrel_hi(ld) +; RV64-NOPIC-NEXT: ld a0, %pcrel_lo(.Lpcrel_hi3)(a0) +; RV64-NOPIC-NEXT: add a0, a0, tp +; RV64-NOPIC-NEXT: lw a0, 0(a0) +; RV64-NOPIC-NEXT: bnez a0, .LBB4_2 +; RV64-NOPIC-NEXT: # %bb.1: +; RV64-NOPIC-NEXT: .Lpcrel_hi4: +; RV64-NOPIC-NEXT: auipc a0, %tls_ie_pcrel_hi(ld2) +; RV64-NOPIC-NEXT: ld a0, %pcrel_lo(.Lpcrel_hi4)(a0) +; RV64-NOPIC-NEXT: add a0, a0, tp +; RV64-NOPIC-NEXT: lw a0, 0(a0) +; RV64-NOPIC-NEXT: .LBB4_2: +; RV64-NOPIC-NEXT: .Lpcrel_hi5: +; RV64-NOPIC-NEXT: auipc a1, %tls_ie_pcrel_hi(ld3) +; RV64-NOPIC-NEXT: ld a1, %pcrel_lo(.Lpcrel_hi5)(a1) +; RV64-NOPIC-NEXT: add a1, a1, tp +; RV64-NOPIC-NEXT: lw a1, 0(a1) +; RV64-NOPIC-NEXT: addw a0, a0, a1 +; RV64-NOPIC-NEXT: ret +; +; RV32-PIC-TLSDESC-LABEL: dedup_localdynamic: +; RV32-PIC-TLSDESC: # %bb.0: # %entry +; RV32-PIC-TLSDESC-NEXT: .Ltlsdesc_hi2: +; RV32-PIC-TLSDESC-NEXT: auipc a0, %tlsdesc_hi(_TLS_MODULE_BASE_) +; RV32-PIC-TLSDESC-NEXT: lw a1, %tlsdesc_load_lo(.Ltlsdesc_hi2)(a0) +; RV32-PIC-TLSDESC-NEXT: addi a0, a0, %tlsdesc_add_lo(.Ltlsdesc_hi2) +; RV32-PIC-TLSDESC-NEXT: jalr t0, 0(a1), %tlsdesc_call(.Ltlsdesc_hi2) +; RV32-PIC-TLSDESC-NEXT: add a0, a0, tp +; RV32-PIC-TLSDESC-NEXT: lui a1, %tprel_hi(ld) +; RV32-PIC-TLSDESC-NEXT: add a1, a1, a0 +; RV32-PIC-TLSDESC-NEXT: lw a1, %tprel_lo(ld)(a1) +; RV32-PIC-TLSDESC-NEXT: bnez a1, .LBB4_2 +; RV32-PIC-TLSDESC-NEXT: # %bb.1: +; RV32-PIC-TLSDESC-NEXT: lui a1, %tprel_hi(ld2) +; RV32-PIC-TLSDESC-NEXT: add a1, a1, a0 +; RV32-PIC-TLSDESC-NEXT: lw a1, %tprel_lo(ld2)(a1) +; RV32-PIC-TLSDESC-NEXT: .LBB4_2: +; RV32-PIC-TLSDESC-NEXT: lui a2, %tprel_hi(ld3) +; RV32-PIC-TLSDESC-NEXT: add a0, a2, a0 +; RV32-PIC-TLSDESC-NEXT: lw a0, %tprel_lo(ld3)(a0) +; RV32-PIC-TLSDESC-NEXT: add a0, a1, a0 +; RV32-PIC-TLSDESC-NEXT: ret +; +; RV64-PIC-TLSDESC-LABEL: dedup_localdynamic: +; RV64-PIC-TLSDESC: # %bb.0: # %entry +; RV64-PIC-TLSDESC-NEXT: .Ltlsdesc_hi2: +; RV64-PIC-TLSDESC-NEXT: auipc a0, %tlsdesc_hi(_TLS_MODULE_BASE_) +; RV64-PIC-TLSDESC-NEXT: ld a1, %tlsdesc_load_lo(.Ltlsdesc_hi2)(a0) +; RV64-PIC-TLSDESC-NEXT: addi a0, a0, %tlsdesc_add_lo(.Ltlsdesc_hi2) +; RV64-PIC-TLSDESC-NEXT: jalr t0, 0(a1), %tlsdesc_call(.Ltlsdesc_hi2) +; RV64-PIC-TLSDESC-NEXT: add a0, a0, tp +; RV64-PIC-TLSDESC-NEXT: lui a1, %tprel_hi(ld) +; RV64-PIC-TLSDESC-NEXT: add a1, a1, a0 +; RV64-PIC-TLSDESC-NEXT: lw a1, %tprel_lo(ld)(a1) +; RV64-PIC-TLSDESC-NEXT: bnez a1, .LBB4_2 +; RV64-PIC-TLSDESC-NEXT: # %bb.1: +; RV64-PIC-TLSDESC-NEXT: lui a1, %tprel_hi(ld2) +; RV64-PIC-TLSDESC-NEXT: add a1, a1, a0 +; RV64-PIC-TLSDESC-NEXT: lw a1, %tprel_lo(ld2)(a1) +; RV64-PIC-TLSDESC-NEXT: .LBB4_2: +; RV64-PIC-TLSDESC-NEXT: lui a2, %tprel_hi(ld3) +; RV64-PIC-TLSDESC-NEXT: add a0, a2, a0 +; RV64-PIC-TLSDESC-NEXT: lw a0, %tprel_lo(ld3)(a0) +; RV64-PIC-TLSDESC-NEXT: addw a0, a1, a0 +; RV64-PIC-TLSDESC-NEXT: ret +; +; RV32-NOPIC-TLSDESC-LABEL: dedup_localdynamic: +; RV32-NOPIC-TLSDESC: # %bb.0: # %entry +; RV32-NOPIC-TLSDESC-NEXT: .Lpcrel_hi3: +; RV32-NOPIC-TLSDESC-NEXT: auipc a0, %tls_ie_pcrel_hi(ld) +; RV32-NOPIC-TLSDESC-NEXT: lw a0, %pcrel_lo(.Lpcrel_hi3)(a0) +; RV32-NOPIC-TLSDESC-NEXT: add a0, a0, tp +; RV32-NOPIC-TLSDESC-NEXT: lw a0, 0(a0) +; RV32-NOPIC-TLSDESC-NEXT: bnez a0, .LBB4_2 +; RV32-NOPIC-TLSDESC-NEXT: # %bb.1: +; RV32-NOPIC-TLSDESC-NEXT: .Lpcrel_hi4: +; RV32-NOPIC-TLSDESC-NEXT: auipc a0, %tls_ie_pcrel_hi(ld2) +; RV32-NOPIC-TLSDESC-NEXT: lw a0, %pcrel_lo(.Lpcrel_hi4)(a0) +; RV32-NOPIC-TLSDESC-NEXT: add a0, a0, tp +; RV32-NOPIC-TLSDESC-NEXT: lw a0, 0(a0) +; RV32-NOPIC-TLSDESC-NEXT: .LBB4_2: +; RV32-NOPIC-TLSDESC-NEXT: .Lpcrel_hi5: +; RV32-NOPIC-TLSDESC-NEXT: auipc a1, %tls_ie_pcrel_hi(ld3) +; RV32-NOPIC-TLSDESC-NEXT: lw a1, %pcrel_lo(.Lpcrel_hi5)(a1) +; RV32-NOPIC-TLSDESC-NEXT: add a1, a1, tp +; RV32-NOPIC-TLSDESC-NEXT: lw a1, 0(a1) +; RV32-NOPIC-TLSDESC-NEXT: add a0, a0, a1 +; RV32-NOPIC-TLSDESC-NEXT: ret +; +; RV64-NOPIC-TLSDESC-LABEL: dedup_localdynamic: +; RV64-NOPIC-TLSDESC: # %bb.0: # %entry +; RV64-NOPIC-TLSDESC-NEXT: .Lpcrel_hi3: +; RV64-NOPIC-TLSDESC-NEXT: auipc a0, %tls_ie_pcrel_hi(ld) +; RV64-NOPIC-TLSDESC-NEXT: ld a0, %pcrel_lo(.Lpcrel_hi3)(a0) +; RV64-NOPIC-TLSDESC-NEXT: add a0, a0, tp +; RV64-NOPIC-TLSDESC-NEXT: lw a0, 0(a0) +; RV64-NOPIC-TLSDESC-NEXT: bnez a0, .LBB4_2 +; RV64-NOPIC-TLSDESC-NEXT: # %bb.1: +; RV64-NOPIC-TLSDESC-NEXT: .Lpcrel_hi4: +; RV64-NOPIC-TLSDESC-NEXT: auipc a0, %tls_ie_pcrel_hi(ld2) +; RV64-NOPIC-TLSDESC-NEXT: ld a0, %pcrel_lo(.Lpcrel_hi4)(a0) +; RV64-NOPIC-TLSDESC-NEXT: add a0, a0, tp +; RV64-NOPIC-TLSDESC-NEXT: lw a0, 0(a0) +; RV64-NOPIC-TLSDESC-NEXT: .LBB4_2: +; RV64-NOPIC-TLSDESC-NEXT: .Lpcrel_hi5: +; RV64-NOPIC-TLSDESC-NEXT: auipc a1, %tls_ie_pcrel_hi(ld3) +; RV64-NOPIC-TLSDESC-NEXT: ld a1, %pcrel_lo(.Lpcrel_hi5)(a1) +; RV64-NOPIC-TLSDESC-NEXT: add a1, a1, tp +; RV64-NOPIC-TLSDESC-NEXT: lw a1, 0(a1) +; RV64-NOPIC-TLSDESC-NEXT: addw a0, a0, a1 +; RV64-NOPIC-TLSDESC-NEXT: ret +entry: + %0 = load i32, ptr @ld, align 4 + %1 = icmp eq i32 %0, 0 + br i1 %1, label %2, label %4 + +2: + %3 = load i32, ptr @ld2, align 4 + br label %4 + +4: + %5 = phi i32 [ %3, %2 ], [ %0, %entry ] + %6 = load i32, ptr @ld3, align 4 + %7 = add nsw i32 %5, %6 + ret i32 %7 +}