Index: llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp =================================================================== --- llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp +++ llvm/lib/Target/X86/X86SpeculativeLoadHardening.cpp @@ -179,6 +179,9 @@ void unfoldCallAndJumpLoads(MachineFunction &MF); + SmallVector + tracePredStateThroughIndirectBranches(MachineFunction &MF); + void tracePredStateThroughBlocksAndHarden(MachineFunction &MF); unsigned saveEFLAGS(MachineBasicBlock &MBB, @@ -522,11 +525,16 @@ } } - // If we are going to harden calls and jumps we need to unfold their memory - // operands. - if (HardenIndirectCallsAndJumps) + if (HardenIndirectCallsAndJumps) { + // If we are going to harden calls and jumps we need to unfold their memory + // operands. unfoldCallAndJumpLoads(MF); + // Then we trace predicate state through the indirect branches. + auto IndirectBrCMovs = tracePredStateThroughIndirectBranches(MF); + CMovs.append(IndirectBrCMovs.begin(), IndirectBrCMovs.end()); + } + // Now that we have the predicate state available at the start of each block // in the CFG, trace it through each block, hardening vulnerable instructions // as we go. @@ -925,6 +933,261 @@ } } +/// Trace the predicate state through indirect branches, instrumenting them to +/// poison the state if a target is reached that does not match the expected +/// target. +/// +/// This is designed to mitigate Spectre variant 1 attacks where an indirect +/// branch is trained to predict a particular target and then mispredicts that +/// target in a way that can leak data. Despite using an indirect branch, this +/// is really a variant 1 style attack: it does not steer execution to an +/// arbitrary or attacker controlled address, and it does not require any +/// special code executing next to the victim. This attack can also be mitigated +/// through retpolines, but those will remove all indirect branches from the +/// code naturally disabling this. This mitigation can replace retpoline-style +/// mitigations for jump tables and other indirect branches within a function +/// when variant 2 isn't a risk. Indirect calls, however, cannot be mitigated +/// through this technique without changing the ABI in a fundamental way. +SmallVector +X86SpeculativeLoadHardeningPass::tracePredStateThroughIndirectBranches( + MachineFunction &MF) { + // We use the SSAUpdater to insert PHI nodes for the target addresses of + // indirect branches. We don't actually need the full power of the SSA updater + // in this particular case as we always have immediately available values, but + // this avoids us having to re-implement the PHI construction logic. + MachineSSAUpdater TargetAddrSSA(MF); + TargetAddrSSA.Initialize(MRI->createVirtualRegister(&X86::GR64RegClass)); + + // Track which blocks were terminated with an indirect branch. + SmallPtrSet IndirectTerminatedMBBs; + + // We need to know what blocks end up reached via indirect branches. We + // expect this to be a subset of those whose address is token and so track it + // directly via the CFG. + SmallPtrSet IndirectTargetMBBs; + + // Walk all the blocks which end in an indirect branch and make the + // target address available. + for (MachineBasicBlock &MBB : MF) { + // Find the last terminator. + auto MII = MBB.instr_rbegin(); + while (MII != MBB.instr_rend() && MII->isDebugInstr()) + ++MII; + if (MII == MBB.instr_rend()) + continue; + MachineInstr &TI = *MII; + if (!TI.isTerminator() || !TI.isBranch()) + // No terminator or non-branch terminator. + continue; + + unsigned TargetReg; + + switch (TI.getOpcode()) { + default: + // Direct branch or conditional branch (leading to fallthrough). + continue; + + case X86::FARJMP16m: + case X86::FARJMP32m: + case X86::FARJMP64: + // We cannot mitigate far jumps or calls, but we also don't expect them + // to be vulnerable to Spectre v1.2 or v2 (self trained) style attacks. + continue; + + case X86::JMP16m: + case X86::JMP16m_NT: + case X86::JMP32m: + case X86::JMP32m_NT: + case X86::JMP64m: + case X86::JMP64m_NT: + // Mostly as documentation. + report_fatal_error("Memory operand jumps should have been unfolded!"); + + case X86::JMP16r: + report_fatal_error( + "Support for 16-bit indirect branches is not implemented."); + case X86::JMP32r: + report_fatal_error( + "Support for 32-bit indirect branches is not implemented."); + + case X86::JMP64r: + TargetReg = TI.getOperand(0).getReg(); + } + + // We have definitely found an indirect branch. Verify that there are no + // preceding conditional branches as we don't yet support that. + if (llvm::any_of(MBB.terminators(), [&](MachineInstr &OtherTI) { + return !OtherTI.isDebugInstr() && &OtherTI != &TI; + })) { + LLVM_DEBUG({ + dbgs() << "ERROR: Found other terminators in a block with an indirect " + "branch! This is not yet supported! Terminator sequence:\n"; + for (MachineInstr &MI : MBB.terminators()) { + MI.dump(); + dbgs() << '\n'; + } + }); + report_fatal_error("Unimplemented terminator sequence!"); + } + + // Make the target register an available value for this block. + TargetAddrSSA.AddAvailableValue(&MBB, TargetReg); + IndirectTerminatedMBBs.insert(&MBB); + + // Add all the successors to our target candidates. + for (MachineBasicBlock *Succ : MBB.successors()) + IndirectTargetMBBs.insert(Succ); + } + + // Keep track of the cmov instructions we insert so we can return them. + SmallVector CMovs; + + // If we didn't find any indirect branches with targets, nothing to do here. + if (IndirectTargetMBBs.empty()) + return CMovs; + + // We found indirect branches and targets that need to be instrumented to + // harden loads within them. Walk the blocks of the function (to get a stable + // ordering) and instrument each target of an indirect branch. + for (MachineBasicBlock &MBB : MF) { + // Skip the blocks that aren't candidate targets. + if (!IndirectTargetMBBs.count(&MBB)) + continue; + + // We don't expect EH pads to ever be reached via an indirect branch. If + // this is desired for some reason, we could simply skip them here rather + // than asserting. + assert(!MBB.isEHPad() && + "Unexpected EH pad as target of an indirect branch!"); + + // We should never end up threading EFLAGS into a block to harden + // conditional jumps as there would be an additional successor via the + // indirect branch. As a consequence, all such edges would be split before + // reaching here, and the inserted block will handle the EFLAGS-based + // hardening. + assert(!MBB.isLiveIn(X86::EFLAGS) && + "Cannot check within a block that already has live-in EFLAGS!"); + + // We can't handle having non-indirect edges into this block unless this is + // the only successor and we can synthesize the necessary target address. + for (MachineBasicBlock *Pred : MBB.predecessors()) { + // If we've already handled this by extracting the target directly, + // nothing to do. + if (IndirectTerminatedMBBs.count(Pred)) + continue; + + // Otherwise, we have to be the only successor. We generally expect this + // to be true as conditional branches should have had a critical edge + // split already. We don't however need to worry about EH pad successors + // as they'll happily ignore the target and their hardening strategy is + // resilient to all ways in which they could be reached speculatively. + if (!llvm::all_of(Pred->successors(), [&](MachineBasicBlock *Succ) { + return Succ->isEHPad() || Succ == &MBB; + })) { + LLVM_DEBUG({ + dbgs() << "ERROR: Found conditional entry to target of indirect " + "branch!\n"; + Pred->dump(); + MBB.dump(); + }); + report_fatal_error("Cannot harden a conditional entry to a target of " + "an indirect branch!"); + } + + // Now we need to compute the address of this block and install it as a + // synthetic target in the predecessor. We do this at the bottom of the + // predecessor. + auto InsertPt = Pred->getFirstTerminator(); + unsigned TargetReg = MRI->createVirtualRegister(&X86::GR64RegClass); + if (MF.getTarget().getCodeModel() == CodeModel::Small && + !Subtarget->isPositionIndependent()) { + // Directly materialize it into an immediate. + auto AddrI = BuildMI(*Pred, InsertPt, DebugLoc(), + TII->get(X86::MOV64ri32), TargetReg) + .addMBB(&MBB); + ++NumInstsInserted; + (void)AddrI; + LLVM_DEBUG(dbgs() << " Inserting mov: "; AddrI->dump(); + dbgs() << "\n"); + } else { + auto AddrI = BuildMI(*Pred, InsertPt, DebugLoc(), TII->get(X86::LEA64r), + TargetReg) + .addReg(X86::RIP) + .addImm(0) + .addReg(0) + .addMBB(&MBB) + .addReg(0); + ++NumInstsInserted; + (void)AddrI; + LLVM_DEBUG(dbgs() << " Inserting lea: "; AddrI->dump(); + dbgs() << "\n"); + } + // And make this available. + TargetAddrSSA.AddAvailableValue(Pred, TargetReg); + } + + // Materialize the needed SSA value of the target. Note that we need the + // middle of the block as this block might at the bottom have an indirect + // branch back to itself. We can do this here because at this point, every + // predecessor of this block has an available value. This is basically just + // automating the construction of a PHI node for this target. + unsigned TargetReg = TargetAddrSSA.GetValueInMiddleOfBlock(&MBB); + + // Insert a comparison of the incoming target register with this block's + // address. + auto InsertPt = MBB.SkipPHIsLabelsAndDebug(MBB.begin()); + if (MF.getTarget().getCodeModel() == CodeModel::Small && + !Subtarget->isPositionIndependent()) { + // Check directly against a relocated immediate when we can. + auto CheckI = BuildMI(MBB, InsertPt, DebugLoc(), TII->get(X86::CMP64ri32)) + .addReg(TargetReg, RegState::Kill) + .addMBB(&MBB); + ++NumInstsInserted; + (void)CheckI; + LLVM_DEBUG(dbgs() << " Inserting cmp: "; CheckI->dump(); dbgs() << "\n"); + } else { + // Otherwise compute the address into a register first. + unsigned AddrReg = MRI->createVirtualRegister(&X86::GR64RegClass); + auto AddrI = + BuildMI(MBB, InsertPt, DebugLoc(), TII->get(X86::LEA64r), AddrReg) + .addReg(X86::RIP) + .addImm(0) + .addReg(0) + .addMBB(&MBB) + .addReg(0); + ++NumInstsInserted; + (void)AddrI; + LLVM_DEBUG(dbgs() << " Inserting lea: "; AddrI->dump(); dbgs() << "\n"); + auto CheckI = BuildMI(MBB, InsertPt, DebugLoc(), TII->get(X86::CMP64rr)) + .addReg(TargetReg, RegState::Kill) + .addReg(AddrReg, RegState::Kill); + ++NumInstsInserted; + (void)CheckI; + LLVM_DEBUG(dbgs() << " Inserting cmp: "; CheckI->dump(); dbgs() << "\n"); + } + + // Now cmov over the predicate if the comparison wasn't equal. + int PredStateSizeInBytes = TRI->getRegSizeInBits(*PS->RC) / 8; + auto CMovOp = X86::getCMovFromCond(X86::COND_NE, PredStateSizeInBytes); + unsigned UpdatedStateReg = MRI->createVirtualRegister(PS->RC); + auto CMovI = + BuildMI(MBB, InsertPt, DebugLoc(), TII->get(CMovOp), UpdatedStateReg) + .addReg(PS->InitialReg) + .addReg(PS->PoisonReg); + CMovI->findRegisterUseOperand(X86::EFLAGS)->setIsKill(true); + ++NumInstsInserted; + LLVM_DEBUG(dbgs() << " Inserting cmov: "; CMovI->dump(); dbgs() << "\n"); + CMovs.push_back(&*CMovI); + + // And put the new value into the available values for SSA form of our + // predicate state. + PS->SSA.AddAvailableValue(&MBB, UpdatedStateReg); + } + + // Return all the newly inserted cmov instructions of the predicate state. + return CMovs; +} + /// Returns true if the instruction has no behavior (specified or otherwise) /// that is based on the value of any of its register operands /// Index: llvm/test/CodeGen/X86/speculative-load-hardening-indirect.ll =================================================================== --- llvm/test/CodeGen/X86/speculative-load-hardening-indirect.ll +++ llvm/test/CodeGen/X86/speculative-load-hardening-indirect.ll @@ -157,19 +157,27 @@ ; X64-NEXT: movq %rsp, %rcx ; X64-NEXT: movq $-1, %rax ; X64-NEXT: sarq $63, %rcx -; X64-NEXT: movq (%rdi), %rax -; X64-NEXT: orq %rcx, %rax -; X64-NEXT: jmpq *%rax +; X64-NEXT: movq (%rdi), %rdx +; X64-NEXT: orq %rcx, %rdx +; X64-NEXT: jmpq *%rdx ; X64-NEXT: .LBB4_1: # %bb0 +; X64-NEXT: cmpq $.LBB4_1, %rdx +; X64-NEXT: cmovneq %rax, %rcx ; X64-NEXT: movl $2, %eax ; X64-NEXT: jmp .LBB4_2 ; X64-NEXT: .LBB4_4: # %bb2 +; X64-NEXT: cmpq $.LBB4_4, %rdx +; X64-NEXT: cmovneq %rax, %rcx ; X64-NEXT: movl $13, %eax ; X64-NEXT: jmp .LBB4_2 ; X64-NEXT: .LBB4_5: # %bb3 +; X64-NEXT: cmpq $.LBB4_5, %rdx +; X64-NEXT: cmovneq %rax, %rcx ; X64-NEXT: movl $42, %eax ; X64-NEXT: jmp .LBB4_2 ; X64-NEXT: .LBB4_3: # %bb1 +; X64-NEXT: cmpq $.LBB4_3, %rdx +; X64-NEXT: cmovneq %rax, %rcx ; X64-NEXT: movl $7, %eax ; X64-NEXT: .LBB4_2: # %bb0 ; X64-NEXT: shlq $47, %rcx @@ -201,24 +209,32 @@ ; X64-NEXT: movq %rsp, %rcx ; X64-NEXT: movq $-1, %rax ; X64-NEXT: sarq $63, %rcx -; X64-NEXT: movslq %edi, %rax -; X64-NEXT: movq global_blockaddrs(,%rax,8), %rax -; X64-NEXT: orq %rcx, %rax -; X64-NEXT: jmpq *%rax +; X64-NEXT: movslq %edi, %rdx +; X64-NEXT: movq global_blockaddrs(,%rdx,8), %rdx +; X64-NEXT: orq %rcx, %rdx +; X64-NEXT: jmpq *%rdx ; X64-NEXT: .Ltmp0: # Block address taken ; X64-NEXT: .LBB5_1: # %bb0 +; X64-NEXT: cmpq $.LBB5_1, %rdx +; X64-NEXT: cmovneq %rax, %rcx ; X64-NEXT: movl $2, %eax ; X64-NEXT: jmp .LBB5_2 ; X64-NEXT: .Ltmp1: # Block address taken ; X64-NEXT: .LBB5_4: # %bb2 +; X64-NEXT: cmpq $.LBB5_4, %rdx +; X64-NEXT: cmovneq %rax, %rcx ; X64-NEXT: movl $13, %eax ; X64-NEXT: jmp .LBB5_2 ; X64-NEXT: .Ltmp2: # Block address taken ; X64-NEXT: .LBB5_5: # %bb3 +; X64-NEXT: cmpq $.LBB5_5, %rdx +; X64-NEXT: cmovneq %rax, %rcx ; X64-NEXT: movl $42, %eax ; X64-NEXT: jmp .LBB5_2 ; X64-NEXT: .Ltmp3: # Block address taken ; X64-NEXT: .LBB5_3: # %bb1 +; X64-NEXT: cmpq $.LBB5_3, %rdx +; X64-NEXT: cmovneq %rax, %rcx ; X64-NEXT: movl $7, %eax ; X64-NEXT: .LBB5_2: # %bb0 ; X64-NEXT: shlq $47, %rcx @@ -296,26 +312,34 @@ ; X64-NEXT: ja .LBB6_2 ; X64-NEXT: # %bb.1: # %entry ; X64-NEXT: cmovaq %rax, %rcx -; X64-NEXT: movl %edi, %eax -; X64-NEXT: movq .LJTI6_0(,%rax,8), %rax -; X64-NEXT: orq %rcx, %rax -; X64-NEXT: jmpq *%rax -; X64-NEXT: .LBB6_3: # %bb1 +; X64-NEXT: movl %edi, %edx +; X64-NEXT: movq .LJTI6_0(,%rdx,8), %rdx +; X64-NEXT: orq %rcx, %rdx +; X64-NEXT: jmpq *%rdx +; X64-NEXT: .LBB6_4: # %bb1 +; X64-NEXT: cmpq $.LBB6_4, %rdx +; X64-NEXT: cmovneq %rax, %rcx ; X64-NEXT: movl $7, %eax -; X64-NEXT: jmp .LBB6_4 +; X64-NEXT: jmp .LBB6_3 ; X64-NEXT: .LBB6_2: # %bb0 ; X64-NEXT: cmovbeq %rax, %rcx ; X64-NEXT: movl $2, %eax -; X64-NEXT: jmp .LBB6_4 +; X64-NEXT: jmp .LBB6_3 ; X64-NEXT: .LBB6_5: # %bb2 +; X64-NEXT: cmpq $.LBB6_5, %rdx +; X64-NEXT: cmovneq %rax, %rcx ; X64-NEXT: movl $13, %eax -; X64-NEXT: jmp .LBB6_4 +; X64-NEXT: jmp .LBB6_3 ; X64-NEXT: .LBB6_6: # %bb3 +; X64-NEXT: cmpq $.LBB6_6, %rdx +; X64-NEXT: cmovneq %rax, %rcx ; X64-NEXT: movl $42, %eax -; X64-NEXT: jmp .LBB6_4 +; X64-NEXT: jmp .LBB6_3 ; X64-NEXT: .LBB6_7: # %bb5 +; X64-NEXT: cmpq $.LBB6_7, %rdx +; X64-NEXT: cmovneq %rax, %rcx ; X64-NEXT: movl $11, %eax -; X64-NEXT: .LBB6_4: # %bb1 +; X64-NEXT: .LBB6_3: # %bb0 ; X64-NEXT: shlq $47, %rcx ; X64-NEXT: orq %rcx, %rsp ; X64-NEXT: retq @@ -389,3 +413,140 @@ bb5: ret i32 11 } + +; This function's switch is crafted to trigger jump-table lowering in the x86 +; backend so that we can test how the exact jump table lowering behaves, but +; also arranges for fallthroughs from case to case to ensure that this pattern +; too can be handled. +define i32 @test_switch_jumptable_fallthrough(i32 %idx, i32* %a.ptr, i32* %b.ptr, i32* %c.ptr, i32* %d.ptr) nounwind { +; X64-LABEL: test_switch_jumptable_fallthrough: +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rsp, %r9 +; X64-NEXT: movq $-1, %r10 +; X64-NEXT: sarq $63, %r9 +; X64-NEXT: cmpl $3, %edi +; X64-NEXT: ja .LBB7_2 +; X64-NEXT: # %bb.1: # %entry +; X64-NEXT: cmovaq %r10, %r9 +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: movl %edi, %esi +; X64-NEXT: movq .LJTI7_0(,%rsi,8), %rsi +; X64-NEXT: orq %r9, %rsi +; X64-NEXT: jmpq *%rsi +; X64-NEXT: .LBB7_2: # %bb0 +; X64-NEXT: cmovbeq %r10, %r9 +; X64-NEXT: movl (%rsi), %eax +; X64-NEXT: orl %r9d, %eax +; X64-NEXT: movq $.LBB7_3, %rsi +; X64-NEXT: .LBB7_3: # %bb1 +; X64-NEXT: cmpq $.LBB7_3, %rsi +; X64-NEXT: cmovneq %r10, %r9 +; X64-NEXT: addl (%rdx), %eax +; X64-NEXT: orl %r9d, %eax +; X64-NEXT: movq $.LBB7_4, %rsi +; X64-NEXT: .LBB7_4: # %bb2 +; X64-NEXT: cmpq $.LBB7_4, %rsi +; X64-NEXT: cmovneq %r10, %r9 +; X64-NEXT: addl (%rcx), %eax +; X64-NEXT: orl %r9d, %eax +; X64-NEXT: movq $.LBB7_5, %rsi +; X64-NEXT: .LBB7_5: # %bb3 +; X64-NEXT: cmpq $.LBB7_5, %rsi +; X64-NEXT: cmovneq %r10, %r9 +; X64-NEXT: addl (%r8), %eax +; X64-NEXT: orl %r9d, %eax +; X64-NEXT: movq $.LBB7_6, %rsi +; X64-NEXT: .LBB7_6: # %bb4 +; X64-NEXT: cmpq $.LBB7_6, %rsi +; X64-NEXT: cmovneq %r10, %r9 +; X64-NEXT: shlq $47, %r9 +; X64-NEXT: orq %r9, %rsp +; X64-NEXT: retq +; +; X64-RETPOLINE-LABEL: test_switch_jumptable_fallthrough: +; X64-RETPOLINE: # %bb.0: # %entry +; X64-RETPOLINE-NEXT: movq %rsp, %r9 +; X64-RETPOLINE-NEXT: movq $-1, %r10 +; X64-RETPOLINE-NEXT: sarq $63, %r9 +; X64-RETPOLINE-NEXT: xorl %eax, %eax +; X64-RETPOLINE-NEXT: cmpl $1, %edi +; X64-RETPOLINE-NEXT: jg .LBB8_5 +; X64-RETPOLINE-NEXT: # %bb.1: # %entry +; X64-RETPOLINE-NEXT: cmovgq %r10, %r9 +; X64-RETPOLINE-NEXT: testl %edi, %edi +; X64-RETPOLINE-NEXT: je .LBB8_2 +; X64-RETPOLINE-NEXT: # %bb.3: # %entry +; X64-RETPOLINE-NEXT: cmoveq %r10, %r9 +; X64-RETPOLINE-NEXT: cmpl $1, %edi +; X64-RETPOLINE-NEXT: jne .LBB8_8 +; X64-RETPOLINE-NEXT: # %bb.4: +; X64-RETPOLINE-NEXT: cmovneq %r10, %r9 +; X64-RETPOLINE-NEXT: jmp .LBB8_10 +; X64-RETPOLINE-NEXT: .LBB8_5: # %entry +; X64-RETPOLINE-NEXT: cmovleq %r10, %r9 +; X64-RETPOLINE-NEXT: cmpl $2, %edi +; X64-RETPOLINE-NEXT: je .LBB8_6 +; X64-RETPOLINE-NEXT: # %bb.7: # %entry +; X64-RETPOLINE-NEXT: cmoveq %r10, %r9 +; X64-RETPOLINE-NEXT: cmpl $3, %edi +; X64-RETPOLINE-NEXT: jne .LBB8_8 +; X64-RETPOLINE-NEXT: # %bb.13: +; X64-RETPOLINE-NEXT: cmovneq %r10, %r9 +; X64-RETPOLINE-NEXT: jmp .LBB8_12 +; X64-RETPOLINE-NEXT: .LBB8_8: +; X64-RETPOLINE-NEXT: cmoveq %r10, %r9 +; X64-RETPOLINE-NEXT: movl (%rsi), %eax +; X64-RETPOLINE-NEXT: orl %r9d, %eax +; X64-RETPOLINE-NEXT: jmp .LBB8_9 +; X64-RETPOLINE-NEXT: .LBB8_2: +; X64-RETPOLINE-NEXT: cmovneq %r10, %r9 +; X64-RETPOLINE-NEXT: .LBB8_9: # %bb1 +; X64-RETPOLINE-NEXT: addl (%rdx), %eax +; X64-RETPOLINE-NEXT: orl %r9d, %eax +; X64-RETPOLINE-NEXT: .LBB8_10: # %bb2 +; X64-RETPOLINE-NEXT: addl (%rcx), %eax +; X64-RETPOLINE-NEXT: orl %r9d, %eax +; X64-RETPOLINE-NEXT: jmp .LBB8_11 +; X64-RETPOLINE-NEXT: .LBB8_6: +; X64-RETPOLINE-NEXT: cmovneq %r10, %r9 +; X64-RETPOLINE-NEXT: .LBB8_11: # %bb3 +; X64-RETPOLINE-NEXT: addl (%r8), %eax +; X64-RETPOLINE-NEXT: orl %r9d, %eax +; X64-RETPOLINE-NEXT: .LBB8_12: # %bb4 +; X64-RETPOLINE-NEXT: shlq $47, %r9 +; X64-RETPOLINE-NEXT: orq %r9, %rsp +; X64-RETPOLINE-NEXT: retq +entry: + switch i32 %idx, label %bb0 [ + i32 0, label %bb1 + i32 1, label %bb2 + i32 2, label %bb3 + i32 3, label %bb4 + ] + +bb0: + %a = load i32, i32* %a.ptr + br label %bb1 + +bb1: + %b.phi = phi i32 [ 0, %entry ], [ %a, %bb0 ] + %b = load i32, i32* %b.ptr + %b.sum = add i32 %b.phi, %b + br label %bb2 + +bb2: + %c.phi = phi i32 [ 0, %entry ], [ %b.sum, %bb1 ] + %c = load i32, i32* %c.ptr + %c.sum = add i32 %c.phi, %c + br label %bb3 + +bb3: + %d.phi = phi i32 [ 0, %entry ], [ %c.sum, %bb2 ] + %d = load i32, i32* %d.ptr + %d.sum = add i32 %d.phi, %d + br label %bb4 + +bb4: + %e.phi = phi i32 [ 0, %entry ], [ %d.sum, %bb3 ] + ret i32 %e.phi +}