Index: lib/CodeGen/BranchRelaxation.cpp =================================================================== --- lib/CodeGen/BranchRelaxation.cpp +++ lib/CodeGen/BranchRelaxation.cpp @@ -78,7 +78,8 @@ MachineBasicBlock *createNewBlockAfter(MachineBasicBlock &BB); - MachineBasicBlock *splitBlockBeforeInstr(MachineInstr &MI); + MachineBasicBlock *splitBlockBeforeInstr(MachineInstr &MI, + MachineBasicBlock *DestBB); void adjustBlockOffsets(MachineBasicBlock &MBB); bool isBlockInRange(const MachineInstr &MI, const MachineBasicBlock &BB) const; @@ -116,6 +117,7 @@ unsigned Num = MBB.getNumber(); assert(BlockInfo[Num].Offset % (1u << Align) == 0); assert(!Num || BlockInfo[PrevNum].postOffset(MBB) <= BlockInfo[Num].Offset); + assert(BlockInfo[Num].Size == computeBlockSize(MBB)); PrevNum = Num; } #endif @@ -205,10 +207,8 @@ /// Split the basic block containing MI into two blocks, which are joined by /// an unconditional branch. Update data structures and renumber blocks to /// account for this change and returns the newly created block. -/// NOTE: Successor list of the original BB is out of date after this function, -/// and must be updated by the caller! Other transforms follow using this -/// utility function, so no point updating now rather than waiting. -MachineBasicBlock *BranchRelaxation::splitBlockBeforeInstr(MachineInstr &MI) { +MachineBasicBlock *BranchRelaxation::splitBlockBeforeInstr(MachineInstr &MI, + MachineBasicBlock *DestBB) { MachineBasicBlock *OrigBB = MI.getParent(); // Create a new MBB for the code after the OrigBB. @@ -228,6 +228,16 @@ // Insert an entry into BlockInfo to align it properly with the block numbers. BlockInfo.insert(BlockInfo.begin() + NewBB->getNumber(), BasicBlockInfo()); + + NewBB->transferSuccessors(OrigBB); + OrigBB->addSuccessor(NewBB); + OrigBB->addSuccessor(DestBB); + + // Cleanup potential unconditional branch to successor block. + // Note that updateTerminator may change the size of the blocks. + NewBB->updateTerminator(); + OrigBB->updateTerminator(); + // Figure out how large the OrigBB is. As the first half of the original // block, it cannot contain a tablejump. The size includes // the new jump we added. (It should be possible to do this without @@ -386,12 +396,9 @@ DebugLoc DL = MI.getDebugLoc(); MI.eraseFromParent(); - - // insertUnconditonalBranch may have inserted a new block. - BlockInfo[MBB->getNumber()].Size += TII->insertIndirectBranch( + BlockInfo[BranchBB->getNumber()].Size += TII->insertIndirectBranch( *BranchBB, *DestBB, DL, DestOffset - SrcOffset, RS.get()); - computeBlockSize(*BranchBB); adjustBlockOffsets(*MBB); return true; } @@ -440,14 +447,7 @@ // analyzable block. Split later terminators into a new block so // each one will be analyzable. - MachineBasicBlock *NewBB = splitBlockBeforeInstr(*Next); - NewBB->transferSuccessors(&MBB); - MBB.addSuccessor(NewBB); - MBB.addSuccessor(DestBB); - - // Cleanup potential unconditional branch to successor block. - NewBB->updateTerminator(); - MBB.updateTerminator(); + splitBlockBeforeInstr(*Next, DestBB); } else { fixupConditionalBranch(MI); ++NumConditionalRelaxed; Index: test/CodeGen/AMDGPU/branch-relaxation.ll =================================================================== --- test/CodeGen/AMDGPU/branch-relaxation.ll +++ test/CodeGen/AMDGPU/branch-relaxation.ll @@ -475,5 +475,59 @@ ret void } +; GCN-LABEL: {{^}}long_branch_hang: +; GCN: s_cmp_lt_i32 s{{[0-9]+}}, 6 +; GCN-NEXT: s_cbranch_scc1 [[LONG_BR_0:BB[0-9]+_[0-9]+]] + +; GCN: s_add_u32 vcc_lo, vcc_lo, [[LONG_BR_DEST0:BB[0-9]+_[0-9]+]]-( +; GCN: s_setpc_b64 + +; GCN-NEXT: [[LONG_BR_0]]: +; GCN: s_setpc_b64 + +; GCN-NEXT: [[LONG_BR_DEST0]]: +; GCN-DAG: v_cmp_lt_i32 +; GCN-DAG: v_cmp_gt_i32 +; GCN: s_cbranch_vccnz + +; GCN: s_setpc_b64 +; GCN: s_setpc_b64 + +; GCN: s_cmp_eq_u32 +; GCN-NEXT: s_cbranch_scc0 +; GCN: s_setpc_b64 + +; GCN: s_endpgm +define amdgpu_kernel void @long_branch_hang(i32 addrspace(1)* nocapture %arg, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i64 %arg5) #0 { +bb: + %tmp = icmp slt i32 %arg2, 9 + %tmp6 = icmp eq i32 %arg1, 0 + %tmp7 = icmp sgt i32 %arg4, 0 + %tmp8 = icmp sgt i32 %arg4, 5 + br i1 %tmp8, label %bb9, label %bb13 + +bb9: ; preds = %bb + %tmp10 = and i1 %tmp7, %tmp + %tmp11 = icmp slt i32 %arg3, %arg4 + %tmp12 = or i1 %tmp11, %tmp7 + br i1 %tmp12, label %bb19, label %bb14 + +bb13: ; preds = %bb + br i1 %tmp6, label %bb19, label %bb14 + +bb14: ; preds = %bb13, %bb9 + %tmp15 = icmp slt i32 %arg3, %arg4 + %tmp16 = or i1 %tmp15, %tmp + %tmp17 = and i1 %tmp6, %tmp16 + %tmp18 = zext i1 %tmp17 to i32 + br label %bb19 + +bb19: ; preds = %bb14, %bb13, %bb9 + %tmp20 = phi i32 [ undef, %bb9 ], [ undef, %bb13 ], [ %tmp18, %bb14 ] + %tmp21 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %arg5 + store i32 %tmp20, i32 addrspace(1)* %tmp21, align 4 + ret void +} + attributes #0 = { nounwind } attributes #1 = { nounwind readnone }