diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -1095,17 +1095,24 @@ // Update (liveout) uses of bonus instructions, // now that the bonus instruction has been cloned into predecessor. - SSAUpdater SSAUpdate; - SSAUpdate.Initialize(BonusInst.getType(), - (NewBonusInst->getName() + ".merge").str()); - SSAUpdate.AddAvailableValue(BB, &BonusInst); - SSAUpdate.AddAvailableValue(PredBlock, NewBonusInst); + // Note that we expect to be in a block-closed SSA form for this to work! for (Use &U : make_early_inc_range(BonusInst.uses())) { auto *UI = cast(U.getUser()); - if (UI->getParent() != PredBlock) - SSAUpdate.RewriteUseAfterInsertions(U); - else // Use is in the same block as, and comes before, NewBonusInst. - SSAUpdate.RewriteUse(U); + auto *PN = dyn_cast(UI); + if (!PN) { + assert(UI->getParent() == BB && BonusInst.comesBefore(UI) && + "If the user is not a PHI node, then it should be in the same " + "block as, and come after, the original bonus instruction."); + continue; // Keep using the original bonus instruction. + } + // Is this the block-closed SSA form PHI node? + if (PN->getIncomingBlock(U) == BB) + continue; // Great, keep using the original bonus instruction. + // The only other alternative is an "use" when coming from + // the predecessor block - here we should refer to the cloned bonus instr. + assert(PN->getIncomingBlock(U) == PredBlock && + "Not in block-closed SSA form?"); + U.set(NewBonusInst); } } } @@ -3018,6 +3025,56 @@ LLVM_DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB); + // We want to duplicate all the bonus instructions in this block, + // and rewrite their uses, but in some cases with self-loops, + // the naive use rewrite approach won't work (will result in miscompilations). + // To avoid this problem, let's form block-closed SSA form. + for (Instruction &BonusInst : + reverse(iterator_range(*BB))) { + auto IsBCSSAUse = [BB, &BonusInst](Use &U) { + auto *UI = cast(U.getUser()); + if (auto *PN = dyn_cast(UI)) + return PN->getIncomingBlock(U) == BB; + return UI->getParent() == BB && BonusInst.comesBefore(UI); + }; + + // Does this instruction require rewriting of uses? + if (all_of(BonusInst.uses(), IsBCSSAUse)) + continue; + + SSAUpdater SSAUpdate; + Type *Ty = BonusInst.getType(); + SmallVector BCSSAPHIs; + SSAUpdate.Initialize(Ty, BonusInst.getName()); + + // Into each successor block of BB, insert a PHI node, that receives + // the BonusInst when coming from it's basic block, or poison otherwise. + for (BasicBlock *Succ : successors(BB)) { + // The block may have the same successor multiple times. Do it only once. + if (SSAUpdate.HasValueForBlock(Succ)) + continue; + BCSSAPHIs.emplace_back(PHINode::Create( + Ty, 0, BonusInst.getName() + ".bcssa", &Succ->front())); + PHINode *PN = BCSSAPHIs.back(); + for (BasicBlock *PredOfSucc : predecessors(Succ)) + PN->addIncoming(PredOfSucc == BB ? (Value *)&BonusInst + : PoisonValue::get(Ty), + PredOfSucc); + SSAUpdate.AddAvailableValue(Succ, PN); + } + + // And rewrite all uses that break block-closed SSA form. + for (Use &U : make_early_inc_range(BonusInst.uses())) + if (!IsBCSSAUse(U)) + SSAUpdate.RewriteUseAfterInsertions(U); + + // We might not have ended up needing PHI's in all of the succ blocks, + // drop the ones that are certainly unused, but don't bother otherwise. + for (PHINode *PN : BCSSAPHIs) + if (PN->use_empty()) + PN->eraseFromParent(); + } + IRBuilder<> Builder(PBI); // The builder is used to create instructions to eliminate the branch in BB. // If BB's terminator has !annotation metadata, add it to the new diff --git a/llvm/test/Transforms/SimplifyCFG/fold-branch-to-common-dest.ll b/llvm/test/Transforms/SimplifyCFG/fold-branch-to-common-dest.ll --- a/llvm/test/Transforms/SimplifyCFG/fold-branch-to-common-dest.ll +++ b/llvm/test/Transforms/SimplifyCFG/fold-branch-to-common-dest.ll @@ -134,6 +134,7 @@ ; CHECK: final_right: ; CHECK-NEXT: call void @sideeffect0() ; CHECK-NEXT: br label [[COMMON_RET]] +; pred: %c0 = icmp ne i32* %p, null br i1 %c0, label %dispatch, label %final_right @@ -153,10 +154,19 @@ ; Drop dereferenceable on the parameter define void @one_pred_with_spec_call_deref(i8 %v0, i8 %v1, i32* %p) { -; CHECK-LABEL: one_pred_with_spec_call_deref -; CHECK-LABEL: pred: -; CHECK: %c0 = icmp ne i32* %p, null -; CHECK: %x = call i32 @speculate_call(i32* %p) +; CHECK-LABEL: @one_pred_with_spec_call_deref( +; CHECK-NEXT: pred: +; CHECK-NEXT: [[C0:%.*]] = icmp ne i32* [[P:%.*]], null +; CHECK-NEXT: [[X:%.*]] = call i32 @speculate_call(i32* [[P]]) +; CHECK-NEXT: [[C1:%.*]] = icmp eq i8 [[V1:%.*]], 0 +; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[C0]], i1 [[C1]], i1 false +; CHECK-NEXT: br i1 [[OR_COND]], label [[COMMON_RET:%.*]], label [[FINAL_RIGHT:%.*]] +; CHECK: common.ret: +; CHECK-NEXT: ret void +; CHECK: final_right: +; CHECK-NEXT: call void @sideeffect0() +; CHECK-NEXT: br label [[COMMON_RET]] +; pred: %c0 = icmp ne i32* %p, null br i1 %c0, label %dispatch, label %final_right @@ -824,7 +834,7 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[COUNTDOWN:%.*]] = phi i8 [ 8, [[ENTRY:%.*]] ], [ [[DEC_MERGE:%.*]], [[FOR_BODYTHREAD_PRE_SPLIT:%.*]] ] +; CHECK-NEXT: [[COUNTDOWN:%.*]] = phi i8 [ 8, [[ENTRY:%.*]] ], [ [[DEC_BCSSA1:%.*]], [[FOR_BODYTHREAD_PRE_SPLIT:%.*]] ] ; CHECK-NEXT: [[C:%.*]] = call i1 @gen1() ; CHECK-NEXT: br i1 [[C]], label [[FOR_INC:%.*]], label [[IF_THEN:%.*]] ; CHECK: for.inc: @@ -839,7 +849,7 @@ ; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[C2_NOT]], i1 true, i1 [[CMP_NOT]] ; CHECK-NEXT: br i1 [[OR_COND]], label [[IF_END_LOOPEXIT]], label [[FOR_BODYTHREAD_PRE_SPLIT]] ; CHECK: for.bodythread-pre-split: -; CHECK-NEXT: [[DEC_MERGE]] = phi i8 [ [[DEC]], [[IF_THEN]] ], [ [[DEC_OLD]], [[FOR_INC]] ] +; CHECK-NEXT: [[DEC_BCSSA1]] = phi i8 [ [[DEC_OLD]], [[FOR_INC]] ], [ [[DEC]], [[IF_THEN]] ] ; CHECK-NEXT: call void @sideeffect0() ; CHECK-NEXT: br label [[FOR_BODY]] ; CHECK: if.end.loopexit: @@ -875,7 +885,7 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[COUNTDOWN:%.*]] = phi i8 [ 8, [[ENTRY:%.*]] ], [ [[DEC_MERGE:%.*]], [[FOR_BODYTHREAD_PRE_SPLIT:%.*]] ] +; CHECK-NEXT: [[COUNTDOWN:%.*]] = phi i8 [ 8, [[ENTRY:%.*]] ], [ [[DEC_BCSSA1:%.*]], [[FOR_BODYTHREAD_PRE_SPLIT:%.*]] ] ; CHECK-NEXT: [[C:%.*]] = call i1 @gen1() ; CHECK-NEXT: br i1 [[C]], label [[FOR_INC:%.*]], label [[IF_THEN:%.*]] ; CHECK: for.inc: @@ -890,7 +900,7 @@ ; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[C2_NOT]], i1 true, i1 [[CMP_NOT]] ; CHECK-NEXT: br i1 [[OR_COND]], label [[IF_END_LOOPEXIT]], label [[FOR_BODYTHREAD_PRE_SPLIT]] ; CHECK: for.bodythread-pre-split: -; CHECK-NEXT: [[DEC_MERGE]] = phi i8 [ [[DEC_OLD]], [[FOR_INC]] ], [ [[DEC_MERGE]], [[FOR_BODYTHREAD_PRE_SPLIT_LOOPBACK:%.*]] ], [ [[DEC]], [[IF_THEN]] ] +; CHECK-NEXT: [[DEC_BCSSA1]] = phi i8 [ poison, [[FOR_BODYTHREAD_PRE_SPLIT_LOOPBACK:%.*]] ], [ [[DEC_OLD]], [[FOR_INC]] ], [ [[DEC]], [[IF_THEN]] ] ; CHECK-NEXT: [[SHOULD_LOOPBACK:%.*]] = phi i1 [ true, [[FOR_INC]] ], [ false, [[FOR_BODYTHREAD_PRE_SPLIT_LOOPBACK]] ], [ true, [[IF_THEN]] ] ; CHECK-NEXT: [[DO_LOOPBACK:%.*]] = and i1 [[SHOULD_LOOPBACK]], [[ENABLE_LOOPBACK:%.*]] ; CHECK-NEXT: call void @sideeffect0() @@ -995,8 +1005,8 @@ ; CHECK-NEXT: [[TOBOOL_OLD:%.*]] = icmp ne i16 [[DOTOLD]], 0 ; CHECK-NEXT: br i1 [[TOBOOL_OLD]], label [[LAND_RHS:%.*]], label [[FOR_END:%.*]] ; CHECK: land.rhs: -; CHECK-NEXT: [[DOTMERGE:%.*]] = phi i16 [ [[TMP0:%.*]], [[LAND_RHS]] ], [ [[DOTOLD]], [[ENTRY:%.*]] ] -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i16 [[DOTMERGE]], 0 +; CHECK-NEXT: [[DOTBCSSA:%.*]] = phi i16 [ [[DOTOLD]], [[ENTRY:%.*]] ], [ [[TMP0:%.*]], [[LAND_RHS]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i16 [[DOTBCSSA]], 0 ; CHECK-NEXT: [[TMP0]] = load i16, i16* @global_pr49510, align 1 ; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i16 [[TMP0]], 0 ; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[CMP]], i1 [[TOBOOL]], i1 false @@ -1020,10 +1030,6 @@ ret void } -; FIXME: -; This is a miscompile if we replace a phi incoming value -; with an updated loaded value *after* it was stored. - @global_pr51125 = global i32 1, align 4 define i32 @pr51125() { @@ -1033,15 +1039,15 @@ ; CHECK-NEXT: [[ISZERO_OLD:%.*]] = icmp eq i32 [[LD_OLD]], 0 ; CHECK-NEXT: br i1 [[ISZERO_OLD]], label [[EXIT:%.*]], label [[L2:%.*]] ; CHECK: L2: -; CHECK-NEXT: [[LD_MERGE:%.*]] = phi i32 [ [[LD:%.*]], [[L2]] ], [ [[LD_OLD]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[LD_BCSSA1:%.*]] = phi i32 [ [[LD_OLD]], [[ENTRY:%.*]] ], [ [[LD:%.*]], [[L2]] ] ; CHECK-NEXT: store i32 -1, i32* @global_pr51125, align 4 -; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[LD_MERGE]], -1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[LD_BCSSA1]], -1 ; CHECK-NEXT: [[LD]] = load i32, i32* @global_pr51125, align 4 ; CHECK-NEXT: [[ISZERO:%.*]] = icmp eq i32 [[LD]], 0 ; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[CMP]], i1 true, i1 [[ISZERO]] ; CHECK-NEXT: br i1 [[OR_COND]], label [[EXIT]], label [[L2]] ; CHECK: exit: -; CHECK-NEXT: [[R:%.*]] = phi i32 [ [[LD]], [[L2]] ], [ [[LD_OLD]], [[ENTRY]] ] +; CHECK-NEXT: [[R:%.*]] = phi i32 [ [[LD_BCSSA1]], [[L2]] ], [ [[LD_OLD]], [[ENTRY]] ] ; CHECK-NEXT: ret i32 [[R]] ; entry: