Index: llvm/lib/Transforms/IPO/IROutliner.cpp =================================================================== --- llvm/lib/Transforms/IPO/IROutliner.cpp +++ llvm/lib/Transforms/IPO/IROutliner.cpp @@ -1561,11 +1561,14 @@ /// \p PN in. /// \param OutputMappings [in] - The mapping of output values from outlined /// region to their original values. +/// \param UsedPhis [in, out] - The PHINodes in the block that have already been +/// matched. /// \return the newly found or created PHINode in \p OverallPhiBlock. static PHINode* findOrCreatePHIInBlock(PHINode &PN, OutlinableRegion &Region, BasicBlock *OverallPhiBlock, - const DenseMap &OutputMappings) { + const DenseMap &OutputMappings, + DenseSet &UsedPHIs) { OutlinableGroup &Group = *Region.Parent; DenseSet PNCanonNums; @@ -1581,14 +1584,20 @@ // Find the Canonical Numbering for each PHINode, if it matches, we replace // the uses of the PHINode we are searching for, with the found PHINode. for (PHINode &CurrPN : OverallPhiBlock->phis()) { + // If this PHINode has already been matched to another PHINode to be merged, + // we skip it. + if (UsedPHIs.find(&CurrPN) != UsedPHIs.end()) + continue; + CurrentCanonNums.clear(); findCanonNumsForPHI(&CurrPN, *FirstRegion, OutputMappings, CurrentCanonNums, /* ReplacedWithOutlinedCall = */ true); - if (all_of(PNCanonNums, [&CurrentCanonNums](unsigned CanonNum) { return CurrentCanonNums.contains(CanonNum); - })) + })) { + UsedPHIs.insert(&CurrPN); return &CurrPN; + } } // If we've made it here, it means we weren't able to replace the PHINode, so @@ -1648,6 +1657,7 @@ if (FirstFunction) DominatingFunction = Group.OutlinedFunction; DominatorTree DT(*DominatingFunction); + DenseSet UsedPHIs; for (unsigned ArgIdx = 0; ArgIdx < Region.ExtractedFunction->arg_size(); ArgIdx++) { @@ -1747,8 +1757,8 @@ // For our PHINode, we find the combined canonical numbering, and // attempt to find a matching PHINode in the overall PHIBlock. If we // cannot, we copy the PHINode and move it into this new block. - PHINode *NewPN = - findOrCreatePHIInBlock(*PN, Region, OverallPhiBlock, OutputMappings); + PHINode *NewPN = findOrCreatePHIInBlock(*PN, Region, OverallPhiBlock, + OutputMappings, UsedPHIs); NewI->setOperand(0, NewPN); } Index: llvm/test/Transforms/IROutliner/duplicate-merging-phis.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/IROutliner/duplicate-merging-phis.ll @@ -0,0 +1,117 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs +; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s + +; Make sure that when we merge phi nodes, we do not merge two different PHINodes +; as the same phi node. + +define void @f1() { +bb1: + %0 = add i32 1, 2 + %1 = add i32 3, 4 + %2 = add i32 5, 6 + %3 = add i32 7, 8 + br label %bb5 +bb2: + %4 = mul i32 5, 4 + br label %bb5 + +placeholder: + %a = sub i32 5, 4 + ret void + +bb5: + %phinode = phi i32 [5, %bb1], [5, %bb2] + %phinode1 = phi i32 [5, %bb1], [5, %bb2] + ret void +} + +define void @f2() { +bb1: + %0 = add i32 1, 2 + %1 = add i32 3, 4 + %2 = add i32 5, 6 + %3 = add i32 7, 8 + br label %bb5 +bb2: + %4 = mul i32 5, 4 + br label %bb5 + +placeholder: + %a = sub i32 5, 4 + ret void + +bb5: + %phinode = phi i32 [5, %bb1], [5, %bb2] + %phinode1 = phi i32 [5, %bb1], [5, %bb2] + ret void +} +; CHECK-LABEL: @f1( +; CHECK-NEXT: bb1: +; CHECK-NEXT: [[PHINODE1_CE_LOC:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[PHINODE_CE_LOC:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i32* [[PHINODE_CE_LOC]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]]) +; CHECK-NEXT: [[LT_CAST1:%.*]] = bitcast i32* [[PHINODE1_CE_LOC]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST1]]) +; CHECK-NEXT: [[TARGETBLOCK:%.*]] = call i1 @outlined_ir_func_0(i32* [[PHINODE_CE_LOC]], i32* [[PHINODE1_CE_LOC]]) +; CHECK-NEXT: [[PHINODE_CE_RELOAD:%.*]] = load i32, i32* [[PHINODE_CE_LOC]], align 4 +; CHECK-NEXT: [[PHINODE1_CE_RELOAD:%.*]] = load i32, i32* [[PHINODE1_CE_LOC]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST1]]) +; CHECK-NEXT: br i1 [[TARGETBLOCK]], label [[BB5:%.*]], label [[BB1_AFTER_OUTLINE:%.*]] +; CHECK: bb1_after_outline: +; CHECK-NEXT: ret void +; CHECK: bb5: +; CHECK-NEXT: [[PHINODE:%.*]] = phi i32 [ [[PHINODE_CE_RELOAD]], [[BB1:%.*]] ] +; CHECK-NEXT: [[PHINODE1:%.*]] = phi i32 [ [[PHINODE1_CE_RELOAD]], [[BB1]] ] +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: @f2( +; CHECK-NEXT: bb1: +; CHECK-NEXT: [[PHINODE1_CE_LOC:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[PHINODE_CE_LOC:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i32* [[PHINODE_CE_LOC]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]]) +; CHECK-NEXT: [[LT_CAST1:%.*]] = bitcast i32* [[PHINODE1_CE_LOC]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST1]]) +; CHECK-NEXT: [[TARGETBLOCK:%.*]] = call i1 @outlined_ir_func_0(i32* [[PHINODE_CE_LOC]], i32* [[PHINODE1_CE_LOC]]) +; CHECK-NEXT: [[PHINODE_CE_RELOAD:%.*]] = load i32, i32* [[PHINODE_CE_LOC]], align 4 +; CHECK-NEXT: [[PHINODE1_CE_RELOAD:%.*]] = load i32, i32* [[PHINODE1_CE_LOC]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST1]]) +; CHECK-NEXT: br i1 [[TARGETBLOCK]], label [[BB5:%.*]], label [[BB1_AFTER_OUTLINE:%.*]] +; CHECK: bb1_after_outline: +; CHECK-NEXT: ret void +; CHECK: bb5: +; CHECK-NEXT: [[PHINODE:%.*]] = phi i32 [ [[PHINODE_CE_RELOAD]], [[BB1:%.*]] ] +; CHECK-NEXT: [[PHINODE1:%.*]] = phi i32 [ [[PHINODE1_CE_RELOAD]], [[BB1]] ] +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define internal i1 @outlined_ir_func_0( +; CHECK-NEXT: newFuncRoot: +; CHECK-NEXT: br label [[BB1_TO_OUTLINE:%.*]] +; CHECK: bb1_to_outline: +; CHECK-NEXT: [[TMP2:%.*]] = add i32 1, 2 +; CHECK-NEXT: [[TMP3:%.*]] = add i32 3, 4 +; CHECK-NEXT: [[TMP4:%.*]] = add i32 5, 6 +; CHECK-NEXT: [[TMP5:%.*]] = add i32 7, 8 +; CHECK-NEXT: br label [[BB5_SPLIT:%.*]] +; CHECK: bb2: +; CHECK-NEXT: [[TMP6:%.*]] = mul i32 5, 4 +; CHECK-NEXT: br label [[BB5_SPLIT]] +; CHECK: placeholder: +; CHECK-NEXT: [[A:%.*]] = sub i32 5, 4 +; CHECK-NEXT: br label [[BB1_AFTER_OUTLINE_EXITSTUB:%.*]] +; CHECK: bb5.split: +; CHECK-NEXT: [[PHINODE_CE:%.*]] = phi i32 [ 5, [[BB1_TO_OUTLINE]] ], [ 5, [[BB2:%.*]] ] +; CHECK-NEXT: [[PHINODE1_CE:%.*]] = phi i32 [ 5, [[BB1_TO_OUTLINE]] ], [ 5, [[BB2]] ] +; CHECK-NEXT: br label [[BB5_EXITSTUB:%.*]] +; CHECK: bb5.exitStub: +; CHECK-NEXT: store i32 [[PHINODE_CE]], i32* [[TMP0:%.*]], align 4 +; CHECK-NEXT: store i32 [[PHINODE1_CE]], i32* [[TMP1:%.*]], align 4 +; CHECK-NEXT: ret i1 true +; CHECK: bb1_after_outline.exitStub: +; CHECK-NEXT: ret i1 false +;