diff --git a/llvm/lib/Transforms/IPO/IROutliner.cpp b/llvm/lib/Transforms/IPO/IROutliner.cpp --- a/llvm/lib/Transforms/IPO/IROutliner.cpp +++ b/llvm/lib/Transforms/IPO/IROutliner.cpp @@ -1095,26 +1095,39 @@ /// /// \param Region - The region that \p PN is an output for. /// \param PN - The PHINode we are analyzing. +/// \param Blocks - The blocks for the region we are analyzing. /// \param AggArgIdx - The argument \p PN will be stored into. /// \returns An optional holding the assigned canonical number, or None if /// there is some attribute of the PHINode blocking it from being used. static Optional getGVNForPHINode(OutlinableRegion &Region, - PHINode *PN, unsigned AggArgIdx) { + PHINode *PN, + DenseSet &Blocks, + unsigned AggArgIdx) { OutlinableGroup &Group = *Region.Parent; IRSimilarityCandidate &Cand = *Region.Candidate; BasicBlock *PHIBB = PN->getParent(); CanonList PHIGVNs; - for (Value *Incoming : PN->incoming_values()) { - // If we cannot find a GVN, this means that the input to the PHINode is - // not included in the region we are trying to analyze, meaning, that if - // it was outlined, we would be adding an extra input. We ignore this - // case for now, and so ignore the region. + Value *Incoming; + BasicBlock *IncomingBlock; + for (unsigned Idx = 0, EIdx = PN->getNumIncomingValues(); Idx < EIdx; Idx++) { + Incoming = PN->getIncomingValue(Idx); + IncomingBlock = PN->getIncomingBlock(Idx); + // If we cannot find a GVN, and the incoming block is included in the region + // this means that the input to the PHINode is not included in the region we + // are trying to analyze, meaning, that if it was outlined, we would be + // adding an extra input. We ignore this case for now, and so ignore the + // region. Optional OGVN = Cand.getGVN(Incoming); - if (!OGVN.hasValue()) { + if (!OGVN.hasValue() && (Blocks.find(IncomingBlock) != Blocks.end())) { Region.IgnoreRegion = true; return None; } + // If the incoming block isn't in the region, we don't have to worry about + // this incoming value. + if (Blocks.find(IncomingBlock) == Blocks.end()) + continue; + // Collect the canonical numbers of the values in the PHINode. unsigned GVN = OGVN.getValue(); OGVN = Cand.getCanonicalNum(GVN); @@ -1259,9 +1272,9 @@ // If two PHINodes have the same canonical values, but different aggregate // argument locations, then they will have distinct Canonical Values. - GVN = getGVNForPHINode(Region, PN, AggArgIdx); + GVN = getGVNForPHINode(Region, PN, BlocksInRegion, AggArgIdx); if (!GVN.hasValue()) - return; + return; } else { // If we do not have a PHINode we use the global value numbering for the // output value, to find the canonical number to add to the set of stored diff --git a/llvm/test/Transforms/IROutliner/exit-block-phi-node-value-attribution.ll b/llvm/test/Transforms/IROutliner/exit-block-phi-node-value-attribution.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/IROutliner/exit-block-phi-node-value-attribution.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs +; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s + +; Make sure that each outlined region only analyzes on the incoming values in exit +; block phi nodes that come from the outlined region. Without this, more incoming +; values from the phi node would be considered if the incoming value is +; included in the outlined region. This is particularly likely to happen for +; constants. + +define void @f1() { +bb1: + %0 = add i32 1, 2 + %1 = add i32 3, 4 + %2 = add i32 5, 6 + %3 = add i32 7, 8 + br label %bb5 +bb2: + %4 = mul i32 5, 4 + br label %bb5 +placeholder: + %a = sub i32 5, 4 + br label %bb5 +bb3: + %5 = add i32 1, 2 + %6 = add i32 3, 4 + %7 = add i32 5, 6 + %8 = add i32 7, 8 + br label %bb5 +bb4: + %9 = mul i32 5, 4 + br label %bb5 + +placeholder1: + %b = add i32 5, 4 + ret void + +bb5: + %phinode = phi i32 [5, %placeholder], [5, %bb1], [5, %bb2], [4, %bb3], [4, %bb4] + ret void +} +; CHECK-LABEL: @f1( +; CHECK-NEXT: bb1: +; CHECK-NEXT: [[PHINODE_CE_LOC1:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[PHINODE_CE_LOC:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i32* [[PHINODE_CE_LOC]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]]) +; CHECK-NEXT: call void @outlined_ir_func_0(i32* [[PHINODE_CE_LOC]], i32 0) +; CHECK-NEXT: [[PHINODE_CE_RELOAD:%.*]] = load i32, i32* [[PHINODE_CE_LOC]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]]) +; CHECK-NEXT: br label [[BB5:%.*]] +; CHECK: placeholder: +; CHECK-NEXT: [[A:%.*]] = sub i32 5, 4 +; CHECK-NEXT: br label [[BB5]] +; CHECK: bb3: +; CHECK-NEXT: [[LT_CAST3:%.*]] = bitcast i32* [[PHINODE_CE_LOC1]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST3]]) +; CHECK-NEXT: call void @outlined_ir_func_0(i32* [[PHINODE_CE_LOC1]], i32 1) +; CHECK-NEXT: [[PHINODE_CE_RELOAD2:%.*]] = load i32, i32* [[PHINODE_CE_LOC1]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST3]]) +; CHECK-NEXT: br label [[BB5]] +; CHECK: placeholder1: +; CHECK-NEXT: [[B:%.*]] = add i32 5, 4 +; CHECK-NEXT: ret void +; CHECK: bb5: +; CHECK-NEXT: [[PHINODE:%.*]] = phi i32 [ 5, [[PLACEHOLDER:%.*]] ], [ [[PHINODE_CE_RELOAD]], [[BB1:%.*]] ], [ [[PHINODE_CE_RELOAD2]], [[BB3:%.*]] ] +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define internal void @outlined_ir_func_0( +; CHECK-NEXT: newFuncRoot: +; CHECK-NEXT: br label [[BB1_TO_OUTLINE:%.*]] +; CHECK: bb1_to_outline: +; CHECK-NEXT: [[TMP2:%.*]] = add i32 1, 2 +; CHECK-NEXT: [[TMP3:%.*]] = add i32 3, 4 +; CHECK-NEXT: [[TMP4:%.*]] = add i32 5, 6 +; CHECK-NEXT: [[TMP5:%.*]] = add i32 7, 8 +; CHECK-NEXT: br label [[BB5_SPLIT:%.*]] +; CHECK: bb2: +; CHECK-NEXT: [[TMP6:%.*]] = mul i32 5, 4 +; CHECK-NEXT: br label [[BB5_SPLIT]] +; CHECK: bb5.split: +; CHECK-NEXT: [[TMP7:%.*]] = phi i32 [ 4, [[BB1_TO_OUTLINE]] ], [ 4, [[BB2:%.*]] ] +; CHECK-NEXT: [[PHINODE_CE:%.*]] = phi i32 [ 5, [[BB1_TO_OUTLINE]] ], [ 5, [[BB2]] ] +; CHECK-NEXT: br label [[BB5_EXITSTUB:%.*]] +; CHECK: bb5.exitStub: +; CHECK-NEXT: switch i32 [[TMP1:%.*]], label [[FINAL_BLOCK_0:%.*]] [ +; CHECK-NEXT: i32 0, label [[OUTPUT_BLOCK_0_0:%.*]] +; CHECK-NEXT: i32 1, label [[OUTPUT_BLOCK_1_0:%.*]] +; CHECK-NEXT: ] +; CHECK: output_block_0_0: +; CHECK-NEXT: store i32 [[PHINODE_CE]], i32* [[TMP0:%.*]], align 4 +; CHECK-NEXT: br label [[FINAL_BLOCK_0]] +; CHECK: output_block_1_0: +; CHECK-NEXT: store i32 [[TMP7]], i32* [[TMP0]], align 4 +; CHECK-NEXT: br label [[FINAL_BLOCK_0]] +; CHECK: final_block_0: +; CHECK-NEXT: ret void +;