Index: llvm/lib/Analysis/IRSimilarityIdentifier.cpp =================================================================== --- llvm/lib/Analysis/IRSimilarityIdentifier.cpp +++ llvm/lib/Analysis/IRSimilarityIdentifier.cpp @@ -459,6 +459,18 @@ // that both of these instructions are not nullptrs. FirstInst = FirstInstIt; LastInst = LastInstIt; + + // Add the basic blocks contained in the set into the global value numbering. + DenseSet BBSet; + getBasicBlocks(BBSet); + for (BasicBlock *BB : BBSet) { + if (ValueToNumber.find(BB) != ValueToNumber.end()) + continue; + + ValueToNumber.try_emplace(BB, LocalValNumber); + NumberToValue.try_emplace(LocalValNumber, BB); + LocalValNumber++; + } } bool IRSimilarityCandidate::isSimilar(const IRSimilarityCandidate &A, @@ -1007,6 +1019,41 @@ CanonNumToNumber.insert(std::make_pair(CanonNum, SourceGVN)); NumberToCanonNum.insert(std::make_pair(SourceGVN, CanonNum)); } + + DenseSet BBSet; + getBasicBlocks(BBSet); + // Find canonical numbers for the BasicBlocks in the current candididate. + // This is done by finding the corresponding value for the first instruction + // in the block in the current candidate, finding the matching value in the + // source candidate. Then by finding the parent of this value, use the + // canonical number of the block in the source candidate for the canonical + // number in the current candidate. + for (BasicBlock *BB : BBSet) { + unsigned ThisBBGVN = ValueToNumber.find(BB)->second; + + // We can skip the BasicBlock if the caonical numbering has already been + // found in a separate instruction. + if (NumberToCanonNum.find(ThisBBGVN) != NumberToCanonNum.end()) + continue; + + Value *V; + // If the basic block is the starting block, then the shared instruction may + // not be the first instruction in the block, it will be the first + // instruction in the similarity region. + if (BB == getStartBB()) + V = frontInstruction(); + else + V = &BB->front(); + unsigned ThisGVN = *getGVN(V); + unsigned ThisCanonNum = *getCanonicalNum(ThisGVN); + unsigned SourceGVN = *SourceCand.fromCanonicalNum(ThisCanonNum); + Value *SourceV = *SourceCand.fromGVN(SourceGVN); + BasicBlock *SourceBB = cast(SourceV)->getParent(); + unsigned SourceBBGVN = *SourceCand.getGVN(SourceBB); + unsigned SourceCanonBBGVN = *SourceCand.getCanonicalNum(SourceBBGVN); + CanonNumToNumber.insert(std::make_pair(SourceCanonBBGVN, ThisBBGVN)); + NumberToCanonNum.insert(std::make_pair(ThisBBGVN, SourceCanonBBGVN)); + } } void IRSimilarityCandidate::createCanonicalMappingFor( Index: llvm/lib/Transforms/IPO/IROutliner.cpp =================================================================== --- llvm/lib/Transforms/IPO/IROutliner.cpp +++ llvm/lib/Transforms/IPO/IROutliner.cpp @@ -1173,6 +1173,25 @@ OGVN = Cand.getCanonicalNum(GVN); assert(OGVN.hasValue() && "No GVN found for incoming value?"); PHIGVNs.push_back(*OGVN); + + // Find the incoming block and use the canonical numbering as well to define + // the hash for the PHINode. + OGVN = Cand.getGVN(IncomingBlock); + + // If there is no number for the incoming block, it is becaause we have + // split the candidate basic blocks. So we use the previous block that it + // was split from to find the valid global value numbering for the PHINode. + if (!OGVN.hasValue()) { + assert(Cand.getStartBB() == IncomingBlock && + "Unknown basic block used in exit path PHINode."); + + BasicBlock *PrevBlock = IncomingBlock->getSinglePredecessor(); + OGVN = Cand.getGVN(PrevBlock); + } + GVN = OGVN.getValue(); + OGVN = Cand.getCanonicalNum(GVN); + assert(OGVN.hasValue() && "No GVN found for incoming block?"); + PHIGVNs.push_back(*OGVN); } // Now that we have the GVNs for the incoming values, we are going to combine Index: llvm/test/Transforms/IROutliner/phi-node-exit-path-order.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/IROutliner/phi-node-exit-path-order.ll @@ -0,0 +1,122 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs +; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s + +; A PHINode defines the global value number of a split phi node for +; an exit paths based on the canonical number for the incoming values, and +; the canonical number for the basic block. This checks that we accurately +; capture a different numbering for the same incoming value but with different +; blocks. + +define void @func1(i32 %0, i32 %1) local_unnamed_addr #0 { +bb1: + br label %bb5 + +bb2: + %a = add i32 %0, %1 + %b = add i32 %0, %1 + %c = icmp eq i32 %b, 1 + br i1 %c, label %bb5, label %bb3 + +bb3: + %d = add i32 %0, %1 + br label %bb5 + +bb4: + %e = sub i32 %0, %1 + br label %bb2 + +bb5: + %f = phi i32 [ 0, %bb1 ], [ 1, %bb2 ], [ 1, %bb3 ] + ret void +} + +define void @func2(i32 %0, i32 %1) local_unnamed_addr #0 { +bb1: + br label %bb5 + +bb2: + %a = sub i32 %0, %1 + %b = add i32 %0, %1 + %c = icmp eq i32 %b, 1 + br i1 %c, label %bb5, label %bb3 + +bb3: + %d = add i32 %0, %1 + br label %bb5 + +bb4: + %e = add i32 %0, %1 + br label %bb2 + +bb5: + %f = phi i32 [ 0, %bb1 ], [ 1, %bb3 ], [ 1, %bb2 ] + ret void +} +; CHECK-LABEL: @func1( +; CHECK-NEXT: bb1: +; CHECK-NEXT: [[F_CE_LOC:%.*]] = alloca i32, align 4 +; CHECK-NEXT: br label [[BB5:%.*]] +; CHECK: bb2: +; CHECK-NEXT: [[A:%.*]] = add i32 [[TMP0:%.*]], [[TMP1:%.*]] +; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i32* [[F_CE_LOC]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]]) +; CHECK-NEXT: call void @outlined_ir_func_0(i32 [[TMP0]], i32 [[TMP1]], i32* [[F_CE_LOC]], i32 0) +; CHECK-NEXT: [[F_CE_RELOAD:%.*]] = load i32, i32* [[F_CE_LOC]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]]) +; CHECK-NEXT: br label [[BB5]] +; CHECK: bb4: +; CHECK-NEXT: [[E:%.*]] = sub i32 [[TMP0]], [[TMP1]] +; CHECK-NEXT: br label [[BB2:%.*]] +; CHECK: bb5: +; CHECK-NEXT: [[F:%.*]] = phi i32 [ 0, [[BB1:%.*]] ], [ [[F_CE_RELOAD]], [[BB2]] ] +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: @func2( +; CHECK-NEXT: bb1: +; CHECK-NEXT: [[F_CE_LOC:%.*]] = alloca i32, align 4 +; CHECK-NEXT: br label [[BB5:%.*]] +; CHECK: bb2: +; CHECK-NEXT: [[A:%.*]] = sub i32 [[TMP0:%.*]], [[TMP1:%.*]] +; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i32* [[F_CE_LOC]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]]) +; CHECK-NEXT: call void @outlined_ir_func_0(i32 [[TMP0]], i32 [[TMP1]], i32* [[F_CE_LOC]], i32 1) +; CHECK-NEXT: [[F_CE_RELOAD:%.*]] = load i32, i32* [[F_CE_LOC]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]]) +; CHECK-NEXT: br label [[BB5]] +; CHECK: bb4: +; CHECK-NEXT: [[E:%.*]] = add i32 [[TMP0]], [[TMP1]] +; CHECK-NEXT: br label [[BB2:%.*]] +; CHECK: bb5: +; CHECK-NEXT: [[F:%.*]] = phi i32 [ 0, [[BB1:%.*]] ], [ [[F_CE_RELOAD]], [[BB2]] ] +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define internal void @outlined_ir_func_0( +; CHECK-NEXT: newFuncRoot: +; CHECK-NEXT: br label [[BB2_TO_OUTLINE:%.*]] +; CHECK: bb2_to_outline: +; CHECK-NEXT: [[B:%.*]] = add i32 [[TMP0:%.*]], [[TMP1:%.*]] +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[B]], 1 +; CHECK-NEXT: br i1 [[C]], label [[BB5_SPLIT:%.*]], label [[BB3:%.*]] +; CHECK: bb3: +; CHECK-NEXT: [[D:%.*]] = add i32 [[TMP0]], [[TMP1]] +; CHECK-NEXT: br label [[BB5_SPLIT]] +; CHECK: bb5.split: +; CHECK-NEXT: [[TMP4:%.*]] = phi i32 [ 1, [[BB3]] ], [ 1, [[BB2_TO_OUTLINE]] ] +; CHECK-NEXT: [[F_CE:%.*]] = phi i32 [ 1, [[BB2_TO_OUTLINE]] ], [ 1, [[BB3]] ] +; CHECK-NEXT: br label [[BB5_EXITSTUB:%.*]] +; CHECK: bb5.exitStub: +; CHECK-NEXT: switch i32 [[TMP3:%.*]], label [[FINAL_BLOCK_0:%.*]] [ +; CHECK-NEXT: i32 0, label [[OUTPUT_BLOCK_0_0:%.*]] +; CHECK-NEXT: i32 1, label [[OUTPUT_BLOCK_1_0:%.*]] +; CHECK-NEXT: ] +; CHECK: output_block_0_0: +; CHECK-NEXT: store i32 [[F_CE]], i32* [[TMP2:%.*]], align 4 +; CHECK-NEXT: br label [[FINAL_BLOCK_0]] +; CHECK: output_block_1_0: +; CHECK-NEXT: store i32 [[TMP4]], i32* [[TMP2]], align 4 +; CHECK-NEXT: br label [[FINAL_BLOCK_0]] +; CHECK: final_block_0: +; CHECK-NEXT: ret void +;