Index: llvm/lib/Transforms/IPO/IROutliner.cpp =================================================================== --- llvm/lib/Transforms/IPO/IROutliner.cpp +++ llvm/lib/Transforms/IPO/IROutliner.cpp @@ -126,6 +126,10 @@ void OutlinableRegion::splitCandidate() { assert(!CandidateSplit && "Candidate already split!"); + if (Candidate->end()->Inst != + Candidate->backInstruction()->getNextNonDebugInstruction()) + return; + Instruction *StartInst = (*Candidate->begin()).Inst; Instruction *EndInst = (*Candidate->end()).Inst; assert(StartInst && EndInst && "Expected a start and end instruction?"); @@ -1574,6 +1578,9 @@ // Break the outlinable region out of its parent BasicBlock into its own // BasicBlocks (see function implementation). OS->splitCandidate(); + if (!OS->CandidateSplit) + continue; + std::vector BE = {OS->StartBB}; OS->CE = new (ExtractorAllocator.Allocate()) CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false, Index: llvm/test/Transforms/IROutliner/outlining-bitcasts.ll =================================================================== --- llvm/test/Transforms/IROutliner/outlining-bitcasts.ll +++ llvm/test/Transforms/IROutliner/outlining-bitcasts.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs ; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s ; This test ensures that an extra output is not added when there is a bitcast @@ -12,15 +12,6 @@ declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) define void @outline_bitcast_base() { -; CHECK-LABEL: @outline_bitcast_base( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[D:%.*]] = alloca i32, align 4 -; CHECK-NEXT: call void @[[FUNCTION_0:.*]](i32* [[A]], i32* [[B]], i32* [[C]], i32* [[D]]) -; CHECK-NEXT: ret void -; entry: %a = alloca i32, align 4 %b = alloca i32, align 4 @@ -37,25 +28,6 @@ } define void @outline_bitcast_removed() { -; CHECK-LABEL: @outline_bitcast_removed( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 -; CHECK-NEXT: [[D:%.*]] = alloca i32, align 4 -; CHECK-NEXT: call void @[[FUNCTION_0:.*]](i32* [[A]], i32* [[B]], i32* [[C]], i32* [[D]]) -; CHECK-NEXT: [[LT_CAST1:%.*]] = bitcast i32* [[D]] to i8* -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST1]]) -; CHECK-NEXT: [[AM:%.*]] = load i32, i32* [[B]], align 4 -; CHECK-NEXT: [[BM:%.*]] = load i32, i32* [[A]], align 4 -; CHECK-NEXT: [[CM:%.*]] = load i32, i32* [[C]], align 4 -; CHECK-NEXT: [[AS:%.*]] = add i32 [[AM]], [[BM]] -; CHECK-NEXT: [[BS:%.*]] = add i32 [[BM]], [[AM]] -; CHECK-NEXT: [[CS:%.*]] = add i32 [[BM]], [[CM]] -; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i32* [[D]] to i8* -; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]]) -; CHECK-NEXT: ret void -; entry: %a = alloca i32, align 4 %b = alloca i32, align 4 @@ -72,34 +44,92 @@ %am = load i32, i32* %b %bm = load i32, i32* %a %cm = load i32, i32* %c - %as = add i32 %am, %bm - %bs = add i32 %bm, %am - %cs = add i32 %bm, %cm call void @llvm.lifetime.end.p0i8(i64 -1, i8* %X) ret void } +; The first bitcast is moved down to lifetime start, and, since the original +; endpoint does not match the new endpoint, we cannot extract and outline the +; second bitcast and set of ads. Outlining only occurs in this case due to +; the lack of a cost model + define void @outline_bitcast_base2(i32 %a, i32 %b, i32 %c) { -; CHECK-LABEL: @outline_bitcast_base2( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[AL:%.*]] = add i32 [[A:%.*]], [[B:%.*]] -; CHECK-NEXT: [[BL:%.*]] = add i32 [[B]], [[A]] -; CHECK-NEXT: [[CL:%.*]] = add i32 [[B]], [[C:%.*]] -; CHECK-NEXT: ret void -; entry: + %d = alloca i32, align 4 + %X = bitcast i32* %d to i8* %al = add i32 %a, %b %bl = add i32 %b, %a %cl = add i32 %b, %c + %buffer = mul i32 %a, %b + %Y = bitcast i32* %d to i8* + %am = add i32 %a, %b + %bm = add i32 %b, %a + %cm = add i32 %b, %c + call void @llvm.lifetime.start.p0i8(i64 -1, i8* %X) + call void @llvm.lifetime.end.p0i8(i64 -1, i8* %X) ret void } -; CHECK: define internal void @[[FUNCTION_0]](i32* [[ARG0:%.*]], i32* [[ARG1:%.*]], i32* [[ARG2:%.*]], i32* [[ARG3:%.*]]) -; CHECK: entry_to_outline: -; CHECK-NEXT: store i32 2, i32* [[ARG0]], align 4 -; CHECK-NEXT: store i32 3, i32* [[ARG1]], align 4 -; CHECK-NEXT: store i32 4, i32* [[ARG2]], align 4 -; CHECK-NEXT: [[X:%.*]] = bitcast i32* [[ARG3]] to i8* -; CHECK-NEXT: [[AL:%.*]] = load i32, i32* [[ARG0]], align 4 -; CHECK-NEXT: [[BL:%.*]] = load i32, i32* [[ARG1]], align 4 -; CHECK-NEXT: [[CL:%.*]] = load i32, i32* [[ARG2]], align 4 +; CHECK-LABEL: @outline_bitcast_base( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[D:%.*]] = alloca i32, align 4 +; CHECK-NEXT: call void @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[C]], i32* [[D]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: @outline_bitcast_removed( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[D:%.*]] = alloca i32, align 4 +; CHECK-NEXT: call void @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[C]], i32* [[D]]) +; CHECK-NEXT: [[LT_CAST1:%.*]] = bitcast i32* [[D]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST1]]) +; CHECK-NEXT: [[AM:%.*]] = load i32, i32* [[B]], align 4 +; CHECK-NEXT: [[BM:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[CM:%.*]] = load i32, i32* [[C]], align 4 +; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i32* [[D]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]]) +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: @outline_bitcast_base2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[D:%.*]] = alloca i32, align 4 +; CHECK-NEXT: call void @outlined_ir_func_1(i32* [[D]], i32 [[A:%.*]], i32 [[B:%.*]], i32 [[C:%.*]]) +; CHECK-NEXT: [[BUFFER:%.*]] = mul i32 [[A]], [[B]] +; CHECK-NEXT: [[Y:%.*]] = bitcast i32* [[D]] to i8* +; CHECK-NEXT: [[AM:%.*]] = add i32 [[A]], [[B]] +; CHECK-NEXT: [[BM:%.*]] = add i32 [[B]], [[A]] +; CHECK-NEXT: [[CM:%.*]] = add i32 [[B]], [[C]] +; CHECK-NEXT: [[LT_CAST1:%.*]] = bitcast i32* [[D]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST1]]) +; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i32* [[D]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]]) +; CHECK-NEXT: ret void +; +; +; CHECK: @outlined_ir_func_0(i32* [[TMP0:%.*]], i32* [[TMP1:%.*]], i32* [[TMP2:%.*]], i32* [[TMP3:%.*]]) +; CHECK: entry_to_outline: +; CHECK-NEXT: store i32 2, i32* [[TMP0:%.*]], align 4 +; CHECK-NEXT: store i32 3, i32* [[TMP1:%.*]], align 4 +; CHECK-NEXT: store i32 4, i32* [[TMP2:%.*]], align 4 +; CHECK-NEXT: [[X:%.*]] = bitcast i32* [[TMP3:%.*]] to i8* +; CHECK-NEXT: [[AL:%.*]] = load i32, i32* [[TMP0]], align 4 +; CHECK-NEXT: [[BL:%.*]] = load i32, i32* [[TMP1]], align 4 +; CHECK-NEXT: [[CL:%.*]] = load i32, i32* [[TMP2]], align 4 +; CHECK-NEXT: br label [[ENTRY_AFTER_OUTLINE_EXITSTUB:%.*]] +; +; +; CHECK: @outlined_ir_func_1(i32* [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]], i32 [[TMP3:%.*]]) +; CHECK: entry_to_outline: +; CHECK-NEXT: [[X:%.*]] = bitcast i32* [[TMP0]] to i8* +; CHECK-NEXT: [[AL:%.*]] = add i32 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[BL:%.*]] = add i32 [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[CL:%.*]] = add i32 [[TMP2]], [[TMP3]] +; CHECK-NEXT: br label [[ENTRY_AFTER_OUTLINE_EXITSTUB:%.*]] +;