diff --git a/llvm/lib/Transforms/IPO/IROutliner.cpp b/llvm/lib/Transforms/IPO/IROutliner.cpp --- a/llvm/lib/Transforms/IPO/IROutliner.cpp +++ b/llvm/lib/Transforms/IPO/IROutliner.cpp @@ -510,13 +510,16 @@ // outlined region. PremappedInputs are the arguments found by the // CodeExtractor, removing conditions such as sunken allocas, but that // may need to be remapped due to the extracted output values replacing - // the original values. - SetVector OverallInputs, PremappedInputs, SinkCands, HoistCands; + // the original values. We use DummyOutputs for this first run of finding + // inputs and outputs since the outputs could change during findAllocas, + // the correct set of extracted outputs will be in the final Outputs ValueSet. + SetVector OverallInputs, PremappedInputs, SinkCands, HoistCands, + DummyOutputs; // Use the code extractor to get the inputs and outputs, without sunken // allocas or removing llvm.assumes. CodeExtractor *CE = Region.CE; - CE->findInputsOutputs(OverallInputs, Outputs, SinkCands); + CE->findInputsOutputs(OverallInputs, DummyOutputs, SinkCands); assert(Region.StartBB && "Region must have a start BasicBlock!"); Function *OrigF = Region.StartBB->getParent(); CodeExtractorAnalysisCache CEAC(*OrigF); @@ -1263,6 +1266,16 @@ continue; bool BadInst = any_of(IRSC, [this](IRInstructionData &ID) { + // We check if there is a discrepancy between the InstructionDataList + // and the actual next instruction in the module. If there is, it means + // that an extra instruction was added, likely by the CodeExtractor. + + // Since we do not have any similarity data about this particular + // instruction, we cannot confidently outline it, and must discard this + // candidate. + if (std::next(ID.getIterator())->Inst != + ID.Inst->getNextNonDebugInstruction()) + return true; return !this->InstructionClassifier.visit(ID.Inst); }); diff --git a/llvm/test/Transforms/IROutliner/outlining-bitcasts.ll b/llvm/test/Transforms/IROutliner/outlining-bitcasts.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/IROutliner/outlining-bitcasts.ll @@ -0,0 +1,105 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s + +; This test ensures that an extra output is not added when there is a bitcast +; that is relocated to outside of the extraction due to a starting lifetime +; instruction outside of the extracted region. + +; Additionally, we check that the newly added bitcast instruction is excluded in +; further extractions. + +declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) +declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) + +define void @outline_bitcast_base() { +; CHECK-LABEL: @outline_bitcast_base( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[D:%.*]] = alloca i32, align 4 +; CHECK-NEXT: call void @[[FUNCTION_0:.*]](i32* [[A]], i32* [[B]], i32* [[C]], i32* [[D]]) +; CHECK-NEXT: ret void +; +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + %c = alloca i32, align 4 + %d = alloca i32, align 4 + store i32 2, i32* %a, align 4 + store i32 3, i32* %b, align 4 + store i32 4, i32* %c, align 4 + %X = bitcast i32* %d to i8* + %al = load i32, i32* %a + %bl = load i32, i32* %b + %cl = load i32, i32* %c + ret void +} + +define void @outline_bitcast_removed() { +; CHECK-LABEL: @outline_bitcast_removed( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[D:%.*]] = alloca i32, align 4 +; CHECK-NEXT: call void @[[FUNCTION_0:.*]](i32* [[A]], i32* [[B]], i32* [[C]], i32* [[D]]) +; CHECK-NEXT: [[LT_CAST1:%.*]] = bitcast i32* [[D]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST1]]) +; CHECK-NEXT: [[AM:%.*]] = load i32, i32* [[B]], align 4 +; CHECK-NEXT: [[BM:%.*]] = load i32, i32* [[A]], align 4 +; CHECK-NEXT: [[CM:%.*]] = load i32, i32* [[C]], align 4 +; CHECK-NEXT: [[AS:%.*]] = add i32 [[AM]], [[BM]] +; CHECK-NEXT: [[BS:%.*]] = add i32 [[BM]], [[AM]] +; CHECK-NEXT: [[CS:%.*]] = add i32 [[BM]], [[CM]] +; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i32* [[D]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]]) +; CHECK-NEXT: ret void +; +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + %c = alloca i32, align 4 + %d = alloca i32, align 4 + store i32 2, i32* %a, align 4 + store i32 3, i32* %b, align 4 + store i32 4, i32* %c, align 4 + %X = bitcast i32* %d to i8* + %al = load i32, i32* %a + %bl = load i32, i32* %b + %cl = load i32, i32* %c + call void @llvm.lifetime.start.p0i8(i64 -1, i8* %X) + %am = load i32, i32* %b + %bm = load i32, i32* %a + %cm = load i32, i32* %c + %as = add i32 %am, %bm + %bs = add i32 %bm, %am + %cs = add i32 %bm, %cm + call void @llvm.lifetime.end.p0i8(i64 -1, i8* %X) + ret void +} + +define void @outline_bitcast_base2(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: @outline_bitcast_base2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[AL:%.*]] = add i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[BL:%.*]] = add i32 [[B]], [[A]] +; CHECK-NEXT: [[CL:%.*]] = add i32 [[B]], [[C:%.*]] +; CHECK-NEXT: ret void +; +entry: + %al = add i32 %a, %b + %bl = add i32 %b, %a + %cl = add i32 %b, %c + ret void +} + +; CHECK: define internal void @[[FUNCTION_0]](i32* [[ARG0:%.*]], i32* [[ARG1:%.*]], i32* [[ARG2:%.*]], i32* [[ARG3:%.*]]) +; CHECK: entry_to_outline: +; CHECK-NEXT: store i32 2, i32* [[ARG0]], align 4 +; CHECK-NEXT: store i32 3, i32* [[ARG1]], align 4 +; CHECK-NEXT: store i32 4, i32* [[ARG2]], align 4 +; CHECK-NEXT: [[X:%.*]] = bitcast i32* [[ARG3]] to i8* +; CHECK-NEXT: [[AL:%.*]] = load i32, i32* [[ARG0]], align 4 +; CHECK-NEXT: [[BL:%.*]] = load i32, i32* [[ARG1]], align 4 +; CHECK-NEXT: [[CL:%.*]] = load i32, i32* [[ARG2]], align 4