Index: llvm/include/llvm/Transforms/IPO/IROutliner.h =================================================================== --- llvm/include/llvm/Transforms/IPO/IROutliner.h +++ llvm/include/llvm/Transforms/IPO/IROutliner.h @@ -175,6 +175,54 @@ /// The memory allocator used to allocate the CodeExtractors. SpecificBumpPtrAllocator CodeExtractorAllocator; + + /// Custom InstVisitor to classify different instructions for whether it can + /// be analyzed for similarity. This is needed as there may be instruction we + /// can identify as having similarity, but are more complicated to outline. + struct InstructionAllowed : public InstVisitor { + InstructionAllowed() {} + + // TODO: Determine a scheme to resolve when the label is similar enough. + bool visitBranchInst(BranchInst &BI) { return false; } + // TODO: Determine a scheme to resolve when the labels are similar enough. + bool visitPHINode(PHINode &PN) { return false; } + // TODO: Handle allocas. + bool visitAllocaInst(AllocaInst &AI) { return false; } + // VAArg instructions are not allowed since this could cause difficulty when + // differentiating between different sets of variable instructions in + // the deduplicated outlined regions. + bool visitVAArgInst(VAArgInst &VI) { return false; } + // We exclude all exception handling cases since they are so context + // dependent. + bool visitLandingPadInst(LandingPadInst &LPI) { return false; } + bool visitFuncletPadInst(FuncletPadInst &FPI) { return false; } + // DebugInfo should be included in the regions, but should not be + // analyzed for similarity as it has no bearing on the outcome of the + // program. + bool visitDbgInfoIntrinsic(DbgInfoIntrinsic &DII) { return true; } + // TODO: Handle GetElementPtrInsts + bool visitGetElementPtrInst(GetElementPtrInst &GEPI) { return false; } + // TODO: Handle specific intrinsics individually from those that can be + // handled. + bool IntrinsicInst(IntrinsicInst &II) { return false; } + // TODO: Handle CallInsts, there will need to be handling for special kinds + // of calls, as well as calls to intrinsics. + bool visitCallInst(CallInst &CI) { return false; } + // TODO: Handle FreezeInsts. Since a frozen value could be frozen inside + // the outlined region, and then returned as an output, this will have to be + // handled differently. + bool visitFreezeInst(FreezeInst &CI) { return false; } + // TODO: We do not current handle similarity that changes the control flow. + bool visitInvokeInst(InvokeInst &II) { return false; } + // TODO: We do not current handle similarity that changes the control flow. + bool visitCallBrInst(CallBrInst &CBI) { return false; } + // TODO: Handle interblock similarity. + bool visitTerminator(Instruction &I) { return false; } + bool visitInstruction(Instruction &I) { return true; } + }; + + /// A InstVisitor used to exclude certain instructions from being outlined. + InstructionAllowed InstructionClassifier; }; /// Pass to outline similar regions. Index: llvm/lib/Transforms/IPO/IROutliner.cpp =================================================================== --- llvm/lib/Transforms/IPO/IROutliner.cpp +++ llvm/lib/Transforms/IPO/IROutliner.cpp @@ -176,8 +176,15 @@ if (CurrentEndIdx != 0 && StartIdx <= CurrentEndIdx) continue; - OutlinableRegion *OS = new (RegionAllocator.Allocate()) - OutlinableRegion(IRSC, CurrentGroup); + bool BadInst = any_of(IRSC, [this](IRInstructionData &ID) { + return !this->InstructionClassifier.visit(ID.Inst); + }); + + if (BadInst) + continue; + + OutlinableRegion *OS = + new (RegionAllocator.Allocate()) OutlinableRegion(IRSC, CurrentGroup); CurrentGroup.Regions.push_back(OS); CurrentEndIdx = EndIdx; @@ -212,10 +219,14 @@ // should not be outlined in this round. So marking these as illegal is // allowed. IRInstructionDataList *IDL = Region.Candidate->front()->IDL; + Instruction *BeginRewritten = &*RewrittenBB->begin(); + Instruction *EndRewritten = &*RewrittenBB->begin(); Region.NewFront = new (IRDAAllocator.Allocate()) - IRInstructionData(*RewrittenBB->begin(), false, *IDL); + IRInstructionData(*RewrittenBB->begin(), + InstructionClassifier.visit(*BeginRewritten), *IDL); Region.NewBack = new (IRDAAllocator.Allocate()) - IRInstructionData(*std::prev(std::prev(RewrittenBB->end())), false, *IDL); + IRInstructionData(*std::prev(std::prev(RewrittenBB->end())), + InstructionClassifier.visit(*EndRewritten), *IDL); // Insert the first IRInstructionData of the new region in front of the // first IRInstructionData of the IRSimilarityCandidate. Index: llvm/test/Transforms/IROutliner/illegal-allocas.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/IROutliner/illegal-allocas.ll @@ -0,0 +1,36 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -verify -iroutliner < %s | FileCheck %s + +; Show that we do not extract allocas, as outlining allocas may cause +; inconsistencies with the CodeExtractor's algorithm. + +define void @function1() { +; CHECK-LABEL: @function1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 +; CHECK-NEXT: ret void +; +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + %c = alloca i32, align 4 + + ret void +} + +define void @function2() { +; CHECK-LABEL: @function2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 +; CHECK-NEXT: ret void +; +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + %c = alloca i32, align 4 + ret void +} Index: llvm/test/Transforms/IROutliner/illegal-assumes.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/IROutliner/illegal-assumes.ll @@ -0,0 +1,143 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -verify -iroutliner < %s | FileCheck %s + +; This test ensures that we do not include llvm.assumes. There are exceptions +; in the CodeExtractor's algorithm for llvm.assumes, so we ignore it for now. + +define void @outline_assumes() { +; CHECK-LABEL: @outline_assumes( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[DL_LOC:%.*]] = alloca i1, align 1 +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[D:%.*]] = alloca i1, align 4 +; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i1* [[DL_LOC]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]]) +; CHECK-NEXT: call void @outline_assumes.outlined.5(i1* [[D]], i1* [[DL_LOC]]) +; CHECK-NEXT: [[DL_RELOAD:%.*]] = load i1, i1* [[DL_LOC]], align 1 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]]) +; CHECK-NEXT: [[SPLIT_INST:%.*]] = sub i1 [[DL_RELOAD]], [[DL_RELOAD]] +; CHECK-NEXT: call void @outline_assumes.outlined(i32* [[A]], i32* [[B]], i32* [[C]]) +; CHECK-NEXT: call void @llvm.assume(i1 [[DL_RELOAD]]) +; CHECK-NEXT: call void @outline_assumes.outlined.1(i32* [[A]], i32* [[B]], i32* [[C]]) +; CHECK-NEXT: ret void +; +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + %c = alloca i32, align 4 + %d = alloca i1, align 4 + store i1 1, i1* %d, align 4 + %dl = load i1, i1* %d + %split_inst = sub i1 %dl, %dl + store i32 2, i32* %a, align 4 + store i32 3, i32* %b, align 4 + store i32 4, i32* %c, align 4 + call void @llvm.assume(i1 %dl) + %al = load i32, i32* %a + %bl = load i32, i32* %b + %cl = load i32, i32* %c + ret void +} + +define void @outline_assumes2() { +; CHECK-LABEL: @outline_assumes2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[DL_LOC:%.*]] = alloca i1, align 1 +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[D:%.*]] = alloca i1, align 4 +; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i1* [[DL_LOC]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]]) +; CHECK-NEXT: call void @outline_assumes2.outlined.6(i1* [[D]], i1* [[DL_LOC]]) +; CHECK-NEXT: [[DL_RELOAD:%.*]] = load i1, i1* [[DL_LOC]], align 1 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]]) +; CHECK-NEXT: call void @outline_assumes2.outlined(i32* [[A]], i32* [[B]], i32* [[C]]) +; CHECK-NEXT: call void @llvm.assume(i1 [[DL_RELOAD]]) +; CHECK-NEXT: call void @outline_assumes2.outlined.2(i32* [[A]], i32* [[B]], i32* [[C]]) +; CHECK-NEXT: ret void +; +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + %c = alloca i32, align 4 + %d = alloca i1, align 4 + store i1 0, i1* %d, align 4 + %dl = load i1, i1* %d + store i32 2, i32* %a, align 4 + store i32 3, i32* %b, align 4 + store i32 4, i32* %c, align 4 + call void @llvm.assume(i1 %dl) + %al = load i32, i32* %a + %bl = load i32, i32* %b + %cl = load i32, i32* %c + ret void +} + +define void @outline_assumes3() { +; CHECK-LABEL: @outline_assumes3( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[D:%.*]] = alloca i1, align 4 +; CHECK-NEXT: store i1 true, i1* [[D]], align 4 +; CHECK-NEXT: [[DL:%.*]] = load i1, i1* [[D]], align 1 +; CHECK-NEXT: [[SPLIT_INST:%.*]] = add i1 [[DL]], [[DL]] +; CHECK-NEXT: call void @outline_assumes3.outlined(i32* [[A]], i32* [[B]], i32* [[C]]) +; CHECK-NEXT: call void @llvm.assume(i1 [[DL]]) +; CHECK-NEXT: call void @outline_assumes3.outlined.3(i32* [[A]]) +; CHECK-NEXT: ret void +; +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + %c = alloca i32, align 4 + %d = alloca i1, align 4 + store i1 1, i1* %d, align 4 + %dl = load i1, i1* %d + %split_inst = add i1 %dl, %dl + store i32 2, i32* %a, align 4 + store i32 3, i32* %b, align 4 + store i32 4, i32* %c, align 4 + call void @llvm.assume(i1 %dl) + %al = load i32, i32* %a + %bl = add i32 %al, %al + ret void +} + +define void @outline_assumes4() { +; CHECK-LABEL: @outline_assumes4( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[D:%.*]] = alloca i1, align 4 +; CHECK-NEXT: store i1 false, i1* [[D]], align 4 +; CHECK-NEXT: [[DL:%.*]] = load i1, i1* [[D]], align 1 +; CHECK-NEXT: [[SPLIT_INST:%.*]] = add i1 [[DL]], [[DL]] +; CHECK-NEXT: call void @outline_assumes4.outlined(i32* [[A]], i32* [[B]], i32* [[C]]) +; CHECK-NEXT: call void @llvm.assume(i1 [[DL]]) +; CHECK-NEXT: call void @outline_assumes4.outlined.4(i32* [[A]]) +; CHECK-NEXT: ret void +; +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + %c = alloca i32, align 4 + %d = alloca i1, align 4 + store i1 0, i1* %d, align 4 + %dl = load i1, i1* %d + %split_inst = add i1 %dl, %dl + store i32 2, i32* %a, align 4 + store i32 3, i32* %b, align 4 + store i32 4, i32* %c, align 4 + call void @llvm.assume(i1 %dl) + %al = load i32, i32* %a + %bl = add i32 %al, %al + ret void +} + +declare void @llvm.assume(i1) Index: llvm/test/Transforms/IROutliner/illegal-branches.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/IROutliner/illegal-branches.ll @@ -0,0 +1,51 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -verify -iroutliner < %s | FileCheck %s + +; Show that we do not extract sections with branches as it would require extra +; label and control flow checking. + +define void @function1() { +; CHECK-LABEL: @function1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 +; CHECK-NEXT: call void @function1.outlined(i32* [[A]], i32* [[B]], i32* [[C]]) +; CHECK-NEXT: br label [[NEXT:%.*]] +; CHECK: next: +; CHECK-NEXT: ret void +; +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + %c = alloca i32, align 4 + store i32 2, i32* %a, align 4 + store i32 3, i32* %b, align 4 + store i32 4, i32* %c, align 4 + br label %next +next: + ret void +} + +define void @function2() { +; CHECK-LABEL: @function2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 +; CHECK-NEXT: call void @function2.outlined(i32* [[A]], i32* [[B]], i32* [[C]]) +; CHECK-NEXT: br label [[NEXT:%.*]] +; CHECK: next: +; CHECK-NEXT: ret void +; +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + %c = alloca i32, align 4 + store i32 2, i32* %a, align 4 + store i32 3, i32* %b, align 4 + store i32 4, i32* %c, align 4 + br label %next +next: + ret void +} Index: llvm/test/Transforms/IROutliner/illegal-callbr.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/IROutliner/illegal-callbr.ll @@ -0,0 +1,66 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -verify -iroutliner < %s | FileCheck %s + +; This test checks that we do not outline callbr instruction since as we do not +; outline any control flow change instructions. + + +define i32 @function1(i32 %a, i32 %b) { +; CHECK-LABEL: @function1( +; CHECK-NEXT: bb0: +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[A:%.*]], 4 +; CHECK-NEXT: call void @function1.outlined(i32 [[B:%.*]]) +; CHECK-NEXT: callbr void asm "xorl $0, $0 +; CHECK-NEXT: to label [[NORMAL:%.*]] [label %fail1] +; CHECK: normal: +; CHECK-NEXT: call void @function1.outlined.1(i32 [[B]]) +; CHECK-NEXT: ret i32 0 +; CHECK: fail1: +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[B]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[B]], 1 +; CHECK-NEXT: ret i32 0 +; +bb0: + %0 = add i32 %a, 4 + %1 = add i32 %b, 1 + %2 = add i32 %b, 1 + callbr void asm "xorl $0, $0; jmp ${1:l}", "r,X,~{dirflag},~{fpsr},~{flags}"(i32 %0, i8* blockaddress(@function1, %fail1)) to label %normal [label %fail1] +normal: + %3 = add i32 %b, 1 + %4 = add i32 %b, 1 + ret i32 0 +fail1: + %5 = add i32 %b, 1 + %6 = add i32 %b, 1 + ret i32 0 +} + +define i32 @function2(i32 %a, i32 %b) { +; CHECK-LABEL: @function2( +; CHECK-NEXT: bb0: +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[A:%.*]], 4 +; CHECK-NEXT: call void @function2.outlined(i32 [[B:%.*]]) +; CHECK-NEXT: callbr void asm "xorl $0, $0 +; CHECK-NEXT: to label [[NORMAL:%.*]] [label %fail1] +; CHECK: normal: +; CHECK-NEXT: call void @function2.outlined.2(i32 [[B]]) +; CHECK-NEXT: ret i32 0 +; CHECK: fail1: +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[B]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[B]], 1 +; CHECK-NEXT: ret i32 0 +; +bb0: + %0 = add i32 %a, 4 + %1 = add i32 %b, 1 + %2 = add i32 %b, 1 + callbr void asm "xorl $0, $0; jmp ${1:l}", "r,X,~{dirflag},~{fpsr},~{flags}"(i32 %0, i8* blockaddress(@function2, %fail1)) to label %normal [label %fail1] +normal: + %3 = add i32 %b, 1 + %4 = add i32 %b, 1 + ret i32 0 +fail1: + %5 = add i32 %b, 1 + %6 = add i32 %b, 1 + ret i32 0 +} Index: llvm/test/Transforms/IROutliner/illegal-calls.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/IROutliner/illegal-calls.ll @@ -0,0 +1,58 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -verify -iroutliner < %s | FileCheck %s + +; This test checks that we do not outline calls. Special calls, such as +; indirect or nameless calls require extra handling to ensure that there +; are no inconsistencies when outlining and consolidating regions. + +declare void @f1(i32*, i32*); + +define void @outline_constants1() { +; CHECK-LABEL: @outline_constants1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 +; CHECK-NEXT: call void @outline_constants1.outlined.1(i32* [[A]], i32* [[B]], i32* [[C]]) +; CHECK-NEXT: call void @f1(i32* [[A]], i32* [[B]]) +; CHECK-NEXT: call void @outline_constants1.outlined(i32* [[A]], i32* [[B]], i32* [[C]]) +; CHECK-NEXT: ret void +; +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + %c = alloca i32, align 4 + store i32 2, i32* %a, align 4 + store i32 3, i32* %b, align 4 + store i32 4, i32* %c, align 4 + call void @f1(i32* %a, i32* %b) + %al = load i32, i32* %a + %bl = load i32, i32* %b + %cl = load i32, i32* %c + ret void +} + +define void @outline_constants2() { +; CHECK-LABEL: @outline_constants2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 +; CHECK-NEXT: call void @outline_constants2.outlined.2(i32* [[A]], i32* [[B]], i32* [[C]]) +; CHECK-NEXT: call void @f1(i32* [[A]], i32* [[B]]) +; CHECK-NEXT: call void @outline_constants2.outlined(i32* [[A]], i32* [[B]], i32* [[C]]) +; CHECK-NEXT: ret void +; +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + %c = alloca i32, align 4 + store i32 2, i32* %a, align 4 + store i32 3, i32* %b, align 4 + store i32 4, i32* %c, align 4 + call void @f1(i32* %a, i32* %b) + %al = load i32, i32* %a + %bl = load i32, i32* %b + %cl = load i32, i32* %c + ret void +} Index: llvm/test/Transforms/IROutliner/illegal-catchpad.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/IROutliner/illegal-catchpad.ll @@ -0,0 +1,70 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -verify -iroutliner < %s | FileCheck %s + +; This test checks that catchpad instructions are not outlined even if they +; in a similar section. Dealing with exception handling inside of an outlined +; function would require a lot of handling that is not implemented yet. + +declare void @llvm.donothing() nounwind readnone + +define void @function1() personality i8 3 { +; CHECK-LABEL: @function1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: invoke void @llvm.donothing() +; CHECK-NEXT: to label [[NORMAL:%.*]] unwind label [[EXCEPTION:%.*]] +; CHECK: exception: +; CHECK-NEXT: [[CS1:%.*]] = catchswitch within none [label %catchpad1] unwind to caller +; CHECK: catchpad1: +; CHECK-NEXT: [[TMP0:%.*]] = catchpad within [[CS1]] [] +; CHECK-NEXT: call void @function1.outlined(i32* [[A]], i32* [[B]]) +; CHECK-NEXT: br label [[NORMAL]] +; CHECK: normal: +; CHECK-NEXT: ret void +; +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + invoke void @llvm.donothing() to label %normal unwind label %exception +exception: + %cs1 = catchswitch within none [label %catchpad1] unwind to caller +catchpad1: + catchpad within %cs1 [] + store i32 2, i32* %a, align 4 + store i32 3, i32* %b, align 4 + br label %normal +normal: + ret void +} + +define void @function2() personality i8 3 { +; CHECK-LABEL: @function2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: invoke void @llvm.donothing() +; CHECK-NEXT: to label [[NORMAL:%.*]] unwind label [[EXCEPTION:%.*]] +; CHECK: exception: +; CHECK-NEXT: [[CS1:%.*]] = catchswitch within none [label %catchpad1] unwind to caller +; CHECK: catchpad1: +; CHECK-NEXT: [[TMP0:%.*]] = catchpad within [[CS1]] [] +; CHECK-NEXT: call void @function2.outlined(i32* [[A]], i32* [[B]]) +; CHECK-NEXT: br label [[NORMAL]] +; CHECK: normal: +; CHECK-NEXT: ret void +; +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + invoke void @llvm.donothing() to label %normal unwind label %exception +exception: + %cs1 = catchswitch within none [label %catchpad1] unwind to caller +catchpad1: + catchpad within %cs1 [] + store i32 2, i32* %a, align 4 + store i32 3, i32* %b, align 4 + br label %normal +normal: + ret void +} Index: llvm/test/Transforms/IROutliner/illegal-cleanup.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/IROutliner/illegal-cleanup.ll @@ -0,0 +1,62 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -verify -iroutliner < %s | FileCheck %s + +; This test checks that cleanuppad instructions are not outlined even if they +; in a similar section. Dealing with exception handling inside of an outlined +; function would require a lot of handling that is not implemented yet. + +declare void @llvm.donothing() nounwind readnone + +define void @function1() personality i8 3 { +; CHECK-LABEL: @function1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: invoke void @llvm.donothing() +; CHECK-NEXT: to label [[NORMAL:%.*]] unwind label [[EXCEPTION:%.*]] +; CHECK: exception: +; CHECK-NEXT: [[CLEAN:%.*]] = cleanuppad within none [] +; CHECK-NEXT: call void @function1.outlined(i32* [[A]], i32* [[B]]) +; CHECK-NEXT: br label [[NORMAL]] +; CHECK: normal: +; CHECK-NEXT: ret void +; +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + invoke void @llvm.donothing() to label %normal unwind label %exception +exception: + %clean = cleanuppad within none [] + store i32 2, i32* %a, align 4 + store i32 3, i32* %b, align 4 + br label %normal +normal: + ret void +} + +define void @function2() personality i8 3 { +; CHECK-LABEL: @function2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: invoke void @llvm.donothing() +; CHECK-NEXT: to label [[NORMAL:%.*]] unwind label [[EXCEPTION:%.*]] +; CHECK: exception: +; CHECK-NEXT: [[CLEAN:%.*]] = cleanuppad within none [] +; CHECK-NEXT: call void @function2.outlined(i32* [[A]], i32* [[B]]) +; CHECK-NEXT: br label [[NORMAL]] +; CHECK: normal: +; CHECK-NEXT: ret void +; +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + invoke void @llvm.donothing() to label %normal unwind label %exception +exception: + %clean = cleanuppad within none [] + store i32 2, i32* %a, align 4 + store i32 3, i32* %b, align 4 + br label %normal +normal: + ret void +} Index: llvm/test/Transforms/IROutliner/illegal-frozen.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/IROutliner/illegal-frozen.ll @@ -0,0 +1,49 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -verify -iroutliner < %s | FileCheck %s + +; Show that we do not extract freeze instructions, since extra handling is +; required to mark any outputs used with freeze. + +define void @function1(i32* %a, i32* %b) { +; CHECK-LABEL: @function1( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FIRST:%.*]] +; CHECK: first: +; CHECK-NEXT: [[C:%.*]] = freeze i32* [[A:%.*]] +; CHECK-NEXT: call void @function1.outlined(i32* [[C]], i32* [[B:%.*]]) +; CHECK-NEXT: ret void +; CHECK: next: +; CHECK-NEXT: br label [[FIRST]] +; +entry: + br label %first +first: + %c = freeze i32* %a + store i32 2, i32* %c, align 4 + store i32 3, i32* %b, align 4 + ret void +next: + br label %first +} + +define void @function2(i32* %a, i32* %b) { +; CHECK-LABEL: @function2( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FIRST:%.*]] +; CHECK: first: +; CHECK-NEXT: [[C:%.*]] = freeze i32* [[A:%.*]] +; CHECK-NEXT: call void @function2.outlined(i32* [[C]], i32* [[B:%.*]]) +; CHECK-NEXT: ret void +; CHECK: next: +; CHECK-NEXT: br label [[FIRST]] +; +entry: + br label %first +first: + %c = freeze i32* %a + store i32 2, i32* %c, align 4 + store i32 3, i32* %b, align 4 + ret void +next: + br label %first +} Index: llvm/test/Transforms/IROutliner/illegal-gep.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/IROutliner/illegal-gep.ll @@ -0,0 +1,48 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -verify -iroutliner < %s | FileCheck %s + +; This test checks to make sure that we do not outline getelementptr +; instructions since we must make extra checks on the final operands. + +%struct.RT = type { i8, [10 x [20 x i32]], i8 } +%struct.ST = type { i32, double, %struct.RT } + +define void @function1(%struct.ST* %s, i64 %t) { +; CHECK-LABEL: @function1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: call void @function1.outlined(i32* [[A]], i32* [[B]]) +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], %struct.ST* [[S:%.*]], i64 1 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ST]], %struct.ST* [[S]], i64 [[T:%.*]] +; CHECK-NEXT: ret void +; +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + store i32 2, i32* %a, align 4 + store i32 3, i32* %b, align 4 + %0 = getelementptr inbounds %struct.ST, %struct.ST* %s, i64 1 + %1 = getelementptr inbounds %struct.ST, %struct.ST* %s, i64 %t + ret void +} + +define void @function2(%struct.ST* %s, i64 %t) { +; CHECK-LABEL: @function2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: call void @function2.outlined(i32* [[A]], i32* [[B]]) +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], %struct.ST* [[S:%.*]], i64 1 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ST]], %struct.ST* [[S]], i64 [[T:%.*]] +; CHECK-NEXT: ret void +; +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + store i32 2, i32* %a, align 4 + store i32 3, i32* %b, align 4 + %0 = getelementptr inbounds %struct.ST, %struct.ST* %s, i64 1 + %1 = getelementptr inbounds %struct.ST, %struct.ST* %s, i64 %t + ret void +} Index: llvm/test/Transforms/IROutliner/illegal-invoke.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/IROutliner/illegal-invoke.ll @@ -0,0 +1,64 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -verify -iroutliner < %s | FileCheck %s + +; This test checks that invoke instructions are not outlined even if they +; in a similar section. Outlining does not currently handle control flow +; changes. + +declare void @llvm.donothing() nounwind readnone + +define void @function1() personality i8 3 { +; CHECK-LABEL: @function1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: call void @function1.outlined(i32* [[A]], i32* [[B]]) +; CHECK-NEXT: invoke void @llvm.donothing() +; CHECK-NEXT: to label [[NORMAL:%.*]] unwind label [[EXCEPTION:%.*]] +; CHECK: exception: +; CHECK-NEXT: [[CLEANUP:%.*]] = landingpad i8 +; CHECK-NEXT: cleanup +; CHECK-NEXT: br label [[NORMAL]] +; CHECK: normal: +; CHECK-NEXT: ret void +; +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + store i32 2, i32* %a, align 4 + store i32 3, i32* %b, align 4 + invoke void @llvm.donothing() to label %normal unwind label %exception +exception: + %cleanup = landingpad i8 cleanup + br label %normal +normal: + ret void +} + +define void @function2() personality i8 3 { +; CHECK-LABEL: @function2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: call void @function2.outlined(i32* [[A]], i32* [[B]]) +; CHECK-NEXT: invoke void @llvm.donothing() +; CHECK-NEXT: to label [[NORMAL:%.*]] unwind label [[EXCEPTION:%.*]] +; CHECK: exception: +; CHECK-NEXT: [[CLEANUP:%.*]] = landingpad i8 +; CHECK-NEXT: cleanup +; CHECK-NEXT: br label [[NORMAL]] +; CHECK: normal: +; CHECK-NEXT: ret void +; +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + store i32 2, i32* %a, align 4 + store i32 3, i32* %b, align 4 + invoke void @llvm.donothing() to label %normal unwind label %exception +exception: + %cleanup = landingpad i8 cleanup + br label %normal +normal: + ret void +} Index: llvm/test/Transforms/IROutliner/illegal-landingpad.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/IROutliner/illegal-landingpad.ll @@ -0,0 +1,64 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -verify -iroutliner < %s | FileCheck %s + +; This test checks that landingpad instructions are not outlined even if they +; in a similar section. Dealing with exception handling inside of an outlined +; function would require a lot of handling that is not implemented yet. + +declare void @llvm.donothing() nounwind readnone + +define void @function1() personality i8 3 { +; CHECK-LABEL: @function1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: invoke void @llvm.donothing() +; CHECK-NEXT: to label [[NORMAL:%.*]] unwind label [[EXCEPTION:%.*]] +; CHECK: exception: +; CHECK-NEXT: [[CLEANUP:%.*]] = landingpad i8 +; CHECK-NEXT: cleanup +; CHECK-NEXT: call void @function1.outlined(i32* [[A]], i32* [[B]]) +; CHECK-NEXT: br label [[NORMAL]] +; CHECK: normal: +; CHECK-NEXT: ret void +; +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + invoke void @llvm.donothing() to label %normal unwind label %exception +exception: + %cleanup = landingpad i8 cleanup + store i32 2, i32* %a, align 4 + store i32 3, i32* %b, align 4 + br label %normal +normal: + ret void +} + +define void @function2() personality i8 3 { +; CHECK-LABEL: @function2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK-NEXT: invoke void @llvm.donothing() +; CHECK-NEXT: to label [[NORMAL:%.*]] unwind label [[EXCEPTION:%.*]] +; CHECK: exception: +; CHECK-NEXT: [[CLEANUP:%.*]] = landingpad i8 +; CHECK-NEXT: cleanup +; CHECK-NEXT: call void @function2.outlined(i32* [[A]], i32* [[B]]) +; CHECK-NEXT: br label [[NORMAL]] +; CHECK: normal: +; CHECK-NEXT: ret void +; +entry: + %a = alloca i32, align 4 + %b = alloca i32, align 4 + invoke void @llvm.donothing() to label %normal unwind label %exception +exception: + %cleanup = landingpad i8 cleanup + store i32 2, i32* %a, align 4 + store i32 3, i32* %b, align 4 + br label %normal +normal: + ret void +} Index: llvm/test/Transforms/IROutliner/illegal-memcpy.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/IROutliner/illegal-memcpy.ll @@ -0,0 +1,65 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -verify -iroutliner < %s | FileCheck %s + +; This test checks that we do not outline memcpy intrinsics since it may require +; extra address space checks. + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) + +define i8 @function1(i8* noalias %s, i8* noalias %d, i64 %len) { +; CHECK-LABEL: @function1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[B_LOC:%.*]] = alloca i8, align 1 +; CHECK-NEXT: [[A_LOC:%.*]] = alloca i8, align 1 +; CHECK-NEXT: [[RET_LOC:%.*]] = alloca i8, align 1 +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[A_LOC]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[B_LOC]]) +; CHECK-NEXT: call void @function1.outlined.1(i8* [[S:%.*]], i8* [[D:%.*]], i8* [[A_LOC]], i8* [[B_LOC]]) +; CHECK-NEXT: [[A_RELOAD:%.*]] = load i8, i8* [[A_LOC]], align 1 +; CHECK-NEXT: [[B_RELOAD:%.*]] = load i8, i8* [[B_LOC]], align 1 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[A_LOC]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[B_LOC]]) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[D]], i8* [[S]], i64 [[LEN:%.*]], i1 false) +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[RET_LOC]]) +; CHECK-NEXT: call void @function1.outlined(i8 [[A_RELOAD]], i8 [[B_RELOAD]], i8* [[S]], i8* [[RET_LOC]]) +; CHECK-NEXT: [[RET_RELOAD:%.*]] = load i8, i8* [[RET_LOC]], align 1 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[RET_LOC]]) +; CHECK-NEXT: ret i8 [[RET_RELOAD]] +; +entry: + %a = load i8, i8* %s + %b = load i8, i8* %d + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %d, i8* %s, i64 %len, i1 false) + %c = add i8 %a, %b + %ret = load i8, i8* %s + ret i8 %ret +} + +define i8 @function2(i8* noalias %s, i8* noalias %d, i64 %len) { +; CHECK-LABEL: @function2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[B_LOC:%.*]] = alloca i8, align 1 +; CHECK-NEXT: [[A_LOC:%.*]] = alloca i8, align 1 +; CHECK-NEXT: [[RET_LOC:%.*]] = alloca i8, align 1 +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[A_LOC]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[B_LOC]]) +; CHECK-NEXT: call void @function2.outlined.2(i8* [[S:%.*]], i8* [[D:%.*]], i8* [[A_LOC]], i8* [[B_LOC]]) +; CHECK-NEXT: [[A_RELOAD:%.*]] = load i8, i8* [[A_LOC]], align 1 +; CHECK-NEXT: [[B_RELOAD:%.*]] = load i8, i8* [[B_LOC]], align 1 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[A_LOC]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[B_LOC]]) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[D]], i8* [[S]], i64 [[LEN:%.*]], i1 false) +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[RET_LOC]]) +; CHECK-NEXT: call void @function2.outlined(i8 [[A_RELOAD]], i8 [[B_RELOAD]], i8* [[S]], i8* [[RET_LOC]]) +; CHECK-NEXT: [[RET_RELOAD:%.*]] = load i8, i8* [[RET_LOC]], align 1 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[RET_LOC]]) +; CHECK-NEXT: ret i8 [[RET_RELOAD]] +; +entry: + %a = load i8, i8* %s + %b = load i8, i8* %d + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %d, i8* %s, i64 %len, i1 false) + %c = add i8 %a, %b + %ret = load i8, i8* %s + ret i8 %ret +} Index: llvm/test/Transforms/IROutliner/illegal-memmove.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/IROutliner/illegal-memmove.ll @@ -0,0 +1,65 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -verify -iroutliner < %s | FileCheck %s + +; This test checks that we do not outline memcpy intrinsics since it may require +; extra address space checks. + +declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) + +define i8 @function1(i8* noalias %s, i8* noalias %d, i64 %len) { +; CHECK-LABEL: @function1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[B_LOC:%.*]] = alloca i8, align 1 +; CHECK-NEXT: [[A_LOC:%.*]] = alloca i8, align 1 +; CHECK-NEXT: [[RET_LOC:%.*]] = alloca i8, align 1 +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[A_LOC]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[B_LOC]]) +; CHECK-NEXT: call void @function1.outlined.1(i8* [[S:%.*]], i8* [[D:%.*]], i8* [[A_LOC]], i8* [[B_LOC]]) +; CHECK-NEXT: [[A_RELOAD:%.*]] = load i8, i8* [[A_LOC]], align 1 +; CHECK-NEXT: [[B_RELOAD:%.*]] = load i8, i8* [[B_LOC]], align 1 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[A_LOC]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[B_LOC]]) +; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i64(i8* [[D]], i8* [[S]], i64 [[LEN:%.*]], i1 false) +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[RET_LOC]]) +; CHECK-NEXT: call void @function1.outlined(i8 [[A_RELOAD]], i8 [[B_RELOAD]], i8* [[S]], i8* [[RET_LOC]]) +; CHECK-NEXT: [[RET_RELOAD:%.*]] = load i8, i8* [[RET_LOC]], align 1 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[RET_LOC]]) +; CHECK-NEXT: ret i8 [[RET_RELOAD]] +; +entry: + %a = load i8, i8* %s + %b = load i8, i8* %d + call void @llvm.memmove.p0i8.p0i8.i64(i8* %d, i8* %s, i64 %len, i1 false) + %c = add i8 %a, %b + %ret = load i8, i8* %s + ret i8 %ret +} + +define i8 @function2(i8* noalias %s, i8* noalias %d, i64 %len) { +; CHECK-LABEL: @function2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[B_LOC:%.*]] = alloca i8, align 1 +; CHECK-NEXT: [[A_LOC:%.*]] = alloca i8, align 1 +; CHECK-NEXT: [[RET_LOC:%.*]] = alloca i8, align 1 +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[A_LOC]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[B_LOC]]) +; CHECK-NEXT: call void @function2.outlined.2(i8* [[S:%.*]], i8* [[D:%.*]], i8* [[A_LOC]], i8* [[B_LOC]]) +; CHECK-NEXT: [[A_RELOAD:%.*]] = load i8, i8* [[A_LOC]], align 1 +; CHECK-NEXT: [[B_RELOAD:%.*]] = load i8, i8* [[B_LOC]], align 1 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[A_LOC]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[B_LOC]]) +; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i64(i8* [[D]], i8* [[S]], i64 [[LEN:%.*]], i1 false) +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[RET_LOC]]) +; CHECK-NEXT: call void @function2.outlined(i8 [[A_RELOAD]], i8 [[B_RELOAD]], i8* [[S]], i8* [[RET_LOC]]) +; CHECK-NEXT: [[RET_RELOAD:%.*]] = load i8, i8* [[RET_LOC]], align 1 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[RET_LOC]]) +; CHECK-NEXT: ret i8 [[RET_RELOAD]] +; +entry: + %a = load i8, i8* %s + %b = load i8, i8* %d + call void @llvm.memmove.p0i8.p0i8.i64(i8* %d, i8* %s, i64 %len, i1 false) + %c = add i8 %a, %b + %ret = load i8, i8* %s + ret i8 %ret +} Index: llvm/test/Transforms/IROutliner/illegal-memset.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/IROutliner/illegal-memset.ll @@ -0,0 +1,45 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -verify -iroutliner < %s | FileCheck %s + +; This test checks that we do not outline memset intrinsics since it requires +; extra address space checks. + +declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1) + +define i64 @function1(i64 %x, i64 %z, i64 %n) { +; CHECK-LABEL: @function1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[POOL:%.*]] = alloca [59 x i64], align 4 +; CHECK-NEXT: [[TMP:%.*]] = bitcast [59 x i64]* [[POOL]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[TMP]], i8 0, i64 236, i1 false) +; CHECK-NEXT: call void @function1.outlined(i64 [[N:%.*]], i64 [[X:%.*]], i64 [[Z:%.*]]) +; CHECK-NEXT: ret i64 0 +; +entry: + %pool = alloca [59 x i64], align 4 + %tmp = bitcast [59 x i64]* %pool to i8* + call void @llvm.memset.p0i8.i64(i8* nonnull %tmp, i8 0, i64 236, i32 4, i1 false) + %cmp3 = icmp eq i64 %n, 0 + %a = add i64 %x, %z + %c = add i64 %x, %z + ret i64 0 +} + +define i64 @function2(i64 %x, i64 %z, i64 %n) { +; CHECK-LABEL: @function2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[POOL:%.*]] = alloca [59 x i64], align 4 +; CHECK-NEXT: [[TMP:%.*]] = bitcast [59 x i64]* [[POOL]] to i8* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[TMP]], i8 0, i64 236, i1 false) +; CHECK-NEXT: call void @function2.outlined(i64 [[N:%.*]], i64 [[X:%.*]], i64 [[Z:%.*]]) +; CHECK-NEXT: ret i64 0 +; +entry: + %pool = alloca [59 x i64], align 4 + %tmp = bitcast [59 x i64]* %pool to i8* + call void @llvm.memset.p0i8.i64(i8* nonnull %tmp, i8 0, i64 236, i32 4, i1 false) + %cmp3 = icmp eq i64 %n, 0 + %a = add i64 %x, %z + %c = add i64 %x, %z + ret i64 0 +} Index: llvm/test/Transforms/IROutliner/illegal-phi-nodes.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/IROutliner/illegal-phi-nodes.ll @@ -0,0 +1,49 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -verify -iroutliner < %s | FileCheck %s + +; Show that we do not extract phi nodes as it would require extra label and +; control flow checking. + +define void @function1(i32* %a, i32* %b) { +; CHECK-LABEL: @function1( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FIRST:%.*]] +; CHECK: first: +; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ 3, [[NEXT:%.*]] ] +; CHECK-NEXT: call void @function1.outlined(i32* [[A:%.*]], i32* [[B:%.*]]) +; CHECK-NEXT: ret void +; CHECK: next: +; CHECK-NEXT: br label [[FIRST]] +; +entry: + br label %first +first: + %0 = phi i32 [ 0, %entry ], [ 3, %next ] + store i32 2, i32* %a, align 4 + store i32 3, i32* %b, align 4 + ret void +next: + br label %first +} + +define void @function2(i32* %a, i32* %b) { +; CHECK-LABEL: @function2( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FIRST:%.*]] +; CHECK: first: +; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ 3, [[NEXT:%.*]] ] +; CHECK-NEXT: call void @function2.outlined(i32* [[A:%.*]], i32* [[B:%.*]]) +; CHECK-NEXT: ret void +; CHECK: next: +; CHECK-NEXT: br label [[FIRST]] +; +entry: + br label %first +first: + %0 = phi i32 [ 0, %entry ], [ 3, %next ] + store i32 2, i32* %a, align 4 + store i32 3, i32* %b, align 4 + ret void +next: + br label %first +} Index: llvm/test/Transforms/IROutliner/illegal-vaarg.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/IROutliner/illegal-vaarg.ll @@ -0,0 +1,87 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -verify -iroutliner < %s | FileCheck %s + +; This test ensures that we do not outline vararg instructions or intrinsics, as +; they may cause inconsistencies when outlining. + +declare void @llvm.va_start(i8*) +declare void @llvm.va_copy(i8*, i8*) +declare void @llvm.va_end(i8*) + +define i32 @func1(i32 %a, double %b, i8* %v, ...) nounwind { +; CHECK-LABEL: @func1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[AP1_LOC:%.*]] = alloca i8*, align 8 +; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B_ADDR:%.*]] = alloca double, align 8 +; CHECK-NEXT: [[AP:%.*]] = alloca i8*, align 4 +; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i8** [[AP1_LOC]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]]) +; CHECK-NEXT: call void @func1.outlined(i32 [[A:%.*]], i32* [[A_ADDR]], double [[B:%.*]], double* [[B_ADDR]], i8** [[AP]], i8** [[AP1_LOC]]) +; CHECK-NEXT: [[AP1_RELOAD:%.*]] = load i8*, i8** [[AP1_LOC]], align 8 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]]) +; CHECK-NEXT: call void @llvm.va_start(i8* [[AP1_RELOAD]]) +; CHECK-NEXT: [[TMP0:%.*]] = va_arg i8** [[AP]], i32 +; CHECK-NEXT: call void @llvm.va_copy(i8* [[V:%.*]], i8* [[AP1_RELOAD]]) +; CHECK-NEXT: call void @llvm.va_end(i8* [[AP1_RELOAD]]) +; CHECK-NEXT: store i32 [[TMP0]], i32* [[C]], align 4 +; CHECK-NEXT: [[TMP:%.*]] = load i32, i32* [[C]], align 4 +; CHECK-NEXT: ret i32 [[TMP]] +; +entry: + %a.addr = alloca i32, align 4 + %b.addr = alloca double, align 8 + %ap = alloca i8*, align 4 + %c = alloca i32, align 4 + store i32 %a, i32* %a.addr, align 4 + store double %b, double* %b.addr, align 8 + %ap1 = bitcast i8** %ap to i8* + call void @llvm.va_start(i8* %ap1) + %0 = va_arg i8** %ap, i32 + call void @llvm.va_copy(i8* %v, i8* %ap1) + call void @llvm.va_end(i8* %ap1) + store i32 %0, i32* %c, align 4 + %tmp = load i32, i32* %c, align 4 + ret i32 %tmp +} + +define i32 @func2(i32 %a, double %b, i8* %v, ...) nounwind { +; CHECK-LABEL: @func2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[AP1_LOC:%.*]] = alloca i8*, align 8 +; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[B_ADDR:%.*]] = alloca double, align 8 +; CHECK-NEXT: [[AP:%.*]] = alloca i8*, align 4 +; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i8** [[AP1_LOC]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]]) +; CHECK-NEXT: call void @func2.outlined(i32 [[A:%.*]], i32* [[A_ADDR]], double [[B:%.*]], double* [[B_ADDR]], i8** [[AP]], i8** [[AP1_LOC]]) +; CHECK-NEXT: [[AP1_RELOAD:%.*]] = load i8*, i8** [[AP1_LOC]], align 8 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]]) +; CHECK-NEXT: call void @llvm.va_start(i8* [[AP1_RELOAD]]) +; CHECK-NEXT: [[TMP0:%.*]] = va_arg i8** [[AP]], i32 +; CHECK-NEXT: call void @llvm.va_copy(i8* [[V:%.*]], i8* [[AP1_RELOAD]]) +; CHECK-NEXT: call void @llvm.va_end(i8* [[AP1_RELOAD]]) +; CHECK-NEXT: store i32 [[TMP0]], i32* [[C]], align 4 +; CHECK-NEXT: [[AP2:%.*]] = bitcast i8** [[AP]] to i8* +; CHECK-NEXT: [[TMP:%.*]] = load i32, i32* [[C]], align 4 +; CHECK-NEXT: ret i32 [[TMP]] +; +entry: + %a.addr = alloca i32, align 4 + %b.addr = alloca double, align 8 + %ap = alloca i8*, align 4 + %c = alloca i32, align 4 + store i32 %a, i32* %a.addr, align 4 + store double %b, double* %b.addr, align 8 + %ap1 = bitcast i8** %ap to i8* + call void @llvm.va_start(i8* %ap1) + %0 = va_arg i8** %ap, i32 + call void @llvm.va_copy(i8* %v, i8* %ap1) + call void @llvm.va_end(i8* %ap1) + store i32 %0, i32* %c, align 4 + %ap2 = bitcast i8** %ap to i8* + %tmp = load i32, i32* %c, align 4 + ret i32 %tmp +} Index: llvm/test/Transforms/IROutliner/legal-debug.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/IROutliner/legal-debug.ll @@ -0,0 +1,122 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -verify -iroutliner < %s | FileCheck %s + +; This test looks ahecks that debug info is extracted along with the other +; instructions. + +define void @function1() !dbg !6 { +; CHECK-LABEL: @function1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4, [[DBG17:!dbg !.*]] +; CHECK-NEXT: call void @llvm.dbg.value(metadata i32* [[A]], [[META9:metadata !.*]], metadata !DIExpression()), [[DBG17]] +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4, [[DBG18:!dbg !.*]] +; CHECK-NEXT: call void @llvm.dbg.value(metadata i32* [[B]], [[META11:metadata !.*]], metadata !DIExpression()), [[DBG18]] +; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4, [[DBG19:!dbg !.*]] +; CHECK-NEXT: call void @llvm.dbg.value(metadata i32* [[C]], [[META12:metadata !.*]], metadata !DIExpression()), [[DBG19]] +; CHECK-NEXT: call void @function1.outlined(i32* [[A]], i32* [[B]], i32* [[C]]), [[DBG20:!dbg !.*]] +; CHECK-NEXT: ret void, [[DBG21:!dbg !.*]] +; +entry: + %a = alloca i32, align 4, !dbg !17 + call void @llvm.dbg.value(metadata i32* %a, metadata !9, metadata !DIExpression()), !dbg !17 + %b = alloca i32, align 4, !dbg !18 + call void @llvm.dbg.value(metadata i32* %b, metadata !11, metadata !DIExpression()), !dbg !18 + %c = alloca i32, align 4, !dbg !19 + call void @llvm.dbg.value(metadata i32* %c, metadata !12, metadata !DIExpression()), !dbg !19 + store i32 2, i32* %a, align 4, !dbg !20 + store i32 3, i32* %b, align 4, !dbg !21 + store i32 4, i32* %c, align 4, !dbg !22 + %al = load i32, i32* %a, align 4, !dbg !23 + call void @llvm.dbg.value(metadata i32 %al, metadata !13, metadata !DIExpression()), !dbg !23 + %bl = load i32, i32* %b, align 4, !dbg !24 + call void @llvm.dbg.value(metadata i32 %bl, metadata !15, metadata !DIExpression()), !dbg !24 + %cl = load i32, i32* %c, align 4, !dbg !25 + call void @llvm.dbg.value(metadata i32 %cl, metadata !16, metadata !DIExpression()), !dbg !25 + ret void, !dbg !26 +} + +define void @function2() !dbg !27 { +; CHECK-LABEL: @function2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4, [[DBG30:!dbg !.*]] +; CHECK-NEXT: call void @llvm.dbg.value(metadata i32* [[A]], [[META24:metadata !.*]], metadata !DIExpression()), [[DBG30]] +; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4, [[DBG31:!dbg !.*]] +; CHECK-NEXT: call void @llvm.dbg.value(metadata i32* [[B]], [[META25:metadata !.*]], metadata !DIExpression()), [[DBG31]] +; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4, [[DBG32:!dbg !.*]] +; CHECK-NEXT: call void @llvm.dbg.value(metadata i32* [[C]], [[META26:metadata !.*]], metadata !DIExpression()), [[DBG32]] +; CHECK-NEXT: call void @function2.outlined(i32* [[A]], i32* [[B]], i32* [[C]]), [[DBG33:!dbg !.*]] +; CHECK-NEXT: ret void, [[DBG34:!dbg !.*]] +; +entry: + %a = alloca i32, align 4, !dbg !35 + call void @llvm.dbg.value(metadata i32* %a, metadata !29, metadata !DIExpression()), !dbg !35 + %b = alloca i32, align 4, !dbg !36 + call void @llvm.dbg.value(metadata i32* %b, metadata !30, metadata !DIExpression()), !dbg !36 + %c = alloca i32, align 4, !dbg !37 + call void @llvm.dbg.value(metadata i32* %c, metadata !31, metadata !DIExpression()), !dbg !37 + store i32 2, i32* %a, align 4, !dbg !38 + store i32 3, i32* %b, align 4, !dbg !39 + store i32 4, i32* %c, align 4, !dbg !40 + %al = load i32, i32* %a, align 4, !dbg !41 + call void @llvm.dbg.value(metadata i32 %al, metadata !32, metadata !DIExpression()), !dbg !41 + %bl = load i32, i32* %b, align 4, !dbg !42 + call void @llvm.dbg.value(metadata i32 %bl, metadata !33, metadata !DIExpression()), !dbg !42 + %cl = load i32, i32* %c, align 4, !dbg !43 + call void @llvm.dbg.value(metadata i32 %cl, metadata !34, metadata !DIExpression()), !dbg !43 + ret void, !dbg !44 +} + +; Function Attrs: nounwind readnone speculatable willreturn +declare void @llvm.dbg.value(metadata, metadata, metadata) #0 + +attributes #0 = { nounwind readnone speculatable willreturn } + +!llvm.dbg.cu = !{!0} +!llvm.debugify = !{!3, !4} +!llvm.module.flags = !{!5} + +!0 = distinct !DICompileUnit(language: DW_LANG_C, file: !1, producer: "debugify", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2) +!1 = !DIFile(filename: "legal-debug.ll", directory: "/") +!2 = !{} +!3 = !{i32 20} +!4 = !{i32 12} +!5 = !{i32 2, !"Debug Info Version", i32 3} +!6 = distinct !DISubprogram(name: "function1", linkageName: "function1", scope: null, file: !1, line: 1, type: !7, scopeLine: 1, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !8) +!7 = !DISubroutineType(types: !2) +!8 = !{!9, !11, !12, !13, !15, !16} +!9 = !DILocalVariable(name: "1", scope: !6, file: !1, line: 1, type: !10) +!10 = !DIBasicType(name: "ty64", size: 64, encoding: DW_ATE_unsigned) +!11 = !DILocalVariable(name: "2", scope: !6, file: !1, line: 2, type: !10) +!12 = !DILocalVariable(name: "3", scope: !6, file: !1, line: 3, type: !10) +!13 = !DILocalVariable(name: "4", scope: !6, file: !1, line: 7, type: !14) +!14 = !DIBasicType(name: "ty32", size: 32, encoding: DW_ATE_unsigned) +!15 = !DILocalVariable(name: "5", scope: !6, file: !1, line: 8, type: !14) +!16 = !DILocalVariable(name: "6", scope: !6, file: !1, line: 9, type: !14) +!17 = !DILocation(line: 1, column: 1, scope: !6) +!18 = !DILocation(line: 2, column: 1, scope: !6) +!19 = !DILocation(line: 3, column: 1, scope: !6) +!20 = !DILocation(line: 4, column: 1, scope: !6) +!21 = !DILocation(line: 5, column: 1, scope: !6) +!22 = !DILocation(line: 6, column: 1, scope: !6) +!23 = !DILocation(line: 7, column: 1, scope: !6) +!24 = !DILocation(line: 8, column: 1, scope: !6) +!25 = !DILocation(line: 9, column: 1, scope: !6) +!26 = !DILocation(line: 10, column: 1, scope: !6) +!27 = distinct !DISubprogram(name: "function2", linkageName: "function2", scope: null, file: !1, line: 11, type: !7, scopeLine: 11, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !28) +!28 = !{!29, !30, !31, !32, !33, !34} +!29 = !DILocalVariable(name: "7", scope: !27, file: !1, line: 11, type: !10) +!30 = !DILocalVariable(name: "8", scope: !27, file: !1, line: 12, type: !10) +!31 = !DILocalVariable(name: "9", scope: !27, file: !1, line: 13, type: !10) +!32 = !DILocalVariable(name: "10", scope: !27, file: !1, line: 17, type: !14) +!33 = !DILocalVariable(name: "11", scope: !27, file: !1, line: 18, type: !14) +!34 = !DILocalVariable(name: "12", scope: !27, file: !1, line: 19, type: !14) +!35 = !DILocation(line: 11, column: 1, scope: !27) +!36 = !DILocation(line: 12, column: 1, scope: !27) +!37 = !DILocation(line: 13, column: 1, scope: !27) +!38 = !DILocation(line: 14, column: 1, scope: !27) +!39 = !DILocation(line: 15, column: 1, scope: !27) +!40 = !DILocation(line: 16, column: 1, scope: !27) +!41 = !DILocation(line: 17, column: 1, scope: !27) +!42 = !DILocation(line: 18, column: 1, scope: !27) +!43 = !DILocation(line: 19, column: 1, scope: !27) +!44 = !DILocation(line: 20, column: 1, scope: !27)