Index: llvm/lib/CodeGen/MachineOutliner.cpp =================================================================== --- llvm/lib/CodeGen/MachineOutliner.cpp +++ llvm/lib/CodeGen/MachineOutliner.cpp @@ -96,6 +96,12 @@ cl::desc("Enable the machine outliner on linkonceodr functions"), cl::init(false)); +// Set the number of times to repeatedly apply outlining. +// Defaults to 1, but more repetitions can save additional size. +static cl::opt + NumRepeat("outline-repeat-count", cl::Hidden, + cl::desc("The number of times to apply outlining"), cl::init(1)); + namespace { /// Represents an undefined index in the suffix tree. @@ -841,6 +847,9 @@ /// linkonceodr linkage. bool OutlineFromLinkOnceODRs = false; + /// The current repeat number of machine outlining. + unsigned OutlineRepeatedNum = 0; + /// Set to true if the outliner should run on all functions in the module /// considered safe for outlining. /// Set to true by default for compatibility with llc's -run-pass option. @@ -899,9 +908,12 @@ InstructionMapper &Mapper, unsigned Name); - /// Calls 'doOutline()'. + /// Calls runOnceOnModule NumRepeat times bool runOnModule(Module &M) override; + /// Calls 'doOutline()'. + bool runOnceOnModule(Module &M, unsigned Iter); + /// Construct a suffix tree on the instructions in \p M and outline repeated /// strings from that tree. bool doOutline(Module &M, unsigned &OutlinedFunctionNum); @@ -1098,7 +1110,13 @@ // Create the function name. This should be unique. // FIXME: We should have a better naming scheme. This should be stable, // regardless of changes to the outliner's cost model/traversal order. - std::string FunctionName = ("OUTLINED_FUNCTION_" + Twine(Name)).str(); + std::string FunctionName; + if (OutlineRepeatedNum > 0) + FunctionName = ("OUTLINED_FUNCTION_" + Twine(OutlineRepeatedNum + 1) + "_" + + Twine(Name)) + .str(); + else + FunctionName = ("OUTLINED_FUNCTION_" + Twine(Name)).str(); // Create the function using an IR-level function. LLVMContext &C = M.getContext(); @@ -1415,12 +1433,14 @@ } } -bool MachineOutliner::runOnModule(Module &M) { +bool MachineOutliner::runOnceOnModule(Module &M, unsigned Iter) { // Check if there's anything in the module. If it's empty, then there's // nothing to outline. if (M.empty()) return false; + OutlineRepeatedNum = Iter; + // Number to append to the current outlined function. unsigned OutlinedFunctionNum = 0; @@ -1484,3 +1504,18 @@ return OutlinedSomething; } + +// Apply machine outlining for NumRepeat times. +bool MachineOutliner::runOnModule(Module &M) { + unsigned NumRepeats = NumRepeat; + if (NumRepeats < 1) + NumRepeats = 1; + + bool Changed = false; + for (unsigned I = 0; I < NumRepeats; I++) { + if (!runOnceOnModule(M, I)) + return Changed; + Changed = true; + } + return Changed; +} Index: llvm/test/CodeGen/AArch64/machine-outliner-iterative.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/machine-outliner-iterative.mir @@ -0,0 +1,271 @@ +# RUN: llc -mtriple=aarch64--- -run-pass=prologepilog -run-pass=machine-outliner -outline-repeat-count=2 -verify-machineinstrs %s -o - | FileCheck %s + +# Example of Repeated Instruction Sequence - Iterative Machine Outlining +# +#; define void @"$s12"(...) { define i64 @"$s5” (...) { define void @"$s13"(...) { +# ... ... ... +# %8 = load i1, i1* %7 %8 = load i1, i1* %7 +# %9 = load i4, i4*, %6 %9 = load i4, i4*, %6 %9 = load i4, i4*, %6 +# store i4 %9, i4* %5 store i4 %9, i4* %5 store i4 %9, i4* %5 +# ... ... ... +# } } } +# +# After machine outliner (1st time) +# +# define void @"$s12"(...) { define i64 @"$s5” (...) { define void @"$s13"(...) { +# ... ... ... +# %8 = load i1, i1* %7 %8 = load i1, i1* %7 +# call void @outlined_function_1_1 call void @outlined_function_1_1 call void @outlined_function_1_1 +# ... ... ... +# } } } +# +# After machine outliner (2nd time) +# +# define void @"$s12"(...) { define i64 @"$s5” (...) { define void @"$s13"(...) { +# ... ... ... +# call void @outlined_function_2_1 call void @outlined_function_1_1 call void @outlined_function_2_1 +# ... ... ... +# } } } +# +# Check whether machine outliner can further find the outlining opportunity after machine +# outlining has performed. +# +--- | + target triple = "aarch64-apple-darwin" + + %0 = type { %1*, i64 } + %1 = type { i64 } + %2 = type <{ %0, %3 }> + %3 = type <{ %4 }> + %4 = type <{ %5 }> + %5 = type <{ %6* }> + %6 = type opaque + %7 = type <{ %0, %8, %3 }> + %8 = type <{ %9, %9, %9, %9 }> + %9 = type <{ %10 }> + %10 = type <{ double }> + declare %0* @widget(%0* returned) local_unnamed_addr + declare void @foo(i8*, [24 x i8]*, i64, i8*) local_unnamed_addr + declare hidden swiftcc %2* @bar() + define void @baz.14() { + bb: + %tmp3 = alloca [24 x i8], align 8 + %tmp37 = call swiftcc %7* @barney.16() + %tmp57 = getelementptr inbounds %7, %7* %tmp37, i64 0, i32 2 + %tmp59 = bitcast %3* %tmp57 to i8* + call void @foo(i8* nonnull %tmp59, [24 x i8]* nonnull %tmp3, i64 33, i8* null) + %tmp602 = bitcast %7* %tmp37 to %0* + %tmp61 = call %0* @widget(%0* returned %tmp602) + ret void + } + define void @baz.15() { + bb: + %tmp4 = alloca [24 x i8], align 8 + %tmp11 = tail call swiftcc %2* @bar() + %tmp38 = call swiftcc %7* @barney.16() + %tmp101 = getelementptr inbounds %2, %2* %tmp11, i64 0, i32 1 + %tmp103 = bitcast %3* %tmp101 to i8* + call void @foo(i8* nonnull %tmp103, [24 x i8]* nonnull %tmp4, i64 33, i8* null) + %tmp1042 = bitcast %7* %tmp38 to %0* + %tmp105 = call %0* @widget(%0* returned %tmp1042) + ret void + } + define void @baz.16() { + bb: + %tmp6 = alloca [24 x i8], align 8 + %tmp39 = call swiftcc %7* @barney.16() + %tmp178 = getelementptr inbounds %7, %7* %tmp39, i64 0, i32 2 + %tmp180 = bitcast %3* %tmp178 to i8* + call void @foo(i8* nonnull %tmp180, [24 x i8]* nonnull %tmp6, i64 33, i8* null) + %tmp1812 = bitcast %7* %tmp39 to %0* + %tmp182 = call %0* @widget(%0* returned %tmp1812) + ret void + } + declare hidden swiftcc %7* @barney.16() local_unnamed_addr + +... +--- +name: baz.14 +alignment: 4 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: [] +liveins: [] +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 8 + adjustsStack: true + hasCalls: true + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 24 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: + - { id: 0, name: tmp3, type: default, offset: 0, size: 24, alignment: 8, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + local-offset: -24, debug-info-variable: '', debug-info-expression: '', + debug-info-location: '' } +callSites: [] +constants: [] +machineFunctionInfo: {} +body: | + bb.0.bb: + ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + BL @barney.16, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $x0 + ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + renamable $x19 = COPY $x0 + renamable $x0 = nuw ADDXri $x0, 48, 0 + ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + $x1 = ADDXri %stack.0.tmp3, 0, 0 + dead $w2 = MOVi32imm 33, implicit-def $x2 + $x3 = COPY $xzr + BL @foo, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit killed $x1, implicit killed $x2, implicit killed $x3, implicit-def $sp + ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + $x0 = COPY killed renamable $x19 + BL @widget, csr_aarch64_aapcs_thisreturn, implicit-def dead $lr, implicit $sp, implicit $x0, implicit-def $sp + ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + RET_ReallyLR + +... +--- +name: baz.15 +alignment: 4 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: [] +liveins: [] +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 8 + adjustsStack: true + hasCalls: true + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 24 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: + - { id: 0, name: tmp4, type: default, offset: 0, size: 24, alignment: 8, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + local-offset: -24, debug-info-variable: '', debug-info-expression: '', + debug-info-location: '' } +callSites: [] +constants: [] +machineFunctionInfo: {} +body: | + bb.0.bb: + ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + BL @bar, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $x0 + ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + renamable $x19 = COPY $x0 + ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + BL @barney.16, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $x0 + ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + renamable $x20 = COPY $x0 + renamable $x0 = nuw ADDXri killed renamable $x19, 16, 0 + ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + $x1 = ADDXri %stack.0.tmp4, 0, 0 + dead $w2 = MOVi32imm 33, implicit-def $x2 + $x3 = COPY $xzr + BL @foo, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit killed $x1, implicit killed $x2, implicit killed $x3, implicit-def $sp + ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + $x0 = COPY killed renamable $x20 + BL @widget, csr_aarch64_aapcs_thisreturn, implicit-def dead $lr, implicit $sp, implicit $x0, implicit-def $sp + ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + RET_ReallyLR + +... +--- +name: baz.16 +alignment: 4 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: [] +liveins: [] +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 8 + adjustsStack: true + hasCalls: true + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 24 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: + - { id: 0, name: tmp6, type: default, offset: 0, size: 24, alignment: 8, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + local-offset: -24, debug-info-variable: '', debug-info-expression: '', + debug-info-location: '' } +callSites: [] +constants: [] +machineFunctionInfo: {} +body: | + bb.0.bb: + ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + BL @barney.16, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $x0 + ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + renamable $x19 = COPY $x0 + renamable $x0 = nuw ADDXri $x0, 48, 0 + ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + $x1 = ADDXri %stack.0.tmp6, 0, 0 + dead $w2 = MOVi32imm 33, implicit-def $x2 + $x3 = COPY $xzr + BL @foo, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit killed $x1, implicit killed $x2, implicit killed $x3, implicit-def $sp + ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + $x0 = COPY killed renamable $x19 + BL @widget, csr_aarch64_aapcs_thisreturn, implicit-def dead $lr, implicit $sp, implicit $x0, implicit-def $sp + ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + RET_ReallyLR + +... +# CHECK: [[OUTLINED:OUTLINED_FUNCTION_2_[0-9]+]]