diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp --- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -431,6 +431,7 @@ void addPostRegAlloc() override; void addPreSched2() override; void addPreEmitPass() override; + void addPreEmitPass2() override; std::unique_ptr getCSEConfig() const override; }; @@ -692,8 +693,11 @@ if (TM->getOptLevel() != CodeGenOpt::None && EnableCollectLOH && TM->getTargetTriple().isOSBinFormatMachO()) addPass(createAArch64CollectLOHPass()); +} - // SVE bundles move prefixes with destructive operations. +void AArch64PassConfig::addPreEmitPass2() { + // SVE bundles move prefixes with destructive operations. BLR_RVMARKER pseudo + // instructions are lowered to bundles as well. addPass(createUnpackMachineBundles(nullptr)); } diff --git a/llvm/test/CodeGen/AArch64/O0-pipeline.ll b/llvm/test/CodeGen/AArch64/O0-pipeline.ll --- a/llvm/test/CodeGen/AArch64/O0-pipeline.ll +++ b/llvm/test/CodeGen/AArch64/O0-pipeline.ll @@ -64,10 +64,10 @@ ; CHECK-NEXT: Implement the 'patchable-function' attribute ; CHECK-NEXT: AArch64 Branch Targets ; CHECK-NEXT: Branch relaxation pass -; CHECK-NEXT: Unpack machine instruction bundles ; CHECK-NEXT: Contiguously Lay Out Funclets ; CHECK-NEXT: StackMap Liveness Analysis ; CHECK-NEXT: Live DEBUG_VALUE analysis +; CHECK-NEXT: Unpack machine instruction bundles ; CHECK-NEXT: Lazy Machine Block Frequency Analysis ; CHECK-NEXT: Machine Optimization Remark Emitter ; CHECK-NEXT: AArch64 Assembly Printer diff --git a/llvm/test/CodeGen/AArch64/O3-pipeline.ll b/llvm/test/CodeGen/AArch64/O3-pipeline.ll --- a/llvm/test/CodeGen/AArch64/O3-pipeline.ll +++ b/llvm/test/CodeGen/AArch64/O3-pipeline.ll @@ -195,12 +195,12 @@ ; CHECK-NEXT: AArch64 Branch Targets ; CHECK-NEXT: Branch relaxation pass ; CHECK-NEXT: AArch64 Compress Jump Tables -; CHECK-NEXT: Unpack machine instruction bundles ; CHECK-NEXT: Contiguously Lay Out Funclets ; CHECK-NEXT: StackMap Liveness Analysis ; CHECK-NEXT: Live DEBUG_VALUE analysis ; CHECK-NEXT: Machine Outliner ; CHECK-NEXT: FunctionPass Manager +; CHECK-NEXT: Unpack machine instruction bundles ; CHECK-NEXT: Lazy Machine Block Frequency Analysis ; CHECK-NEXT: Machine Optimization Remark Emitter ; CHECK-NEXT: AArch64 Assembly Printer diff --git a/llvm/test/CodeGen/AArch64/arm64-opt-remarks-lazy-bfi.ll b/llvm/test/CodeGen/AArch64/arm64-opt-remarks-lazy-bfi.ll --- a/llvm/test/CodeGen/AArch64/arm64-opt-remarks-lazy-bfi.ll +++ b/llvm/test/CodeGen/AArch64/arm64-opt-remarks-lazy-bfi.ll @@ -34,6 +34,10 @@ ; HOTNESS-NEXT: Executing Pass 'Function Pass Manager' ; HOTNESS-NEXT: Executing Pass 'Verify generated machine code' ; HOTNESS-NEXT: Freeing Pass 'Verify generated machine code' +; HOTNESS-NEXT: Executing Pass 'Unpack machine instruction bundles' +; HOTNESS-NEXT: Freeing Pass 'Unpack machine instruction bundles' +; HOTNESS-NEXT: Executing Pass 'Verify generated machine code' +; HOTNESS-NEXT: Freeing Pass 'Verify generated machine code' ; HOTNESS-NEXT: Executing Pass 'Lazy Machine Block Frequency Analysis' ; HOTNESS-NEXT: Executing Pass 'Machine Optimization Remark Emitter' ; HOTNESS-NEXT: Building MachineBlockFrequencyInfo on the fly @@ -51,6 +55,10 @@ ; NO_HOTNESS-NEXT: Executing Pass 'Function Pass Manager' ; NO_HOTNESS-NEXT: Executing Pass 'Verify generated machine code' ; NO_HOTNESS-NEXT: Freeing Pass 'Verify generated machine code' +; NO_HOTNESS-NEXT: Executing Pass 'Unpack machine instruction bundles' +; NO_HOTNESS-NEXT: Freeing Pass 'Unpack machine instruction bundles' +; NO_HOTNESS-NEXT: Executing Pass 'Verify generated machine code' +; NO_HOTNESS-NEXT: Freeing Pass 'Verify generated machine code' ; NO_HOTNESS-NEXT: Executing Pass 'Lazy Machine Block Frequency Analysis' ; NO_HOTNESS-NEXT: Executing Pass 'Machine Optimization Remark Emitter' ; NO_HOTNESS-NEXT: Executing Pass 'AArch64 Assembly Printer' diff --git a/llvm/test/CodeGen/AArch64/rvmarker-pseudo-expansion-and-outlining.mir b/llvm/test/CodeGen/AArch64/rvmarker-pseudo-expansion-and-outlining.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/rvmarker-pseudo-expansion-and-outlining.mir @@ -0,0 +1,132 @@ +# RUN: llc -run-pass=aarch64-expand-pseudo -run-pass=prologepilog -run-pass=machine-outliner -mtriple=arm64-apple-ios -o - %s | FileCheck %s + +--- | + target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" + target triple = "arm64-apple-ios9.0.0" + + define void @fn1() { + ret void + } + + define void @fn2() { + ret void + } + + declare void @cb1() + declare void @cb2() + declare void @fn() + +... +--- +# BLR_RVMARKER calls to the same function. Make sure the marker instruction is not outline on its own. +# +# CHECK-LABEL: name: fn1 +# CHECK: bb.1: +# CHECK: BUNDLE implicit-def +# CHECK-NEXT: BL @cb1, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $x0 +# CHECK-NEXT: $fp = ORRXrs $xzr, $fp, 0 +# CHECK-NEXT: } +# CHECK-NEXT: BL @OUTLINED_FUNCTION_0, implicit-def $lr, implicit $sp, implicit-def $w12, implicit-def $lr, implicit $wzr, implicit $sp +# +# CHECK: bb.2: +# CHECK: BUNDLE implicit-def +# CHECK-NEXT: BL @cb1, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $x0 +# CHECK-NEXT: $fp = ORRXrs $xzr, $fp, 0 +# CHECK-NEXT: } +# CHECK-NEXT: BL @OUTLINED_FUNCTION_0, implicit-def $lr, implicit $sp, implicit-def $w12, implicit-def $lr, implicit $wzr, implicit $sp + + +name: fn1 +tracksRegLiveness: true +body: | + bb.0: + liveins: $lr + $sp = frame-setup SUBXri $sp, 16, 0 + renamable $x9 = ADRP target-flags(aarch64-page) @fn + $x9 = ORRXri $xzr, 1 + $w12 = ORRWri $wzr, 1 + $w30 = ORRWri $wzr, 1 + $lr = ORRXri $xzr, 1 + bb.1: + liveins: $lr + $x20, $x19 = LDPXi $sp, 10 + BLR_RVMARKER @cb1, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $x0 + $w12 = ORRWri $wzr, 1 + $w12 = ORRWri $wzr, 1 + $w12 = ORRWri $wzr, 1 + $w12 = ORRWri $wzr, 1 + $w12 = ORRWri $wzr, 1 + $w12 = ORRWri $wzr, 1 + $lr = ORRXri $xzr, 1 + bb.2: + liveins: $lr + $x20, $x19 = LDPXi $sp, 10 + BLR_RVMARKER @cb1, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $x0 + $w12 = ORRWri $wzr, 1 + $w12 = ORRWri $wzr, 1 + $w12 = ORRWri $wzr, 1 + $w12 = ORRWri $wzr, 1 + $w12 = ORRWri $wzr, 1 + $w12 = ORRWri $wzr, 1 + $lr = ORRXri $xzr, 1 + bb.3: + liveins: $lr + RET undef $lr + +... + +--- +# BLR_RVMARKER calls to different function. Make sure the marker instruction is not outline on its own. +# +# CHECK-LABEL: name: fn2 +# CHECK: bb.1: +# CHECK: BUNDLE implicit-def +# CHECK-NEXT: BL @cb1, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $x0 +# CHECK-NEXT: $fp = ORRXrs $xzr, $fp, 0 +# CHECK-NEXT: } +# CHECK-NEXT: BL @OUTLINED_FUNCTION_0, implicit-def $lr, implicit $sp, implicit-def $w12, implicit-def $lr, implicit $wzr, implicit $sp +# CHECK: bb.2: +# CHECK: BUNDLE implicit-def +# CHECK-NEXT: BL @cb2, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $x0 +# CHECK-NEXT: $fp = ORRXrs $xzr, $fp, 0 +# CHECK-NEXT: } +# CHECK-NEXT: BL @OUTLINED_FUNCTION_0, implicit-def $lr, implicit $sp, implicit-def $w12, implicit-def $lr, implicit $wzr, implicit $sp +# +name: fn2 +tracksRegLiveness: true +body: | + bb.0: + liveins: $lr + $sp = frame-setup SUBXri $sp, 16, 0 + renamable $x9 = ADRP target-flags(aarch64-page) @fn + $x9 = ORRXri $xzr, 1 + $w12 = ORRWri $wzr, 1 + $w30 = ORRWri $wzr, 1 + $lr = ORRXri $xzr, 1 + bb.1: + liveins: $lr + $x20, $x19 = LDPXi $sp, 10 + BLR_RVMARKER @cb1, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $x0 + $w12 = ORRWri $wzr, 1 + $w12 = ORRWri $wzr, 1 + $w12 = ORRWri $wzr, 1 + $w12 = ORRWri $wzr, 1 + $w12 = ORRWri $wzr, 1 + $w12 = ORRWri $wzr, 1 + $lr = ORRXri $xzr, 1 + bb.2: + liveins: $lr + $x20, $x19 = LDPXi $sp, 10 + BLR_RVMARKER @cb2, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $x0 + $w12 = ORRWri $wzr, 1 + $w12 = ORRWri $wzr, 1 + $w12 = ORRWri $wzr, 1 + $w12 = ORRWri $wzr, 1 + $w12 = ORRWri $wzr, 1 + $w12 = ORRWri $wzr, 1 + $lr = ORRXri $xzr, 1 + bb.3: + liveins: $lr + RET undef $lr + +...