diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h --- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -1937,6 +1937,13 @@ return Formatter.get(); } + /// Returns the target-specific default value for tail duplication. + /// This value will be used if the tail-dup-size argument is not provided. + virtual unsigned + getTailDuplicateSizeOverride(CodeGenOpt::Level OptLevel) const { + return OptLevel >= CodeGenOpt::Aggressive ? 4 : 2; + } + private: mutable std::unique_ptr Formatter; unsigned CallFrameSetupOpcode, CallFrameDestroyOpcode; diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp --- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp +++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp @@ -3337,6 +3337,13 @@ TailDupSize = TailDupPlacementAggressiveThreshold; } + // If there's no threshold provided through options, query the target + // information for a threshold instead. + if (TailDupPlacementThreshold.getNumOccurrences() == 0 && + (PassConfig->getOptLevel() < CodeGenOpt::Aggressive || + TailDupPlacementAggressiveThreshold.getNumOccurrences() == 0)) + TailDupSize = TII->getTailDuplicateSizeOverride(PassConfig->getOptLevel()); + if (allowTailDupPlacement()) { MPDT = &getAnalysis(); bool OptForSize = MF.getFunction().hasOptSize() || diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h @@ -299,6 +299,9 @@ Optional describeLoadedValue(const MachineInstr &MI, Register Reg) const override; + unsigned int + getTailDuplicateSizeOverride(CodeGenOpt::Level OptLevel) const override; + static void decomposeStackOffsetForFrameOffsets(const StackOffset &Offset, int64_t &NumBytes, int64_t &NumPredicateVectors, diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -7183,6 +7183,11 @@ return get(Opc).TSFlags & AArch64::InstrFlagIsWhile; } +unsigned int AArch64InstrInfo::getTailDuplicateSizeOverride( + CodeGenOpt::Level OptLevel) const { + return OptLevel >= CodeGenOpt::Aggressive ? 6 : 2; +} + unsigned llvm::getBLRCallOpcode(const MachineFunction &MF) { if (MF.getSubtarget().hardenSlsBlr()) return AArch64::BLRNoIP; diff --git a/llvm/test/CodeGen/AArch64/aarch64-tail-dup-size.ll b/llvm/test/CodeGen/AArch64/aarch64-tail-dup-size.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/aarch64-tail-dup-size.ll @@ -0,0 +1,73 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-none-linux -O2 < %s | FileCheck %s --check-prefix=CHECK-O2 +; RUN: llc -mtriple=aarch64-none-linux -O3 < %s | FileCheck %s --check-prefix=CHECK-O3 + +%struct.record = type { %struct.record*, i32, %union.anon } +%union.anon = type { %struct.anon } +%struct.anon = type { i32, i32, [31 x i8] } + +@Ptr_Glob = dso_local local_unnamed_addr global %struct.record* null, align 8 +@Int_Glob = dso_local local_unnamed_addr global i32 0, align 4 + +define dso_local void @testcase(%struct.record** nocapture %Ptr_Ref_Par){ +; CHECK-O2-LABEL: testcase: +; CHECK-O2: // %bb.0: // %entry +; CHECK-O2-NEXT: adrp x8, Ptr_Glob +; CHECK-O2-NEXT: ldr x9, [x8, :lo12:Ptr_Glob] +; CHECK-O2-NEXT: cbz x9, .LBB0_2 +; CHECK-O2-NEXT: // %bb.1: // %if.then +; CHECK-O2-NEXT: ldr x9, [x9] +; CHECK-O2-NEXT: str x9, [x0] +; CHECK-O2-NEXT: ldr x8, [x8, :lo12:Ptr_Glob] +; CHECK-O2-NEXT: b .LBB0_3 +; CHECK-O2-NEXT: .LBB0_2: +; CHECK-O2-NEXT: mov x8, xzr +; CHECK-O2-NEXT: .LBB0_3: // %if.end +; CHECK-O2-NEXT: adrp x9, Int_Glob +; CHECK-O2-NEXT: ldr w1, [x9, :lo12:Int_Glob] +; CHECK-O2-NEXT: add x2, x8, #16 // =16 +; CHECK-O2-NEXT: mov w0, #10 +; CHECK-O2-NEXT: b externalfunc +; +; CHECK-O3-LABEL: testcase: +; CHECK-O3: // %bb.0: // %entry +; CHECK-O3-NEXT: adrp x8, Ptr_Glob +; CHECK-O3-NEXT: ldr x9, [x8, :lo12:Ptr_Glob] +; CHECK-O3-NEXT: cbz x9, .LBB0_2 +; CHECK-O3-NEXT: // %bb.1: // %if.then +; CHECK-O3-NEXT: ldr x9, [x9] +; CHECK-O3-NEXT: str x9, [x0] +; CHECK-O3-NEXT: ldr x8, [x8, :lo12:Ptr_Glob] +; CHECK-O3-NEXT: adrp x9, Int_Glob +; CHECK-O3-NEXT: ldr w1, [x9, :lo12:Int_Glob] +; CHECK-O3-NEXT: add x2, x8, #16 // =16 +; CHECK-O3-NEXT: mov w0, #10 +; CHECK-O3-NEXT: b externalfunc +; CHECK-O3-NEXT: .LBB0_2: +; CHECK-O3-NEXT: mov x8, xzr +; CHECK-O3-NEXT: adrp x9, Int_Glob +; CHECK-O3-NEXT: ldr w1, [x9, :lo12:Int_Glob] +; CHECK-O3-NEXT: add x2, x8, #16 // =16 +; CHECK-O3-NEXT: mov w0, #10 +; CHECK-O3-NEXT: b externalfunc +entry: + %0 = load %struct.record*, %struct.record** @Ptr_Glob, align 8 + %cmp.not = icmp eq %struct.record* %0, null + br i1 %cmp.not, label %if.end, label %if.then + +if.then: ; preds = %entry + %Ptr_Comp = getelementptr inbounds %struct.record, %struct.record* %0, i64 0, i32 0 + %1 = load %struct.record*, %struct.record** %Ptr_Comp, align 8 + store %struct.record* %1, %struct.record** %Ptr_Ref_Par, align 8 + %.pre = load %struct.record*, %struct.record** @Ptr_Glob, align 8 + br label %if.end + +if.end: ; preds = %if.then, %entry + %2 = phi %struct.record* [ %.pre, %if.then ], [ null, %entry ] + %3 = load i32, i32* @Int_Glob, align 4 + %Int_Comp = getelementptr inbounds %struct.record, %struct.record* %2, i64 0, i32 2, i32 0, i32 1 + tail call void @externalfunc(i32 10, i32 %3, i32* nonnull %Int_Comp) + ret void +} + +declare dso_local void @externalfunc(i32, i32, i32*)