diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -1408,6 +1408,8 @@ SDValue Addr, SelectionDAG &DAG) const override; + Align getPrefLoopAlignment(MachineLoop *ML) const override; + protected: std::pair findRepresentativeClass(const TargetRegisterInfo *TRI, diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -35,6 +35,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetLowering.h" @@ -76,6 +77,14 @@ " of the loop header PC will be 0)."), cl::Hidden); +static cl::opt ExperimentalPrefInnermostLoopAlignment( + "x86-experimental-pref-innermost-loop-alignment", cl::init(4), + cl::desc( + "Sets the preferable loop alignment for experiments (as log2 bytes) " + "for innermost loops only. If specified, this option overrides " + "alignment set by x86-experimental-pref-loop-alignment."), + cl::Hidden); + static cl::opt MulConstantOptimization( "mul-constant-optimization", cl::init(true), cl::desc("Replace 'mul x, Const' with more effective instructions like " @@ -51696,3 +51705,10 @@ .getAsInteger(0, StackProbeSize); return StackProbeSize; } + +Align X86TargetLowering::getPrefLoopAlignment(MachineLoop *ML) const { + if (ML->isInnermost() && + ExperimentalPrefInnermostLoopAlignment.getNumOccurrences()) + return Align(1ULL << ExperimentalPrefInnermostLoopAlignment); + return TargetLowering::getPrefLoopAlignment(); +} diff --git a/llvm/test/CodeGen/X86/innermost-loop-alignment.ll b/llvm/test/CodeGen/X86/innermost-loop-alignment.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/innermost-loop-alignment.ll @@ -0,0 +1,59 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-pc-linux-gnu | FileCheck %s -check-prefix=DEFAULT +; RUN: llc < %s -mtriple=i686-pc-linux-gnu -x86-experimental-pref-innermost-loop-alignment=5 | FileCheck %s -check-prefix=ALIGN32 +; RUN: llc < %s -mtriple=i686-pc-linux-gnu -x86-experimental-pref-loop-alignment=5 -x86-experimental-pref-innermost-loop-alignment=6 | FileCheck %s -check-prefix=ALIGN64 + +declare void @foo() + +define void @test(i32 %n, i32 %m) { +; DEFAULT-LABEL: test: +; DEFAULT: .p2align 4, 0x90 +; DEFAULT-NEXT: .LBB0_1: # %outer +; DEFAULT-NEXT: # =>This Loop Header: Depth=1 +; DEFAULT-NEXT: # Child Loop BB0_2 Depth 2 +; DEFAULT: .p2align 4, 0x90 +; DEFAULT-NEXT: .LBB0_2: # %inner +; DEFAULT-NEXT: # Parent Loop BB0_1 Depth=1 + +; ALIGN32-LABEL: test: +; ALIGN32: .p2align 4, 0x90 +; ALIGN32-NEXT: .LBB0_1: # %outer +; ALIGN32-NEXT: # =>This Loop Header: Depth=1 +; ALIGN32-NEXT: # Child Loop BB0_2 Depth 2 +; ALIGN32: .p2align 5, 0x90 +; ALIGN32-NEXT: .LBB0_2: # %inner +; ALIGN32-NEXT: # Parent Loop BB0_1 Depth=1 +; ALIGN32-NEXT: # => This Inner Loop Header: Depth=2 + +; ALIGN64-LABEL: test: +; ALIGN64: .p2align 5, 0x90 +; ALIGN64-NEXT: .LBB0_1: # %outer +; ALIGN64-NEXT: # =>This Loop Header: Depth=1 +; ALIGN64-NEXT: # Child Loop BB0_2 Depth 2 +; ALIGN64: .p2align 6, 0x90 +; ALIGN64-NEXT: .LBB0_2: # %inner +; ALIGN64-NEXT: # Parent Loop BB0_1 Depth=1 +; ALIGN64-NEXT: # => This Inner Loop Header: Depth=2 + +entry: + br label %outer + +outer: + %outer.iv = phi i32 [0, %entry], [%outer.iv.next, %outer_bb] + br label %inner + +inner: + %inner.iv = phi i32 [0, %outer], [%inner.iv.next, %inner] + call void @foo() + %inner.iv.next = add i32 %inner.iv, 1 + %inner.cond = icmp ne i32 %inner.iv.next, %m + br i1 %inner.cond, label %inner, label %outer_bb + +outer_bb: + %outer.iv.next = add i32 %outer.iv, 1 + %outer.cond = icmp ne i32 %outer.iv.next, %n + br i1 %outer.cond, label %outer, label %exit + +exit: + ret void +}