diff --git a/llvm/lib/CodeGen/MachineCSE.cpp b/llvm/lib/CodeGen/MachineCSE.cpp --- a/llvm/lib/CodeGen/MachineCSE.cpp +++ b/llvm/lib/CodeGen/MachineCSE.cpp @@ -65,6 +65,10 @@ CSUsesThreshold("csuses-threshold", cl::Hidden, cl::init(1024), cl::desc("Threshold for the size of CSUses")); +static cl::opt AggressiveMachineCSE( + "aggressive-machine-cse", cl::Hidden, cl::init(false), + cl::desc("Override the profitability heuristics for Machine CSE")); + namespace { class MachineCSE : public MachineFunctionPass { @@ -439,6 +443,9 @@ /// defined. bool MachineCSE::isProfitableToCSE(Register CSReg, Register Reg, MachineBasicBlock *CSBB, MachineInstr *MI) { + if (AggressiveMachineCSE) + return true; + // FIXME: Heuristics that works around the lack the live range splitting. // If CSReg is used at all uses of Reg, CSE should not increase register diff --git a/llvm/test/CodeGen/AArch64/machine-cse-profitable-check.ll b/llvm/test/CodeGen/AArch64/machine-cse-profitable-check.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/machine-cse-profitable-check.ll @@ -0,0 +1,31 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mtriple aarch64-none-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK-BASE +; RUN: llc -mtriple aarch64-none-linux-gnu -aggressive-machine-cse < %s | FileCheck %s --check-prefixes=CHECK-AGGRESSIVE-CSE + +define void @foo(ptr %buf, <8 x i16> %a) { +; CHECK-BASE-LABEL: foo: +; CHECK-BASE: // %bb.0: // %entry +; CHECK-BASE-NEXT: movi v2.2d, #0000000000000000 +; CHECK-BASE-NEXT: // kill: def $q0 killed $q0 def $q0_q1 +; CHECK-BASE-NEXT: zip2 v2.8h, v0.8h, v2.8h +; CHECK-BASE-NEXT: movi v1.2d, #0000000000000000 +; CHECK-BASE-NEXT: st2 { v0.4h, v1.4h }, [x0], #16 +; CHECK-BASE-NEXT: str q2, [x0] +; CHECK-BASE-NEXT: ret +; +; CHECK-AGGRESSIVE-CSE-LABEL: foo: +; CHECK-AGGRESSIVE-CSE: // %bb.0: // %entry +; CHECK-AGGRESSIVE-CSE-NEXT: // kill: def $q0 killed $q0 def $q0_q1 +; CHECK-AGGRESSIVE-CSE-NEXT: movi v1.2d, #0000000000000000 +; CHECK-AGGRESSIVE-CSE-NEXT: st2 { v0.4h, v1.4h }, [x0], #16 +; CHECK-AGGRESSIVE-CSE-NEXT: zip2 v0.8h, v0.8h, v1.8h +; CHECK-AGGRESSIVE-CSE-NEXT: str q0, [x0] +; CHECK-AGGRESSIVE-CSE-NEXT: ret +entry: + %vzip.i = shufflevector <8 x i16> %a, <8 x i16> , <8 x i32> + %vzip1.i = shufflevector <8 x i16> %a, <8 x i16> , <8 x i32> + store <8 x i16> %vzip.i, ptr %buf, align 4 + %add.ptr = getelementptr inbounds i32, ptr %buf, i64 4 + store <8 x i16> %vzip1.i, ptr %add.ptr, align 4 + ret void +}