diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp @@ -14,12 +14,15 @@ #include "AArch64.h" #include "AArch64TargetMachine.h" #include "MCTargetDesc/AArch64MCTargetDesc.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #define DEBUG_TYPE "aarch64-post-select-optimize" @@ -42,6 +45,9 @@ private: bool optimizeNZCVDefs(MachineBasicBlock &MBB); + bool doPeepholeOpts(MachineBasicBlock &MBB); + /// Look for cross regclass copies that can be trivially eliminated. + bool foldSimpleCrossClassCopies(MachineInstr &MI); }; } // end anonymous namespace @@ -74,6 +80,62 @@ } } +bool AArch64PostSelectOptimize::doPeepholeOpts(MachineBasicBlock &MBB) { + bool Changed = false; + for (auto &MI : make_early_inc_range(make_range(MBB.begin(), MBB.end()))) { + Changed |= foldSimpleCrossClassCopies(MI); + } + return Changed; +} + +bool AArch64PostSelectOptimize::foldSimpleCrossClassCopies(MachineInstr &MI) { + auto *MF = MI.getMF(); + auto &MRI = MF->getRegInfo(); + + if (!MI.isCopy()) + return false; + + if (MI.getOperand(1).getSubReg()) + return false; // Don't deal with subreg copies + + Register Src = MI.getOperand(1).getReg(); + Register Dst = MI.getOperand(0).getReg(); + + if (Src.isPhysical() || Dst.isPhysical()) + return false; + + const TargetRegisterClass *SrcRC = MRI.getRegClass(Src); + const TargetRegisterClass *DstRC = MRI.getRegClass(Dst); + + if (SrcRC == DstRC) + return false; + + + if (SrcRC->hasSubClass(DstRC)) { + // This is the case where the source class is a superclass of the dest, so + // if the copy is the only user of the source, we can just constrain the + // source reg to the dest class. + + if (!MRI.hasOneNonDBGUse(Src)) + return false; // Only constrain single uses of the source. + + // Constrain to dst reg class as long as it's not a weird class that only + // has a few registers. + if (!MRI.constrainRegClass(Src, DstRC, /* MinNumRegs */ 25)) + return false; + } else if (DstRC->hasSubClass(SrcRC)) { + // This is the inverse case, where the destination class is a superclass of + // the source. Here, if the copy is the only user, we can just constrain + // the user of the copy to use the smaller class of the source. + } else { + return false; + } + + MRI.replaceRegWith(Dst, Src); + MI.eraseFromParent(); + return true; +} + bool AArch64PostSelectOptimize::optimizeNZCVDefs(MachineBasicBlock &MBB) { // Consider the following code: // FCMPSrr %0, %1, implicit-def $nzcv @@ -178,8 +240,10 @@ "Expected a selected MF"); bool Changed = false; - for (auto &BB : MF) + for (auto &BB : MF) { Changed |= optimizeNZCVDefs(BB); + Changed |= doPeepholeOpts(BB); + } return Changed; } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/postselectopt-xclass-copies.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postselectopt-xclass-copies.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/postselectopt-xclass-copies.mir @@ -0,0 +1,116 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple aarch64 -run-pass=aarch64-post-select-optimize -verify-machineinstrs %s -o - | FileCheck %s +--- | + target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" + + @x = external hidden local_unnamed_addr global i32*, align 8 + define void @copy_from_larger_rc_def() { ret void } + define void @copy_from_larger_rc_def_multi_use() { ret void } + define void @copy_from_smaller_rc_def() { ret void } + +... +--- +name: copy_from_larger_rc_def +alignment: 4 +legalized: true +regBankSelected: true +selected: true +tracksRegLiveness: true +liveins: + - { reg: '$x0' } + - { reg: '$w1' } + - { reg: '$x2' } +body: | + bb.1: + liveins: $w1, $x0, $x2 + + ; Show that if we're doing a copy from a large rc to a single user with a smaller rc + ; then we just constrain the def instead. + ; CHECK-LABEL: name: copy_from_larger_rc_def + ; CHECK: liveins: $w1, $x0, $x2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK-NEXT: %large_rc_def:gpr64common = UBFMXri [[COPY]], 61, 60 + ; CHECK-NEXT: %add:gpr64sp = ADDXri %large_rc_def, 3, 0 + ; CHECK-NEXT: $x0 = COPY %add + ; CHECK-NEXT: RET_ReallyLR + %0:gpr64 = COPY $x0 + %large_rc_def:gpr64 = UBFMXri %0, 61, 60 + %constrain_copy:gpr64common = COPY %large_rc_def + ; Even though ADDXri may not actually need to use gpr64common, just use it as an example. + %add:gpr64sp = ADDXri %constrain_copy, 3, 0 + $x0 = COPY %add + RET_ReallyLR + +... +--- +name: copy_from_larger_rc_def_multi_use +alignment: 4 +legalized: true +regBankSelected: true +selected: true +tracksRegLiveness: true +liveins: + - { reg: '$x0' } + - { reg: '$w1' } + - { reg: '$x2' } +body: | + bb.1: + liveins: $w1, $x0, $x2 + + ; Don't constrain def if the original def has multiple users. + ; CHECK-LABEL: name: copy_from_larger_rc_def_multi_use + ; CHECK: liveins: $w1, $x0, $x2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK-NEXT: %large_rc_def:gpr64 = UBFMXri [[COPY]], 61, 60 + ; CHECK-NEXT: %constrain_copy:gpr64common = COPY %large_rc_def + ; CHECK-NEXT: %add:gpr64sp = ADDXri %constrain_copy, 3, 0 + ; CHECK-NEXT: %add2:gpr64sp = ADDXri %constrain_copy, 3, 0 + ; CHECK-NEXT: $x0 = COPY %add + ; CHECK-NEXT: $x1 = COPY %large_rc_def + ; CHECK-NEXT: RET_ReallyLR + %0:gpr64 = COPY $x0 + %large_rc_def:gpr64 = UBFMXri %0, 61, 60 + %constrain_copy:gpr64common = COPY %large_rc_def + %add:gpr64sp = ADDXri %constrain_copy, 3, 0 + %add2:gpr64sp = ADDXri %constrain_copy, 3, 0 + $x0 = COPY %add + $x1 = COPY %large_rc_def + RET_ReallyLR + +... +--- +name: copy_from_smaller_rc_def +alignment: 4 +legalized: true +regBankSelected: true +selected: true +tracksRegLiveness: true +liveins: + - { reg: '$x0' } + - { reg: '$w1' } + - { reg: '$x2' } +body: | + bb.1: + liveins: $w1, $x0, $x2 + + ; Show that if we're doing a copy from a small rc to a single user with a larger rc + ; then we just use the smaller def instead of doing a copy. + ; CHECK-LABEL: name: copy_from_smaller_rc_def + ; CHECK: liveins: $w1, $x0, $x2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY $x1 + ; CHECK-NEXT: %add:gpr64common = ADDXri [[COPY1]], 3, 0 + ; CHECK-NEXT: STRXui [[COPY1]], %add, target-flags(aarch64-pageoff, aarch64-nc) @x :: (store (p0)) + ; CHECK-NEXT: RET_ReallyLR + %0:gpr64common = COPY $x0 + %1:gpr64common = COPY $x1 + + %add:gpr64common = ADDXri %1, 3, 0 + %copy:gpr64sp = COPY %add + STRXui %1, %copy, target-flags(aarch64-pageoff, aarch64-nc) @x :: (store (p0)) + RET_ReallyLR + +...