diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp @@ -14,12 +14,15 @@ #include "AArch64.h" #include "AArch64TargetMachine.h" #include "MCTargetDesc/AArch64MCTargetDesc.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #define DEBUG_TYPE "aarch64-post-select-optimize" @@ -42,6 +45,9 @@ private: bool optimizeNZCVDefs(MachineBasicBlock &MBB); + bool doPeepholeOpts(MachineBasicBlock &MBB); + bool foldSimpleCrossClassCopies(MachineInstr &MI); + bool setKillFlags(MachineInstr &MI); }; } // end anonymous namespace @@ -74,6 +80,96 @@ } } +bool AArch64PostSelectOptimize::doPeepholeOpts(MachineBasicBlock &MBB) { + bool Changed = false; + for (auto &MI : make_early_inc_range(make_range(MBB.begin(), MBB.end()))) { + Changed |= foldSimpleCrossClassCopies(MI); + if (!MI.getParent()) + continue; // We may have erased this instruction. + Changed |= setKillFlags(MI); + } + return Changed; +} + +/// Look for cross regclass copies that can be trivially eliminated. +bool AArch64PostSelectOptimize::foldSimpleCrossClassCopies(MachineInstr &MI) { + auto *MF = MI.getMF(); + auto &MRI = MF->getRegInfo(); + + if (!MI.isCopy()) + return false; + + if (MI.getOperand(1).getSubReg()) + return false; // Don't deal with subreg copies + + Register Src = MI.getOperand(1).getReg(); + Register Dst = MI.getOperand(0).getReg(); + + if (!Src.isVirtual() || !Dst.isVirtual()) + return false; + + const TargetRegisterClass *SrcRC = MRI.getRegClass(Src); + const TargetRegisterClass *DstRC = MRI.getRegClass(Dst); + + if (SrcRC == DstRC) + return false; + + + if (SrcRC->hasSubClass(DstRC)) { + // This is the case where the source class is a superclass of the dest, so + // if the copy is the only user of the source, we can just constrain the + // source reg to the dest class. + + if (!MRI.hasOneNonDBGUse(Src)) + return false; // Only constrain single uses of the source. + + // Constrain to dst reg class as long as it's not a weird class that only + // has a few registers. + if (!MRI.constrainRegClass(Src, DstRC, /* MinNumRegs */ 25)) + return false; + } else if (DstRC->hasSubClass(SrcRC)) { + // This is the inverse case, where the destination class is a superclass of + // the source. Here, if the copy is the only user, we can just constrain + // the user of the copy to use the smaller class of the source. + } else { + return false; + } + + MRI.replaceRegWith(Dst, Src); + MI.eraseFromParent(); + return true; +} + +bool AArch64PostSelectOptimize::setKillFlags(MachineInstr &MI) { + auto *MF = MI.getMF(); + auto &MRI = MF->getRegInfo(); + bool Changed = false; + if (MI.isPHI() || MI.isCopy() || MI.isDebugInstr()) + return false; + + for (unsigned Idx = 0; Idx < MI.getNumExplicitOperands(); ++Idx) { + auto &Op = MI.getOperand(Idx); + if (!Op.isReg() || Op.isDef()) + continue; + + Register Reg = Op.getReg(); + if (!Reg.isVirtual() || !MRI.hasOneNonDBGUse(Reg)) + continue; + + auto MCID = MI.getDesc(); + // Don't set kill flags on tied operands. + if (MCID.getOperandConstraint(Idx, MCOI::TIED_TO) != -1) + continue; + + if (MRI.getVRegDef(Reg)->getParent() != MI.getParent()) + continue; + + Op.setIsKill(); + Changed = true; + } + return Changed; +} + bool AArch64PostSelectOptimize::optimizeNZCVDefs(MachineBasicBlock &MBB) { // Consider the following code: // FCMPSrr %0, %1, implicit-def $nzcv @@ -178,8 +274,10 @@ "Expected a selected MF"); bool Changed = false; - for (auto &BB : MF) + for (auto &BB : MF) { Changed |= optimizeNZCVDefs(BB); + Changed |= doPeepholeOpts(BB); + } return Changed; } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/postselectopt-constrain-new-regop.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postselectopt-constrain-new-regop.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/postselectopt-constrain-new-regop.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/postselectopt-constrain-new-regop.mir @@ -17,36 +17,40 @@ body: | ; CHECK-LABEL: name: pluto ; CHECK: bb.0: - ; CHECK: successors: %bb.2(0x40000000), %bb.1(0x40000000) - ; CHECK: liveins: $w1, $x0, $x2 - ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 - ; CHECK: [[COPY1:%[0-9]+]]:gpr32sp = COPY $w1 - ; CHECK: [[COPY2:%[0-9]+]]:gpr64sp = COPY $x2 - ; CHECK: [[DEF:%[0-9]+]]:gpr32 = IMPLICIT_DEF - ; CHECK: [[COPY3:%[0-9]+]]:fpr32 = COPY [[DEF]] - ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 872415232 - ; CHECK: [[COPY4:%[0-9]+]]:fpr32 = COPY [[MOVi32imm]] - ; CHECK: FCMPSrr [[COPY3]], [[COPY4]], implicit-def $nzcv - ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 13, implicit $nzcv - ; CHECK: [[SUBWri:%[0-9]+]]:gpr32common = SUBWri [[COPY1]], 1, 0 - ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[SUBWri]], %subreg.sub_32 - ; CHECK: [[COPY5:%[0-9]+]]:fpr32 = COPY [[DEF]] - ; CHECK: FCMPSrr [[COPY5]], [[COPY4]], implicit-def $nzcv - ; CHECK: [[CSINCWr1:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 12, implicit $nzcv - ; CHECK: [[EORWrr:%[0-9]+]]:gpr32 = EORWrr [[CSINCWr]], [[CSINCWr1]] - ; CHECK: TBNZW [[EORWrr]], 0, %bb.2 - ; CHECK: B %bb.1 - ; CHECK: bb.1: - ; CHECK: successors: %bb.2(0x80000000) - ; CHECK: [[UBFMXri:%[0-9]+]]:gpr64 = UBFMXri [[SUBREG_TO_REG]], 60, 59 - ; CHECK: [[LDRSroX:%[0-9]+]]:fpr32 = LDRSroX [[COPY]], [[UBFMXri]], 0, 0 :: (load (s32)) - ; CHECK: [[COPY6:%[0-9]+]]:fpr32 = COPY [[DEF]] - ; CHECK: [[FMULSrr:%[0-9]+]]:fpr32 = FMULSrr [[COPY6]], [[LDRSroX]] - ; CHECK: [[COPY7:%[0-9]+]]:fpr32 = COPY [[DEF]] - ; CHECK: [[FADDSrr:%[0-9]+]]:fpr32 = FADDSrr [[FMULSrr]], [[COPY7]] - ; CHECK: STRSui [[FADDSrr]], [[COPY2]], 0 :: (store (s32)) - ; CHECK: bb.2: - ; CHECK: RET_ReallyLR + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $w1, $x0, $x2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32sp = COPY $w1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64sp = COPY $x2 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:gpr32 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:fpr32 = COPY [[DEF]] + ; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 872415232 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:fpr32 = COPY [[MOVi32imm]] + ; CHECK-NEXT: FCMPSrr killed [[COPY3]], [[COPY4]], implicit-def $nzcv + ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 13, implicit $nzcv + ; CHECK-NEXT: [[SUBWri:%[0-9]+]]:gpr32common = SUBWri killed [[COPY1]], 1, 0 + ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, killed [[SUBWri]], %subreg.sub_32 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:fpr32 = COPY [[DEF]] + ; CHECK-NEXT: FCMPSrr killed [[COPY5]], [[COPY4]], implicit-def $nzcv + ; CHECK-NEXT: [[CSINCWr1:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 12, implicit $nzcv + ; CHECK-NEXT: [[EORWrr:%[0-9]+]]:gpr32 = EORWrr killed [[CSINCWr]], killed [[CSINCWr1]] + ; CHECK-NEXT: TBNZW killed [[EORWrr]], 0, %bb.2 + ; CHECK-NEXT: B %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[UBFMXri:%[0-9]+]]:gpr64 = UBFMXri [[SUBREG_TO_REG]], 60, 59 + ; CHECK-NEXT: [[LDRSroX:%[0-9]+]]:fpr32 = LDRSroX [[COPY]], killed [[UBFMXri]], 0, 0 :: (load (s32)) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:fpr32 = COPY [[DEF]] + ; CHECK-NEXT: [[FMULSrr:%[0-9]+]]:fpr32 = FMULSrr killed [[COPY6]], killed [[LDRSroX]] + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:fpr32 = COPY [[DEF]] + ; CHECK-NEXT: [[FADDSrr:%[0-9]+]]:fpr32 = FADDSrr killed [[FMULSrr]], killed [[COPY7]] + ; CHECK-NEXT: STRSui killed [[FADDSrr]], [[COPY2]], 0 :: (store (s32)) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: RET_ReallyLR bb.1: liveins: $w1, $x0, $x2 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/postselectopt-dead-cc-defs-in-fcmp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postselectopt-dead-cc-defs-in-fcmp.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/postselectopt-dead-cc-defs-in-fcmp.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/postselectopt-dead-cc-defs-in-fcmp.mir @@ -16,19 +16,20 @@ ; CHECK-LABEL: name: test_fcmp_dead_cc ; CHECK: liveins: $w1, $x0, $s0, $s1 - ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 - ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 - ; CHECK: [[COPY2:%[0-9]+]]:fpr32 = COPY $s0 - ; CHECK: [[COPY3:%[0-9]+]]:fpr32 = COPY $s1 - ; CHECK: [[COPY4:%[0-9]+]]:gpr32 = COPY $wzr - ; CHECK: FCMPSrr [[COPY2]], [[COPY3]], implicit-def dead $nzcv - ; CHECK: [[SUBWrr:%[0-9]+]]:gpr32 = SUBWrr [[COPY1]], [[COPY4]] - ; CHECK: FCMPSrr [[COPY2]], [[COPY3]], implicit-def $nzcv - ; CHECK: [[UBFMWri:%[0-9]+]]:gpr32common = UBFMWri [[SUBWrr]], 1, 31 - ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1 - ; CHECK: [[CSELWr:%[0-9]+]]:gpr32 = CSELWr [[UBFMWri]], [[MOVi32imm]], 8, implicit $nzcv - ; CHECK: $w0 = COPY [[CSELWr]] - ; CHECK: RET_ReallyLR implicit $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:fpr32 = COPY $s0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:fpr32 = COPY $s1 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr32 = COPY $wzr + ; CHECK-NEXT: FCMPSrr [[COPY2]], [[COPY3]], implicit-def dead $nzcv + ; CHECK-NEXT: [[SUBWrr:%[0-9]+]]:gpr32 = SUBWrr killed [[COPY1]], killed [[COPY4]] + ; CHECK-NEXT: FCMPSrr [[COPY2]], [[COPY3]], implicit-def $nzcv + ; CHECK-NEXT: [[UBFMWri:%[0-9]+]]:gpr32common = UBFMWri killed [[SUBWrr]], 1, 31 + ; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1 + ; CHECK-NEXT: [[CSELWr:%[0-9]+]]:gpr32 = CSELWr killed [[UBFMWri]], killed [[MOVi32imm]], 8, implicit $nzcv + ; CHECK-NEXT: $w0 = COPY [[CSELWr]] + ; CHECK-NEXT: RET_ReallyLR implicit $w0 %1:gpr64 = COPY $x0 %2:gpr32 = COPY $w1 %3:fpr32 = COPY $s0 @@ -60,19 +61,20 @@ ; CHECK-LABEL: name: test_fcmp_64_dead_cc ; CHECK: liveins: $w1, $x0, $d0, $d1 - ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 - ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 - ; CHECK: [[COPY2:%[0-9]+]]:fpr64 = COPY $d0 - ; CHECK: [[COPY3:%[0-9]+]]:fpr64 = COPY $d1 - ; CHECK: [[COPY4:%[0-9]+]]:gpr32 = COPY $wzr - ; CHECK: FCMPDrr [[COPY2]], [[COPY3]], implicit-def dead $nzcv - ; CHECK: [[SUBWrr:%[0-9]+]]:gpr32 = SUBWrr [[COPY1]], [[COPY4]] - ; CHECK: FCMPDrr [[COPY2]], [[COPY3]], implicit-def $nzcv - ; CHECK: [[UBFMWri:%[0-9]+]]:gpr32common = UBFMWri [[SUBWrr]], 1, 31 - ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1 - ; CHECK: [[CSELWr:%[0-9]+]]:gpr32 = CSELWr [[UBFMWri]], [[MOVi32imm]], 8, implicit $nzcv - ; CHECK: $w0 = COPY [[CSELWr]] - ; CHECK: RET_ReallyLR implicit $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:fpr64 = COPY $d1 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr32 = COPY $wzr + ; CHECK-NEXT: FCMPDrr [[COPY2]], [[COPY3]], implicit-def dead $nzcv + ; CHECK-NEXT: [[SUBWrr:%[0-9]+]]:gpr32 = SUBWrr killed [[COPY1]], killed [[COPY4]] + ; CHECK-NEXT: FCMPDrr [[COPY2]], [[COPY3]], implicit-def $nzcv + ; CHECK-NEXT: [[UBFMWri:%[0-9]+]]:gpr32common = UBFMWri killed [[SUBWrr]], 1, 31 + ; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1 + ; CHECK-NEXT: [[CSELWr:%[0-9]+]]:gpr32 = CSELWr killed [[UBFMWri]], killed [[MOVi32imm]], 8, implicit $nzcv + ; CHECK-NEXT: $w0 = COPY [[CSELWr]] + ; CHECK-NEXT: RET_ReallyLR implicit $w0 %1:gpr64 = COPY $x0 %2:gpr32 = COPY $w1 %3:fpr64 = COPY $d0 @@ -104,21 +106,21 @@ ; CHECK-LABEL: name: test_fcmp_dead_cc_3_fcmps ; CHECK: liveins: $w1, $x0, $s0, $s1 - ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 - ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 - ; CHECK: [[COPY2:%[0-9]+]]:fpr32 = COPY $s0 - ; CHECK: [[COPY3:%[0-9]+]]:fpr32 = COPY $s1 - ; CHECK: [[COPY4:%[0-9]+]]:gpr32 = COPY $wzr - ; CHECK: FCMPSrr [[COPY2]], [[COPY3]], implicit-def dead $nzcv - ; CHECK: [[SUBWrr:%[0-9]+]]:gpr32 = SUBWrr [[COPY1]], [[COPY4]] - ; CHECK: FCMPSrr [[COPY2]], [[COPY3]], implicit-def dead $nzcv - ; CHECK: [[SUBWrr1:%[0-9]+]]:gpr32 = SUBWrr [[COPY1]], [[COPY4]] - ; CHECK: FCMPSrr [[COPY2]], [[COPY3]], implicit-def $nzcv - ; CHECK: [[UBFMWri:%[0-9]+]]:gpr32common = UBFMWri [[SUBWrr1]], 1, 31 - ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1 - ; CHECK: [[CSELWr:%[0-9]+]]:gpr32 = CSELWr [[UBFMWri]], [[MOVi32imm]], 8, implicit $nzcv - ; CHECK: $w0 = COPY [[CSELWr]] - ; CHECK: RET_ReallyLR implicit $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:fpr32 = COPY $s0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:fpr32 = COPY $s1 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr32 = COPY $wzr + ; CHECK-NEXT: FCMPSrr [[COPY2]], [[COPY3]], implicit-def dead $nzcv + ; CHECK-NEXT: [[SUBWrr:%[0-9]+]]:gpr32 = SUBWrr [[COPY1]], [[COPY4]] + ; CHECK-NEXT: FCMPSrr [[COPY2]], [[COPY3]], implicit-def dead $nzcv + ; CHECK-NEXT: [[SUBWrr1:%[0-9]+]]:gpr32 = SUBWrr [[COPY1]], [[COPY4]] + ; CHECK-NEXT: FCMPSrr [[COPY2]], [[COPY3]], implicit-def $nzcv + ; CHECK-NEXT: [[UBFMWri:%[0-9]+]]:gpr32common = UBFMWri killed [[SUBWrr]], 1, 31 + ; CHECK-NEXT: [[CSELWr:%[0-9]+]]:gpr32 = CSELWr killed [[UBFMWri]], killed [[SUBWrr1]], 8, implicit $nzcv + ; CHECK-NEXT: $w0 = COPY [[CSELWr]] + ; CHECK-NEXT: RET_ReallyLR implicit $w0 %1:gpr64 = COPY $x0 %2:gpr32 = COPY $w1 %3:fpr32 = COPY $s0 @@ -127,11 +129,10 @@ FCMPSrr %3, %4, implicit-def $nzcv %12:gpr32 = SUBSWrr %2, %26, implicit-def $nzcv FCMPSrr %3, %4, implicit-def $nzcv - %12:gpr32 = SUBSWrr %2, %26, implicit-def $nzcv + %13:gpr32 = SUBSWrr %2, %26, implicit-def $nzcv FCMPSrr %3, %4, implicit-def $nzcv %14:gpr32common = UBFMWri %12, 1, 31 - %60:gpr32 = MOVi32imm 1 - %16:gpr32 = CSELWr %14, %60, 8, implicit $nzcv + %16:gpr32 = CSELWr %14, %13, 8, implicit $nzcv $w0 = COPY %16 RET_ReallyLR implicit $w0 @@ -152,19 +153,20 @@ ; Check that any dead imp-defs of NZCV are marked as such. ; CHECK-LABEL: name: test_impdef_made_dead ; CHECK: liveins: $w1, $x0, $s0, $s1 - ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 - ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 - ; CHECK: [[COPY2:%[0-9]+]]:fpr32 = COPY $s0 - ; CHECK: [[COPY3:%[0-9]+]]:fpr32 = COPY $s1 - ; CHECK: [[COPY4:%[0-9]+]]:gpr32 = COPY $wzr - ; CHECK: [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr [[COPY1]], [[COPY4]], implicit-def dead $nzcv - ; CHECK: FCMPSrr [[COPY2]], [[COPY3]], implicit-def $nzcv - ; CHECK: [[UBFMWri:%[0-9]+]]:gpr32common = UBFMWri [[SUBSWrr]], 1, 31 - ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1 - ; CHECK: [[CSELWr:%[0-9]+]]:gpr32 = CSELWr [[UBFMWri]], [[MOVi32imm]], 8, implicit $nzcv - ; CHECK: %ret:gpr32 = SUBSWrr [[CSELWr]], [[SUBSWrr]], implicit-def dead $nzcv - ; CHECK: $w0 = COPY [[CSELWr]] - ; CHECK: RET_ReallyLR implicit $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:fpr32 = COPY $s0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:fpr32 = COPY $s1 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr32 = COPY $wzr + ; CHECK-NEXT: [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr killed [[COPY1]], killed [[COPY4]], implicit-def dead $nzcv + ; CHECK-NEXT: FCMPSrr killed [[COPY2]], killed [[COPY3]], implicit-def $nzcv + ; CHECK-NEXT: [[UBFMWri:%[0-9]+]]:gpr32common = UBFMWri [[SUBSWrr]], 1, 31 + ; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1 + ; CHECK-NEXT: [[CSELWr:%[0-9]+]]:gpr32 = CSELWr killed [[UBFMWri]], killed [[MOVi32imm]], 8, implicit $nzcv + ; CHECK-NEXT: %ret:gpr32 = SUBSWrr [[CSELWr]], [[SUBSWrr]], implicit-def dead $nzcv + ; CHECK-NEXT: $w0 = COPY [[CSELWr]] + ; CHECK-NEXT: RET_ReallyLR implicit $w0 %1:gpr64 = COPY $x0 %2:gpr32 = COPY $w1 %3:fpr32 = COPY $s0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/postselectopt-killflags.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postselectopt-killflags.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/postselectopt-killflags.mir @@ -0,0 +1,94 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple aarch64 -run-pass=aarch64-post-select-optimize -verify-machineinstrs %s -o - | FileCheck %s + +--- +name: set_kill_flags +alignment: 4 +legalized: true +regBankSelected: true +selected: true +tracksRegLiveness: true +liveins: + - { reg: '$x0' } + - { reg: '$w1' } + - { reg: '$x2' } +body: | + bb.1: + liveins: $w1, $x0, $x2 + + ; CHECK-LABEL: name: set_kill_flags + ; CHECK: liveins: $w1, $x0, $x2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK-NEXT: %def:gpr64common = UBFMXri killed [[COPY]], 61, 60 + ; CHECK-NEXT: %add:gpr64common = ADDXri killed %def, 3, 0 + ; CHECK-NEXT: $x0 = COPY %add + ; CHECK-NEXT: RET_ReallyLR + %0:gpr64 = COPY $x0 + %def:gpr64common = UBFMXri %0, 61, 60 + %add:gpr64common = ADDXri %def, 3, 0 + $x0 = COPY %add + RET_ReallyLR + +... +--- +name: no_set_kill_flags_tied +alignment: 4 +legalized: true +regBankSelected: true +selected: true +tracksRegLiveness: true +body: | + bb.1: + liveins: $x0, $x1 + ; We shouldn't mark %val2 as killed here since it's tied to %res. + ; CHECK-LABEL: name: no_set_kill_flags_tied + ; CHECK: liveins: $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %val:gpr64sp = COPY $x0 + ; CHECK-NEXT: %val2:gpr64sp = COPY $x1 + ; CHECK-NEXT: %res:gpr64sp = STGPreIndex killed %val, %val2, 2 + ; CHECK-NEXT: RET_ReallyLR + %val:gpr64sp = COPY $x0 + %val2:gpr64sp = COPY $x1 + %res:gpr64sp = STGPreIndex %val, %val2, 2 + RET_ReallyLR + +... +--- +name: set_kill_flags_different_block +alignment: 4 +legalized: true +regBankSelected: true +selected: true +tracksRegLiveness: true +liveins: + - { reg: '$x0' } + - { reg: '$w1' } + - { reg: '$x2' } +body: | + ; CHECK-LABEL: name: set_kill_flags_different_block + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $w1, $x0, $x2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK-NEXT: %def:gpr64common = UBFMXri killed [[COPY]], 61, 60 + ; CHECK-NEXT: B %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: %add:gpr64common = ADDXri %def, 3, 0 + ; CHECK-NEXT: $x0 = COPY %add + ; CHECK-NEXT: RET_ReallyLR + bb.1: + liveins: $w1, $x0, $x2 + + %0:gpr64 = COPY $x0 + %def:gpr64common = UBFMXri %0, 61, 60 + B %bb.2 + bb.2: + %add:gpr64common = ADDXri %def, 3, 0 + $x0 = COPY %add + RET_ReallyLR + +... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/postselectopt-xclass-copies.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postselectopt-xclass-copies.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/postselectopt-xclass-copies.mir @@ -0,0 +1,116 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple aarch64 -run-pass=aarch64-post-select-optimize -verify-machineinstrs %s -o - | FileCheck %s +--- | + target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" + + @x = external hidden local_unnamed_addr global i32*, align 8 + define void @copy_from_larger_rc_def() { ret void } + define void @copy_from_larger_rc_def_multi_use() { ret void } + define void @copy_from_smaller_rc_def() { ret void } + +... +--- +name: copy_from_larger_rc_def +alignment: 4 +legalized: true +regBankSelected: true +selected: true +tracksRegLiveness: true +liveins: + - { reg: '$x0' } + - { reg: '$w1' } + - { reg: '$x2' } +body: | + bb.1: + liveins: $w1, $x0, $x2 + + ; Show that if we're doing a copy from a large rc to a single user with a smaller rc + ; then we just constrain the def instead. + ; CHECK-LABEL: name: copy_from_larger_rc_def + ; CHECK: liveins: $w1, $x0, $x2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK-NEXT: %large_rc_def:gpr64common = UBFMXri killed [[COPY]], 61, 60 + ; CHECK-NEXT: %add:gpr64sp = ADDXri killed %large_rc_def, 3, 0 + ; CHECK-NEXT: $x0 = COPY %add + ; CHECK-NEXT: RET_ReallyLR + %0:gpr64 = COPY $x0 + %large_rc_def:gpr64 = UBFMXri %0, 61, 60 + %constrain_copy:gpr64common = COPY %large_rc_def + ; Even though ADDXri may not actually need to use gpr64common, just use it as an example. + %add:gpr64sp = ADDXri %constrain_copy, 3, 0 + $x0 = COPY %add + RET_ReallyLR + +... +--- +name: copy_from_larger_rc_def_multi_use +alignment: 4 +legalized: true +regBankSelected: true +selected: true +tracksRegLiveness: true +liveins: + - { reg: '$x0' } + - { reg: '$w1' } + - { reg: '$x2' } +body: | + bb.1: + liveins: $w1, $x0, $x2 + + ; Don't constrain def if the original def has multiple users. + ; CHECK-LABEL: name: copy_from_larger_rc_def_multi_use + ; CHECK: liveins: $w1, $x0, $x2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK-NEXT: %large_rc_def:gpr64 = UBFMXri killed [[COPY]], 61, 60 + ; CHECK-NEXT: %constrain_copy:gpr64common = COPY %large_rc_def + ; CHECK-NEXT: %add:gpr64sp = ADDXri %constrain_copy, 3, 0 + ; CHECK-NEXT: %add2:gpr64sp = ADDXri %constrain_copy, 3, 0 + ; CHECK-NEXT: $x0 = COPY %add + ; CHECK-NEXT: $x1 = COPY %large_rc_def + ; CHECK-NEXT: RET_ReallyLR + %0:gpr64 = COPY $x0 + %large_rc_def:gpr64 = UBFMXri %0, 61, 60 + %constrain_copy:gpr64common = COPY %large_rc_def + %add:gpr64sp = ADDXri %constrain_copy, 3, 0 + %add2:gpr64sp = ADDXri %constrain_copy, 3, 0 + $x0 = COPY %add + $x1 = COPY %large_rc_def + RET_ReallyLR + +... +--- +name: copy_from_smaller_rc_def +alignment: 4 +legalized: true +regBankSelected: true +selected: true +tracksRegLiveness: true +liveins: + - { reg: '$x0' } + - { reg: '$w1' } + - { reg: '$x2' } +body: | + bb.1: + liveins: $w1, $x0, $x2 + + ; Show that if we're doing a copy from a small rc to a single user with a larger rc + ; then we just use the smaller def instead of doing a copy. + ; CHECK-LABEL: name: copy_from_smaller_rc_def + ; CHECK: liveins: $w1, $x0, $x2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY $x1 + ; CHECK-NEXT: %add:gpr64common = ADDXri [[COPY1]], 3, 0 + ; CHECK-NEXT: STRXui [[COPY1]], killed %add, target-flags(aarch64-pageoff, aarch64-nc) @x :: (store (p0)) + ; CHECK-NEXT: RET_ReallyLR + %0:gpr64common = COPY $x0 + %1:gpr64common = COPY $x1 + + %add:gpr64common = ADDXri %1, 3, 0 + %copy:gpr64sp = COPY %add + STRXui %1, %copy, target-flags(aarch64-pageoff, aarch64-nc) @x :: (store (p0)) + RET_ReallyLR + +... diff --git a/llvm/test/CodeGen/AArch64/kcfi.ll b/llvm/test/CodeGen/AArch64/kcfi.ll --- a/llvm/test/CodeGen/AArch64/kcfi.ll +++ b/llvm/test/CodeGen/AArch64/kcfi.ll @@ -26,8 +26,8 @@ ; MIR-LABEL: name: f1 ; MIR: body: -; ISEL: BLR %0, csr_aarch64_aapcs,{{.*}} cfi-type 12345678 -; ISEL-SLS: BLRNoIP %0, csr_aarch64_aapcs,{{.*}} cfi-type 12345678 +; ISEL: BLR {{.*}}, csr_aarch64_aapcs,{{.*}} cfi-type 12345678 +; ISEL-SLS: BLRNoIP {{.*}}, csr_aarch64_aapcs,{{.*}} cfi-type 12345678 ; KCFI: BUNDLE{{.*}} { ; KCFI-NEXT: KCFI_CHECK $x0, 12345678, implicit-def $x9, implicit-def $x16, implicit-def $x17, implicit-def $nzcv @@ -61,7 +61,7 @@ ; MIR-LABEL: name: f2 ; MIR: body: -; ISEL: TCRETURNri %0, 0, csr_aarch64_aapcs, implicit $sp, cfi-type 12345678 +; ISEL: TCRETURNri {{.*}}, 0, csr_aarch64_aapcs, implicit $sp, cfi-type 12345678 ; KCFI: BUNDLE{{.*}} { ; KCFI-NEXT: KCFI_CHECK $x0, 12345678, implicit-def $x9, implicit-def $x16, implicit-def $x17, implicit-def $nzcv