diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp @@ -14,6 +14,7 @@ #include "AArch64.h" #include "AArch64TargetMachine.h" #include "MCTargetDesc/AArch64MCTargetDesc.h" +#include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineOperand.h" @@ -93,7 +94,12 @@ // Our solution here is to try to convert flag setting operations between // a interval of identical FCMPs, so that CSE will be able to eliminate one. bool Changed = false; - const auto *TII = MBB.getParent()->getSubtarget().getInstrInfo(); + auto &MF = *MBB.getParent(); + auto &Subtarget = MF.getSubtarget(); + const auto &TII = Subtarget.getInstrInfo(); + auto TRI = Subtarget.getRegisterInfo(); + auto RBI = Subtarget.getRegBankInfo(); + auto &MRI = MF.getRegInfo(); // The first step is to find the first and last FCMPs. If we have found // at least two, then set the limit of the bottom-up walk to the first FCMP @@ -144,6 +150,11 @@ << II); II.setDesc(TII->get(NewOpc)); II.RemoveOperand(DeadNZCVIdx); + // Changing the opcode can result in differing regclass requirements, + // e.g. SUBSWri uses gpr32 for the dest, whereas SUBWri uses gpr32sp. + // Constrain the regclasses, possibly introducing a copy. + constrainOperandRegClass(MF, *TRI, MRI, *TII, *RBI, II, II.getDesc(), + II.getOperand(0), 0); Changed |= true; } else { // Otherwise, we just set the nzcv imp-def operand to be dead, so the diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/postselectopt-constrain-new-regop.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postselectopt-constrain-new-regop.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/postselectopt-constrain-new-regop.mir @@ -0,0 +1,83 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple aarch64 -run-pass=aarch64-post-select-optimize -verify-machineinstrs %s -o - | FileCheck %s +--- +name: pluto +alignment: 4 +legalized: true +regBankSelected: true +selected: true +tracksRegLiveness: true +liveins: + - { reg: '$x0' } + - { reg: '$w1' } + - { reg: '$x2' } +frameInfo: + maxAlignment: 1 + maxCallFrameSize: 0 +body: | + ; CHECK-LABEL: name: pluto + ; CHECK: bb.0: + ; CHECK: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK: liveins: $w1, $x0, $x2 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:gpr32sp = COPY $w1 + ; CHECK: [[COPY2:%[0-9]+]]:gpr64sp = COPY $x2 + ; CHECK: [[DEF:%[0-9]+]]:gpr32 = IMPLICIT_DEF + ; CHECK: [[COPY3:%[0-9]+]]:fpr32 = COPY [[DEF]] + ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 872415232 + ; CHECK: [[COPY4:%[0-9]+]]:fpr32 = COPY [[MOVi32imm]] + ; CHECK: FCMPSrr [[COPY3]], [[COPY4]], implicit-def $nzcv + ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 13, implicit $nzcv + ; CHECK: [[SUBWri:%[0-9]+]]:gpr32common = SUBWri [[COPY1]], 1, 0 + ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, [[SUBWri]], %subreg.sub_32 + ; CHECK: [[COPY5:%[0-9]+]]:fpr32 = COPY [[DEF]] + ; CHECK: FCMPSrr [[COPY5]], [[COPY4]], implicit-def $nzcv + ; CHECK: [[CSINCWr1:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 12, implicit $nzcv + ; CHECK: [[EORWrr:%[0-9]+]]:gpr32 = EORWrr [[CSINCWr]], [[CSINCWr1]] + ; CHECK: TBNZW [[EORWrr]], 0, %bb.2 + ; CHECK: B %bb.1 + ; CHECK: bb.1: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: [[UBFMXri:%[0-9]+]]:gpr64 = UBFMXri [[SUBREG_TO_REG]], 60, 59 + ; CHECK: [[LDRSroX:%[0-9]+]]:fpr32 = LDRSroX [[COPY]], [[UBFMXri]], 0, 0 :: (load 4) + ; CHECK: [[COPY6:%[0-9]+]]:fpr32 = COPY [[DEF]] + ; CHECK: [[FMULSrr:%[0-9]+]]:fpr32 = FMULSrr [[COPY6]], [[LDRSroX]] + ; CHECK: [[COPY7:%[0-9]+]]:fpr32 = COPY [[DEF]] + ; CHECK: [[FADDSrr:%[0-9]+]]:fpr32 = FADDSrr [[FMULSrr]], [[COPY7]] + ; CHECK: STRSui [[FADDSrr]], [[COPY2]], 0 :: (store 4) + ; CHECK: bb.2: + ; CHECK: RET_ReallyLR + bb.1: + liveins: $w1, $x0, $x2 + + %0:gpr64sp = COPY $x0 + %1:gpr32sp = COPY $w1 + %2:gpr64sp = COPY $x2 + %3:gpr32 = IMPLICIT_DEF + %29:fpr32 = COPY %3 + %33:gpr32 = MOVi32imm 872415232 + %4:fpr32 = COPY %33 + FCMPSrr %29, %4, implicit-def $nzcv + %28:gpr32 = CSINCWr $wzr, $wzr, 13, implicit $nzcv + %7:gpr32 = SUBSWri %1, 1, 0, implicit-def $nzcv + %8:gpr64 = SUBREG_TO_REG 0, %7, %subreg.sub_32 + %30:fpr32 = COPY %3 + FCMPSrr %30, %4, implicit-def $nzcv + %27:gpr32 = CSINCWr $wzr, $wzr, 12, implicit $nzcv + %26:gpr32 = EORWrr %28, %27 + TBNZW %26, 0, %bb.3 + B %bb.2 + + bb.2: + %12:gpr64 = UBFMXri %8, 60, 59 + %15:fpr32 = LDRSroX %0, %12, 0, 0 :: (load 4) + %31:fpr32 = COPY %3 + %16:fpr32 = FMULSrr %31, %15 + %32:fpr32 = COPY %3 + %17:fpr32 = FADDSrr %16, %32 + STRSui %17, %2, 0 :: (store 4) + + bb.3: + RET_ReallyLR + +...