diff --git a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp --- a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp @@ -64,6 +64,8 @@ ProduceNonFlagSettingCondBr = !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening); MFReturnAddr = Register(); + + processPHIs(MF); } private: @@ -78,6 +80,9 @@ // An early selection function that runs before the selectImpl() call. bool earlySelect(MachineInstr &I) const; + // Do some preprocessing of G_PHIs before we begin selection. + void processPHIs(MachineFunction &MF); + bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI) const; /// Eliminate same-sized cross-bank copies into stores before selectImpl(). @@ -5327,6 +5332,95 @@ } } + +// Perform fixups on the given PHI instruction's operands to force them all +// to be the same as the destination regbank. +static void fixupPHIOpBanks(MachineInstr &MI, MachineRegisterInfo &MRI, + const AArch64RegisterBankInfo &RBI) { + assert(MI.getOpcode() == TargetOpcode::G_PHI && "Expected a G_PHI"); + Register DstReg = MI.getOperand(0).getReg(); + const RegisterBank *DstRB = MRI.getRegBankOrNull(DstReg); + assert(DstRB && "Expected PHI dst to have regbank assigned"); + MachineIRBuilder MIB(MI); + + // Go through each operand and ensure it has the same regbank. + for (unsigned OpIdx = 1; OpIdx < MI.getNumOperands(); ++OpIdx) { + MachineOperand &MO = MI.getOperand(OpIdx); + if (!MO.isReg()) + continue; + Register OpReg = MO.getReg(); + const RegisterBank *RB = MRI.getRegBankOrNull(OpReg); + if (RB != DstRB) { + // Insert a cross-bank copy. + auto *OpDef = MRI.getVRegDef(OpReg); + const LLT &Ty = MRI.getType(OpReg); + MIB.setInsertPt(*OpDef->getParent(), std::next(OpDef->getIterator())); + auto Copy = MIB.buildCopy(Ty, OpReg); + MRI.setRegBank(Copy.getReg(0), *DstRB); + MO.setReg(Copy.getReg(0)); + } + } +} + +void AArch64InstructionSelector::processPHIs(MachineFunction &MF) { + // We're looking for PHIs, build a list so we don't invalidate iterators. + MachineRegisterInfo &MRI = MF.getRegInfo(); + SmallVector Phis; + for (auto &BB : MF) { + for (auto &MI : BB) { + if (MI.getOpcode() == TargetOpcode::G_PHI) + Phis.emplace_back(&MI); + } + } + + for (auto *MI : Phis) { + // We need to do some work here if the operand types are < 16 bit and they + // are split across fpr/gpr banks. Since all types <32b on gpr + // end up being assigned gpr32 regclasses, we can end up with PHIs here + // which try to select between a gpr32 and an fpr16. Ideally RBS shouldn't + // be selecting heterogenous regbanks for operands if possible, but we + // still need to be able to deal with it here. + // + // To fix this, if we have a gpr-bank operand < 32b in size and at least + // one other operand is on the fpr bank, then we add cross-bank copies + // to homogenize the operand banks. For simplicity the bank that we choose + // to settle on is whatever bank the def operand has. For example: + // + // %endbb: + // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2:fpr(s16), %bb2 + // => + // %bb2: + // ... + // %in2_copy:gpr(s16) = COPY %in2:fpr(s16) + // ... + // %endbb: + // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2_copy:gpr(s16), %bb2 + bool HasGPROp = false, HasFPROp = false; + for (unsigned OpIdx = 1; OpIdx < MI->getNumOperands(); ++OpIdx) { + const auto &MO = MI->getOperand(OpIdx); + if (!MO.isReg()) + continue; + const LLT &Ty = MRI.getType(MO.getReg()); + if (!Ty.isValid() || !Ty.isScalar()) + break; + if (Ty.getSizeInBits() >= 32) + break; + const RegisterBank *RB = MRI.getRegBankOrNull(MO.getReg()); + // If for some reason we don't have a regbank yet. Don't try anything. + if (!RB) + break; + + if (RB->getID() == AArch64::GPRRegBankID) + HasGPROp = true; + else + HasFPROp = true; + } + // We have heterogenous regbanks, need to fixup. + if (HasGPROp && HasFPROp) + fixupPHIOpBanks(*MI, MRI, RBI); + } +} + namespace llvm { InstructionSelector * createAArch64InstructionSelector(const AArch64TargetMachine &TM, diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/preselect-process-phis.mir b/llvm/test/CodeGen/AArch64/GlobalISel/preselect-process-phis.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/preselect-process-phis.mir @@ -0,0 +1,110 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -verify-machineinstrs -mtriple aarch64--- -run-pass=instruction-select -global-isel %s -o - | FileCheck %s +--- +name: test_loop_phi_fpr_to_gpr +alignment: 4 +legalized: true +regBankSelected: true +selected: false +failedISel: false +tracksRegLiveness: true +liveins: [] +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: test_loop_phi_fpr_to_gpr + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: [[DEF:%[0-9]+]]:gpr32 = IMPLICIT_DEF + ; CHECK: [[DEF1:%[0-9]+]]:gpr64common = IMPLICIT_DEF + ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 2143289344 + ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY [[MOVi32imm]] + ; CHECK: bb.1: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: [[DEF2:%[0-9]+]]:gpr32 = IMPLICIT_DEF + ; CHECK: $wzr = ANDSWri [[DEF]], 0, implicit-def $nzcv + ; CHECK: [[CSELWr:%[0-9]+]]:gpr32 = CSELWr [[DEF2]], [[DEF2]], 1, implicit $nzcv + ; CHECK: bb.2: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: [[PHI:%[0-9]+]]:gpr32 = PHI [[CSELWr]], %bb.1, %8, %bb.2 + ; CHECK: [[FCVTHSr:%[0-9]+]]:fpr16 = FCVTHSr [[COPY]] + ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:fpr32 = SUBREG_TO_REG 0, [[FCVTHSr]], %subreg.hsub + ; CHECK: [[COPY1:%[0-9]+]]:gpr32all = COPY [[SUBREG_TO_REG]] + ; CHECK: STRHHui [[PHI]], [[DEF1]], 0 :: (store 2 into `half* undef`) + ; CHECK: B %bb.2 + bb.0: + successors: %bb.1(0x80000000) + + %0:gpr(s1) = G_IMPLICIT_DEF + %4:gpr(p0) = G_IMPLICIT_DEF + %8:fpr(s32) = G_FCONSTANT float 0x7FF8000000000000 + + bb.1: + successors: %bb.2(0x80000000) + + %6:gpr(s32) = G_IMPLICIT_DEF + %7:gpr(s32) = G_SELECT %0(s1), %6, %6 + %1:gpr(s16) = G_TRUNC %7(s32) + + bb.2: + successors: %bb.2(0x80000000) + + %3:gpr(s16) = G_PHI %1(s16), %bb.1, %5(s16), %bb.2 + %5:fpr(s16) = G_FPTRUNC %8(s32) + G_STORE %3(s16), %4(p0) :: (store 2 into `half* undef`) + G_BR %bb.2 + +... +--- +name: test_loop_phi_gpr_to_fpr +alignment: 4 +legalized: true +regBankSelected: true +selected: false +failedISel: false +tracksRegLiveness: true +liveins: [] +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: test_loop_phi_gpr_to_fpr + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: [[DEF:%[0-9]+]]:gpr32 = IMPLICIT_DEF + ; CHECK: [[DEF1:%[0-9]+]]:gpr64common = IMPLICIT_DEF + ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 2143289344 + ; CHECK: [[COPY:%[0-9]+]]:fpr32 = COPY [[MOVi32imm]] + ; CHECK: bb.1: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: [[DEF2:%[0-9]+]]:gpr32 = IMPLICIT_DEF + ; CHECK: $wzr = ANDSWri [[DEF]], 0, implicit-def $nzcv + ; CHECK: [[CSELWr:%[0-9]+]]:gpr32 = CSELWr [[DEF2]], [[DEF2]], 1, implicit $nzcv + ; CHECK: [[COPY1:%[0-9]+]]:fpr32 = COPY [[CSELWr]] + ; CHECK: [[COPY2:%[0-9]+]]:fpr16 = COPY [[COPY1]].hsub + ; CHECK: bb.2: + ; CHECK: successors: %bb.2(0x80000000) + ; CHECK: [[PHI:%[0-9]+]]:fpr16 = PHI %7, %bb.2, [[COPY2]], %bb.1 + ; CHECK: [[FCVTHSr:%[0-9]+]]:fpr16 = FCVTHSr [[COPY]] + ; CHECK: STRHui [[PHI]], [[DEF1]], 0 :: (store 2 into `half* undef`) + ; CHECK: B %bb.2 + bb.0: + successors: %bb.1(0x80000000) + + %0:gpr(s1) = G_IMPLICIT_DEF + %4:gpr(p0) = G_IMPLICIT_DEF + %8:fpr(s32) = G_FCONSTANT float 0x7FF8000000000000 + + bb.1: + successors: %bb.2(0x80000000) + + %6:gpr(s32) = G_IMPLICIT_DEF + %7:gpr(s32) = G_SELECT %0(s1), %6, %6 + %1:gpr(s16) = G_TRUNC %7(s32) + + bb.2: + successors: %bb.2(0x80000000) + + %3:fpr(s16) = G_PHI %5(s16), %bb.2, %1(s16), %bb.1 + %5:fpr(s16) = G_FPTRUNC %8(s32) + G_STORE %3(s16), %4(p0) :: (store 2 into `half* undef`) + G_BR %bb.2 + +...