Index: llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp =================================================================== --- llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -356,14 +356,14 @@ getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI, bool IsLoadStore = false) const; - /// Instructions that accept extend modifiers like UXTW expect the register - /// being extended to be a GPR32. Narrow ExtReg to a 32-bit register using a - /// subregister copy if necessary. Return either ExtReg, or the result of the - /// new copy. - Register narrowExtendRegIfNeeded(Register ExtReg, - MachineIRBuilder &MIB) const; - Register widenGPRBankRegIfNeeded(Register Reg, unsigned Size, - MachineIRBuilder &MIB) const; + /// If necessary, emit a copy to move \p Reg to a register belonging to \p RC. + /// + /// \returns either \p Reg if no narrowing was required, or the result of the + /// copy used to widen \p Reg. + Register narrowOrWidenScalarIfNeeded(Register Reg, + const TargetRegisterClass &RC, + MachineIRBuilder &MIB) const; + ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const; void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI, @@ -1353,10 +1353,10 @@ // TBNZW work. bool UseWReg = Bit < 32; unsigned NecessarySize = UseWReg ? 32 : 64; - if (Size < NecessarySize) - TestReg = widenGPRBankRegIfNeeded(TestReg, NecessarySize, MIB); - else if (Size > NecessarySize) - TestReg = narrowExtendRegIfNeeded(TestReg, MIB); + if (Size != NecessarySize) + TestReg = narrowOrWidenScalarIfNeeded( + TestReg, + UseWReg ? AArch64::GPR32RegClass : AArch64::GPR64RegClass, MIB); static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX}, {AArch64::TBZW, AArch64::TBNZW}}; @@ -5152,7 +5152,7 @@ // Need a 32-bit wide register here. MachineIRBuilder MIB(*MRI.getVRegDef(Root.getReg())); - OffsetReg = narrowExtendRegIfNeeded(OffsetReg, MIB); + OffsetReg = narrowOrWidenScalarIfNeeded(OffsetReg, AArch64::GPR32RegClass, MIB); } // We can use the LHS of the GEP as the base, and the LHS of the shift as an @@ -5372,8 +5372,8 @@ // Need a 32-bit wide register. MachineIRBuilder MIB(*PtrAdd); - Register ExtReg = - narrowExtendRegIfNeeded(OffsetInst->getOperand(1).getReg(), MIB); + Register ExtReg = narrowOrWidenScalarIfNeeded(OffsetInst->getOperand(1).getReg(), + AArch64::GPR32RegClass, MIB); unsigned SignExtend = Ext == AArch64_AM::SXTW; // Base is LHS, offset is ExtReg. @@ -5647,67 +5647,22 @@ } } -Register AArch64InstructionSelector::narrowExtendRegIfNeeded( - Register ExtReg, MachineIRBuilder &MIB) const { +Register AArch64InstructionSelector::narrowOrWidenScalarIfNeeded( + Register Reg, const TargetRegisterClass &RC, + MachineIRBuilder &MIB) const { MachineRegisterInfo &MRI = *MIB.getMRI(); - if (MRI.getType(ExtReg).getSizeInBits() == 32) - return ExtReg; - - // Insert a copy to move ExtReg to GPR32. - Register NarrowReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass); - auto Copy = MIB.buildCopy({NarrowReg}, {ExtReg}); + auto Ty = MRI.getType(Reg); + assert(!Ty.isVector() && "Expected scalars only!"); + if (Ty.getSizeInBits() == TRI.getRegSizeInBits(RC)) + return Reg; - // Select the copy into a subregister copy. + // Create a copy and immediately select it. + // FIXME: We should have an emitCopy function? + auto Copy = MIB.buildCopy({&RC}, {Reg}); selectCopy(*Copy, TII, MRI, TRI, RBI); return Copy.getReg(0); } -Register AArch64InstructionSelector::widenGPRBankRegIfNeeded( - Register Reg, unsigned WideSize, MachineIRBuilder &MIB) const { - assert(WideSize >= 8 && "WideSize is smaller than all possible registers?"); - MachineRegisterInfo &MRI = *MIB.getMRI(); - unsigned NarrowSize = MRI.getType(Reg).getSizeInBits(); - assert(WideSize >= NarrowSize && - "WideSize cannot be smaller than NarrowSize!"); - - // If the sizes match, just return the register. - // - // If NarrowSize is an s1, then we can select it to any size, so we'll treat - // it as a don't care. - if (NarrowSize == WideSize || NarrowSize == 1) - return Reg; - - // Now check the register classes. - const RegisterBank *RB = RBI.getRegBank(Reg, MRI, TRI); - const TargetRegisterClass *OrigRC = getMinClassForRegBank(*RB, NarrowSize); - const TargetRegisterClass *WideRC = getMinClassForRegBank(*RB, WideSize); - assert(OrigRC && "Could not determine narrow RC?"); - assert(WideRC && "Could not determine wide RC?"); - - // If the sizes differ, but the register classes are the same, there is no - // need to insert a SUBREG_TO_REG. - // - // For example, an s8 that's supposed to be a GPR will be selected to either - // a GPR32 or a GPR64 register. Note that this assumes that the s8 will - // always end up on a GPR32. - if (OrigRC == WideRC) - return Reg; - - // We have two different register classes. Insert a SUBREG_TO_REG. - unsigned SubReg = 0; - getSubRegForClass(OrigRC, TRI, SubReg); - assert(SubReg && "Couldn't determine subregister?"); - - // Build the SUBREG_TO_REG and return the new, widened register. - auto SubRegToReg = - MIB.buildInstr(AArch64::SUBREG_TO_REG, {WideRC}, {}) - .addImm(0) - .addUse(Reg) - .addImm(SubReg); - constrainSelectedInstRegOperands(*SubRegToReg, TII, TRI, RBI); - return SubRegToReg.getReg(0); -} - /// Select an "extended register" operand. This operand folds in an extend /// followed by an optional left shift. InstructionSelector::ComplexRendererFns @@ -5768,7 +5723,7 @@ // We require a GPR32 here. Narrow the ExtReg if needed using a subregister // copy. MachineIRBuilder MIB(*RootDef); - ExtReg = narrowExtendRegIfNeeded(ExtReg, MIB); + ExtReg = narrowOrWidenScalarIfNeeded(ExtReg, AArch64::GPR32RegClass, MIB); return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); }, [=](MachineInstrBuilder &MIB) { Index: llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-ext-tbz-tbnz.mir =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-ext-tbz-tbnz.mir +++ llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-ext-tbz-tbnz.mir @@ -78,8 +78,9 @@ ; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000) ; CHECK: liveins: $h0 ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:fpr32 = SUBREG_TO_REG 0, $h0, %subreg.hsub - ; CHECK: %copy:gpr32 = COPY [[SUBREG_TO_REG]] - ; CHECK: TBNZW %copy, 3, %bb.1 + ; CHECK: %copy:gpr32all = COPY [[SUBREG_TO_REG]] + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %copy + ; CHECK: TBNZW [[COPY]], 3, %bb.1 ; CHECK: B %bb.0 ; CHECK: bb.1: ; CHECK: RET_ReallyLR Index: llvm/test/CodeGen/AArch64/GlobalISel/subreg-copy.mir =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/subreg-copy.mir +++ llvm/test/CodeGen/AArch64/GlobalISel/subreg-copy.mir @@ -34,3 +34,35 @@ bb.1: RET_ReallyLR ... +--- +name: no_trunc +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: no_trunc + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY]], 0 :: (load 16) + ; CHECK: [[COPY1:%[0-9]+]]:gpr64all = COPY [[LDRQui]].dsub + ; CHECK: [[COPY2:%[0-9]+]]:gpr64 = COPY [[COPY1]] + ; CHECK: TBNZX [[COPY2]], 33, %bb.1 + ; CHECK: bb.1: + ; CHECK: RET_ReallyLR + bb.0: + liveins: $x0 + %1:gpr(p0) = COPY $x0 + %3:gpr(s64) = G_CONSTANT i64 8589934592 + %5:gpr(s64) = G_CONSTANT i64 0 + %0:fpr(s128) = G_LOAD %1:gpr(p0) :: (load 16) + %2:fpr(s64) = G_TRUNC %0:fpr(s128) + %8:gpr(s64) = COPY %2:fpr(s64) + %4:gpr(s64) = G_AND %8:gpr, %3:gpr + %7:gpr(s32) = G_ICMP intpred(ne), %4:gpr(s64), %5:gpr + %6:gpr(s1) = G_TRUNC %7:gpr(s32) + G_BRCOND %6:gpr(s1), %bb.1 + bb.1: + RET_ReallyLR Index: llvm/test/CodeGen/AArch64/GlobalISel/widen-narrow-tbz-tbnz.mir =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/widen-narrow-tbz-tbnz.mir +++ llvm/test/CodeGen/AArch64/GlobalISel/widen-narrow-tbz-tbnz.mir @@ -106,8 +106,9 @@ ; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000) ; CHECK: liveins: $w0 ; CHECK: %reg:gpr32all = COPY $w0 - ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, %reg, %subreg.sub_32 - ; CHECK: TBZX [[SUBREG_TO_REG]], 33, %bb.1 + ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64all = SUBREG_TO_REG 0, %reg, %subreg.sub_32 + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY [[SUBREG_TO_REG]] + ; CHECK: TBZX [[COPY]], 33, %bb.1 ; CHECK: B %bb.0 ; CHECK: bb.1: ; CHECK: RET_ReallyLR @@ -140,8 +141,9 @@ ; CHECK: bb.0: ; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000) ; CHECK: %reg:gpr32 = IMPLICIT_DEF - ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, %reg, %subreg.sub_32 - ; CHECK: TBZX [[SUBREG_TO_REG]], 33, %bb.1 + ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64all = SUBREG_TO_REG 0, %reg, %subreg.sub_32 + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY [[SUBREG_TO_REG]] + ; CHECK: TBZX [[COPY]], 33, %bb.1 ; CHECK: B %bb.0 ; CHECK: bb.1: ; CHECK: RET_ReallyLR