Index: llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp =================================================================== --- llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -188,7 +188,8 @@ /// {{AArch64::ADDXri, AArch64::ADDWri}, /// {AArch64::ADDXrs, AArch64::ADDWrs}, /// {AArch64::ADDXrr, AArch64::ADDWrr}, - /// {AArch64::SUBXri, AArch64::SUBWri}}}; + /// {AArch64::SUBXri, AArch64::SUBWri}, + /// {AArch64::ADDXrx, AArch64::ADDWrx}}}; /// \endcode /// /// Each row in the table corresponds to a different addressing mode. Each @@ -199,6 +200,7 @@ /// - Row 1: The rs opcode variants /// - Row 2: The rr opcode variants /// - Row 3: The ri opcode variants for negative immediates + /// - Row 4: The rx opcode variants /// /// \attention Columns must be structured as follows: /// - Column 0: The 64-bit opcode variants @@ -208,7 +210,7 @@ /// \p LHS is the left-hand operand of the binop to emit. /// \p RHS is the right-hand operand of the binop to emit. MachineInstr *emitAddSub( - const std::array, 4> &AddrModeAndSizeToOpcode, + const std::array, 5> &AddrModeAndSizeToOpcode, Register Dst, MachineOperand &LHS, MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const; MachineInstr *emitADD(Register DefReg, MachineOperand &LHS, @@ -3821,7 +3823,7 @@ } MachineInstr *AArch64InstructionSelector::emitAddSub( - const std::array, 4> &AddrModeAndSizeToOpcode, + const std::array, 5> &AddrModeAndSizeToOpcode, Register Dst, MachineOperand &LHS, MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const { MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); @@ -3842,6 +3844,11 @@ return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {LHS}, MIRBuilder, Fns); + // INSTRrx form. + if (auto Fns = selectArithExtendedRegister(RHS)) + return emitInstr(AddrModeAndSizeToOpcode[4][Is32Bit], {Dst}, {LHS}, + MIRBuilder, Fns); + // INSTRrs form. if (auto Fns = selectShiftedRegister(RHS)) return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {LHS}, @@ -3854,11 +3861,12 @@ AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS, MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const { - const std::array, 4> OpcTable{ + const std::array, 5> OpcTable{ {{AArch64::ADDXri, AArch64::ADDWri}, {AArch64::ADDXrs, AArch64::ADDWrs}, {AArch64::ADDXrr, AArch64::ADDWrr}, - {AArch64::SUBXri, AArch64::SUBWri}}}; + {AArch64::SUBXri, AArch64::SUBWri}, + {AArch64::ADDXrx, AArch64::ADDWrx}}}; return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder); } @@ -3866,11 +3874,12 @@ AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS, MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const { - const std::array, 4> OpcTable{ + const std::array, 5> OpcTable{ {{AArch64::ADDSXri, AArch64::ADDSWri}, {AArch64::ADDSXrs, AArch64::ADDSWrs}, {AArch64::ADDSXrr, AArch64::ADDSWrr}, - {AArch64::SUBSXri, AArch64::SUBSWri}}}; + {AArch64::SUBSXri, AArch64::SUBSWri}, + {AArch64::ADDSXrx, AArch64::ADDSWrx}}}; return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder); } @@ -3878,11 +3887,12 @@ AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS, MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const { - const std::array, 4> OpcTable{ + const std::array, 5> OpcTable{ {{AArch64::SUBSXri, AArch64::SUBSWri}, {AArch64::SUBSXrs, AArch64::SUBSWrs}, {AArch64::SUBSXrr, AArch64::SUBSWrr}, - {AArch64::ADDSXri, AArch64::ADDSWri}}}; + {AArch64::ADDSXri, AArch64::ADDSWri}, + {AArch64::SUBSXrx, AArch64::SUBSWrx}}}; return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder); } Index: llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-compare.mir =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-compare.mir +++ llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-compare.mir @@ -603,3 +603,36 @@ %cmp:gpr(s32) = G_ICMP intpred(ne), %reg0(s32), %sub $w0 = COPY %cmp(s32) RET_ReallyLR implicit $w0 + +... +--- +name: cmn_arith_extended_shl +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $x0, $x1 + ; We should be able to fold away the extend + shift and select ADDSXrx. + + ; CHECK-LABEL: name: cmn_arith_extended_shl + ; CHECK: liveins: $w0, $x0, $x1 + ; CHECK: %reg0:gpr64sp = COPY $x0 + ; CHECK: %reg1:gpr32 = COPY $w0 + ; CHECK: $xzr = ADDSXrx %reg0, %reg1, 50, implicit-def $nzcv + ; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 0, implicit $nzcv + ; CHECK: $w0 = COPY %cmp + ; CHECK: RET_ReallyLR implicit $w0 + %reg0:gpr(s64) = COPY $x0 + %zero:gpr(s64) = G_CONSTANT i64 0 + %sub:gpr(s64) = G_SUB %zero, %reg0 + + %reg1:gpr(s32) = COPY $w0 + %ext:gpr(s64) = G_SEXT %reg1(s32) + %cst:gpr(s64) = G_CONSTANT i64 2 + %shift:gpr(s64) = G_SHL %ext, %cst(s64) + + %cmp:gpr(s32) = G_ICMP intpred(ne), %sub(s64), %shift + $w0 = COPY %cmp(s32) + RET_ReallyLR implicit $w0 Index: llvm/test/CodeGen/AArch64/GlobalISel/select-cmp.mir =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/select-cmp.mir +++ llvm/test/CodeGen/AArch64/GlobalISel/select-cmp.mir @@ -182,3 +182,91 @@ %cmp:gpr(s32) = G_ICMP intpred(eq), %reg0(s32), %cst $w0 = COPY %cmp(s32) RET_ReallyLR implicit $w0 +... +--- +name: cmp_arith_extended_s64 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $x1 + + ; CHECK-LABEL: name: cmp_arith_extended_s64 + ; CHECK: liveins: $w0, $x1 + ; CHECK: %reg0:gpr32 = COPY $w0 + ; CHECK: %reg1:gpr64sp = COPY $x1 + ; CHECK: [[SUBSXrx:%[0-9]+]]:gpr64 = SUBSXrx %reg1, %reg0, 18, implicit-def $nzcv + ; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 9, implicit $nzcv + ; CHECK: $w0 = COPY %cmp + ; CHECK: RET_ReallyLR implicit $w0 + %reg0:gpr(s32) = COPY $w0 + %reg1:gpr(s64) = COPY $x1 + %ext:gpr(s64) = G_ZEXT %reg0(s32) + %cst:gpr(s64) = G_CONSTANT i64 2 + %shift:gpr(s64) = G_SHL %ext, %cst(s64) + %cmp:gpr(s32) = G_ICMP intpred(ugt), %reg1(s64), %shift + $w0 = COPY %cmp(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: cmp_arith_extended_s32 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1, $h0 + + ; CHECK-LABEL: name: cmp_arith_extended_s32 + ; CHECK: liveins: $w0, $w1, $h0 + ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:fpr32 = SUBREG_TO_REG 0, $h0, %subreg.hsub + ; CHECK: %reg0:gpr32all = COPY [[SUBREG_TO_REG]] + ; CHECK: %reg1:gpr32sp = COPY $w1 + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %reg0 + ; CHECK: [[SUBSWrx:%[0-9]+]]:gpr32 = SUBSWrx %reg1, [[COPY]], 10, implicit-def $nzcv + ; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 9, implicit $nzcv + ; CHECK: $w0 = COPY %cmp + ; CHECK: RET_ReallyLR implicit $w0 + %reg0:gpr(s16) = COPY $h0 + %reg1:gpr(s32) = COPY $w1 + %ext:gpr(s32) = G_ZEXT %reg0(s16) + %cst:gpr(s32) = G_CONSTANT i32 2 + %shift:gpr(s32) = G_SHL %ext, %cst(s32) + %cmp:gpr(s32) = G_ICMP intpred(ugt), %reg1(s32), %shift + $w0 = COPY %cmp(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: cmp_arith_extended_shl_too_large +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $x1 + + ; The constant on the G_SHL is > 4, so we won't sleect SUBSXrx + + ; CHECK-LABEL: name: cmp_arith_extended_shl_too_large + ; CHECK: liveins: $w0, $x1 + ; CHECK: %reg0:gpr32 = COPY $w0 + ; CHECK: %reg1:gpr64 = COPY $x1 + ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, %reg0, %subreg.sub_32 + ; CHECK: %ext:gpr64 = UBFMXri [[SUBREG_TO_REG]], 0, 31 + ; CHECK: [[SUBSXrs:%[0-9]+]]:gpr64 = SUBSXrs %reg1, %ext, 5, implicit-def $nzcv + ; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 9, implicit $nzcv + ; CHECK: $w0 = COPY %cmp + ; CHECK: RET_ReallyLR implicit $w0 + %reg0:gpr(s32) = COPY $w0 + %reg1:gpr(s64) = COPY $x1 + %ext:gpr(s64) = G_ZEXT %reg0(s32) + %cst:gpr(s64) = G_CONSTANT i64 5 + %shift:gpr(s64) = G_SHL %ext, %cst(s64) + %cmp:gpr(s32) = G_ICMP intpred(ugt), %reg1(s64), %shift + $w0 = COPY %cmp(s32) + RET_ReallyLR implicit $w0 + +... Index: llvm/test/CodeGen/AArch64/GlobalISel/select-ptr-add.mir =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/select-ptr-add.mir +++ llvm/test/CodeGen/AArch64/GlobalISel/select-ptr-add.mir @@ -89,3 +89,24 @@ %2:gpr(p0) = G_PTR_ADD %0, %1(s64) $x0 = COPY %2(p0) ... +--- +name: ptr_add_arith_extended +legalized: true +regBankSelected: true +body: | + bb.0: + liveins: $x0 + ; CHECK-LABEL: name: ptr_add_arith_extended + ; CHECK: %reg0:gpr32 = COPY $w0 + ; CHECK: %ptr:gpr64 = COPY $x1 + ; CHECK: [[COPY:%[0-9]+]]:gpr64common = COPY %ptr + ; CHECK: %ptr_add:gpr64sp = ADDXrx [[COPY]], %reg0, 18 + ; CHECK: $x0 = COPY %ptr_add + %reg0:gpr(s32) = COPY $w0 + %ptr:gpr(p0) = COPY $x1 + %ext:gpr(s64) = G_ZEXT %reg0(s32) + %cst:gpr(s64) = G_CONSTANT i64 2 + %shift:gpr(s64) = G_SHL %ext, %cst(s64) + %ptr_add:gpr(p0) = G_PTR_ADD %ptr, %shift(s64) + $x0 = COPY %ptr_add(p0) +... Index: llvm/test/CodeGen/AArch64/GlobalISel/select-uaddo.mir =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/select-uaddo.mir +++ llvm/test/CodeGen/AArch64/GlobalISel/select-uaddo.mir @@ -136,3 +136,31 @@ %add:gpr(s32), %overflow:gpr(s1) = G_UADDO %copy, %constant $w0 = COPY %add(s32) RET_ReallyLR implicit $w0 + +... +--- +name: uaddo_arith_extended +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $w0, $x0 + ; Check that we get ADDSXrx. + ; CHECK-LABEL: name: uaddo_arith_extended + ; CHECK: liveins: $w0, $x0 + ; CHECK: %reg0:gpr64sp = COPY $x0 + ; CHECK: %reg1:gpr32 = COPY $w0 + ; CHECK: %add:gpr64 = ADDSXrx %reg0, %reg1, 18, implicit-def $nzcv + ; CHECK: %flags:gpr32 = CSINCWr $wzr, $wzr, 3, implicit $nzcv + ; CHECK: $x0 = COPY %add + ; CHECK: RET_ReallyLR implicit $x0 + %reg0:gpr(s64) = COPY $x0 + %reg1:gpr(s32) = COPY $w0 + %ext:gpr(s64) = G_ZEXT %reg1(s32) + %cst:gpr(s64) = G_CONSTANT i64 2 + %shift:gpr(s64) = G_SHL %ext, %cst(s64) + %add:gpr(s64), %flags:gpr(s1) = G_UADDO %reg0, %shift + $x0 = COPY %add(s64) + RET_ReallyLR implicit $x0