diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -258,6 +258,14 @@ MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred, MachineIRBuilder &MIRBuilder) const; + /// Emit the overflow op for \p Opcode. + /// + /// \p Opcode is expected to be an overflow op's opcode, e.g. G_UADDO, + /// G_USUBO, etc. + std::pair + emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS, + MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const; + /// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg. /// \p IsNegative is true if the test should be "not zero". /// This will also optimize the test bit instruction when possible. @@ -2672,35 +2680,23 @@ I.eraseFromParent(); return true; } - case TargetOpcode::G_UADDO: { - // TODO: Support other types. - unsigned OpSize = Ty.getSizeInBits(); - if (OpSize != 32 && OpSize != 64) { - LLVM_DEBUG( - dbgs() - << "G_UADDO currently only supported for 32 and 64 b types.\n"); - return false; - } - - // TODO: Support vectors. - if (Ty.isVector()) { - LLVM_DEBUG(dbgs() << "G_UADDO currently only supported for scalars.\n"); - return false; - } - - // Add and set the set condition flag. + case TargetOpcode::G_SADDO: + case TargetOpcode::G_UADDO: + case TargetOpcode::G_SSUBO: { + // Emit the operation and get the correct condition code. MachineIRBuilder MIRBuilder(I); - emitADDS(I.getOperand(0).getReg(), I.getOperand(2), I.getOperand(3), - MIRBuilder); + auto OpAndCC = emitOverflowOp(Opcode, I.getOperand(0).getReg(), + I.getOperand(2), I.getOperand(3), MIRBuilder); // Now, put the overflow result in the register given by the first operand - // to the G_UADDO. CSINC increments the result when the predicate is false, - // so to get the increment when it's true, we need to use the inverse. In - // this case, we want to increment when carry is set. + // to the overflow op. CSINC increments the result when the predicate is + // false, so to get the increment when it's true, we need to use the + // inverse. In this case, we want to increment when carry is set. + Register ZReg = AArch64::WZR; auto CsetMI = MIRBuilder .buildInstr(AArch64::CSINCWr, {I.getOperand(1).getReg()}, - {Register(AArch64::WZR), Register(AArch64::WZR)}) - .addImm(getInvertedCondCode(AArch64CC::HS)); + {ZReg, ZReg}) + .addImm(getInvertedCondCode(OpAndCC.second)); constrainSelectedInstRegOperands(*CsetMI, TII, TRI, RBI); I.eraseFromParent(); return true; @@ -4287,6 +4283,23 @@ return &*I; } +std::pair +AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst, + MachineOperand &LHS, + MachineOperand &RHS, + MachineIRBuilder &MIRBuilder) const { + switch (Opcode) { + default: + llvm_unreachable("Unexpected opcode!"); + case TargetOpcode::G_SADDO: + return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS); + case TargetOpcode::G_UADDO: + return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS); + case TargetOpcode::G_SSUBO: + return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS); + } +} + bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) const { MachineIRBuilder MIB(I); MachineRegisterInfo &MRI = *MIB.getMRI(); diff --git a/llvm/lib/Target/AArch64/GISel/select-saddo.mir b/llvm/lib/Target/AArch64/GISel/select-saddo.mir new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/AArch64/GISel/select-saddo.mir @@ -0,0 +1,158 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -verify-machineinstrs -mtriple aarch64-unknown-uknown -global-isel -run-pass=instruction-select %s -o - | FileCheck %s + +... +--- +name: saddo_s32 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $w0, $w1, $x2 + + ; CHECK-LABEL: name: saddo_s32 + ; CHECK: liveins: $w0, $w1, $x2 + ; CHECK: %reg0:gpr32 = COPY $w0 + ; CHECK: %reg1:gpr32 = COPY $w1 + ; CHECK: %saddo:gpr32 = ADDSWrr %reg0, %reg1, implicit-def $nzcv + ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv + ; CHECK: $w0 = COPY %saddo + ; CHECK: RET_ReallyLR implicit $w0 + %reg0:gpr(s32) = COPY $w0 + %reg1:gpr(s32) = COPY $w1 + %saddo:gpr(s32), %4:gpr(s1) = G_SADDO %reg0, %reg1 + $w0 = COPY %saddo(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: saddo_s64 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $x0, $x1, $x2 + + ; CHECK-LABEL: name: saddo_s64 + ; CHECK: liveins: $x0, $x1, $x2 + ; CHECK: %reg0:gpr64 = COPY $x0 + ; CHECK: %reg1:gpr64 = COPY $x1 + ; CHECK: %saddo:gpr64 = ADDSXrr %reg0, %reg1, implicit-def $nzcv + ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv + ; CHECK: $x0 = COPY %saddo + ; CHECK: RET_ReallyLR implicit $x0 + %reg0:gpr(s64) = COPY $x0 + %reg1:gpr(s64) = COPY $x1 + %saddo:gpr(s64), %4:gpr(s1) = G_SADDO %reg0, %reg1 + $x0 = COPY %saddo(s64) + RET_ReallyLR implicit $x0 + +... +--- +name: saddo_s32_imm +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $w0, $w1, $x2 + ; Check that we get ADDSWri when we can fold in a constant. + ; + ; CHECK-LABEL: name: saddo_s32_imm + ; CHECK: liveins: $w0, $w1, $x2 + ; CHECK: %copy:gpr32sp = COPY $w0 + ; CHECK: %saddo:gpr32 = ADDSWri %copy, 16, 0, implicit-def $nzcv + ; CHECK: %overflow:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv + ; CHECK: $w0 = COPY %saddo + ; CHECK: RET_ReallyLR implicit $w0 + %copy:gpr(s32) = COPY $w0 + %constant:gpr(s32) = G_CONSTANT i32 16 + %saddo:gpr(s32), %overflow:gpr(s1) = G_SADDO %copy, %constant + $w0 = COPY %saddo(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: saddo_s32_shifted +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $w0, $w1, $x2 + ; Check that we get ADDSWrs when we can fold in a shift. + ; + ; CHECK-LABEL: name: saddo_s32_shifted + ; CHECK: liveins: $w0, $w1, $x2 + ; CHECK: %reg0:gpr32 = COPY $w0 + ; CHECK: %reg1:gpr32 = COPY $w1 + ; CHECK: %add:gpr32 = ADDSWrs %reg0, %reg1, 16, implicit-def $nzcv + ; CHECK: %overflow:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv + ; CHECK: $w0 = COPY %add + ; CHECK: RET_ReallyLR implicit $w0 + %reg0:gpr(s32) = COPY $w0 + %reg1:gpr(s32) = COPY $w1 + %constant:gpr(s32) = G_CONSTANT i32 16 + %shift:gpr(s32) = G_SHL %reg1(s32), %constant(s32) + %add:gpr(s32), %overflow:gpr(s1) = G_SADDO %reg0, %shift + $w0 = COPY %add(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: saddo_s32_neg_imm +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $w0, $w1, $x2 + ; Check that we get SUBSWri when we can fold in a negative constant. + ; + ; CHECK-LABEL: name: saddo_s32_neg_imm + ; CHECK: liveins: $w0, $w1, $x2 + ; CHECK: %copy:gpr32sp = COPY $w0 + ; CHECK: %add:gpr32 = SUBSWri %copy, 16, 0, implicit-def $nzcv + ; CHECK: %overflow:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv + ; CHECK: $w0 = COPY %add + ; CHECK: RET_ReallyLR implicit $w0 + %copy:gpr(s32) = COPY $w0 + %constant:gpr(s32) = G_CONSTANT i32 -16 + %add:gpr(s32), %overflow:gpr(s1) = G_SADDO %copy, %constant + $w0 = COPY %add(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: saddo_arith_extended +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $w0, $x0 + ; Check that we get ADDSXrx. + ; CHECK-LABEL: name: saddo_arith_extended + ; CHECK: liveins: $w0, $x0 + ; CHECK: %reg0:gpr64sp = COPY $x0 + ; CHECK: %reg1:gpr32 = COPY $w0 + ; CHECK: %add:gpr64 = ADDSXrx %reg0, %reg1, 18, implicit-def $nzcv + ; CHECK: %flags:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv + ; CHECK: $x0 = COPY %add + ; CHECK: RET_ReallyLR implicit $x0 + %reg0:gpr(s64) = COPY $x0 + %reg1:gpr(s32) = COPY $w0 + %ext:gpr(s64) = G_ZEXT %reg1(s32) + %cst:gpr(s64) = G_CONSTANT i64 2 + %shift:gpr(s64) = G_SHL %ext, %cst(s64) + %add:gpr(s64), %flags:gpr(s1) = G_SADDO %reg0, %shift + $x0 = COPY %add(s64) + RET_ReallyLR implicit $x0 diff --git a/llvm/lib/Target/AArch64/GISel/select-ssubo.mir b/llvm/lib/Target/AArch64/GISel/select-ssubo.mir new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/AArch64/GISel/select-ssubo.mir @@ -0,0 +1,158 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -verify-machineinstrs -mtriple aarch64-unknown-uknown -global-isel -run-pass=instruction-select %s -o - | FileCheck %s + +... +--- +name: ssubo_s32 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $w0, $w1, $x2 + + ; CHECK-LABEL: name: ssubo_s32 + ; CHECK: liveins: $w0, $w1, $x2 + ; CHECK: %reg0:gpr32 = COPY $w0 + ; CHECK: %reg1:gpr32 = COPY $w1 + ; CHECK: %ssubo:gpr32 = SUBSWrr %reg0, %reg1, implicit-def $nzcv + ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv + ; CHECK: $w0 = COPY %ssubo + ; CHECK: RET_ReallyLR implicit $w0 + %reg0:gpr(s32) = COPY $w0 + %reg1:gpr(s32) = COPY $w1 + %ssubo:gpr(s32), %4:gpr(s1) = G_SSUBO %reg0, %reg1 + $w0 = COPY %ssubo(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: ssubo_s64 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $x0, $x1, $x2 + + ; CHECK-LABEL: name: ssubo_s64 + ; CHECK: liveins: $x0, $x1, $x2 + ; CHECK: %reg0:gpr64 = COPY $x0 + ; CHECK: %reg1:gpr64 = COPY $x1 + ; CHECK: %ssubo:gpr64 = SUBSXrr %reg0, %reg1, implicit-def $nzcv + ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv + ; CHECK: $x0 = COPY %ssubo + ; CHECK: RET_ReallyLR implicit $x0 + %reg0:gpr(s64) = COPY $x0 + %reg1:gpr(s64) = COPY $x1 + %ssubo:gpr(s64), %4:gpr(s1) = G_SSUBO %reg0, %reg1 + $x0 = COPY %ssubo(s64) + RET_ReallyLR implicit $x0 + +... +--- +name: ssubo_s32_imm +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $w0, $w1, $x2 + ; Check that we get SUBSWri when we can fold in a constant. + ; + ; CHECK-LABEL: name: ssubo_s32_imm + ; CHECK: liveins: $w0, $w1, $x2 + ; CHECK: %copy:gpr32sp = COPY $w0 + ; CHECK: %ssubo:gpr32 = SUBSWri %copy, 16, 0, implicit-def $nzcv + ; CHECK: %overflow:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv + ; CHECK: $w0 = COPY %ssubo + ; CHECK: RET_ReallyLR implicit $w0 + %copy:gpr(s32) = COPY $w0 + %constant:gpr(s32) = G_CONSTANT i32 16 + %ssubo:gpr(s32), %overflow:gpr(s1) = G_SSUBO %copy, %constant + $w0 = COPY %ssubo(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: ssubo_s32_shifted +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $w0, $w1, $x2 + ; Check that we get SUBSWrs when we can fold in a shift. + ; + ; CHECK-LABEL: name: ssubo_s32_shifted + ; CHECK: liveins: $w0, $w1, $x2 + ; CHECK: %reg0:gpr32 = COPY $w0 + ; CHECK: %reg1:gpr32 = COPY $w1 + ; CHECK: %sub:gpr32 = SUBSWrs %reg0, %reg1, 16, implicit-def $nzcv + ; CHECK: %overflow:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv + ; CHECK: $w0 = COPY %sub + ; CHECK: RET_ReallyLR implicit $w0 + %reg0:gpr(s32) = COPY $w0 + %reg1:gpr(s32) = COPY $w1 + %constant:gpr(s32) = G_CONSTANT i32 16 + %shift:gpr(s32) = G_SHL %reg1(s32), %constant(s32) + %sub:gpr(s32), %overflow:gpr(s1) = G_SSUBO %reg0, %shift + $w0 = COPY %sub(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: ssubo_s32_neg_imm +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $w0, $w1, $x2 + ; Check that we get ADDSWri when we can fold in a negative constant. + ; + ; CHECK-LABEL: name: ssubo_s32_neg_imm + ; CHECK: liveins: $w0, $w1, $x2 + ; CHECK: %copy:gpr32sp = COPY $w0 + ; CHECK: %sub:gpr32 = ADDSWri %copy, 16, 0, implicit-def $nzcv + ; CHECK: %overflow:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv + ; CHECK: $w0 = COPY %sub + ; CHECK: RET_ReallyLR implicit $w0 + %copy:gpr(s32) = COPY $w0 + %constant:gpr(s32) = G_CONSTANT i32 -16 + %sub:gpr(s32), %overflow:gpr(s1) = G_SSUBO %copy, %constant + $w0 = COPY %sub(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: ssubo_arith_extended +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $w0, $x0 + ; Check that we get SUBSXrx. + ; CHECK-LABEL: name: ssubo_arith_extended + ; CHECK: liveins: $w0, $x0 + ; CHECK: %reg0:gpr64sp = COPY $x0 + ; CHECK: %reg1:gpr32 = COPY $w0 + ; CHECK: %sub:gpr64 = SUBSXrx %reg0, %reg1, 18, implicit-def $nzcv + ; CHECK: %flags:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv + ; CHECK: $x0 = COPY %sub + ; CHECK: RET_ReallyLR implicit $x0 + %reg0:gpr(s64) = COPY $x0 + %reg1:gpr(s32) = COPY $w0 + %ext:gpr(s64) = G_ZEXT %reg1(s32) + %cst:gpr(s64) = G_CONSTANT i64 2 + %shift:gpr(s64) = G_SHL %ext, %cst(s64) + %sub:gpr(s64), %flags:gpr(s1) = G_SSUBO %reg0, %shift + $x0 = COPY %sub(s64) + RET_ReallyLR implicit $x0