diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -225,6 +225,10 @@ const std::array, 5> &AddrModeAndSizeToOpcode, Register Dst, MachineOperand &LHS, MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const; + MachineInstr *emitAdcSbc(const std::array &SizeToOpcode, + Register Dst, MachineOperand &LHS, + MachineOperand &RHS, + MachineIRBuilder &MIRBuilder) const; MachineInstr *emitADD(Register DefReg, MachineOperand &LHS, MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const; @@ -232,6 +236,10 @@ MachineIRBuilder &MIRBuilder) const; MachineInstr *emitSUBS(Register Dst, MachineOperand &LHS, MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const; + MachineInstr *emitADCS(Register Dst, MachineOperand &LHS, MachineOperand &RHS, + MachineIRBuilder &MIRBuilder) const; + MachineInstr *emitSBCS(Register Dst, MachineOperand &LHS, MachineOperand &RHS, + MachineIRBuilder &MIRBuilder) const; MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const; MachineInstr *emitTST(MachineOperand &LHS, MachineOperand &RHS, @@ -261,13 +269,19 @@ MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred, MachineIRBuilder &MIRBuilder) const; + /// Get the appropriate overflow cond code for \p Opcode. + /// + /// \p Opcode is expected to be an overflow op's opcode, e.g. G_UADDO, + /// G_USUBO, etc. + AArch64CC::CondCode condCodeForOverflowOp(unsigned Opcode) const; + /// Emit the overflow op for \p Opcode. /// /// \p Opcode is expected to be an overflow op's opcode, e.g. G_UADDO, /// G_USUBO, etc. - std::pair - emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS, - MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const; + MachineInstr *emitOverflowOp(unsigned Opcode, Register Dst, + MachineOperand &LHS, MachineOperand &RHS, + MachineIRBuilder &MIRBuilder) const; /// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg. /// \p IsNegative is true if the test should be "not zero". @@ -411,6 +425,9 @@ /// zero extended. bool isDef32(const MachineInstr &MI) const; + /// Returns true if \p MI is one of the carrying add/sub generic ops. + bool isCarryOp(const MachineInstr &MI) const; + const AArch64TargetMachine &TM; const AArch64Subtarget &STI; const AArch64InstrInfo &TII; @@ -2744,21 +2761,59 @@ } case TargetOpcode::G_SADDO: case TargetOpcode::G_UADDO: - case TargetOpcode::G_SSUBO: { - // Emit the operation and get the correct condition code. + case TargetOpcode::G_SSUBO: + case TargetOpcode::G_USUBO: { MachineIRBuilder MIRBuilder(I); - auto OpAndCC = emitOverflowOp(Opcode, I.getOperand(0).getReg(), - I.getOperand(2), I.getOperand(3), MIRBuilder); + emitOverflowOp(Opcode, I.getOperand(0).getReg(), I.getOperand(2), + I.getOperand(3), MIRBuilder); // Now, put the overflow result in the register given by the first operand // to the overflow op. CSINC increments the result when the predicate is // false, so to get the increment when it's true, we need to use the // inverse. In this case, we want to increment when carry is set. Register ZReg = AArch64::WZR; - auto CsetMI = MIRBuilder - .buildInstr(AArch64::CSINCWr, {I.getOperand(1).getReg()}, - {ZReg, ZReg}) - .addImm(getInvertedCondCode(OpAndCC.second)); + auto CsetMI = + MIRBuilder + .buildInstr(AArch64::CSINCWr, {I.getOperand(1).getReg()}, + {ZReg, ZReg}) + .addImm(getInvertedCondCode(condCodeForOverflowOp(Opcode))); + constrainSelectedInstRegOperands(*CsetMI, TII, TRI, RBI); + I.eraseFromParent(); + return true; + } + + case TargetOpcode::G_SADDE: + case TargetOpcode::G_UADDE: + case TargetOpcode::G_SSUBE: + case TargetOpcode::G_USUBE: { + MachineIRBuilder MIRBuilder(I); + + auto CarryIn = I.getOperand(4); + MachineInstr *Def = MRI.getVRegDef(CarryIn.getReg()); + bool DefIsCarryOp = isCarryOp(*Def); + bool IsPredecessor = I.getPrevNode() == Def; + bool UsableCFlag = IsPredecessor && DefIsCarryOp && + condCodeForOverflowOp(Def->getOpcode()) == AArch64CC::HS; + // If we're not immediately preceeded by an unsigned carry op, we can't + // directly rely on the condition to have the C flag set appropriately, so + // we need to recompute the condition code. + if (!UsableCFlag) { + Register Dst = MRI.cloneVirtualRegister(CarryIn.getReg()); + auto OneImm = select12BitValueWithLeftShift(1); + // Sets the C flag to match CarryIn + emitInstr(AArch64::SUBWri, {Dst}, {CarryIn}, MIRBuilder, OneImm); + } + + emitOverflowOp(Opcode, I.getOperand(0).getReg(), I.getOperand(2), + I.getOperand(3), MIRBuilder); + + // Save the overflow result in a register as in the previous case. + Register ZReg = AArch64::WZR; + auto CsetMI = + MIRBuilder + .buildInstr(AArch64::CSINCWr, {I.getOperand(1).getReg()}, + {ZReg, ZReg}) + .addImm(getInvertedCondCode(condCodeForOverflowOp(Opcode))); constrainSelectedInstRegOperands(*CsetMI, TII, TRI, RBI); I.eraseFromParent(); return true; @@ -4124,6 +4179,36 @@ return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder); } +MachineInstr *AArch64InstructionSelector::emitAdcSbc( + const std::array &SizeToOpcode, Register Dst, + MachineOperand &LHS, MachineOperand &RHS, + MachineIRBuilder &MIRBuilder) const { + MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); + assert(LHS.isReg() && RHS.isReg() && "Expected register operands?"); + auto Ty = MRI.getType(LHS.getReg()); + assert(!Ty.isVector() && "Expected a scalar or pointer?"); + unsigned Size = Ty.getSizeInBits(); + assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit type only"); + bool Is32Bit = Size == 32; + return emitInstr(SizeToOpcode[Is32Bit], {Dst}, {LHS, RHS}, MIRBuilder); +} + +MachineInstr * +AArch64InstructionSelector::emitADCS(Register Dst, MachineOperand &LHS, + MachineOperand &RHS, + MachineIRBuilder &MIRBuilder) const { + const std::array Opcodes = {AArch64::ADCSXr, AArch64::ADCSXr}; + return emitAdcSbc(Opcodes, Dst, LHS, RHS, MIRBuilder); +} + +MachineInstr * +AArch64InstructionSelector::emitSBCS(Register Dst, MachineOperand &LHS, + MachineOperand &RHS, + MachineIRBuilder &MIRBuilder) const { + const std::array Opcodes = {AArch64::SBCSXr, AArch64::SBCSXr}; + return emitAdcSbc(Opcodes, Dst, LHS, RHS, MIRBuilder); +} + MachineInstr * AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const { @@ -4361,20 +4446,42 @@ return &*I; } -std::pair -AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst, - MachineOperand &LHS, - MachineOperand &RHS, - MachineIRBuilder &MIRBuilder) const { +AArch64CC::CondCode +AArch64InstructionSelector::condCodeForOverflowOp(unsigned Opcode) const { switch (Opcode) { default: llvm_unreachable("Unexpected opcode!"); case TargetOpcode::G_SADDO: - return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS); + case TargetOpcode::G_SSUBO: + case TargetOpcode::G_SADDE: + case TargetOpcode::G_SSUBE: + return AArch64CC::VS; case TargetOpcode::G_UADDO: - return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS); + case TargetOpcode::G_USUBO: + case TargetOpcode::G_UADDE: + case TargetOpcode::G_USUBE: + return AArch64CC::HS; + } +} + +MachineInstr *AArch64InstructionSelector::emitOverflowOp( + unsigned Opcode, Register Dst, MachineOperand &LHS, MachineOperand &RHS, + MachineIRBuilder &MIRBuilder) const { + switch (Opcode) { + default: + llvm_unreachable("Unexpected opcode!"); + case TargetOpcode::G_SADDO: + case TargetOpcode::G_UADDO: + return emitADDS(Dst, LHS, RHS, MIRBuilder); case TargetOpcode::G_SSUBO: - return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS); + case TargetOpcode::G_USUBO: + return emitSUBS(Dst, LHS, RHS, MIRBuilder); + case TargetOpcode::G_SADDE: + case TargetOpcode::G_UADDE: + return emitADCS(Dst, LHS, RHS, MIRBuilder); + case TargetOpcode::G_SSUBE: + case TargetOpcode::G_USUBE: + return emitSBCS(Dst, LHS, RHS, MIRBuilder); } } @@ -5920,6 +6027,21 @@ } } +bool AArch64InstructionSelector::isCarryOp(const MachineInstr &MI) const { + switch (MI.getOpcode()) { + case TargetOpcode::G_SADDE: + case TargetOpcode::G_UADDE: + case TargetOpcode::G_SSUBE: + case TargetOpcode::G_USUBE: + case TargetOpcode::G_SADDO: + case TargetOpcode::G_UADDO: + case TargetOpcode::G_SSUBO: + case TargetOpcode::G_USUBO: + return true; + default: + return false; + } +} // Perform fixups on the given PHI instruction's operands to force them all // to be the same as the destination regbank. diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -165,7 +165,8 @@ getActionDefinitionsBuilder({G_SMULH, G_UMULH}).legalFor({s32, s64}); - getActionDefinitionsBuilder({G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_UADDO}) + getActionDefinitionsBuilder( + {G_SADDE, G_SSUBE, G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_UADDO, G_USUBO}) .legalFor({{s32, s1}, {s64, s1}}) .minScalar(0, s32); diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-arith-128.ll b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-arith-128.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-arith-128.ll @@ -0,0 +1,41 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +; RUN: llc -O3 --mtriple aarch64-unknown-unknown -stop-after=legalizer --global-isel=1 %s -o - -verify-machineinstrs | FileCheck %s + +define void @add(i128* nocapture, i128, i128) { + ; CHECK-LABEL: name: add + ; CHECK: bb.1 (%ir-block.3): + ; CHECK: liveins: $x0, $x2, $x3, $x4, $x5 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x2 + ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $x3 + ; CHECK: [[COPY3:%[0-9]+]]:_(s64) = COPY $x4 + ; CHECK: [[COPY4:%[0-9]+]]:_(s64) = COPY $x5 + ; CHECK: [[UADDO:%[0-9]+]]:_(s64), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[COPY3]], [[COPY1]] + ; CHECK: [[UADDE:%[0-9]+]]:_(s64), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[COPY4]], [[COPY2]], [[UADDO1]] + ; CHECK: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[UADDO]](s64), [[UADDE]](s64) + ; CHECK: G_STORE [[MV]](s128), [[COPY]](p0) :: (store 16 into %ir.0) + ; CHECK: RET_ReallyLR + %4 = add nsw i128 %2, %1 + store i128 %4, i128* %0, align 16 + ret void +} + +define void @sub(i128* nocapture, i128, i128) { + ; CHECK-LABEL: name: sub + ; CHECK: bb.1 (%ir-block.3): + ; CHECK: liveins: $x0, $x2, $x3, $x4, $x5 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x2 + ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $x3 + ; CHECK: [[COPY3:%[0-9]+]]:_(s64) = COPY $x4 + ; CHECK: [[COPY4:%[0-9]+]]:_(s64) = COPY $x5 + ; CHECK: [[USUBO:%[0-9]+]]:_(s64), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[COPY3]], [[COPY1]] + ; CHECK: [[USUBE:%[0-9]+]]:_(s64), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[COPY4]], [[COPY2]], [[USUBO1]] + ; CHECK: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[USUBO]](s64), [[USUBE]](s64) + ; CHECK: G_STORE [[MV]](s128), [[COPY]](p0) :: (store 16 into %ir.0) + ; CHECK: RET_ReallyLR + %4 = sub nsw i128 %2, %1 + store i128 %4, i128* %0, align 16 + ret void +} + diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir @@ -297,11 +297,13 @@ # DEBUG-NEXT: .. the first uncovered type index: 2, OK # DEBUG-NEXT: .. the first uncovered imm index: 0, OK # DEBUG-NEXT: G_UADDE (opcode {{[0-9]+}}): 2 type indices, 0 imm indices +# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}} # DEBUG-NEXT: .. the first uncovered type index: 2, OK # DEBUG-NEXT: .. the first uncovered imm index: 0, OK # DEBUG-NEXT: G_USUBO (opcode {{[0-9]+}}): 2 type indices, 0 imm indices -# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined -# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}} +# DEBUG-NEXT: .. the first uncovered type index: 2, OK +# DEBUG-NEXT: .. the first uncovered imm index: 0, OK # DEBUG-NEXT: G_USUBE (opcode {{[0-9]+}}): 2 type indices, 0 imm indices # DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}} # DEBUG-NEXT: .. the first uncovered type index: 2, OK @@ -311,15 +313,16 @@ # DEBUG-NEXT: .. the first uncovered type index: 2, OK # DEBUG-NEXT: .. the first uncovered imm index: 0, OK # DEBUG-NEXT: G_SADDE (opcode {{[0-9]+}}): 2 type indices, 0 imm indices -# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined -# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: .. the first uncovered type index: 2, OK +# DEBUG-NEXT: .. the first uncovered imm index: 0, OK # DEBUG-NEXT: G_SSUBO (opcode {{[0-9]+}}): 2 type indices, 0 imm indices # DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}} # DEBUG-NEXT: .. the first uncovered type index: 2, OK # DEBUG-NEXT: .. the first uncovered imm index: 0, OK # DEBUG-NEXT: G_SSUBE (opcode {{[0-9]+}}): 2 type indices, 0 imm indices -# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined -# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}} +# DEBUG-NEXT: .. the first uncovered type index: 2, OK +# DEBUG-NEXT: .. the first uncovered imm index: 0, OK # DEBUG-NEXT: G_UMULO (opcode {{[0-9]+}}): 2 type indices, 0 imm indices # DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}} # DEBUG-NEXT: .. the first uncovered type index: 2, OK diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-add-carry.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-add-carry.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-add-carry.mir @@ -0,0 +1,106 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=aarch64 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s + +--- +name: uadd_wide +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.1: + liveins: $x0, $x1, $x2, $x3 + ; CHECK-LABEL: name: uadd_wide + ; CHECK: liveins: $x0, $x1, $x2, $x3 + ; CHECK: %i0:gpr64 = COPY $x0 + ; CHECK: %i1:gpr64 = COPY $x1 + ; CHECK: %i2:gpr64 = COPY $x2 + ; CHECK: %i3:gpr64 = COPY $x3 + ; CHECK: %o0:gpr64 = ADDSXrr %i0, %i1, implicit-def $nzcv + ; CHECK: %c1:gpr32 = CSINCWr $wzr, $wzr, 3, implicit $nzcv + ; CHECK: %o1:gpr64 = ADCSXr %i1, %i2, implicit-def $nzcv, implicit $nzcv + ; CHECK: %c2:gpr32 = CSINCWr $wzr, $wzr, 3, implicit $nzcv + ; CHECK: %o2:gpr64 = ADCSXr %i2, %i3, implicit-def $nzcv, implicit $nzcv + ; CHECK: %c3:gpr32 = CSINCWr $wzr, $wzr, 3, implicit $nzcv + ; CHECK: $x0 = COPY %o0 + ; CHECK: $x1 = COPY %o1 + ; CHECK: $x2 = COPY %o2 + %i0:gpr(s64) = COPY $x0 + %i1:gpr(s64) = COPY $x1 + %i2:gpr(s64) = COPY $x2 + %i3:gpr(s64) = COPY $x3 + %o0:gpr(s64), %c1:gpr(s1) = G_UADDO %i0, %i1 + %o1:gpr(s64), %c2:gpr(s1) = G_UADDE %i1, %i2, %c1 + %o2:gpr(s64), %c3:gpr(s1) = G_UADDE %i2, %i3, %c2 + $x0 = COPY %o0 + $x1 = COPY %o1 + $x2 = COPY %o2 + +... +--- +name: uadde_interrupted +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.1: + liveins: $x0, $x1, $x2 + ; CHECK-LABEL: name: uadde_interrupted + ; CHECK: liveins: $x0, $x1, $x2 + ; CHECK: %i0:gpr64 = COPY $x0 + ; CHECK: %i1:gpr64 = COPY $x1 + ; CHECK: %i2:gpr64 = COPY $x2 + ; CHECK: %o0:gpr64 = ADDSXrr %i0, %i1, implicit-def $nzcv + ; CHECK: %c0:gpr32common = CSINCWr $wzr, $wzr, 3, implicit $nzcv + ; CHECK: %rude:gpr64 = ADDXrr %i0, %i2 + ; CHECK: [[SUBWri:%[0-9]+]]:gpr32sp = SUBWri %c0, 1, 0 + ; CHECK: %o1:gpr64 = ADCSXr %i1, %i2, implicit-def $nzcv, implicit $nzcv + ; CHECK: %c1:gpr32 = CSINCWr $wzr, $wzr, 3, implicit $nzcv + ; CHECK: $x0 = COPY %o0 + ; CHECK: $x1 = COPY %o1 + ; CHECK: $x2 = COPY %rude + %i0:gpr(s64) = COPY $x0 + %i1:gpr(s64) = COPY $x1 + %i2:gpr(s64) = COPY $x2 + %o0:gpr(s64), %c0:gpr(s1) = G_UADDO %i0, %i1 + %rude:gpr(s64) = G_ADD %i0, %i2 + %o1:gpr(s64), %c1:gpr(s1) = G_UADDE %i1, %i2, %c0 + $x0 = COPY %o0 + $x1 = COPY %o1 + $x2 = COPY %rude + +... +--- +name: uadde_from_saddo +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.1: + liveins: $x0, $x1, $x2 + ; CHECK-LABEL: name: uadde_from_saddo + ; CHECK: liveins: $x0, $x1, $x2 + ; CHECK: %i0:gpr64 = COPY $x0 + ; CHECK: %i1:gpr64 = COPY $x1 + ; CHECK: %i2:gpr64 = COPY $x2 + ; CHECK: %o0:gpr64 = ADDSXrr %i0, %i1, implicit-def $nzcv + ; CHECK: %c0:gpr32common = CSINCWr $wzr, $wzr, 7, implicit $nzcv + ; CHECK: [[SUBWri:%[0-9]+]]:gpr32sp = SUBWri %c0, 1, 0 + ; CHECK: %o1:gpr64 = ADCSXr %i1, %i2, implicit-def $nzcv, implicit $nzcv + ; CHECK: %c1:gpr32 = CSINCWr $wzr, $wzr, 3, implicit $nzcv + ; CHECK: $x0 = COPY %o0 + ; CHECK: $x1 = COPY %o1 + %i0:gpr(s64) = COPY $x0 + %i1:gpr(s64) = COPY $x1 + %i2:gpr(s64) = COPY $x2 + %o0:gpr(s64), %c0:gpr(s1) = G_SADDO %i0, %i1 + %o1:gpr(s64), %c1:gpr(s1) = G_UADDE %i1, %i2, %c0 + $x0 = COPY %o0 + $x1 = COPY %o1 + +...