diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -269,6 +269,10 @@ MachineIRBuilder &MIRBuilder) const; MachineInstr *emitSUBS(Register Dst, MachineOperand &LHS, MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const; + MachineInstr *emitADCS(Register Dst, MachineOperand &LHS, MachineOperand &RHS, + MachineIRBuilder &MIRBuilder) const; + MachineInstr *emitSBCS(Register Dst, MachineOperand &LHS, MachineOperand &RHS, + MachineIRBuilder &MIRBuilder) const; MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const; MachineInstr *emitTST(MachineOperand &LHS, MachineOperand &RHS, @@ -289,6 +293,13 @@ MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred, MachineIRBuilder &MIRBuilder) const; + /// Emit an instruction that sets NZCV to the carry-in expected by \p I. + /// Might elide the instruction if the previous instruction already sets NZCV + /// correctly. + MachineInstr *emitCarryIn(MachineInstr &I, MachineOperand &CarrySrc); + + bool selectOverflowOp(MachineInstr &I, MachineRegisterInfo &MRI); + /// Emit the overflow op for \p Opcode. /// /// \p Opcode is expected to be an overflow op's opcode, e.g. G_UADDO, @@ -3081,24 +3092,16 @@ I.eraseFromParent(); return true; } + + case TargetOpcode::G_SADDE: + case TargetOpcode::G_UADDE: + case TargetOpcode::G_SSUBE: + case TargetOpcode::G_USUBE: case TargetOpcode::G_SADDO: case TargetOpcode::G_UADDO: case TargetOpcode::G_SSUBO: - case TargetOpcode::G_USUBO: { - // Emit the operation and get the correct condition code. - auto OpAndCC = emitOverflowOp(Opcode, I.getOperand(0).getReg(), - I.getOperand(2), I.getOperand(3), MIB); - - // Now, put the overflow result in the register given by the first operand - // to the overflow op. CSINC increments the result when the predicate is - // false, so to get the increment when it's true, we need to use the - // inverse. In this case, we want to increment when carry is set. - Register ZReg = AArch64::WZR; - emitCSINC(/*Dst=*/I.getOperand(1).getReg(), /*Src1=*/ZReg, /*Src2=*/ZReg, - getInvertedCondCode(OpAndCC.second), MIB); - I.eraseFromParent(); - return true; - } + case TargetOpcode::G_USUBO: + return selectOverflowOp(I, MRI); case TargetOpcode::G_PTRMASK: { Register MaskReg = I.getOperand(2).getReg(); @@ -4569,6 +4572,28 @@ return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder); } +MachineInstr * +AArch64InstructionSelector::emitADCS(Register Dst, MachineOperand &LHS, + MachineOperand &RHS, + MachineIRBuilder &MIRBuilder) const { + assert(LHS.isReg() && RHS.isReg() && "Expected register operands?"); + MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); + bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32); + static const unsigned OpcTable[2] = {AArch64::ADCSXr, AArch64::ADCSWr}; + return emitInstr(OpcTable[Is32Bit], {Dst}, {LHS, RHS}, MIRBuilder); +} + +MachineInstr * +AArch64InstructionSelector::emitSBCS(Register Dst, MachineOperand &LHS, + MachineOperand &RHS, + MachineIRBuilder &MIRBuilder) const { + assert(LHS.isReg() && RHS.isReg() && "Expected register operands?"); + MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); + bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32); + static const unsigned OpcTable[2] = {AArch64::SBCSXr, AArch64::SBCSWr}; + return emitInstr(OpcTable[Is32Bit], {Dst}, {LHS, RHS}, MIRBuilder); +} + MachineInstr * AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const { @@ -4775,6 +4800,107 @@ return &*CSINC; } +MachineInstr * +AArch64InstructionSelector::emitCarryIn(MachineInstr &I, + MachineOperand &CarrySrc) { + assert(CarrySrc.isReg() && "Expected register operand?"); + MachineRegisterInfo &MRI = MIB.getMF().getRegInfo(); + + Register CarryReg = CarrySrc.getReg(); + unsigned Opcode = I.getOpcode(); + + bool OptAdd = false; + bool OptSub = false; + bool NegateCarry = false; + switch (Opcode) { + default: + llvm_unreachable("Unexpected opcode!"); + case TargetOpcode::G_SADDE: + case TargetOpcode::G_UADDE: + OptAdd = true; + break; + case TargetOpcode::G_SSUBE: + case TargetOpcode::G_USUBE: + OptSub = true; + NegateCarry = true; + break; + } + + // If the previous instruction will already produce the correct carry, do not + // emit a carry generating instruction. E.g. for G_UADDE/G_USUBE sequences + // generated during legalization of wide add/sub. This optimization depends on + // these sequences not being interrupted by other instructions. + MachineInstr *CarryMI = MRI.getVRegDef(CarryReg); + if (CarryMI && CarryMI == I.getPrevNode()) { + unsigned CarryOp = CarryMI->getOpcode(); + if (OptAdd && + (CarryOp == TargetOpcode::G_UADDE || CarryOp == TargetOpcode::G_UADDO)) + return nullptr; + if (OptSub && + (CarryOp == TargetOpcode::G_USUBE || CarryOp == TargetOpcode::G_USUBO)) + return nullptr; + } + + Register DeadReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass); + + if (NegateCarry) { + // (0 - Carry) sets !C in NZCV when Carry == 1 + Register ZReg = AArch64::WZR; + return emitInstr(AArch64::SUBSWrr, {DeadReg}, {ZReg, CarryReg}, MIB); + } + + // (Carry - 1) sets !C in NZCV when Carry == 0 + auto Fns = select12BitValueWithLeftShift(1); + return emitInstr(AArch64::SUBSWri, {DeadReg}, {CarryReg}, MIB, Fns); +} + +bool AArch64InstructionSelector::selectOverflowOp(MachineInstr &I, + MachineRegisterInfo &MRI) { + unsigned Opcode = I.getOpcode(); + bool CarryIn = false; + + switch (Opcode) { + default: + llvm_unreachable("Unexpected opcode!"); + case TargetOpcode::G_SADDO: + case TargetOpcode::G_UADDO: + case TargetOpcode::G_SSUBO: + case TargetOpcode::G_USUBO: + break; + case TargetOpcode::G_SADDE: + case TargetOpcode::G_UADDE: + case TargetOpcode::G_SSUBE: + case TargetOpcode::G_USUBE: + CarryIn = true; + break; + } + + if (CarryIn) { + // Set NZCV carry according to carry-in VReg + emitCarryIn(I, I.getOperand(4)); + } + + // Emit the operation and get the correct condition code. + auto OpAndCC = emitOverflowOp(Opcode, I.getOperand(0).getReg(), + I.getOperand(2), I.getOperand(3), MIB); + + Register CarryOutReg = I.getOperand(1).getReg(); + + // Don't convert carry-out to VReg if it is never used + if (!MRI.hasAtMostUserInstrs(CarryOutReg, 0)) { + // Now, put the overflow result in the register given by the first operand + // to the overflow op. CSINC increments the result when the predicate is + // false, so to get the increment when it's true, we need to use the + // inverse. In this case, we want to increment when carry is set. + Register ZReg = AArch64::WZR; + emitCSINC(/*Dst=*/CarryOutReg, /*Src1=*/ZReg, /*Src2=*/ZReg, + getInvertedCondCode(OpAndCC.second), MIB); + } + + I.eraseFromParent(); + return true; +} + std::pair AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS, @@ -4791,6 +4917,14 @@ return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS); case TargetOpcode::G_USUBO: return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO); + case TargetOpcode::G_SADDE: + return std::make_pair(emitADCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS); + case TargetOpcode::G_UADDE: + return std::make_pair(emitADCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS); + case TargetOpcode::G_SSUBE: + return std::make_pair(emitSBCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS); + case TargetOpcode::G_USUBE: + return std::make_pair(emitSBCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO); } } diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp @@ -77,6 +77,24 @@ return AArch64::SUBXri; case AArch64::SUBSWri: return AArch64::SUBWri; + case AArch64::ADDSXrr: + return AArch64::ADDXrr; + case AArch64::ADDSWrr: + return AArch64::ADDWrr; + case AArch64::ADDSXrs: + return AArch64::ADDXrs; + case AArch64::ADDSXri: + return AArch64::ADDXri; + case AArch64::ADDSWri: + return AArch64::ADDWri; + case AArch64::SBCSXr: + return AArch64::SBCXr; + case AArch64::SBCSWr: + return AArch64::SBCWr; + case AArch64::ADCSXr: + return AArch64::ADCXr; + case AArch64::ADCSWr: + return AArch64::ADCWr; } } @@ -137,6 +155,12 @@ } bool AArch64PostSelectOptimize::optimizeNZCVDefs(MachineBasicBlock &MBB) { + // If we find a dead NZCV implicit-def, we + // - try to convert the operation to a non-flag-setting equivalent + // - or mark the def as dead to aid later peephole optimizations. + + // Use cases: + // 1) // Consider the following code: // FCMPSrr %0, %1, implicit-def $nzcv // %sel1:gpr32 = CSELWr %_, %_, 12, implicit $nzcv @@ -153,8 +177,11 @@ // in between the two FCMPs. In this case, the SUBS defines NZCV // but it doesn't have any users, being overwritten by the second FCMP. // - // Our solution here is to try to convert flag setting operations between - // a interval of identical FCMPs, so that CSE will be able to eliminate one. + // 2) + // The instruction selector always emits the flag-setting variant of ADC/SBC + // while selecting G_UADDE/G_SADDE/G_USUBE/G_SSUBE. If the carry-out of these + // instructions is never used, we can switch to the non-flag-setting variant. + bool Changed = false; auto &MF = *MBB.getParent(); auto &Subtarget = MF.getSubtarget(); @@ -163,52 +190,19 @@ auto RBI = Subtarget.getRegBankInfo(); auto &MRI = MF.getRegInfo(); - // The first step is to find the first and last FCMPs. If we have found - // at least two, then set the limit of the bottom-up walk to the first FCMP - // found since we're only interested in dealing with instructions between - // them. - MachineInstr *FirstCmp = nullptr, *LastCmp = nullptr; - for (auto &MI : instructionsWithoutDebug(MBB.begin(), MBB.end())) { - if (MI.getOpcode() == AArch64::FCMPSrr || - MI.getOpcode() == AArch64::FCMPDrr) { - if (!FirstCmp) - FirstCmp = &MI; - else - LastCmp = &MI; - } - } - - // In addition to converting flag-setting ops in fcmp ranges into non-flag - // setting ops, across the whole basic block we also detect when nzcv - // implicit-defs are dead, and mark them as dead. Peephole optimizations need - // this information later. - LiveRegUnits LRU(*MBB.getParent()->getSubtarget().getRegisterInfo()); LRU.addLiveOuts(MBB); bool NZCVDead = LRU.available(AArch64::NZCV); - bool InsideCmpRange = false; for (auto &II : instructionsWithoutDebug(MBB.rbegin(), MBB.rend())) { - LRU.stepBackward(II); - - if (LastCmp) { // There's a range present in this block. - // If we're inside an fcmp range, look for begin instruction. - if (InsideCmpRange && &II == FirstCmp) - InsideCmpRange = false; - else if (&II == LastCmp) - InsideCmpRange = true; - } - - // Did this instruction define NZCV? - bool NZCVDeadAtCurrInstr = LRU.available(AArch64::NZCV); - if (NZCVDead && NZCVDeadAtCurrInstr && II.definesRegister(AArch64::NZCV)) { - // If we have a def and NZCV is dead, then we may convert this op. + if (NZCVDead && II.definesRegister(AArch64::NZCV)) { + // The instruction defines NZCV, but NZCV is dead. unsigned NewOpc = getNonFlagSettingVariant(II.getOpcode()); int DeadNZCVIdx = II.findRegisterDefOperandIdx(AArch64::NZCV); if (DeadNZCVIdx != -1) { - // If we're inside an fcmp range, then convert flag setting ops. - if (InsideCmpRange && NewOpc) { + if (NewOpc) { + // If there is an equivalent non-flag-setting op, we convert. LLVM_DEBUG(dbgs() << "Post-select optimizer: converting flag-setting " - "op in fcmp range: " + "op: " << II); II.setDesc(TII->get(NewOpc)); II.removeOperand(DeadNZCVIdx); @@ -226,7 +220,8 @@ } } - NZCVDead = NZCVDeadAtCurrInstr; + LRU.stepBackward(II); + NZCVDead = LRU.available(AArch64::NZCV); } return Changed; } diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2.ll --- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2.ll +++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2.ll @@ -925,14 +925,18 @@ define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_monotonic: -; -O0: adds x14, x11, x10 -; -O0: ldxp x10, x12, [x9] -; -O0: cmp x10, x11 -; -O0: cmp x12, x13 -; -O0: stxp w8, x14, x15, [x9] -; -O0: stxp w8, x10, x12, [x9] -; -O0: subs x12, x12, x13 -; -O0: ccmp x10, x11, #0, eq +; -O0: adds x14, x8, x10 +; -O0: and w10, w8, #0x1 +; -O0: subs w10, w10, #1 +; -O0: ldxp x10, x9, [x11] +; -O0: cmp x10, x12 +; -O0: cmp x9, x13 +; -O0: stxp w8, x14, x15, [x11] +; -O0: stxp w8, x10, x9, [x11] +; -O0: eor x8, x10, x8 +; -O0: eor x11, x9, x11 +; -O0: orr x8, x8, x11 +; -O0: subs x8, x8, #0 ; ; -O1-LABEL: atomicrmw_add_i128_aligned_monotonic: ; -O1: ldxp x0, x1, [x8] @@ -944,14 +948,18 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_acquire: -; -O0: adds x14, x11, x10 -; -O0: ldaxp x10, x12, [x9] -; -O0: cmp x10, x11 -; -O0: cmp x12, x13 -; -O0: stxp w8, x14, x15, [x9] -; -O0: stxp w8, x10, x12, [x9] -; -O0: subs x12, x12, x13 -; -O0: ccmp x10, x11, #0, eq +; -O0: adds x14, x8, x10 +; -O0: and w10, w8, #0x1 +; -O0: subs w10, w10, #1 +; -O0: ldaxp x10, x9, [x11] +; -O0: cmp x10, x12 +; -O0: cmp x9, x13 +; -O0: stxp w8, x14, x15, [x11] +; -O0: stxp w8, x10, x9, [x11] +; -O0: eor x8, x10, x8 +; -O0: eor x11, x9, x11 +; -O0: orr x8, x8, x11 +; -O0: subs x8, x8, #0 ; ; -O1-LABEL: atomicrmw_add_i128_aligned_acquire: ; -O1: ldaxp x0, x1, [x8] @@ -963,14 +971,18 @@ define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_release: -; -O0: adds x14, x11, x10 -; -O0: ldxp x10, x12, [x9] -; -O0: cmp x10, x11 -; -O0: cmp x12, x13 -; -O0: stlxp w8, x14, x15, [x9] -; -O0: stlxp w8, x10, x12, [x9] -; -O0: subs x12, x12, x13 -; -O0: ccmp x10, x11, #0, eq +; -O0: adds x14, x8, x10 +; -O0: and w10, w8, #0x1 +; -O0: subs w10, w10, #1 +; -O0: ldxp x10, x9, [x11] +; -O0: cmp x10, x12 +; -O0: cmp x9, x13 +; -O0: stlxp w8, x14, x15, [x11] +; -O0: stlxp w8, x10, x9, [x11] +; -O0: eor x8, x10, x8 +; -O0: eor x11, x9, x11 +; -O0: orr x8, x8, x11 +; -O0: subs x8, x8, #0 ; ; -O1-LABEL: atomicrmw_add_i128_aligned_release: ; -O1: ldxp x0, x1, [x8] @@ -982,14 +994,18 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_acq_rel: -; -O0: adds x14, x11, x10 -; -O0: ldaxp x10, x12, [x9] -; -O0: cmp x10, x11 -; -O0: cmp x12, x13 -; -O0: stlxp w8, x14, x15, [x9] -; -O0: stlxp w8, x10, x12, [x9] -; -O0: subs x12, x12, x13 -; -O0: ccmp x10, x11, #0, eq +; -O0: adds x14, x8, x10 +; -O0: and w10, w8, #0x1 +; -O0: subs w10, w10, #1 +; -O0: ldaxp x10, x9, [x11] +; -O0: cmp x10, x12 +; -O0: cmp x9, x13 +; -O0: stlxp w8, x14, x15, [x11] +; -O0: stlxp w8, x10, x9, [x11] +; -O0: eor x8, x10, x8 +; -O0: eor x11, x9, x11 +; -O0: orr x8, x8, x11 +; -O0: subs x8, x8, #0 ; ; -O1-LABEL: atomicrmw_add_i128_aligned_acq_rel: ; -O1: ldaxp x0, x1, [x8] @@ -1001,14 +1017,18 @@ define dso_local i128 @atomicrmw_add_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_seq_cst: -; -O0: adds x14, x11, x10 -; -O0: ldaxp x10, x12, [x9] -; -O0: cmp x10, x11 -; -O0: cmp x12, x13 -; -O0: stlxp w8, x14, x15, [x9] -; -O0: stlxp w8, x10, x12, [x9] -; -O0: subs x12, x12, x13 -; -O0: ccmp x10, x11, #0, eq +; -O0: adds x14, x8, x10 +; -O0: and w10, w8, #0x1 +; -O0: subs w10, w10, #1 +; -O0: ldaxp x10, x9, [x11] +; -O0: cmp x10, x12 +; -O0: cmp x9, x13 +; -O0: stlxp w8, x14, x15, [x11] +; -O0: stlxp w8, x10, x9, [x11] +; -O0: eor x8, x10, x8 +; -O0: eor x11, x9, x11 +; -O0: orr x8, x8, x11 +; -O0: subs x8, x8, #0 ; ; -O1-LABEL: atomicrmw_add_i128_aligned_seq_cst: ; -O1: ldaxp x0, x1, [x8] @@ -1285,7 +1305,9 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_monotonic: -; -O0: adds x8, x11, x8 +; -O0: adds x9, x8, x9 +; -O0: and w11, w8, #0x1 +; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i128_unaligned_monotonic: @@ -1298,7 +1320,9 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_acquire: -; -O0: adds x8, x11, x8 +; -O0: adds x9, x8, x9 +; -O0: and w11, w8, #0x1 +; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i128_unaligned_acquire: @@ -1311,7 +1335,9 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_release: -; -O0: adds x8, x11, x8 +; -O0: adds x9, x8, x9 +; -O0: and w11, w8, #0x1 +; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i128_unaligned_release: @@ -1324,7 +1350,9 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_acq_rel: -; -O0: adds x8, x11, x8 +; -O0: adds x9, x8, x9 +; -O0: and w11, w8, #0x1 +; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i128_unaligned_acq_rel: @@ -1337,7 +1365,9 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_seq_cst: -; -O0: adds x8, x11, x8 +; -O0: adds x9, x8, x9 +; -O0: and w11, w8, #0x1 +; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i128_unaligned_seq_cst: @@ -1675,14 +1705,17 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_monotonic: -; -O0: subs x14, x11, x10 -; -O0: ldxp x10, x12, [x9] -; -O0: cmp x10, x11 -; -O0: cmp x12, x13 -; -O0: stxp w8, x14, x15, [x9] -; -O0: stxp w8, x10, x12, [x9] -; -O0: subs x12, x12, x13 -; -O0: ccmp x10, x11, #0, eq +; -O0: subs x14, x8, x10 +; -O0: and w10, w8, #0x1 +; -O0: ldxp x10, x9, [x11] +; -O0: cmp x10, x12 +; -O0: cmp x9, x13 +; -O0: stxp w8, x14, x15, [x11] +; -O0: stxp w8, x10, x9, [x11] +; -O0: eor x8, x10, x8 +; -O0: eor x11, x9, x11 +; -O0: orr x8, x8, x11 +; -O0: subs x8, x8, #0 ; ; -O1-LABEL: atomicrmw_sub_i128_aligned_monotonic: ; -O1: ldxp x0, x1, [x8] @@ -1694,14 +1727,17 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_acquire: -; -O0: subs x14, x11, x10 -; -O0: ldaxp x10, x12, [x9] -; -O0: cmp x10, x11 -; -O0: cmp x12, x13 -; -O0: stxp w8, x14, x15, [x9] -; -O0: stxp w8, x10, x12, [x9] -; -O0: subs x12, x12, x13 -; -O0: ccmp x10, x11, #0, eq +; -O0: subs x14, x8, x10 +; -O0: and w10, w8, #0x1 +; -O0: ldaxp x10, x9, [x11] +; -O0: cmp x10, x12 +; -O0: cmp x9, x13 +; -O0: stxp w8, x14, x15, [x11] +; -O0: stxp w8, x10, x9, [x11] +; -O0: eor x8, x10, x8 +; -O0: eor x11, x9, x11 +; -O0: orr x8, x8, x11 +; -O0: subs x8, x8, #0 ; ; -O1-LABEL: atomicrmw_sub_i128_aligned_acquire: ; -O1: ldaxp x0, x1, [x8] @@ -1713,14 +1749,17 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_release: -; -O0: subs x14, x11, x10 -; -O0: ldxp x10, x12, [x9] -; -O0: cmp x10, x11 -; -O0: cmp x12, x13 -; -O0: stlxp w8, x14, x15, [x9] -; -O0: stlxp w8, x10, x12, [x9] -; -O0: subs x12, x12, x13 -; -O0: ccmp x10, x11, #0, eq +; -O0: subs x14, x8, x10 +; -O0: and w10, w8, #0x1 +; -O0: ldxp x10, x9, [x11] +; -O0: cmp x10, x12 +; -O0: cmp x9, x13 +; -O0: stlxp w8, x14, x15, [x11] +; -O0: stlxp w8, x10, x9, [x11] +; -O0: eor x8, x10, x8 +; -O0: eor x11, x9, x11 +; -O0: orr x8, x8, x11 +; -O0: subs x8, x8, #0 ; ; -O1-LABEL: atomicrmw_sub_i128_aligned_release: ; -O1: ldxp x0, x1, [x8] @@ -1732,14 +1771,17 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_acq_rel: -; -O0: subs x14, x11, x10 -; -O0: ldaxp x10, x12, [x9] -; -O0: cmp x10, x11 -; -O0: cmp x12, x13 -; -O0: stlxp w8, x14, x15, [x9] -; -O0: stlxp w8, x10, x12, [x9] -; -O0: subs x12, x12, x13 -; -O0: ccmp x10, x11, #0, eq +; -O0: subs x14, x8, x10 +; -O0: and w10, w8, #0x1 +; -O0: ldaxp x10, x9, [x11] +; -O0: cmp x10, x12 +; -O0: cmp x9, x13 +; -O0: stlxp w8, x14, x15, [x11] +; -O0: stlxp w8, x10, x9, [x11] +; -O0: eor x8, x10, x8 +; -O0: eor x11, x9, x11 +; -O0: orr x8, x8, x11 +; -O0: subs x8, x8, #0 ; ; -O1-LABEL: atomicrmw_sub_i128_aligned_acq_rel: ; -O1: ldaxp x0, x1, [x8] @@ -1751,14 +1793,17 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_seq_cst: -; -O0: subs x14, x11, x10 -; -O0: ldaxp x10, x12, [x9] -; -O0: cmp x10, x11 -; -O0: cmp x12, x13 -; -O0: stlxp w8, x14, x15, [x9] -; -O0: stlxp w8, x10, x12, [x9] -; -O0: subs x12, x12, x13 -; -O0: ccmp x10, x11, #0, eq +; -O0: subs x14, x8, x10 +; -O0: and w10, w8, #0x1 +; -O0: ldaxp x10, x9, [x11] +; -O0: cmp x10, x12 +; -O0: cmp x9, x13 +; -O0: stlxp w8, x14, x15, [x11] +; -O0: stlxp w8, x10, x9, [x11] +; -O0: eor x8, x10, x8 +; -O0: eor x11, x9, x11 +; -O0: orr x8, x8, x11 +; -O0: subs x8, x8, #0 ; ; -O1-LABEL: atomicrmw_sub_i128_aligned_seq_cst: ; -O1: ldaxp x0, x1, [x8] @@ -2035,7 +2080,8 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_monotonic: -; -O0: subs x8, x11, x8 +; -O0: subs x9, x8, x9 +; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_monotonic: @@ -2048,7 +2094,8 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_acquire: -; -O0: subs x8, x11, x8 +; -O0: subs x9, x8, x9 +; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acquire: @@ -2061,7 +2108,8 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_release: -; -O0: subs x8, x11, x8 +; -O0: subs x9, x8, x9 +; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_release: @@ -2074,7 +2122,8 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_acq_rel: -; -O0: subs x8, x11, x8 +; -O0: subs x9, x8, x9 +; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acq_rel: @@ -2087,7 +2136,8 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_seq_cst: -; -O0: subs x8, x11, x8 +; -O0: subs x9, x8, x9 +; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_seq_cst: diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2_lse128.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2_lse128.ll --- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2_lse128.ll +++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2_lse128.ll @@ -510,10 +510,14 @@ define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_monotonic: -; -O0: adds x2, x10, x12 +; -O0: adds x2, x9, x11 +; -O0: and w11, w9, #0x1 +; -O0: subs w11, w11, #1 ; -O0: casp x0, x1, x2, x3, [x8] -; -O0: subs x11, x8, x11 -; -O0: ccmp x9, x10, #0, eq +; -O0: eor x8, x10, x8 +; -O0: eor x11, x9, x11 +; -O0: orr x8, x8, x11 +; -O0: subs x8, x8, #0 ; ; -O1-LABEL: atomicrmw_add_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] @@ -527,10 +531,14 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_acquire: -; -O0: adds x2, x10, x12 +; -O0: adds x2, x9, x11 +; -O0: and w11, w9, #0x1 +; -O0: subs w11, w11, #1 ; -O0: caspa x0, x1, x2, x3, [x8] -; -O0: subs x11, x8, x11 -; -O0: ccmp x9, x10, #0, eq +; -O0: eor x8, x10, x8 +; -O0: eor x11, x9, x11 +; -O0: orr x8, x8, x11 +; -O0: subs x8, x8, #0 ; ; -O1-LABEL: atomicrmw_add_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] @@ -544,10 +552,14 @@ define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_release: -; -O0: adds x2, x10, x12 +; -O0: adds x2, x9, x11 +; -O0: and w11, w9, #0x1 +; -O0: subs w11, w11, #1 ; -O0: caspl x0, x1, x2, x3, [x8] -; -O0: subs x11, x8, x11 -; -O0: ccmp x9, x10, #0, eq +; -O0: eor x8, x10, x8 +; -O0: eor x11, x9, x11 +; -O0: orr x8, x8, x11 +; -O0: subs x8, x8, #0 ; ; -O1-LABEL: atomicrmw_add_i128_aligned_release: ; -O1: ldp x4, x5, [x0] @@ -561,10 +573,14 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_acq_rel: -; -O0: adds x2, x10, x12 +; -O0: adds x2, x9, x11 +; -O0: and w11, w9, #0x1 +; -O0: subs w11, w11, #1 ; -O0: caspal x0, x1, x2, x3, [x8] -; -O0: subs x11, x8, x11 -; -O0: ccmp x9, x10, #0, eq +; -O0: eor x8, x10, x8 +; -O0: eor x11, x9, x11 +; -O0: orr x8, x8, x11 +; -O0: subs x8, x8, #0 ; ; -O1-LABEL: atomicrmw_add_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] @@ -578,10 +594,14 @@ define dso_local i128 @atomicrmw_add_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_seq_cst: -; -O0: adds x2, x10, x12 +; -O0: adds x2, x9, x11 +; -O0: and w11, w9, #0x1 +; -O0: subs w11, w11, #1 ; -O0: caspal x0, x1, x2, x3, [x8] -; -O0: subs x11, x8, x11 -; -O0: ccmp x9, x10, #0, eq +; -O0: eor x8, x10, x8 +; -O0: eor x11, x9, x11 +; -O0: orr x8, x8, x11 +; -O0: subs x8, x8, #0 ; ; -O1-LABEL: atomicrmw_add_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] @@ -810,7 +830,9 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_monotonic: -; -O0: adds x8, x11, x8 +; -O0: adds x9, x8, x9 +; -O0: and w11, w8, #0x1 +; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i128_unaligned_monotonic: @@ -823,7 +845,9 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_acquire: -; -O0: adds x8, x11, x8 +; -O0: adds x9, x8, x9 +; -O0: and w11, w8, #0x1 +; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i128_unaligned_acquire: @@ -836,7 +860,9 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_release: -; -O0: adds x8, x11, x8 +; -O0: adds x9, x8, x9 +; -O0: and w11, w8, #0x1 +; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i128_unaligned_release: @@ -849,7 +875,9 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_acq_rel: -; -O0: adds x8, x11, x8 +; -O0: adds x9, x8, x9 +; -O0: and w11, w8, #0x1 +; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i128_unaligned_acq_rel: @@ -862,7 +890,9 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_seq_cst: -; -O0: adds x8, x11, x8 +; -O0: adds x9, x8, x9 +; -O0: and w11, w8, #0x1 +; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i128_unaligned_seq_cst: @@ -1015,10 +1045,13 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_monotonic: -; -O0: subs x2, x10, x12 +; -O0: subs x2, x9, x11 +; -O0: and w11, w9, #0x1 ; -O0: casp x0, x1, x2, x3, [x8] -; -O0: subs x11, x8, x11 -; -O0: ccmp x9, x10, #0, eq +; -O0: eor x8, x10, x8 +; -O0: eor x11, x9, x11 +; -O0: orr x8, x8, x11 +; -O0: subs x8, x8, #0 ; ; -O1-LABEL: atomicrmw_sub_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] @@ -1032,10 +1065,13 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_acquire: -; -O0: subs x2, x10, x12 +; -O0: subs x2, x9, x11 +; -O0: and w11, w9, #0x1 ; -O0: caspa x0, x1, x2, x3, [x8] -; -O0: subs x11, x8, x11 -; -O0: ccmp x9, x10, #0, eq +; -O0: eor x8, x10, x8 +; -O0: eor x11, x9, x11 +; -O0: orr x8, x8, x11 +; -O0: subs x8, x8, #0 ; ; -O1-LABEL: atomicrmw_sub_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] @@ -1049,10 +1085,13 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_release: -; -O0: subs x2, x10, x12 +; -O0: subs x2, x9, x11 +; -O0: and w11, w9, #0x1 ; -O0: caspl x0, x1, x2, x3, [x8] -; -O0: subs x11, x8, x11 -; -O0: ccmp x9, x10, #0, eq +; -O0: eor x8, x10, x8 +; -O0: eor x11, x9, x11 +; -O0: orr x8, x8, x11 +; -O0: subs x8, x8, #0 ; ; -O1-LABEL: atomicrmw_sub_i128_aligned_release: ; -O1: ldp x4, x5, [x0] @@ -1066,10 +1105,13 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_acq_rel: -; -O0: subs x2, x10, x12 +; -O0: subs x2, x9, x11 +; -O0: and w11, w9, #0x1 ; -O0: caspal x0, x1, x2, x3, [x8] -; -O0: subs x11, x8, x11 -; -O0: ccmp x9, x10, #0, eq +; -O0: eor x8, x10, x8 +; -O0: eor x11, x9, x11 +; -O0: orr x8, x8, x11 +; -O0: subs x8, x8, #0 ; ; -O1-LABEL: atomicrmw_sub_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] @@ -1083,10 +1125,13 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_seq_cst: -; -O0: subs x2, x10, x12 +; -O0: subs x2, x9, x11 +; -O0: and w11, w9, #0x1 ; -O0: caspal x0, x1, x2, x3, [x8] -; -O0: subs x11, x8, x11 -; -O0: ccmp x9, x10, #0, eq +; -O0: eor x8, x10, x8 +; -O0: eor x11, x9, x11 +; -O0: orr x8, x8, x11 +; -O0: subs x8, x8, #0 ; ; -O1-LABEL: atomicrmw_sub_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] @@ -1315,7 +1360,8 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_monotonic: -; -O0: subs x8, x11, x8 +; -O0: subs x9, x8, x9 +; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_monotonic: @@ -1328,7 +1374,8 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_acquire: -; -O0: subs x8, x11, x8 +; -O0: subs x9, x8, x9 +; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acquire: @@ -1341,7 +1388,8 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_release: -; -O0: subs x8, x11, x8 +; -O0: subs x9, x8, x9 +; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_release: @@ -1354,7 +1402,8 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_acq_rel: -; -O0: subs x8, x11, x8 +; -O0: subs x9, x8, x9 +; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acq_rel: @@ -1367,7 +1416,8 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_seq_cst: -; -O0: subs x8, x11, x8 +; -O0: subs x9, x8, x9 +; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_seq_cst: diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-outline_atomics.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-outline_atomics.ll --- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-outline_atomics.ll +++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-outline_atomics.ll @@ -555,10 +555,18 @@ define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_monotonic: -; -O0: adds x2, x0, x9 -; -O0: bl __aarch64_cas16_relax -; -O0: subs x10, x10, x11 -; -O0: ccmp x8, x9, #0, eq +; -O0: adds x14, x8, x10 +; -O0: and w10, w8, #0x1 +; -O0: subs w10, w10, #1 +; -O0: ldxp x10, x9, [x11] +; -O0: cmp x10, x12 +; -O0: cmp x9, x13 +; -O0: stxp w8, x14, x15, [x11] +; -O0: stxp w8, x10, x9, [x11] +; -O0: eor x8, x10, x8 +; -O0: eor x11, x9, x11 +; -O0: orr x8, x8, x11 +; -O0: subs x8, x8, #0 ; ; -O1-LABEL: atomicrmw_add_i128_aligned_monotonic: ; -O1: ldxp x0, x1, [x8] @@ -570,10 +578,18 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_acquire: -; -O0: adds x2, x0, x9 -; -O0: bl __aarch64_cas16_acq -; -O0: subs x10, x10, x11 -; -O0: ccmp x8, x9, #0, eq +; -O0: adds x14, x8, x10 +; -O0: and w10, w8, #0x1 +; -O0: subs w10, w10, #1 +; -O0: ldaxp x10, x9, [x11] +; -O0: cmp x10, x12 +; -O0: cmp x9, x13 +; -O0: stxp w8, x14, x15, [x11] +; -O0: stxp w8, x10, x9, [x11] +; -O0: eor x8, x10, x8 +; -O0: eor x11, x9, x11 +; -O0: orr x8, x8, x11 +; -O0: subs x8, x8, #0 ; ; -O1-LABEL: atomicrmw_add_i128_aligned_acquire: ; -O1: ldaxp x0, x1, [x8] @@ -585,10 +601,18 @@ define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_release: -; -O0: adds x2, x0, x9 -; -O0: bl __aarch64_cas16_rel -; -O0: subs x10, x10, x11 -; -O0: ccmp x8, x9, #0, eq +; -O0: adds x14, x8, x10 +; -O0: and w10, w8, #0x1 +; -O0: subs w10, w10, #1 +; -O0: ldxp x10, x9, [x11] +; -O0: cmp x10, x12 +; -O0: cmp x9, x13 +; -O0: stlxp w8, x14, x15, [x11] +; -O0: stlxp w8, x10, x9, [x11] +; -O0: eor x8, x10, x8 +; -O0: eor x11, x9, x11 +; -O0: orr x8, x8, x11 +; -O0: subs x8, x8, #0 ; ; -O1-LABEL: atomicrmw_add_i128_aligned_release: ; -O1: ldxp x0, x1, [x8] @@ -600,10 +624,18 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_acq_rel: -; -O0: adds x2, x0, x9 -; -O0: bl __aarch64_cas16_acq_rel -; -O0: subs x10, x10, x11 -; -O0: ccmp x8, x9, #0, eq +; -O0: adds x14, x8, x10 +; -O0: and w10, w8, #0x1 +; -O0: subs w10, w10, #1 +; -O0: ldaxp x10, x9, [x11] +; -O0: cmp x10, x12 +; -O0: cmp x9, x13 +; -O0: stlxp w8, x14, x15, [x11] +; -O0: stlxp w8, x10, x9, [x11] +; -O0: eor x8, x10, x8 +; -O0: eor x11, x9, x11 +; -O0: orr x8, x8, x11 +; -O0: subs x8, x8, #0 ; ; -O1-LABEL: atomicrmw_add_i128_aligned_acq_rel: ; -O1: ldaxp x0, x1, [x8] @@ -615,10 +647,18 @@ define dso_local i128 @atomicrmw_add_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_seq_cst: -; -O0: adds x2, x0, x9 -; -O0: bl __aarch64_cas16_acq_rel -; -O0: subs x10, x10, x11 -; -O0: ccmp x8, x9, #0, eq +; -O0: adds x14, x8, x10 +; -O0: and w10, w8, #0x1 +; -O0: subs w10, w10, #1 +; -O0: ldaxp x10, x9, [x11] +; -O0: cmp x10, x12 +; -O0: cmp x9, x13 +; -O0: stlxp w8, x14, x15, [x11] +; -O0: stlxp w8, x10, x9, [x11] +; -O0: eor x8, x10, x8 +; -O0: eor x11, x9, x11 +; -O0: orr x8, x8, x11 +; -O0: subs x8, x8, #0 ; ; -O1-LABEL: atomicrmw_add_i128_aligned_seq_cst: ; -O1: ldaxp x0, x1, [x8] @@ -845,7 +885,9 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_monotonic: -; -O0: adds x8, x11, x8 +; -O0: adds x9, x8, x9 +; -O0: and w11, w8, #0x1 +; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i128_unaligned_monotonic: @@ -858,7 +900,9 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_acquire: -; -O0: adds x8, x11, x8 +; -O0: adds x9, x8, x9 +; -O0: and w11, w8, #0x1 +; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i128_unaligned_acquire: @@ -871,7 +915,9 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_release: -; -O0: adds x8, x11, x8 +; -O0: adds x9, x8, x9 +; -O0: and w11, w8, #0x1 +; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i128_unaligned_release: @@ -884,7 +930,9 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_acq_rel: -; -O0: adds x8, x11, x8 +; -O0: adds x9, x8, x9 +; -O0: and w11, w8, #0x1 +; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i128_unaligned_acq_rel: @@ -897,7 +945,9 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_seq_cst: -; -O0: adds x8, x11, x8 +; -O0: adds x9, x8, x9 +; -O0: and w11, w8, #0x1 +; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i128_unaligned_seq_cst: @@ -1130,10 +1180,17 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_monotonic: -; -O0: subs x2, x0, x9 -; -O0: bl __aarch64_cas16_relax -; -O0: subs x10, x10, x11 -; -O0: ccmp x8, x9, #0, eq +; -O0: subs x14, x8, x10 +; -O0: and w10, w8, #0x1 +; -O0: ldxp x10, x9, [x11] +; -O0: cmp x10, x12 +; -O0: cmp x9, x13 +; -O0: stxp w8, x14, x15, [x11] +; -O0: stxp w8, x10, x9, [x11] +; -O0: eor x8, x10, x8 +; -O0: eor x11, x9, x11 +; -O0: orr x8, x8, x11 +; -O0: subs x8, x8, #0 ; ; -O1-LABEL: atomicrmw_sub_i128_aligned_monotonic: ; -O1: ldxp x0, x1, [x8] @@ -1145,10 +1202,17 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_acquire: -; -O0: subs x2, x0, x9 -; -O0: bl __aarch64_cas16_acq -; -O0: subs x10, x10, x11 -; -O0: ccmp x8, x9, #0, eq +; -O0: subs x14, x8, x10 +; -O0: and w10, w8, #0x1 +; -O0: ldaxp x10, x9, [x11] +; -O0: cmp x10, x12 +; -O0: cmp x9, x13 +; -O0: stxp w8, x14, x15, [x11] +; -O0: stxp w8, x10, x9, [x11] +; -O0: eor x8, x10, x8 +; -O0: eor x11, x9, x11 +; -O0: orr x8, x8, x11 +; -O0: subs x8, x8, #0 ; ; -O1-LABEL: atomicrmw_sub_i128_aligned_acquire: ; -O1: ldaxp x0, x1, [x8] @@ -1160,10 +1224,17 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_release: -; -O0: subs x2, x0, x9 -; -O0: bl __aarch64_cas16_rel -; -O0: subs x10, x10, x11 -; -O0: ccmp x8, x9, #0, eq +; -O0: subs x14, x8, x10 +; -O0: and w10, w8, #0x1 +; -O0: ldxp x10, x9, [x11] +; -O0: cmp x10, x12 +; -O0: cmp x9, x13 +; -O0: stlxp w8, x14, x15, [x11] +; -O0: stlxp w8, x10, x9, [x11] +; -O0: eor x8, x10, x8 +; -O0: eor x11, x9, x11 +; -O0: orr x8, x8, x11 +; -O0: subs x8, x8, #0 ; ; -O1-LABEL: atomicrmw_sub_i128_aligned_release: ; -O1: ldxp x0, x1, [x8] @@ -1175,10 +1246,17 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_acq_rel: -; -O0: subs x2, x0, x9 -; -O0: bl __aarch64_cas16_acq_rel -; -O0: subs x10, x10, x11 -; -O0: ccmp x8, x9, #0, eq +; -O0: subs x14, x8, x10 +; -O0: and w10, w8, #0x1 +; -O0: ldaxp x10, x9, [x11] +; -O0: cmp x10, x12 +; -O0: cmp x9, x13 +; -O0: stlxp w8, x14, x15, [x11] +; -O0: stlxp w8, x10, x9, [x11] +; -O0: eor x8, x10, x8 +; -O0: eor x11, x9, x11 +; -O0: orr x8, x8, x11 +; -O0: subs x8, x8, #0 ; ; -O1-LABEL: atomicrmw_sub_i128_aligned_acq_rel: ; -O1: ldaxp x0, x1, [x8] @@ -1190,10 +1268,17 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_seq_cst: -; -O0: subs x2, x0, x9 -; -O0: bl __aarch64_cas16_acq_rel -; -O0: subs x10, x10, x11 -; -O0: ccmp x8, x9, #0, eq +; -O0: subs x14, x8, x10 +; -O0: and w10, w8, #0x1 +; -O0: ldaxp x10, x9, [x11] +; -O0: cmp x10, x12 +; -O0: cmp x9, x13 +; -O0: stlxp w8, x14, x15, [x11] +; -O0: stlxp w8, x10, x9, [x11] +; -O0: eor x8, x10, x8 +; -O0: eor x11, x9, x11 +; -O0: orr x8, x8, x11 +; -O0: subs x8, x8, #0 ; ; -O1-LABEL: atomicrmw_sub_i128_aligned_seq_cst: ; -O1: ldaxp x0, x1, [x8] @@ -1440,7 +1525,8 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_monotonic: -; -O0: subs x8, x11, x8 +; -O0: subs x9, x8, x9 +; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_monotonic: @@ -1453,7 +1539,8 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_acquire: -; -O0: subs x8, x11, x8 +; -O0: subs x9, x8, x9 +; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acquire: @@ -1466,7 +1553,8 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_release: -; -O0: subs x8, x11, x8 +; -O0: subs x9, x8, x9 +; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_release: @@ -1479,7 +1567,8 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_acq_rel: -; -O0: subs x8, x11, x8 +; -O0: subs x9, x8, x9 +; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acq_rel: @@ -1492,7 +1581,8 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_seq_cst: -; -O0: subs x8, x11, x8 +; -O0: subs x9, x8, x9 +; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_seq_cst: diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc.ll --- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc.ll +++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc.ll @@ -925,14 +925,18 @@ define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_monotonic: -; -O0: adds x14, x11, x10 -; -O0: ldxp x10, x12, [x9] -; -O0: cmp x10, x11 -; -O0: cmp x12, x13 -; -O0: stxp w8, x14, x15, [x9] -; -O0: stxp w8, x10, x12, [x9] -; -O0: subs x12, x12, x13 -; -O0: ccmp x10, x11, #0, eq +; -O0: adds x14, x8, x10 +; -O0: and w10, w8, #0x1 +; -O0: subs w10, w10, #1 +; -O0: ldxp x10, x9, [x11] +; -O0: cmp x10, x12 +; -O0: cmp x9, x13 +; -O0: stxp w8, x14, x15, [x11] +; -O0: stxp w8, x10, x9, [x11] +; -O0: eor x8, x10, x8 +; -O0: eor x11, x9, x11 +; -O0: orr x8, x8, x11 +; -O0: subs x8, x8, #0 ; ; -O1-LABEL: atomicrmw_add_i128_aligned_monotonic: ; -O1: ldxp x0, x1, [x8] @@ -944,14 +948,18 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_acquire: -; -O0: adds x14, x11, x10 -; -O0: ldaxp x10, x12, [x9] -; -O0: cmp x10, x11 -; -O0: cmp x12, x13 -; -O0: stxp w8, x14, x15, [x9] -; -O0: stxp w8, x10, x12, [x9] -; -O0: subs x12, x12, x13 -; -O0: ccmp x10, x11, #0, eq +; -O0: adds x14, x8, x10 +; -O0: and w10, w8, #0x1 +; -O0: subs w10, w10, #1 +; -O0: ldaxp x10, x9, [x11] +; -O0: cmp x10, x12 +; -O0: cmp x9, x13 +; -O0: stxp w8, x14, x15, [x11] +; -O0: stxp w8, x10, x9, [x11] +; -O0: eor x8, x10, x8 +; -O0: eor x11, x9, x11 +; -O0: orr x8, x8, x11 +; -O0: subs x8, x8, #0 ; ; -O1-LABEL: atomicrmw_add_i128_aligned_acquire: ; -O1: ldaxp x0, x1, [x8] @@ -963,14 +971,18 @@ define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_release: -; -O0: adds x14, x11, x10 -; -O0: ldxp x10, x12, [x9] -; -O0: cmp x10, x11 -; -O0: cmp x12, x13 -; -O0: stlxp w8, x14, x15, [x9] -; -O0: stlxp w8, x10, x12, [x9] -; -O0: subs x12, x12, x13 -; -O0: ccmp x10, x11, #0, eq +; -O0: adds x14, x8, x10 +; -O0: and w10, w8, #0x1 +; -O0: subs w10, w10, #1 +; -O0: ldxp x10, x9, [x11] +; -O0: cmp x10, x12 +; -O0: cmp x9, x13 +; -O0: stlxp w8, x14, x15, [x11] +; -O0: stlxp w8, x10, x9, [x11] +; -O0: eor x8, x10, x8 +; -O0: eor x11, x9, x11 +; -O0: orr x8, x8, x11 +; -O0: subs x8, x8, #0 ; ; -O1-LABEL: atomicrmw_add_i128_aligned_release: ; -O1: ldxp x0, x1, [x8] @@ -982,14 +994,18 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_acq_rel: -; -O0: adds x14, x11, x10 -; -O0: ldaxp x10, x12, [x9] -; -O0: cmp x10, x11 -; -O0: cmp x12, x13 -; -O0: stlxp w8, x14, x15, [x9] -; -O0: stlxp w8, x10, x12, [x9] -; -O0: subs x12, x12, x13 -; -O0: ccmp x10, x11, #0, eq +; -O0: adds x14, x8, x10 +; -O0: and w10, w8, #0x1 +; -O0: subs w10, w10, #1 +; -O0: ldaxp x10, x9, [x11] +; -O0: cmp x10, x12 +; -O0: cmp x9, x13 +; -O0: stlxp w8, x14, x15, [x11] +; -O0: stlxp w8, x10, x9, [x11] +; -O0: eor x8, x10, x8 +; -O0: eor x11, x9, x11 +; -O0: orr x8, x8, x11 +; -O0: subs x8, x8, #0 ; ; -O1-LABEL: atomicrmw_add_i128_aligned_acq_rel: ; -O1: ldaxp x0, x1, [x8] @@ -1001,14 +1017,18 @@ define dso_local i128 @atomicrmw_add_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_seq_cst: -; -O0: adds x14, x11, x10 -; -O0: ldaxp x10, x12, [x9] -; -O0: cmp x10, x11 -; -O0: cmp x12, x13 -; -O0: stlxp w8, x14, x15, [x9] -; -O0: stlxp w8, x10, x12, [x9] -; -O0: subs x12, x12, x13 -; -O0: ccmp x10, x11, #0, eq +; -O0: adds x14, x8, x10 +; -O0: and w10, w8, #0x1 +; -O0: subs w10, w10, #1 +; -O0: ldaxp x10, x9, [x11] +; -O0: cmp x10, x12 +; -O0: cmp x9, x13 +; -O0: stlxp w8, x14, x15, [x11] +; -O0: stlxp w8, x10, x9, [x11] +; -O0: eor x8, x10, x8 +; -O0: eor x11, x9, x11 +; -O0: orr x8, x8, x11 +; -O0: subs x8, x8, #0 ; ; -O1-LABEL: atomicrmw_add_i128_aligned_seq_cst: ; -O1: ldaxp x0, x1, [x8] @@ -1285,7 +1305,9 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_monotonic: -; -O0: adds x8, x11, x8 +; -O0: adds x9, x8, x9 +; -O0: and w11, w8, #0x1 +; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i128_unaligned_monotonic: @@ -1298,7 +1320,9 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_acquire: -; -O0: adds x8, x11, x8 +; -O0: adds x9, x8, x9 +; -O0: and w11, w8, #0x1 +; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i128_unaligned_acquire: @@ -1311,7 +1335,9 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_release: -; -O0: adds x8, x11, x8 +; -O0: adds x9, x8, x9 +; -O0: and w11, w8, #0x1 +; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i128_unaligned_release: @@ -1324,7 +1350,9 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_acq_rel: -; -O0: adds x8, x11, x8 +; -O0: adds x9, x8, x9 +; -O0: and w11, w8, #0x1 +; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i128_unaligned_acq_rel: @@ -1337,7 +1365,9 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_seq_cst: -; -O0: adds x8, x11, x8 +; -O0: adds x9, x8, x9 +; -O0: and w11, w8, #0x1 +; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i128_unaligned_seq_cst: @@ -1675,14 +1705,17 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_monotonic: -; -O0: subs x14, x11, x10 -; -O0: ldxp x10, x12, [x9] -; -O0: cmp x10, x11 -; -O0: cmp x12, x13 -; -O0: stxp w8, x14, x15, [x9] -; -O0: stxp w8, x10, x12, [x9] -; -O0: subs x12, x12, x13 -; -O0: ccmp x10, x11, #0, eq +; -O0: subs x14, x8, x10 +; -O0: and w10, w8, #0x1 +; -O0: ldxp x10, x9, [x11] +; -O0: cmp x10, x12 +; -O0: cmp x9, x13 +; -O0: stxp w8, x14, x15, [x11] +; -O0: stxp w8, x10, x9, [x11] +; -O0: eor x8, x10, x8 +; -O0: eor x11, x9, x11 +; -O0: orr x8, x8, x11 +; -O0: subs x8, x8, #0 ; ; -O1-LABEL: atomicrmw_sub_i128_aligned_monotonic: ; -O1: ldxp x0, x1, [x8] @@ -1694,14 +1727,17 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_acquire: -; -O0: subs x14, x11, x10 -; -O0: ldaxp x10, x12, [x9] -; -O0: cmp x10, x11 -; -O0: cmp x12, x13 -; -O0: stxp w8, x14, x15, [x9] -; -O0: stxp w8, x10, x12, [x9] -; -O0: subs x12, x12, x13 -; -O0: ccmp x10, x11, #0, eq +; -O0: subs x14, x8, x10 +; -O0: and w10, w8, #0x1 +; -O0: ldaxp x10, x9, [x11] +; -O0: cmp x10, x12 +; -O0: cmp x9, x13 +; -O0: stxp w8, x14, x15, [x11] +; -O0: stxp w8, x10, x9, [x11] +; -O0: eor x8, x10, x8 +; -O0: eor x11, x9, x11 +; -O0: orr x8, x8, x11 +; -O0: subs x8, x8, #0 ; ; -O1-LABEL: atomicrmw_sub_i128_aligned_acquire: ; -O1: ldaxp x0, x1, [x8] @@ -1713,14 +1749,17 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_release: -; -O0: subs x14, x11, x10 -; -O0: ldxp x10, x12, [x9] -; -O0: cmp x10, x11 -; -O0: cmp x12, x13 -; -O0: stlxp w8, x14, x15, [x9] -; -O0: stlxp w8, x10, x12, [x9] -; -O0: subs x12, x12, x13 -; -O0: ccmp x10, x11, #0, eq +; -O0: subs x14, x8, x10 +; -O0: and w10, w8, #0x1 +; -O0: ldxp x10, x9, [x11] +; -O0: cmp x10, x12 +; -O0: cmp x9, x13 +; -O0: stlxp w8, x14, x15, [x11] +; -O0: stlxp w8, x10, x9, [x11] +; -O0: eor x8, x10, x8 +; -O0: eor x11, x9, x11 +; -O0: orr x8, x8, x11 +; -O0: subs x8, x8, #0 ; ; -O1-LABEL: atomicrmw_sub_i128_aligned_release: ; -O1: ldxp x0, x1, [x8] @@ -1732,14 +1771,17 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_acq_rel: -; -O0: subs x14, x11, x10 -; -O0: ldaxp x10, x12, [x9] -; -O0: cmp x10, x11 -; -O0: cmp x12, x13 -; -O0: stlxp w8, x14, x15, [x9] -; -O0: stlxp w8, x10, x12, [x9] -; -O0: subs x12, x12, x13 -; -O0: ccmp x10, x11, #0, eq +; -O0: subs x14, x8, x10 +; -O0: and w10, w8, #0x1 +; -O0: ldaxp x10, x9, [x11] +; -O0: cmp x10, x12 +; -O0: cmp x9, x13 +; -O0: stlxp w8, x14, x15, [x11] +; -O0: stlxp w8, x10, x9, [x11] +; -O0: eor x8, x10, x8 +; -O0: eor x11, x9, x11 +; -O0: orr x8, x8, x11 +; -O0: subs x8, x8, #0 ; ; -O1-LABEL: atomicrmw_sub_i128_aligned_acq_rel: ; -O1: ldaxp x0, x1, [x8] @@ -1751,14 +1793,17 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_seq_cst: -; -O0: subs x14, x11, x10 -; -O0: ldaxp x10, x12, [x9] -; -O0: cmp x10, x11 -; -O0: cmp x12, x13 -; -O0: stlxp w8, x14, x15, [x9] -; -O0: stlxp w8, x10, x12, [x9] -; -O0: subs x12, x12, x13 -; -O0: ccmp x10, x11, #0, eq +; -O0: subs x14, x8, x10 +; -O0: and w10, w8, #0x1 +; -O0: ldaxp x10, x9, [x11] +; -O0: cmp x10, x12 +; -O0: cmp x9, x13 +; -O0: stlxp w8, x14, x15, [x11] +; -O0: stlxp w8, x10, x9, [x11] +; -O0: eor x8, x10, x8 +; -O0: eor x11, x9, x11 +; -O0: orr x8, x8, x11 +; -O0: subs x8, x8, #0 ; ; -O1-LABEL: atomicrmw_sub_i128_aligned_seq_cst: ; -O1: ldaxp x0, x1, [x8] @@ -2035,7 +2080,8 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_monotonic: -; -O0: subs x8, x11, x8 +; -O0: subs x9, x8, x9 +; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_monotonic: @@ -2048,7 +2094,8 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_acquire: -; -O0: subs x8, x11, x8 +; -O0: subs x9, x8, x9 +; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acquire: @@ -2061,7 +2108,8 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_release: -; -O0: subs x8, x11, x8 +; -O0: subs x9, x8, x9 +; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_release: @@ -2074,7 +2122,8 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_acq_rel: -; -O0: subs x8, x11, x8 +; -O0: subs x9, x8, x9 +; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acq_rel: @@ -2087,7 +2136,8 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_seq_cst: -; -O0: subs x8, x11, x8 +; -O0: subs x9, x8, x9 +; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_seq_cst: diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc3.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc3.ll --- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc3.ll +++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc3.ll @@ -925,14 +925,18 @@ define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_monotonic: -; -O0: adds x14, x11, x10 -; -O0: ldxp x10, x12, [x9] -; -O0: cmp x10, x11 -; -O0: cmp x12, x13 -; -O0: stxp w8, x14, x15, [x9] -; -O0: stxp w8, x10, x12, [x9] -; -O0: subs x12, x12, x13 -; -O0: ccmp x10, x11, #0, eq +; -O0: adds x14, x8, x10 +; -O0: and w10, w8, #0x1 +; -O0: subs w10, w10, #1 +; -O0: ldxp x10, x9, [x11] +; -O0: cmp x10, x12 +; -O0: cmp x9, x13 +; -O0: stxp w8, x14, x15, [x11] +; -O0: stxp w8, x10, x9, [x11] +; -O0: eor x8, x10, x8 +; -O0: eor x11, x9, x11 +; -O0: orr x8, x8, x11 +; -O0: subs x8, x8, #0 ; ; -O1-LABEL: atomicrmw_add_i128_aligned_monotonic: ; -O1: ldxp x0, x1, [x8] @@ -944,14 +948,18 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_acquire: -; -O0: adds x14, x11, x10 -; -O0: ldaxp x10, x12, [x9] -; -O0: cmp x10, x11 -; -O0: cmp x12, x13 -; -O0: stxp w8, x14, x15, [x9] -; -O0: stxp w8, x10, x12, [x9] -; -O0: subs x12, x12, x13 -; -O0: ccmp x10, x11, #0, eq +; -O0: adds x14, x8, x10 +; -O0: and w10, w8, #0x1 +; -O0: subs w10, w10, #1 +; -O0: ldaxp x10, x9, [x11] +; -O0: cmp x10, x12 +; -O0: cmp x9, x13 +; -O0: stxp w8, x14, x15, [x11] +; -O0: stxp w8, x10, x9, [x11] +; -O0: eor x8, x10, x8 +; -O0: eor x11, x9, x11 +; -O0: orr x8, x8, x11 +; -O0: subs x8, x8, #0 ; ; -O1-LABEL: atomicrmw_add_i128_aligned_acquire: ; -O1: ldaxp x0, x1, [x8] @@ -963,14 +971,18 @@ define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_release: -; -O0: adds x14, x11, x10 -; -O0: ldxp x10, x12, [x9] -; -O0: cmp x10, x11 -; -O0: cmp x12, x13 -; -O0: stlxp w8, x14, x15, [x9] -; -O0: stlxp w8, x10, x12, [x9] -; -O0: subs x12, x12, x13 -; -O0: ccmp x10, x11, #0, eq +; -O0: adds x14, x8, x10 +; -O0: and w10, w8, #0x1 +; -O0: subs w10, w10, #1 +; -O0: ldxp x10, x9, [x11] +; -O0: cmp x10, x12 +; -O0: cmp x9, x13 +; -O0: stlxp w8, x14, x15, [x11] +; -O0: stlxp w8, x10, x9, [x11] +; -O0: eor x8, x10, x8 +; -O0: eor x11, x9, x11 +; -O0: orr x8, x8, x11 +; -O0: subs x8, x8, #0 ; ; -O1-LABEL: atomicrmw_add_i128_aligned_release: ; -O1: ldxp x0, x1, [x8] @@ -982,14 +994,18 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_acq_rel: -; -O0: adds x14, x11, x10 -; -O0: ldaxp x10, x12, [x9] -; -O0: cmp x10, x11 -; -O0: cmp x12, x13 -; -O0: stlxp w8, x14, x15, [x9] -; -O0: stlxp w8, x10, x12, [x9] -; -O0: subs x12, x12, x13 -; -O0: ccmp x10, x11, #0, eq +; -O0: adds x14, x8, x10 +; -O0: and w10, w8, #0x1 +; -O0: subs w10, w10, #1 +; -O0: ldaxp x10, x9, [x11] +; -O0: cmp x10, x12 +; -O0: cmp x9, x13 +; -O0: stlxp w8, x14, x15, [x11] +; -O0: stlxp w8, x10, x9, [x11] +; -O0: eor x8, x10, x8 +; -O0: eor x11, x9, x11 +; -O0: orr x8, x8, x11 +; -O0: subs x8, x8, #0 ; ; -O1-LABEL: atomicrmw_add_i128_aligned_acq_rel: ; -O1: ldaxp x0, x1, [x8] @@ -1001,14 +1017,18 @@ define dso_local i128 @atomicrmw_add_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_seq_cst: -; -O0: adds x14, x11, x10 -; -O0: ldaxp x10, x12, [x9] -; -O0: cmp x10, x11 -; -O0: cmp x12, x13 -; -O0: stlxp w8, x14, x15, [x9] -; -O0: stlxp w8, x10, x12, [x9] -; -O0: subs x12, x12, x13 -; -O0: ccmp x10, x11, #0, eq +; -O0: adds x14, x8, x10 +; -O0: and w10, w8, #0x1 +; -O0: subs w10, w10, #1 +; -O0: ldaxp x10, x9, [x11] +; -O0: cmp x10, x12 +; -O0: cmp x9, x13 +; -O0: stlxp w8, x14, x15, [x11] +; -O0: stlxp w8, x10, x9, [x11] +; -O0: eor x8, x10, x8 +; -O0: eor x11, x9, x11 +; -O0: orr x8, x8, x11 +; -O0: subs x8, x8, #0 ; ; -O1-LABEL: atomicrmw_add_i128_aligned_seq_cst: ; -O1: ldaxp x0, x1, [x8] @@ -1285,7 +1305,9 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_monotonic: -; -O0: adds x8, x11, x8 +; -O0: adds x9, x8, x9 +; -O0: and w11, w8, #0x1 +; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i128_unaligned_monotonic: @@ -1298,7 +1320,9 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_acquire: -; -O0: adds x8, x11, x8 +; -O0: adds x9, x8, x9 +; -O0: and w11, w8, #0x1 +; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i128_unaligned_acquire: @@ -1311,7 +1335,9 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_release: -; -O0: adds x8, x11, x8 +; -O0: adds x9, x8, x9 +; -O0: and w11, w8, #0x1 +; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i128_unaligned_release: @@ -1324,7 +1350,9 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_acq_rel: -; -O0: adds x8, x11, x8 +; -O0: adds x9, x8, x9 +; -O0: and w11, w8, #0x1 +; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i128_unaligned_acq_rel: @@ -1337,7 +1365,9 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_seq_cst: -; -O0: adds x8, x11, x8 +; -O0: adds x9, x8, x9 +; -O0: and w11, w8, #0x1 +; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i128_unaligned_seq_cst: @@ -1675,14 +1705,17 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_monotonic: -; -O0: subs x14, x11, x10 -; -O0: ldxp x10, x12, [x9] -; -O0: cmp x10, x11 -; -O0: cmp x12, x13 -; -O0: stxp w8, x14, x15, [x9] -; -O0: stxp w8, x10, x12, [x9] -; -O0: subs x12, x12, x13 -; -O0: ccmp x10, x11, #0, eq +; -O0: subs x14, x8, x10 +; -O0: and w10, w8, #0x1 +; -O0: ldxp x10, x9, [x11] +; -O0: cmp x10, x12 +; -O0: cmp x9, x13 +; -O0: stxp w8, x14, x15, [x11] +; -O0: stxp w8, x10, x9, [x11] +; -O0: eor x8, x10, x8 +; -O0: eor x11, x9, x11 +; -O0: orr x8, x8, x11 +; -O0: subs x8, x8, #0 ; ; -O1-LABEL: atomicrmw_sub_i128_aligned_monotonic: ; -O1: ldxp x0, x1, [x8] @@ -1694,14 +1727,17 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_acquire: -; -O0: subs x14, x11, x10 -; -O0: ldaxp x10, x12, [x9] -; -O0: cmp x10, x11 -; -O0: cmp x12, x13 -; -O0: stxp w8, x14, x15, [x9] -; -O0: stxp w8, x10, x12, [x9] -; -O0: subs x12, x12, x13 -; -O0: ccmp x10, x11, #0, eq +; -O0: subs x14, x8, x10 +; -O0: and w10, w8, #0x1 +; -O0: ldaxp x10, x9, [x11] +; -O0: cmp x10, x12 +; -O0: cmp x9, x13 +; -O0: stxp w8, x14, x15, [x11] +; -O0: stxp w8, x10, x9, [x11] +; -O0: eor x8, x10, x8 +; -O0: eor x11, x9, x11 +; -O0: orr x8, x8, x11 +; -O0: subs x8, x8, #0 ; ; -O1-LABEL: atomicrmw_sub_i128_aligned_acquire: ; -O1: ldaxp x0, x1, [x8] @@ -1713,14 +1749,17 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_release: -; -O0: subs x14, x11, x10 -; -O0: ldxp x10, x12, [x9] -; -O0: cmp x10, x11 -; -O0: cmp x12, x13 -; -O0: stlxp w8, x14, x15, [x9] -; -O0: stlxp w8, x10, x12, [x9] -; -O0: subs x12, x12, x13 -; -O0: ccmp x10, x11, #0, eq +; -O0: subs x14, x8, x10 +; -O0: and w10, w8, #0x1 +; -O0: ldxp x10, x9, [x11] +; -O0: cmp x10, x12 +; -O0: cmp x9, x13 +; -O0: stlxp w8, x14, x15, [x11] +; -O0: stlxp w8, x10, x9, [x11] +; -O0: eor x8, x10, x8 +; -O0: eor x11, x9, x11 +; -O0: orr x8, x8, x11 +; -O0: subs x8, x8, #0 ; ; -O1-LABEL: atomicrmw_sub_i128_aligned_release: ; -O1: ldxp x0, x1, [x8] @@ -1732,14 +1771,17 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_acq_rel: -; -O0: subs x14, x11, x10 -; -O0: ldaxp x10, x12, [x9] -; -O0: cmp x10, x11 -; -O0: cmp x12, x13 -; -O0: stlxp w8, x14, x15, [x9] -; -O0: stlxp w8, x10, x12, [x9] -; -O0: subs x12, x12, x13 -; -O0: ccmp x10, x11, #0, eq +; -O0: subs x14, x8, x10 +; -O0: and w10, w8, #0x1 +; -O0: ldaxp x10, x9, [x11] +; -O0: cmp x10, x12 +; -O0: cmp x9, x13 +; -O0: stlxp w8, x14, x15, [x11] +; -O0: stlxp w8, x10, x9, [x11] +; -O0: eor x8, x10, x8 +; -O0: eor x11, x9, x11 +; -O0: orr x8, x8, x11 +; -O0: subs x8, x8, #0 ; ; -O1-LABEL: atomicrmw_sub_i128_aligned_acq_rel: ; -O1: ldaxp x0, x1, [x8] @@ -1751,14 +1793,17 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_seq_cst: -; -O0: subs x14, x11, x10 -; -O0: ldaxp x10, x12, [x9] -; -O0: cmp x10, x11 -; -O0: cmp x12, x13 -; -O0: stlxp w8, x14, x15, [x9] -; -O0: stlxp w8, x10, x12, [x9] -; -O0: subs x12, x12, x13 -; -O0: ccmp x10, x11, #0, eq +; -O0: subs x14, x8, x10 +; -O0: and w10, w8, #0x1 +; -O0: ldaxp x10, x9, [x11] +; -O0: cmp x10, x12 +; -O0: cmp x9, x13 +; -O0: stlxp w8, x14, x15, [x11] +; -O0: stlxp w8, x10, x9, [x11] +; -O0: eor x8, x10, x8 +; -O0: eor x11, x9, x11 +; -O0: orr x8, x8, x11 +; -O0: subs x8, x8, #0 ; ; -O1-LABEL: atomicrmw_sub_i128_aligned_seq_cst: ; -O1: ldaxp x0, x1, [x8] @@ -2035,7 +2080,8 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_monotonic: -; -O0: subs x8, x11, x8 +; -O0: subs x9, x8, x9 +; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_monotonic: @@ -2048,7 +2094,8 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_acquire: -; -O0: subs x8, x11, x8 +; -O0: subs x9, x8, x9 +; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acquire: @@ -2061,7 +2108,8 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_release: -; -O0: subs x8, x11, x8 +; -O0: subs x9, x8, x9 +; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_release: @@ -2074,7 +2122,8 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_acq_rel: -; -O0: subs x8, x11, x8 +; -O0: subs x9, x8, x9 +; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acq_rel: @@ -2087,7 +2136,8 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_seq_cst: -; -O0: subs x8, x11, x8 +; -O0: subs x9, x8, x9 +; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_seq_cst: diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8_1a.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8_1a.ll --- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8_1a.ll +++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8_1a.ll @@ -545,10 +545,14 @@ define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_monotonic: -; -O0: adds x2, x10, x12 +; -O0: adds x2, x9, x11 +; -O0: and w11, w9, #0x1 +; -O0: subs w11, w11, #1 ; -O0: casp x0, x1, x2, x3, [x8] -; -O0: subs x11, x8, x11 -; -O0: ccmp x9, x10, #0, eq +; -O0: eor x8, x10, x8 +; -O0: eor x11, x9, x11 +; -O0: orr x8, x8, x11 +; -O0: subs x8, x8, #0 ; ; -O1-LABEL: atomicrmw_add_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] @@ -562,10 +566,14 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_acquire: -; -O0: adds x2, x10, x12 +; -O0: adds x2, x9, x11 +; -O0: and w11, w9, #0x1 +; -O0: subs w11, w11, #1 ; -O0: caspa x0, x1, x2, x3, [x8] -; -O0: subs x11, x8, x11 -; -O0: ccmp x9, x10, #0, eq +; -O0: eor x8, x10, x8 +; -O0: eor x11, x9, x11 +; -O0: orr x8, x8, x11 +; -O0: subs x8, x8, #0 ; ; -O1-LABEL: atomicrmw_add_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] @@ -579,10 +587,14 @@ define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_release: -; -O0: adds x2, x10, x12 +; -O0: adds x2, x9, x11 +; -O0: and w11, w9, #0x1 +; -O0: subs w11, w11, #1 ; -O0: caspl x0, x1, x2, x3, [x8] -; -O0: subs x11, x8, x11 -; -O0: ccmp x9, x10, #0, eq +; -O0: eor x8, x10, x8 +; -O0: eor x11, x9, x11 +; -O0: orr x8, x8, x11 +; -O0: subs x8, x8, #0 ; ; -O1-LABEL: atomicrmw_add_i128_aligned_release: ; -O1: ldp x4, x5, [x0] @@ -596,10 +608,14 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_acq_rel: -; -O0: adds x2, x10, x12 +; -O0: adds x2, x9, x11 +; -O0: and w11, w9, #0x1 +; -O0: subs w11, w11, #1 ; -O0: caspal x0, x1, x2, x3, [x8] -; -O0: subs x11, x8, x11 -; -O0: ccmp x9, x10, #0, eq +; -O0: eor x8, x10, x8 +; -O0: eor x11, x9, x11 +; -O0: orr x8, x8, x11 +; -O0: subs x8, x8, #0 ; ; -O1-LABEL: atomicrmw_add_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] @@ -613,10 +629,14 @@ define dso_local i128 @atomicrmw_add_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_seq_cst: -; -O0: adds x2, x10, x12 +; -O0: adds x2, x9, x11 +; -O0: and w11, w9, #0x1 +; -O0: subs w11, w11, #1 ; -O0: caspal x0, x1, x2, x3, [x8] -; -O0: subs x11, x8, x11 -; -O0: ccmp x9, x10, #0, eq +; -O0: eor x8, x10, x8 +; -O0: eor x11, x9, x11 +; -O0: orr x8, x8, x11 +; -O0: subs x8, x8, #0 ; ; -O1-LABEL: atomicrmw_add_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] @@ -845,7 +865,9 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_monotonic: -; -O0: adds x8, x11, x8 +; -O0: adds x9, x8, x9 +; -O0: and w11, w8, #0x1 +; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i128_unaligned_monotonic: @@ -858,7 +880,9 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_acquire: -; -O0: adds x8, x11, x8 +; -O0: adds x9, x8, x9 +; -O0: and w11, w8, #0x1 +; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i128_unaligned_acquire: @@ -871,7 +895,9 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_release: -; -O0: adds x8, x11, x8 +; -O0: adds x9, x8, x9 +; -O0: and w11, w8, #0x1 +; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i128_unaligned_release: @@ -884,7 +910,9 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_acq_rel: -; -O0: adds x8, x11, x8 +; -O0: adds x9, x8, x9 +; -O0: and w11, w8, #0x1 +; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i128_unaligned_acq_rel: @@ -897,7 +925,9 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_seq_cst: -; -O0: adds x8, x11, x8 +; -O0: adds x9, x8, x9 +; -O0: and w11, w8, #0x1 +; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i128_unaligned_seq_cst: @@ -1050,10 +1080,13 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_monotonic: -; -O0: subs x2, x10, x12 +; -O0: subs x2, x9, x11 +; -O0: and w11, w9, #0x1 ; -O0: casp x0, x1, x2, x3, [x8] -; -O0: subs x11, x8, x11 -; -O0: ccmp x9, x10, #0, eq +; -O0: eor x8, x10, x8 +; -O0: eor x11, x9, x11 +; -O0: orr x8, x8, x11 +; -O0: subs x8, x8, #0 ; ; -O1-LABEL: atomicrmw_sub_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] @@ -1067,10 +1100,13 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_acquire: -; -O0: subs x2, x10, x12 +; -O0: subs x2, x9, x11 +; -O0: and w11, w9, #0x1 ; -O0: caspa x0, x1, x2, x3, [x8] -; -O0: subs x11, x8, x11 -; -O0: ccmp x9, x10, #0, eq +; -O0: eor x8, x10, x8 +; -O0: eor x11, x9, x11 +; -O0: orr x8, x8, x11 +; -O0: subs x8, x8, #0 ; ; -O1-LABEL: atomicrmw_sub_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] @@ -1084,10 +1120,13 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_release: -; -O0: subs x2, x10, x12 +; -O0: subs x2, x9, x11 +; -O0: and w11, w9, #0x1 ; -O0: caspl x0, x1, x2, x3, [x8] -; -O0: subs x11, x8, x11 -; -O0: ccmp x9, x10, #0, eq +; -O0: eor x8, x10, x8 +; -O0: eor x11, x9, x11 +; -O0: orr x8, x8, x11 +; -O0: subs x8, x8, #0 ; ; -O1-LABEL: atomicrmw_sub_i128_aligned_release: ; -O1: ldp x4, x5, [x0] @@ -1101,10 +1140,13 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_acq_rel: -; -O0: subs x2, x10, x12 +; -O0: subs x2, x9, x11 +; -O0: and w11, w9, #0x1 ; -O0: caspal x0, x1, x2, x3, [x8] -; -O0: subs x11, x8, x11 -; -O0: ccmp x9, x10, #0, eq +; -O0: eor x8, x10, x8 +; -O0: eor x11, x9, x11 +; -O0: orr x8, x8, x11 +; -O0: subs x8, x8, #0 ; ; -O1-LABEL: atomicrmw_sub_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] @@ -1118,10 +1160,13 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_seq_cst: -; -O0: subs x2, x10, x12 +; -O0: subs x2, x9, x11 +; -O0: and w11, w9, #0x1 ; -O0: caspal x0, x1, x2, x3, [x8] -; -O0: subs x11, x8, x11 -; -O0: ccmp x9, x10, #0, eq +; -O0: eor x8, x10, x8 +; -O0: eor x11, x9, x11 +; -O0: orr x8, x8, x11 +; -O0: subs x8, x8, #0 ; ; -O1-LABEL: atomicrmw_sub_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] @@ -1350,7 +1395,8 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_monotonic: -; -O0: subs x8, x11, x8 +; -O0: subs x9, x8, x9 +; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_monotonic: @@ -1363,7 +1409,8 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_acquire: -; -O0: subs x8, x11, x8 +; -O0: subs x9, x8, x9 +; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acquire: @@ -1376,7 +1423,8 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_release: -; -O0: subs x8, x11, x8 +; -O0: subs x9, x8, x9 +; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_release: @@ -1389,7 +1437,8 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_acq_rel: -; -O0: subs x8, x11, x8 +; -O0: subs x9, x8, x9 +; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acq_rel: @@ -1402,7 +1451,8 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_seq_cst: -; -O0: subs x8, x11, x8 +; -O0: subs x9, x8, x9 +; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_seq_cst: diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8a.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8a.ll --- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8a.ll +++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8a.ll @@ -925,14 +925,18 @@ define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_monotonic: -; -O0: adds x14, x11, x10 -; -O0: ldxp x10, x12, [x9] -; -O0: cmp x10, x11 -; -O0: cmp x12, x13 -; -O0: stxp w8, x14, x15, [x9] -; -O0: stxp w8, x10, x12, [x9] -; -O0: subs x12, x12, x13 -; -O0: ccmp x10, x11, #0, eq +; -O0: adds x14, x8, x10 +; -O0: and w10, w8, #0x1 +; -O0: subs w10, w10, #1 +; -O0: ldxp x10, x9, [x11] +; -O0: cmp x10, x12 +; -O0: cmp x9, x13 +; -O0: stxp w8, x14, x15, [x11] +; -O0: stxp w8, x10, x9, [x11] +; -O0: eor x8, x10, x8 +; -O0: eor x11, x9, x11 +; -O0: orr x8, x8, x11 +; -O0: subs x8, x8, #0 ; ; -O1-LABEL: atomicrmw_add_i128_aligned_monotonic: ; -O1: ldxp x0, x1, [x8] @@ -944,14 +948,18 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_acquire: -; -O0: adds x14, x11, x10 -; -O0: ldaxp x10, x12, [x9] -; -O0: cmp x10, x11 -; -O0: cmp x12, x13 -; -O0: stxp w8, x14, x15, [x9] -; -O0: stxp w8, x10, x12, [x9] -; -O0: subs x12, x12, x13 -; -O0: ccmp x10, x11, #0, eq +; -O0: adds x14, x8, x10 +; -O0: and w10, w8, #0x1 +; -O0: subs w10, w10, #1 +; -O0: ldaxp x10, x9, [x11] +; -O0: cmp x10, x12 +; -O0: cmp x9, x13 +; -O0: stxp w8, x14, x15, [x11] +; -O0: stxp w8, x10, x9, [x11] +; -O0: eor x8, x10, x8 +; -O0: eor x11, x9, x11 +; -O0: orr x8, x8, x11 +; -O0: subs x8, x8, #0 ; ; -O1-LABEL: atomicrmw_add_i128_aligned_acquire: ; -O1: ldaxp x0, x1, [x8] @@ -963,14 +971,18 @@ define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_release: -; -O0: adds x14, x11, x10 -; -O0: ldxp x10, x12, [x9] -; -O0: cmp x10, x11 -; -O0: cmp x12, x13 -; -O0: stlxp w8, x14, x15, [x9] -; -O0: stlxp w8, x10, x12, [x9] -; -O0: subs x12, x12, x13 -; -O0: ccmp x10, x11, #0, eq +; -O0: adds x14, x8, x10 +; -O0: and w10, w8, #0x1 +; -O0: subs w10, w10, #1 +; -O0: ldxp x10, x9, [x11] +; -O0: cmp x10, x12 +; -O0: cmp x9, x13 +; -O0: stlxp w8, x14, x15, [x11] +; -O0: stlxp w8, x10, x9, [x11] +; -O0: eor x8, x10, x8 +; -O0: eor x11, x9, x11 +; -O0: orr x8, x8, x11 +; -O0: subs x8, x8, #0 ; ; -O1-LABEL: atomicrmw_add_i128_aligned_release: ; -O1: ldxp x0, x1, [x8] @@ -982,14 +994,18 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_acq_rel: -; -O0: adds x14, x11, x10 -; -O0: ldaxp x10, x12, [x9] -; -O0: cmp x10, x11 -; -O0: cmp x12, x13 -; -O0: stlxp w8, x14, x15, [x9] -; -O0: stlxp w8, x10, x12, [x9] -; -O0: subs x12, x12, x13 -; -O0: ccmp x10, x11, #0, eq +; -O0: adds x14, x8, x10 +; -O0: and w10, w8, #0x1 +; -O0: subs w10, w10, #1 +; -O0: ldaxp x10, x9, [x11] +; -O0: cmp x10, x12 +; -O0: cmp x9, x13 +; -O0: stlxp w8, x14, x15, [x11] +; -O0: stlxp w8, x10, x9, [x11] +; -O0: eor x8, x10, x8 +; -O0: eor x11, x9, x11 +; -O0: orr x8, x8, x11 +; -O0: subs x8, x8, #0 ; ; -O1-LABEL: atomicrmw_add_i128_aligned_acq_rel: ; -O1: ldaxp x0, x1, [x8] @@ -1001,14 +1017,18 @@ define dso_local i128 @atomicrmw_add_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_aligned_seq_cst: -; -O0: adds x14, x11, x10 -; -O0: ldaxp x10, x12, [x9] -; -O0: cmp x10, x11 -; -O0: cmp x12, x13 -; -O0: stlxp w8, x14, x15, [x9] -; -O0: stlxp w8, x10, x12, [x9] -; -O0: subs x12, x12, x13 -; -O0: ccmp x10, x11, #0, eq +; -O0: adds x14, x8, x10 +; -O0: and w10, w8, #0x1 +; -O0: subs w10, w10, #1 +; -O0: ldaxp x10, x9, [x11] +; -O0: cmp x10, x12 +; -O0: cmp x9, x13 +; -O0: stlxp w8, x14, x15, [x11] +; -O0: stlxp w8, x10, x9, [x11] +; -O0: eor x8, x10, x8 +; -O0: eor x11, x9, x11 +; -O0: orr x8, x8, x11 +; -O0: subs x8, x8, #0 ; ; -O1-LABEL: atomicrmw_add_i128_aligned_seq_cst: ; -O1: ldaxp x0, x1, [x8] @@ -1285,7 +1305,9 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_monotonic: -; -O0: adds x8, x11, x8 +; -O0: adds x9, x8, x9 +; -O0: and w11, w8, #0x1 +; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i128_unaligned_monotonic: @@ -1298,7 +1320,9 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_acquire: -; -O0: adds x8, x11, x8 +; -O0: adds x9, x8, x9 +; -O0: and w11, w8, #0x1 +; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i128_unaligned_acquire: @@ -1311,7 +1335,9 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_release: -; -O0: adds x8, x11, x8 +; -O0: adds x9, x8, x9 +; -O0: and w11, w8, #0x1 +; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i128_unaligned_release: @@ -1324,7 +1350,9 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_acq_rel: -; -O0: adds x8, x11, x8 +; -O0: adds x9, x8, x9 +; -O0: and w11, w8, #0x1 +; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i128_unaligned_acq_rel: @@ -1337,7 +1365,9 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_add_i128_unaligned_seq_cst: -; -O0: adds x8, x11, x8 +; -O0: adds x9, x8, x9 +; -O0: and w11, w8, #0x1 +; -O0: subs w11, w11, #1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i128_unaligned_seq_cst: @@ -1675,14 +1705,17 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_monotonic: -; -O0: subs x14, x11, x10 -; -O0: ldxp x10, x12, [x9] -; -O0: cmp x10, x11 -; -O0: cmp x12, x13 -; -O0: stxp w8, x14, x15, [x9] -; -O0: stxp w8, x10, x12, [x9] -; -O0: subs x12, x12, x13 -; -O0: ccmp x10, x11, #0, eq +; -O0: subs x14, x8, x10 +; -O0: and w10, w8, #0x1 +; -O0: ldxp x10, x9, [x11] +; -O0: cmp x10, x12 +; -O0: cmp x9, x13 +; -O0: stxp w8, x14, x15, [x11] +; -O0: stxp w8, x10, x9, [x11] +; -O0: eor x8, x10, x8 +; -O0: eor x11, x9, x11 +; -O0: orr x8, x8, x11 +; -O0: subs x8, x8, #0 ; ; -O1-LABEL: atomicrmw_sub_i128_aligned_monotonic: ; -O1: ldxp x0, x1, [x8] @@ -1694,14 +1727,17 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_acquire: -; -O0: subs x14, x11, x10 -; -O0: ldaxp x10, x12, [x9] -; -O0: cmp x10, x11 -; -O0: cmp x12, x13 -; -O0: stxp w8, x14, x15, [x9] -; -O0: stxp w8, x10, x12, [x9] -; -O0: subs x12, x12, x13 -; -O0: ccmp x10, x11, #0, eq +; -O0: subs x14, x8, x10 +; -O0: and w10, w8, #0x1 +; -O0: ldaxp x10, x9, [x11] +; -O0: cmp x10, x12 +; -O0: cmp x9, x13 +; -O0: stxp w8, x14, x15, [x11] +; -O0: stxp w8, x10, x9, [x11] +; -O0: eor x8, x10, x8 +; -O0: eor x11, x9, x11 +; -O0: orr x8, x8, x11 +; -O0: subs x8, x8, #0 ; ; -O1-LABEL: atomicrmw_sub_i128_aligned_acquire: ; -O1: ldaxp x0, x1, [x8] @@ -1713,14 +1749,17 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_release: -; -O0: subs x14, x11, x10 -; -O0: ldxp x10, x12, [x9] -; -O0: cmp x10, x11 -; -O0: cmp x12, x13 -; -O0: stlxp w8, x14, x15, [x9] -; -O0: stlxp w8, x10, x12, [x9] -; -O0: subs x12, x12, x13 -; -O0: ccmp x10, x11, #0, eq +; -O0: subs x14, x8, x10 +; -O0: and w10, w8, #0x1 +; -O0: ldxp x10, x9, [x11] +; -O0: cmp x10, x12 +; -O0: cmp x9, x13 +; -O0: stlxp w8, x14, x15, [x11] +; -O0: stlxp w8, x10, x9, [x11] +; -O0: eor x8, x10, x8 +; -O0: eor x11, x9, x11 +; -O0: orr x8, x8, x11 +; -O0: subs x8, x8, #0 ; ; -O1-LABEL: atomicrmw_sub_i128_aligned_release: ; -O1: ldxp x0, x1, [x8] @@ -1732,14 +1771,17 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_acq_rel: -; -O0: subs x14, x11, x10 -; -O0: ldaxp x10, x12, [x9] -; -O0: cmp x10, x11 -; -O0: cmp x12, x13 -; -O0: stlxp w8, x14, x15, [x9] -; -O0: stlxp w8, x10, x12, [x9] -; -O0: subs x12, x12, x13 -; -O0: ccmp x10, x11, #0, eq +; -O0: subs x14, x8, x10 +; -O0: and w10, w8, #0x1 +; -O0: ldaxp x10, x9, [x11] +; -O0: cmp x10, x12 +; -O0: cmp x9, x13 +; -O0: stlxp w8, x14, x15, [x11] +; -O0: stlxp w8, x10, x9, [x11] +; -O0: eor x8, x10, x8 +; -O0: eor x11, x9, x11 +; -O0: orr x8, x8, x11 +; -O0: subs x8, x8, #0 ; ; -O1-LABEL: atomicrmw_sub_i128_aligned_acq_rel: ; -O1: ldaxp x0, x1, [x8] @@ -1751,14 +1793,17 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_aligned_seq_cst: -; -O0: subs x14, x11, x10 -; -O0: ldaxp x10, x12, [x9] -; -O0: cmp x10, x11 -; -O0: cmp x12, x13 -; -O0: stlxp w8, x14, x15, [x9] -; -O0: stlxp w8, x10, x12, [x9] -; -O0: subs x12, x12, x13 -; -O0: ccmp x10, x11, #0, eq +; -O0: subs x14, x8, x10 +; -O0: and w10, w8, #0x1 +; -O0: ldaxp x10, x9, [x11] +; -O0: cmp x10, x12 +; -O0: cmp x9, x13 +; -O0: stlxp w8, x14, x15, [x11] +; -O0: stlxp w8, x10, x9, [x11] +; -O0: eor x8, x10, x8 +; -O0: eor x11, x9, x11 +; -O0: orr x8, x8, x11 +; -O0: subs x8, x8, #0 ; ; -O1-LABEL: atomicrmw_sub_i128_aligned_seq_cst: ; -O1: ldaxp x0, x1, [x8] @@ -2035,7 +2080,8 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_monotonic: -; -O0: subs x8, x11, x8 +; -O0: subs x9, x8, x9 +; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_monotonic: @@ -2048,7 +2094,8 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_acquire: -; -O0: subs x8, x11, x8 +; -O0: subs x9, x8, x9 +; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acquire: @@ -2061,7 +2108,8 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_release: -; -O0: subs x8, x11, x8 +; -O0: subs x9, x8, x9 +; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_release: @@ -2074,7 +2122,8 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_acq_rel: -; -O0: subs x8, x11, x8 +; -O0: subs x9, x8, x9 +; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acq_rel: @@ -2087,7 +2136,8 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_seq_cst(ptr %ptr, i128 %value) { ; -O0-LABEL: atomicrmw_sub_i128_unaligned_seq_cst: -; -O0: subs x8, x11, x8 +; -O0: subs x9, x8, x9 +; -O0: and w11, w8, #0x1 ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_seq_cst: diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/postselectopt-dead-cc-defs-in-fcmp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postselectopt-dead-cc-defs-in-fcmp.mir deleted file mode 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/postselectopt-dead-cc-defs-in-fcmp.mir +++ /dev/null @@ -1,181 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple aarch64 -run-pass=aarch64-post-select-optimize -verify-machineinstrs %s -o - | FileCheck %s ---- -name: test_fcmp_dead_cc -alignment: 4 -legalized: true -regBankSelected: true -selected: true -tracksRegLiveness: true -liveins: - - { reg: '$x0' } - - { reg: '$w1' } -body: | - bb.1: - liveins: $w1, $x0, $s0, $s1 - - ; CHECK-LABEL: name: test_fcmp_dead_cc - ; CHECK: liveins: $w1, $x0, $s0, $s1 - ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 - ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 - ; CHECK: [[COPY2:%[0-9]+]]:fpr32 = COPY $s0 - ; CHECK: [[COPY3:%[0-9]+]]:fpr32 = COPY $s1 - ; CHECK: [[COPY4:%[0-9]+]]:gpr32 = COPY $wzr - ; CHECK: FCMPSrr [[COPY2]], [[COPY3]], implicit-def dead $nzcv, implicit $fpcr - ; CHECK: [[SUBWrr:%[0-9]+]]:gpr32 = SUBWrr [[COPY1]], [[COPY4]] - ; CHECK: FCMPSrr [[COPY2]], [[COPY3]], implicit-def $nzcv, implicit $fpcr - ; CHECK: [[UBFMWri:%[0-9]+]]:gpr32common = UBFMWri [[SUBWrr]], 1, 31 - ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1 - ; CHECK: [[CSELWr:%[0-9]+]]:gpr32 = CSELWr [[UBFMWri]], [[MOVi32imm]], 8, implicit $nzcv - ; CHECK: $w0 = COPY [[CSELWr]] - ; CHECK: RET_ReallyLR implicit $w0 - %1:gpr64 = COPY $x0 - %2:gpr32 = COPY $w1 - %3:fpr32 = COPY $s0 - %4:fpr32 = COPY $s1 - %26:gpr32 = COPY $wzr - FCMPSrr %3, %4, implicit-def $nzcv, implicit $fpcr - %12:gpr32 = SUBSWrr %2, %26, implicit-def $nzcv - FCMPSrr %3, %4, implicit-def $nzcv, implicit $fpcr - %14:gpr32common = UBFMWri %12, 1, 31 - %60:gpr32 = MOVi32imm 1 - %16:gpr32 = CSELWr %14, %60, 8, implicit $nzcv - $w0 = COPY %16 - RET_ReallyLR implicit $w0 - -... ---- -name: test_fcmp_64_dead_cc -alignment: 4 -legalized: true -regBankSelected: true -selected: true -tracksRegLiveness: true -liveins: - - { reg: '$x0' } - - { reg: '$w1' } -body: | - bb.1: - liveins: $w1, $x0, $d0, $d1 - - ; CHECK-LABEL: name: test_fcmp_64_dead_cc - ; CHECK: liveins: $w1, $x0, $d0, $d1 - ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 - ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 - ; CHECK: [[COPY2:%[0-9]+]]:fpr64 = COPY $d0 - ; CHECK: [[COPY3:%[0-9]+]]:fpr64 = COPY $d1 - ; CHECK: [[COPY4:%[0-9]+]]:gpr32 = COPY $wzr - ; CHECK: FCMPDrr [[COPY2]], [[COPY3]], implicit-def dead $nzcv, implicit $fpcr - ; CHECK: [[SUBWrr:%[0-9]+]]:gpr32 = SUBWrr [[COPY1]], [[COPY4]] - ; CHECK: FCMPDrr [[COPY2]], [[COPY3]], implicit-def $nzcv, implicit $fpcr - ; CHECK: [[UBFMWri:%[0-9]+]]:gpr32common = UBFMWri [[SUBWrr]], 1, 31 - ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1 - ; CHECK: [[CSELWr:%[0-9]+]]:gpr32 = CSELWr [[UBFMWri]], [[MOVi32imm]], 8, implicit $nzcv - ; CHECK: $w0 = COPY [[CSELWr]] - ; CHECK: RET_ReallyLR implicit $w0 - %1:gpr64 = COPY $x0 - %2:gpr32 = COPY $w1 - %3:fpr64 = COPY $d0 - %4:fpr64 = COPY $d1 - %26:gpr32 = COPY $wzr - FCMPDrr %3, %4, implicit-def $nzcv, implicit $fpcr - %12:gpr32 = SUBSWrr %2, %26, implicit-def $nzcv - FCMPDrr %3, %4, implicit-def $nzcv, implicit $fpcr - %14:gpr32common = UBFMWri %12, 1, 31 - %60:gpr32 = MOVi32imm 1 - %16:gpr32 = CSELWr %14, %60, 8, implicit $nzcv - $w0 = COPY %16 - RET_ReallyLR implicit $w0 - -... ---- -name: test_fcmp_dead_cc_3_fcmps -alignment: 4 -legalized: true -regBankSelected: true -selected: true -tracksRegLiveness: true -liveins: - - { reg: '$x0' } - - { reg: '$w1' } -body: | - bb.1: - liveins: $w1, $x0, $s0, $s1 - - ; CHECK-LABEL: name: test_fcmp_dead_cc_3_fcmps - ; CHECK: liveins: $w1, $x0, $s0, $s1 - ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 - ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 - ; CHECK: [[COPY2:%[0-9]+]]:fpr32 = COPY $s0 - ; CHECK: [[COPY3:%[0-9]+]]:fpr32 = COPY $s1 - ; CHECK: [[COPY4:%[0-9]+]]:gpr32 = COPY $wzr - ; CHECK: FCMPSrr [[COPY2]], [[COPY3]], implicit-def dead $nzcv, implicit $fpcr - ; CHECK: [[SUBWrr:%[0-9]+]]:gpr32 = SUBWrr [[COPY1]], [[COPY4]] - ; CHECK: FCMPSrr [[COPY2]], [[COPY3]], implicit-def dead $nzcv, implicit $fpcr - ; CHECK: [[SUBWrr1:%[0-9]+]]:gpr32 = SUBWrr [[COPY1]], [[COPY4]] - ; CHECK: FCMPSrr [[COPY2]], [[COPY3]], implicit-def $nzcv, implicit $fpcr - ; CHECK: [[UBFMWri:%[0-9]+]]:gpr32common = UBFMWri [[SUBWrr1]], 1, 31 - ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1 - ; CHECK: [[CSELWr:%[0-9]+]]:gpr32 = CSELWr [[UBFMWri]], [[MOVi32imm]], 8, implicit $nzcv - ; CHECK: $w0 = COPY [[CSELWr]] - ; CHECK: RET_ReallyLR implicit $w0 - %1:gpr64 = COPY $x0 - %2:gpr32 = COPY $w1 - %3:fpr32 = COPY $s0 - %4:fpr32 = COPY $s1 - %26:gpr32 = COPY $wzr - FCMPSrr %3, %4, implicit-def $nzcv, implicit $fpcr - %12:gpr32 = SUBSWrr %2, %26, implicit-def $nzcv - FCMPSrr %3, %4, implicit-def $nzcv, implicit $fpcr - %12:gpr32 = SUBSWrr %2, %26, implicit-def $nzcv - FCMPSrr %3, %4, implicit-def $nzcv, implicit $fpcr - %14:gpr32common = UBFMWri %12, 1, 31 - %60:gpr32 = MOVi32imm 1 - %16:gpr32 = CSELWr %14, %60, 8, implicit $nzcv - $w0 = COPY %16 - RET_ReallyLR implicit $w0 - -... ---- -name: test_impdef_made_dead -alignment: 4 -legalized: true -regBankSelected: true -selected: true -tracksRegLiveness: true -liveins: - - { reg: '$x0' } - - { reg: '$w1' } -body: | - bb.1: - liveins: $w1, $x0, $s0, $s1 - ; Check that any dead imp-defs of NZCV are marked as such. - ; CHECK-LABEL: name: test_impdef_made_dead - ; CHECK: liveins: $w1, $x0, $s0, $s1 - ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 - ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 - ; CHECK: [[COPY2:%[0-9]+]]:fpr32 = COPY $s0 - ; CHECK: [[COPY3:%[0-9]+]]:fpr32 = COPY $s1 - ; CHECK: [[COPY4:%[0-9]+]]:gpr32 = COPY $wzr - ; CHECK: [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr [[COPY1]], [[COPY4]], implicit-def dead $nzcv - ; CHECK: FCMPSrr [[COPY2]], [[COPY3]], implicit-def $nzcv, implicit $fpcr - ; CHECK: [[UBFMWri:%[0-9]+]]:gpr32common = UBFMWri [[SUBSWrr]], 1, 31 - ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1 - ; CHECK: [[CSELWr:%[0-9]+]]:gpr32 = CSELWr [[UBFMWri]], [[MOVi32imm]], 8, implicit $nzcv - ; CHECK: %ret:gpr32 = SUBSWrr [[CSELWr]], [[SUBSWrr]], implicit-def dead $nzcv - ; CHECK: $w0 = COPY [[CSELWr]] - ; CHECK: RET_ReallyLR implicit $w0 - %1:gpr64 = COPY $x0 - %2:gpr32 = COPY $w1 - %3:fpr32 = COPY $s0 - %4:fpr32 = COPY $s1 - %26:gpr32 = COPY $wzr - %12:gpr32 = SUBSWrr %2, %26, implicit-def $nzcv - FCMPSrr %3, %4, implicit-def $nzcv, implicit $fpcr - %14:gpr32common = UBFMWri %12, 1, 31 - %60:gpr32 = MOVi32imm 1 - %16:gpr32 = CSELWr %14, %60, 8, implicit $nzcv - %ret:gpr32 = SUBSWrr %16, %12, implicit-def $nzcv - $w0 = COPY %16 - RET_ReallyLR implicit $w0 -... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/postselectopt-dead-cc-defs.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postselectopt-dead-cc-defs.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/postselectopt-dead-cc-defs.mir @@ -0,0 +1,365 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple aarch64 -run-pass=aarch64-post-select-optimize -verify-machineinstrs %s -o - | FileCheck %s +--- +name: test_fcmp_dead_cc +alignment: 4 +legalized: true +regBankSelected: true +selected: true +tracksRegLiveness: true +liveins: + - { reg: '$x0' } + - { reg: '$w1' } +body: | + bb.1: + liveins: $w1, $x0, $s0, $s1 + + ; CHECK-LABEL: name: test_fcmp_dead_cc + ; CHECK: liveins: $w1, $x0, $s0, $s1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:fpr32 = COPY $s0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:fpr32 = COPY $s1 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr32 = COPY $wzr + ; CHECK-NEXT: FCMPSrr [[COPY2]], [[COPY3]], implicit-def dead $nzcv, implicit $fpcr + ; CHECK-NEXT: [[SUBWrr:%[0-9]+]]:gpr32 = SUBWrr [[COPY1]], [[COPY4]] + ; CHECK-NEXT: FCMPSrr [[COPY2]], [[COPY3]], implicit-def $nzcv, implicit $fpcr + ; CHECK-NEXT: [[UBFMWri:%[0-9]+]]:gpr32common = UBFMWri [[SUBWrr]], 1, 31 + ; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1 + ; CHECK-NEXT: [[CSELWr:%[0-9]+]]:gpr32 = CSELWr [[UBFMWri]], [[MOVi32imm]], 8, implicit $nzcv + ; CHECK-NEXT: $w0 = COPY [[CSELWr]] + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %1:gpr64 = COPY $x0 + %2:gpr32 = COPY $w1 + %3:fpr32 = COPY $s0 + %4:fpr32 = COPY $s1 + %26:gpr32 = COPY $wzr + FCMPSrr %3, %4, implicit-def $nzcv, implicit $fpcr + %12:gpr32 = SUBSWrr %2, %26, implicit-def $nzcv + FCMPSrr %3, %4, implicit-def $nzcv, implicit $fpcr + %14:gpr32common = UBFMWri %12, 1, 31 + %60:gpr32 = MOVi32imm 1 + %16:gpr32 = CSELWr %14, %60, 8, implicit $nzcv + $w0 = COPY %16 + RET_ReallyLR implicit $w0 + +... +--- +name: test_fcmp_64_dead_cc +alignment: 4 +legalized: true +regBankSelected: true +selected: true +tracksRegLiveness: true +liveins: + - { reg: '$x0' } + - { reg: '$w1' } +body: | + bb.1: + liveins: $w1, $x0, $d0, $d1 + + ; CHECK-LABEL: name: test_fcmp_64_dead_cc + ; CHECK: liveins: $w1, $x0, $d0, $d1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:fpr64 = COPY $d0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:fpr64 = COPY $d1 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr32 = COPY $wzr + ; CHECK-NEXT: FCMPDrr [[COPY2]], [[COPY3]], implicit-def dead $nzcv, implicit $fpcr + ; CHECK-NEXT: [[SUBWrr:%[0-9]+]]:gpr32 = SUBWrr [[COPY1]], [[COPY4]] + ; CHECK-NEXT: FCMPDrr [[COPY2]], [[COPY3]], implicit-def $nzcv, implicit $fpcr + ; CHECK-NEXT: [[UBFMWri:%[0-9]+]]:gpr32common = UBFMWri [[SUBWrr]], 1, 31 + ; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1 + ; CHECK-NEXT: [[CSELWr:%[0-9]+]]:gpr32 = CSELWr [[UBFMWri]], [[MOVi32imm]], 8, implicit $nzcv + ; CHECK-NEXT: $w0 = COPY [[CSELWr]] + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %1:gpr64 = COPY $x0 + %2:gpr32 = COPY $w1 + %3:fpr64 = COPY $d0 + %4:fpr64 = COPY $d1 + %26:gpr32 = COPY $wzr + FCMPDrr %3, %4, implicit-def $nzcv, implicit $fpcr + %12:gpr32 = SUBSWrr %2, %26, implicit-def $nzcv + FCMPDrr %3, %4, implicit-def $nzcv, implicit $fpcr + %14:gpr32common = UBFMWri %12, 1, 31 + %60:gpr32 = MOVi32imm 1 + %16:gpr32 = CSELWr %14, %60, 8, implicit $nzcv + $w0 = COPY %16 + RET_ReallyLR implicit $w0 + +... +--- +name: test_fcmp_dead_cc_3_fcmps +alignment: 4 +legalized: true +regBankSelected: true +selected: true +tracksRegLiveness: true +liveins: + - { reg: '$x0' } + - { reg: '$w1' } +body: | + bb.1: + liveins: $w1, $x0, $s0, $s1 + + ; CHECK-LABEL: name: test_fcmp_dead_cc_3_fcmps + ; CHECK: liveins: $w1, $x0, $s0, $s1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:fpr32 = COPY $s0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:fpr32 = COPY $s1 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr32 = COPY $wzr + ; CHECK-NEXT: FCMPSrr [[COPY2]], [[COPY3]], implicit-def dead $nzcv, implicit $fpcr + ; CHECK-NEXT: [[SUBWrr:%[0-9]+]]:gpr32 = SUBWrr [[COPY1]], [[COPY4]] + ; CHECK-NEXT: FCMPSrr [[COPY2]], [[COPY3]], implicit-def dead $nzcv, implicit $fpcr + ; CHECK-NEXT: [[SUBWrr1:%[0-9]+]]:gpr32 = SUBWrr [[COPY1]], [[COPY4]] + ; CHECK-NEXT: FCMPSrr [[COPY2]], [[COPY3]], implicit-def $nzcv, implicit $fpcr + ; CHECK-NEXT: [[UBFMWri:%[0-9]+]]:gpr32common = UBFMWri [[SUBWrr1]], 1, 31 + ; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1 + ; CHECK-NEXT: [[CSELWr:%[0-9]+]]:gpr32 = CSELWr [[UBFMWri]], [[MOVi32imm]], 8, implicit $nzcv + ; CHECK-NEXT: $w0 = COPY [[CSELWr]] + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %1:gpr64 = COPY $x0 + %2:gpr32 = COPY $w1 + %3:fpr32 = COPY $s0 + %4:fpr32 = COPY $s1 + %26:gpr32 = COPY $wzr + FCMPSrr %3, %4, implicit-def $nzcv, implicit $fpcr + %12:gpr32 = SUBSWrr %2, %26, implicit-def $nzcv + FCMPSrr %3, %4, implicit-def $nzcv, implicit $fpcr + %12:gpr32 = SUBSWrr %2, %26, implicit-def $nzcv + FCMPSrr %3, %4, implicit-def $nzcv, implicit $fpcr + %14:gpr32common = UBFMWri %12, 1, 31 + %60:gpr32 = MOVi32imm 1 + %16:gpr32 = CSELWr %14, %60, 8, implicit $nzcv + $w0 = COPY %16 + RET_ReallyLR implicit $w0 + +... +--- +name: test_impdef_subsx +alignment: 4 +legalized: true +regBankSelected: true +selected: true +tracksRegLiveness: true +body: | + bb.1: + liveins: $x0, $x1 + ; CHECK-LABEL: name: test_impdef_subsx + ; CHECK: liveins: $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 + ; CHECK-NEXT: [[SUBXrr:%[0-9]+]]:gpr64 = SUBXrr [[COPY]], [[COPY1]] + ; CHECK-NEXT: $x0 = COPY [[SUBXrr]] + ; CHECK-NEXT: RET_ReallyLR implicit $x0 + %1:gpr64 = COPY $x0 + %2:gpr64 = COPY $x1 + %4:gpr64 = SUBSXrr %1, %2, implicit-def $nzcv + $x0 = COPY %4 + RET_ReallyLR implicit $x0 +... +--- +name: test_impdef_subsw +alignment: 4 +legalized: true +regBankSelected: true +selected: true +tracksRegLiveness: true +body: | + bb.1: + liveins: $w0, $w1 + ; CHECK-LABEL: name: test_impdef_subsw + ; CHECK: liveins: $w0, $w1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $x1 + ; CHECK-NEXT: [[SUBWrr:%[0-9]+]]:gpr32 = SUBWrr [[COPY]], [[COPY1]] + ; CHECK-NEXT: $w0 = COPY [[SUBWrr]] + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %1:gpr32 = COPY $x0 + %2:gpr32 = COPY $x1 + %4:gpr32 = SUBSWrr %1, %2, implicit-def $nzcv + $w0 = COPY %4 + RET_ReallyLR implicit $w0 +... +--- +name: test_impdef_addsx +alignment: 4 +legalized: true +regBankSelected: true +selected: true +tracksRegLiveness: true +body: | + bb.1: + liveins: $x0, $x1 + ; CHECK-LABEL: name: test_impdef_addsx + ; CHECK: liveins: $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 + ; CHECK-NEXT: [[ADDXrr:%[0-9]+]]:gpr64 = ADDXrr [[COPY]], [[COPY1]] + ; CHECK-NEXT: $x0 = COPY [[ADDXrr]] + ; CHECK-NEXT: RET_ReallyLR implicit $x0 + %1:gpr64 = COPY $x0 + %2:gpr64 = COPY $x1 + %4:gpr64 = ADDSXrr %1, %2, implicit-def $nzcv + $x0 = COPY %4 + RET_ReallyLR implicit $x0 +... +--- +name: test_impdef_addsw +alignment: 4 +legalized: true +regBankSelected: true +selected: true +tracksRegLiveness: true +body: | + bb.1: + liveins: $w0, $w1 + ; CHECK-LABEL: name: test_impdef_addsw + ; CHECK: liveins: $w0, $w1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $x1 + ; CHECK-NEXT: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr [[COPY]], [[COPY1]] + ; CHECK-NEXT: $w0 = COPY [[ADDWrr]] + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %1:gpr32 = COPY $x0 + %2:gpr32 = COPY $x1 + %4:gpr32 = ADDSWrr %1, %2, implicit-def $nzcv + $w0 = COPY %4 + RET_ReallyLR implicit $w0 +... +--- +name: test_impdef_adcsx +alignment: 4 +legalized: true +regBankSelected: true +selected: true +tracksRegLiveness: true +body: | + bb.1: + liveins: $x0, $x1, $x2, $x3 + ; CHECK-LABEL: name: test_impdef_adcsx + ; CHECK: liveins: $x0, $x1, $x2, $x3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY $x2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64 = COPY $x3 + ; CHECK-NEXT: [[ADDSXrr:%[0-9]+]]:gpr64 = ADDSXrr [[COPY]], [[COPY2]], implicit-def $nzcv + ; CHECK-NEXT: [[ADCXr:%[0-9]+]]:gpr64 = ADCXr [[COPY1]], [[COPY3]], implicit $nzcv + ; CHECK-NEXT: $x0 = COPY [[ADDSXrr]] + ; CHECK-NEXT: $x1 = COPY [[ADCXr]] + ; CHECK-NEXT: RET_ReallyLR implicit $x0, implicit $x1 + %1:gpr64 = COPY $x0 + %2:gpr64 = COPY $x1 + %3:gpr64 = COPY $x2 + %4:gpr64 = COPY $x3 + %5:gpr64 = ADDSXrr %1, %3, implicit-def $nzcv + %6:gpr64 = ADCSXr %2, %4, implicit-def $nzcv, implicit $nzcv + $x0 = COPY %5 + $x1 = COPY %6 + RET_ReallyLR implicit $x0, implicit $x1 +... +--- +name: test_impdef_adcsw +alignment: 4 +legalized: true +regBankSelected: true +selected: true +tracksRegLiveness: true +body: | + bb.1: + liveins: $w0, $w1, $w2, $w3 + ; CHECK-LABEL: name: test_impdef_adcsw + ; CHECK: liveins: $w0, $w1, $w2, $w3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr32 = COPY $w2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr32 = COPY $w3 + ; CHECK-NEXT: [[ADDSWrr:%[0-9]+]]:gpr32 = ADDSWrr [[COPY]], [[COPY2]], implicit-def $nzcv + ; CHECK-NEXT: [[ADCWr:%[0-9]+]]:gpr32 = ADCWr [[COPY1]], [[COPY3]], implicit $nzcv + ; CHECK-NEXT: $w0 = COPY [[ADDSWrr]] + ; CHECK-NEXT: $w1 = COPY [[ADCWr]] + ; CHECK-NEXT: RET_ReallyLR implicit $w0, implicit $w1 + %1:gpr32 = COPY $w0 + %2:gpr32 = COPY $w1 + %3:gpr32 = COPY $w2 + %4:gpr32 = COPY $w3 + %5:gpr32 = ADDSWrr %1, %3, implicit-def $nzcv + %6:gpr32 = ADCSWr %2, %4, implicit-def $nzcv, implicit $nzcv + $w0 = COPY %5 + $w1 = COPY %6 + RET_ReallyLR implicit $w0, implicit $w1 +... +--- +name: test_impdef_sbcsx +alignment: 4 +legalized: true +regBankSelected: true +selected: true +tracksRegLiveness: true +body: | + bb.1: + liveins: $x0, $x1, $x2, $x3 + ; CHECK-LABEL: name: test_impdef_sbcsx + ; CHECK: liveins: $x0, $x1, $x2, $x3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY $x2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64 = COPY $x3 + ; CHECK-NEXT: [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr [[COPY]], [[COPY2]], implicit-def $nzcv + ; CHECK-NEXT: [[SBCXr:%[0-9]+]]:gpr64 = SBCXr [[COPY1]], [[COPY3]], implicit $nzcv + ; CHECK-NEXT: $x0 = COPY [[SUBSXrr]] + ; CHECK-NEXT: $x1 = COPY [[SBCXr]] + ; CHECK-NEXT: RET_ReallyLR implicit $x0, implicit $x1 + %1:gpr64 = COPY $x0 + %2:gpr64 = COPY $x1 + %3:gpr64 = COPY $x2 + %4:gpr64 = COPY $x3 + %5:gpr64 = SUBSXrr %1, %3, implicit-def $nzcv + %6:gpr64 = SBCSXr %2, %4, implicit-def $nzcv, implicit $nzcv + $x0 = COPY %5 + $x1 = COPY %6 + RET_ReallyLR implicit $x0, implicit $x1 +... +--- +name: test_impdef_sbcsw +alignment: 4 +legalized: true +regBankSelected: true +selected: true +tracksRegLiveness: true +body: | + bb.1: + liveins: $w0, $w1, $w2, $w3 + ; CHECK-LABEL: name: test_impdef_sbcsw + ; CHECK: liveins: $w0, $w1, $w2, $w3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr32 = COPY $w2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr32 = COPY $w3 + ; CHECK-NEXT: [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr [[COPY]], [[COPY2]], implicit-def $nzcv + ; CHECK-NEXT: [[SBCWr:%[0-9]+]]:gpr32 = SBCWr [[COPY1]], [[COPY3]], implicit $nzcv + ; CHECK-NEXT: $w0 = COPY [[SUBSWrr]] + ; CHECK-NEXT: $w1 = COPY [[SBCWr]] + ; CHECK-NEXT: RET_ReallyLR implicit $w0, implicit $w1 + %1:gpr32 = COPY $w0 + %2:gpr32 = COPY $w1 + %3:gpr32 = COPY $w2 + %4:gpr32 = COPY $w3 + %5:gpr32 = SUBSWrr %1, %3, implicit-def $nzcv + %6:gpr32 = SBCSWr %2, %4, implicit-def $nzcv, implicit $nzcv + $w0 = COPY %5 + $w1 = COPY %6 + RET_ReallyLR implicit $w0, implicit $w1 +... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-sadde.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-sadde.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-sadde.mir @@ -0,0 +1,177 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2 +# RUN: llc -verify-machineinstrs -mtriple aarch64-unknown-uknown -global-isel-abort=1 -run-pass=instruction-select %s -o - | FileCheck %s +... +--- +name: sadde_s64 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: sadde_s64 + ; CHECK: liveins: $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 + ; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32common = MOVi32imm 1 + ; CHECK-NEXT: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[MOVi32imm]], 1, 0, implicit-def $nzcv + ; CHECK-NEXT: [[ADCSXr:%[0-9]+]]:gpr64 = ADCSXr [[COPY]], [[COPY1]], implicit-def $nzcv, implicit $nzcv + ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv + ; CHECK-NEXT: $x0 = COPY [[ADCSXr]] + ; CHECK-NEXT: $w1 = COPY [[CSINCWr]] + ; CHECK-NEXT: RET_ReallyLR implicit $x0, implicit $w1 + %0:gpr(s64) = COPY $x0 + %1:gpr(s64) = COPY $x1 + %2:gpr(s32) = G_CONSTANT i32 1 + %3:gpr(s64), %4:gpr(s32) = G_SADDE %0, %1, %2 + $x0 = COPY %3(s64) + $w1 = COPY %4(s32) + RET_ReallyLR implicit $x0, implicit $w1 +... +... +--- +name: sadde_s32 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1: + liveins: $w0, $w1 + + ; CHECK-LABEL: name: sadde_s32 + ; CHECK: liveins: $w0, $w1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 + ; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32common = MOVi32imm 1 + ; CHECK-NEXT: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[MOVi32imm]], 1, 0, implicit-def $nzcv + ; CHECK-NEXT: [[ADCSWr:%[0-9]+]]:gpr32 = ADCSWr [[COPY]], [[COPY1]], implicit-def $nzcv, implicit $nzcv + ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv + ; CHECK-NEXT: $w0 = COPY [[ADCSWr]] + ; CHECK-NEXT: $w1 = COPY [[CSINCWr]] + ; CHECK-NEXT: RET_ReallyLR implicit $w0, implicit $w1 + %0:gpr(s32) = COPY $w0 + %1:gpr(s32) = COPY $w1 + %2:gpr(s32) = G_CONSTANT i32 1 + %3:gpr(s32), %4:gpr(s32) = G_SADDE %0, %1, %2 + $w0 = COPY %3(s32) + $w1 = COPY %4(s32) + RET_ReallyLR implicit $w0, implicit $w1 +... +... +--- +name: sadde_opt_prev_uaddo +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1: + liveins: $x0, $x1, $x2, $x3 + + ; CHECK-LABEL: name: sadde_opt_prev_uaddo + ; CHECK: liveins: $x0, $x1, $x2, $x3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY $x2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64 = COPY $x3 + ; CHECK-NEXT: [[ADDSXrr:%[0-9]+]]:gpr64 = ADDSXrr [[COPY]], [[COPY2]], implicit-def $nzcv + ; CHECK-NEXT: [[ADCSXr:%[0-9]+]]:gpr64 = ADCSXr [[COPY1]], [[COPY3]], implicit-def $nzcv, implicit $nzcv + ; CHECK-NEXT: $x0 = COPY [[ADDSXrr]] + ; CHECK-NEXT: $x1 = COPY [[ADCSXr]] + ; CHECK-NEXT: RET_ReallyLR implicit $x0, implicit $x1 + %0:gpr(s64) = COPY $x0 + %1:gpr(s64) = COPY $x1 + %2:gpr(s64) = COPY $x2 + %3:gpr(s64) = COPY $x3 + %8:gpr(s64), %12:gpr(s32) = G_UADDO %0, %2 + %9:gpr(s64), %13:gpr(s32) = G_SADDE %1, %3, %12 + $x0 = COPY %8(s64) + $x1 = COPY %9(s64) + RET_ReallyLR implicit $x0, implicit $x1 +... +... +--- +name: sadde_opt_prev_uadde +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1: + liveins: $x0, $x1, $x2, $x3 + + ; CHECK-LABEL: name: sadde_opt_prev_uadde + ; CHECK: liveins: $x0, $x1, $x2, $x3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY $x2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64 = COPY $x3 + ; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32common = MOVi32imm 1 + ; CHECK-NEXT: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[MOVi32imm]], 1, 0, implicit-def $nzcv + ; CHECK-NEXT: [[ADCSXr:%[0-9]+]]:gpr64 = ADCSXr [[COPY]], [[COPY2]], implicit-def $nzcv, implicit $nzcv + ; CHECK-NEXT: [[ADCSXr1:%[0-9]+]]:gpr64 = ADCSXr [[COPY1]], [[COPY3]], implicit-def $nzcv, implicit $nzcv + ; CHECK-NEXT: $x0 = COPY [[ADCSXr]] + ; CHECK-NEXT: $x1 = COPY [[ADCSXr1]] + ; CHECK-NEXT: RET_ReallyLR implicit $x0, implicit $x1 + %0:gpr(s64) = COPY $x0 + %1:gpr(s64) = COPY $x1 + %2:gpr(s64) = COPY $x2 + %3:gpr(s64) = COPY $x3 + %6:gpr(s32) = G_CONSTANT i32 1 + %8:gpr(s64), %12:gpr(s32) = G_UADDE %0, %2, %6 + %9:gpr(s64), %13:gpr(s32) = G_SADDE %1, %3, %12 + $x0 = COPY %8(s64) + $x1 = COPY %9(s64) + RET_ReallyLR implicit $x0, implicit $x1 +... +... +--- +name: sadde_opt_bail_clobber +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1: + liveins: $x0, $x1, $x2, $x4, $x5, $x6 + + ; CHECK-LABEL: name: sadde_opt_bail_clobber + ; CHECK: liveins: $x0, $x1, $x2, $x4, $x5, $x6 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY $x2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64 = COPY $x4 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr64 = COPY $x5 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:gpr64 = COPY $x6 + ; CHECK-NEXT: [[ADDSXrr:%[0-9]+]]:gpr64 = ADDSXrr [[COPY]], [[COPY3]], implicit-def $nzcv + ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32common = CSINCWr $wzr, $wzr, 3, implicit $nzcv + ; CHECK-NEXT: [[ADCSXr:%[0-9]+]]:gpr64 = ADCSXr [[COPY1]], [[COPY4]], implicit-def $nzcv, implicit $nzcv + ; CHECK-NEXT: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[CSINCWr]], 1, 0, implicit-def $nzcv + ; CHECK-NEXT: [[ADCSXr1:%[0-9]+]]:gpr64 = ADCSXr [[COPY2]], [[COPY5]], implicit-def $nzcv, implicit $nzcv + ; CHECK-NEXT: $x0 = COPY [[ADDSXrr]] + ; CHECK-NEXT: $x1 = COPY [[ADCSXr]] + ; CHECK-NEXT: $x2 = COPY [[ADCSXr1]] + ; CHECK-NEXT: RET_ReallyLR implicit $x0, implicit $x1, implicit $x2 + %0:gpr(s64) = COPY $x0 + %1:gpr(s64) = COPY $x1 + %2:gpr(s64) = COPY $x2 + %4:gpr(s64) = COPY $x4 + %5:gpr(s64) = COPY $x5 + %6:gpr(s64) = COPY $x6 + %7:gpr(s64), %11:gpr(s32) = G_UADDO %0, %4 + %8:gpr(s64), %12:gpr(s32) = G_UADDE %1, %5, %11 + ; carry-in is not produced by previous instruction + %9:gpr(s64), %13:gpr(s32) = G_SADDE %2, %6, %11 + $x0 = COPY %7(s64) + $x1 = COPY %8(s64) + $x2 = COPY %9(s64) + RET_ReallyLR implicit $x0, implicit $x1, implicit $x2 +... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-saddo.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-saddo.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-saddo.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-saddo.mir @@ -74,7 +74,6 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: %copy:gpr32sp = COPY $w0 ; CHECK-NEXT: %add:gpr32 = ADDSWri %copy, 16, 0, implicit-def $nzcv - ; CHECK-NEXT: %overflow:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv ; CHECK-NEXT: $w0 = COPY %add ; CHECK-NEXT: RET_ReallyLR implicit $w0 %copy:gpr(s32) = COPY $w0 @@ -101,7 +100,6 @@ ; CHECK-NEXT: %copy1:gpr32 = COPY $w0 ; CHECK-NEXT: %copy2:gpr32 = COPY $w1 ; CHECK-NEXT: %add:gpr32 = ADDSWrs %copy1, %copy2, 16, implicit-def $nzcv - ; CHECK-NEXT: %overflow:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv ; CHECK-NEXT: $w0 = COPY %add ; CHECK-NEXT: RET_ReallyLR implicit $w0 %copy1:gpr(s32) = COPY $w0 @@ -129,7 +127,6 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: %copy:gpr32sp = COPY $w0 ; CHECK-NEXT: %add:gpr32 = SUBSWri %copy, 16, 0, implicit-def $nzcv - ; CHECK-NEXT: %overflow:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv ; CHECK-NEXT: $w0 = COPY %add ; CHECK-NEXT: RET_ReallyLR implicit $w0 %copy:gpr(s32) = COPY $w0 @@ -155,7 +152,6 @@ ; CHECK-NEXT: %reg0:gpr64sp = COPY $x0 ; CHECK-NEXT: %reg1:gpr32 = COPY $w0 ; CHECK-NEXT: %add:gpr64 = ADDSXrx %reg0, %reg1, 18, implicit-def $nzcv - ; CHECK-NEXT: %flags:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv ; CHECK-NEXT: $x0 = COPY %add ; CHECK-NEXT: RET_ReallyLR implicit $x0 %reg0:gpr(s64) = COPY $x0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-ssube.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-ssube.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-ssube.mir @@ -0,0 +1,177 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2 +# RUN: llc -verify-machineinstrs -mtriple aarch64-unknown-uknown -global-isel-abort=1 -run-pass=instruction-select %s -o - | FileCheck %s +... +--- +name: ssube_s64 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: ssube_s64 + ; CHECK: liveins: $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 + ; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1 + ; CHECK-NEXT: [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr $wzr, [[MOVi32imm]], implicit-def $nzcv + ; CHECK-NEXT: [[SBCSXr:%[0-9]+]]:gpr64 = SBCSXr [[COPY]], [[COPY1]], implicit-def $nzcv, implicit $nzcv + ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv + ; CHECK-NEXT: $x0 = COPY [[SBCSXr]] + ; CHECK-NEXT: $w1 = COPY [[CSINCWr]] + ; CHECK-NEXT: RET_ReallyLR implicit $x0, implicit $w1 + %0:gpr(s64) = COPY $x0 + %1:gpr(s64) = COPY $x1 + %2:gpr(s32) = G_CONSTANT i32 1 + %3:gpr(s64), %4:gpr(s32) = G_SSUBE %0, %1, %2 + $x0 = COPY %3(s64) + $w1 = COPY %4(s32) + RET_ReallyLR implicit $x0, implicit $w1 +... +... +--- +name: ssube_s32 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1: + liveins: $w0, $w1 + + ; CHECK-LABEL: name: ssube_s32 + ; CHECK: liveins: $w0, $w1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 + ; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1 + ; CHECK-NEXT: [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr $wzr, [[MOVi32imm]], implicit-def $nzcv + ; CHECK-NEXT: [[SBCSWr:%[0-9]+]]:gpr32 = SBCSWr [[COPY]], [[COPY1]], implicit-def $nzcv, implicit $nzcv + ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv + ; CHECK-NEXT: $w0 = COPY [[SBCSWr]] + ; CHECK-NEXT: $w1 = COPY [[CSINCWr]] + ; CHECK-NEXT: RET_ReallyLR implicit $w0, implicit $w1 + %0:gpr(s32) = COPY $w0 + %1:gpr(s32) = COPY $w1 + %2:gpr(s32) = G_CONSTANT i32 1 + %3:gpr(s32), %4:gpr(s32) = G_SSUBE %0, %1, %2 + $w0 = COPY %3(s32) + $w1 = COPY %4(s32) + RET_ReallyLR implicit $w0, implicit $w1 +... +... +--- +name: ssube_opt_prev_usubo +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1: + liveins: $x0, $x1, $x2, $x3 + + ; CHECK-LABEL: name: ssube_opt_prev_usubo + ; CHECK: liveins: $x0, $x1, $x2, $x3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY $x2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64 = COPY $x3 + ; CHECK-NEXT: [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr [[COPY]], [[COPY2]], implicit-def $nzcv + ; CHECK-NEXT: [[SBCSXr:%[0-9]+]]:gpr64 = SBCSXr [[COPY1]], [[COPY3]], implicit-def $nzcv, implicit $nzcv + ; CHECK-NEXT: $x0 = COPY [[SUBSXrr]] + ; CHECK-NEXT: $x1 = COPY [[SBCSXr]] + ; CHECK-NEXT: RET_ReallyLR implicit $x0, implicit $x1 + %0:gpr(s64) = COPY $x0 + %1:gpr(s64) = COPY $x1 + %2:gpr(s64) = COPY $x2 + %3:gpr(s64) = COPY $x3 + %8:gpr(s64), %12:gpr(s32) = G_USUBO %0, %2 + %9:gpr(s64), %13:gpr(s32) = G_SSUBE %1, %3, %12 + $x0 = COPY %8(s64) + $x1 = COPY %9(s64) + RET_ReallyLR implicit $x0, implicit $x1 +... +... +--- +name: ssube_opt_prev_usube +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1: + liveins: $x0, $x1, $x2, $x3 + + ; CHECK-LABEL: name: ssube_opt_prev_usube + ; CHECK: liveins: $x0, $x1, $x2, $x3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY $x2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64 = COPY $x3 + ; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1 + ; CHECK-NEXT: [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr $wzr, [[MOVi32imm]], implicit-def $nzcv + ; CHECK-NEXT: [[SBCSXr:%[0-9]+]]:gpr64 = SBCSXr [[COPY]], [[COPY2]], implicit-def $nzcv, implicit $nzcv + ; CHECK-NEXT: [[SBCSXr1:%[0-9]+]]:gpr64 = SBCSXr [[COPY1]], [[COPY3]], implicit-def $nzcv, implicit $nzcv + ; CHECK-NEXT: $x0 = COPY [[SBCSXr]] + ; CHECK-NEXT: $x1 = COPY [[SBCSXr1]] + ; CHECK-NEXT: RET_ReallyLR implicit $x0, implicit $x1 + %0:gpr(s64) = COPY $x0 + %1:gpr(s64) = COPY $x1 + %2:gpr(s64) = COPY $x2 + %3:gpr(s64) = COPY $x3 + %6:gpr(s32) = G_CONSTANT i32 1 + %8:gpr(s64), %12:gpr(s32) = G_USUBE %0, %2, %6 + %9:gpr(s64), %13:gpr(s32) = G_SSUBE %1, %3, %12 + $x0 = COPY %8(s64) + $x1 = COPY %9(s64) + RET_ReallyLR implicit $x0, implicit $x1 +... +... +--- +name: ssube_opt_bail_clobber +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1: + liveins: $x0, $x1, $x2, $x4, $x5, $x6 + + ; CHECK-LABEL: name: ssube_opt_bail_clobber + ; CHECK: liveins: $x0, $x1, $x2, $x4, $x5, $x6 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY $x2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64 = COPY $x4 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr64 = COPY $x5 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:gpr64 = COPY $x6 + ; CHECK-NEXT: [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr [[COPY]], [[COPY3]], implicit-def $nzcv + ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 2, implicit $nzcv + ; CHECK-NEXT: [[SBCSXr:%[0-9]+]]:gpr64 = SBCSXr [[COPY1]], [[COPY4]], implicit-def $nzcv, implicit $nzcv + ; CHECK-NEXT: [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr $wzr, [[CSINCWr]], implicit-def $nzcv + ; CHECK-NEXT: [[SBCSXr1:%[0-9]+]]:gpr64 = SBCSXr [[COPY2]], [[COPY5]], implicit-def $nzcv, implicit $nzcv + ; CHECK-NEXT: $x0 = COPY [[SUBSXrr]] + ; CHECK-NEXT: $x1 = COPY [[SBCSXr]] + ; CHECK-NEXT: $x2 = COPY [[SBCSXr1]] + ; CHECK-NEXT: RET_ReallyLR implicit $x0, implicit $x1, implicit $x2 + %0:gpr(s64) = COPY $x0 + %1:gpr(s64) = COPY $x1 + %2:gpr(s64) = COPY $x2 + %4:gpr(s64) = COPY $x4 + %5:gpr(s64) = COPY $x5 + %6:gpr(s64) = COPY $x6 + %7:gpr(s64), %11:gpr(s32) = G_USUBO %0, %4 + %8:gpr(s64), %12:gpr(s32) = G_USUBE %1, %5, %11 + ; carry-in is not produced by previous instruction + %9:gpr(s64), %13:gpr(s32) = G_SSUBE %2, %6, %11 + $x0 = COPY %7(s64) + $x1 = COPY %8(s64) + $x2 = COPY %9(s64) + RET_ReallyLR implicit $x0, implicit $x1, implicit $x2 +... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-ssubo.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-ssubo.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-ssubo.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-ssubo.mir @@ -74,7 +74,6 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: %copy:gpr32sp = COPY $w0 ; CHECK-NEXT: %add:gpr32 = SUBSWri %copy, 16, 0, implicit-def $nzcv - ; CHECK-NEXT: %overflow:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv ; CHECK-NEXT: $w0 = COPY %add ; CHECK-NEXT: RET_ReallyLR implicit $w0 %copy:gpr(s32) = COPY $w0 @@ -101,7 +100,6 @@ ; CHECK-NEXT: %copy1:gpr32 = COPY $w0 ; CHECK-NEXT: %copy2:gpr32 = COPY $w1 ; CHECK-NEXT: %add:gpr32 = SUBSWrs %copy1, %copy2, 16, implicit-def $nzcv - ; CHECK-NEXT: %overflow:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv ; CHECK-NEXT: $w0 = COPY %add ; CHECK-NEXT: RET_ReallyLR implicit $w0 %copy1:gpr(s32) = COPY $w0 @@ -129,7 +127,6 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: %copy:gpr32sp = COPY $w0 ; CHECK-NEXT: %add:gpr32 = ADDSWri %copy, 16, 0, implicit-def $nzcv - ; CHECK-NEXT: %overflow:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv ; CHECK-NEXT: $w0 = COPY %add ; CHECK-NEXT: RET_ReallyLR implicit $w0 %copy:gpr(s32) = COPY $w0 @@ -155,7 +152,6 @@ ; CHECK-NEXT: %reg0:gpr64sp = COPY $x0 ; CHECK-NEXT: %reg1:gpr32 = COPY $w0 ; CHECK-NEXT: %add:gpr64 = SUBSXrx %reg0, %reg1, 18, implicit-def $nzcv - ; CHECK-NEXT: %flags:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv ; CHECK-NEXT: $x0 = COPY %add ; CHECK-NEXT: RET_ReallyLR implicit $x0 %reg0:gpr(s64) = COPY $x0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-uadde.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-uadde.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-uadde.mir @@ -0,0 +1,177 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2 +# RUN: llc -verify-machineinstrs -mtriple aarch64-unknown-uknown -global-isel-abort=1 -run-pass=instruction-select %s -o - | FileCheck %s +... +--- +name: uadde_s64 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: uadde_s64 + ; CHECK: liveins: $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 + ; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32common = MOVi32imm 1 + ; CHECK-NEXT: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[MOVi32imm]], 1, 0, implicit-def $nzcv + ; CHECK-NEXT: [[ADCSXr:%[0-9]+]]:gpr64 = ADCSXr [[COPY]], [[COPY1]], implicit-def $nzcv, implicit $nzcv + ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 3, implicit $nzcv + ; CHECK-NEXT: $x0 = COPY [[ADCSXr]] + ; CHECK-NEXT: $w1 = COPY [[CSINCWr]] + ; CHECK-NEXT: RET_ReallyLR implicit $x0, implicit $w1 + %0:gpr(s64) = COPY $x0 + %1:gpr(s64) = COPY $x1 + %2:gpr(s32) = G_CONSTANT i32 1 + %3:gpr(s64), %4:gpr(s32) = G_UADDE %0, %1, %2 + $x0 = COPY %3(s64) + $w1 = COPY %4(s32) + RET_ReallyLR implicit $x0, implicit $w1 +... +... +--- +name: uadde_s32 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1: + liveins: $w0, $w1 + + ; CHECK-LABEL: name: uadde_s32 + ; CHECK: liveins: $w0, $w1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 + ; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32common = MOVi32imm 1 + ; CHECK-NEXT: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[MOVi32imm]], 1, 0, implicit-def $nzcv + ; CHECK-NEXT: [[ADCSWr:%[0-9]+]]:gpr32 = ADCSWr [[COPY]], [[COPY1]], implicit-def $nzcv, implicit $nzcv + ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 3, implicit $nzcv + ; CHECK-NEXT: $w0 = COPY [[ADCSWr]] + ; CHECK-NEXT: $w1 = COPY [[CSINCWr]] + ; CHECK-NEXT: RET_ReallyLR implicit $w0, implicit $w1 + %0:gpr(s32) = COPY $w0 + %1:gpr(s32) = COPY $w1 + %2:gpr(s32) = G_CONSTANT i32 1 + %3:gpr(s32), %4:gpr(s32) = G_UADDE %0, %1, %2 + $w0 = COPY %3(s32) + $w1 = COPY %4(s32) + RET_ReallyLR implicit $w0, implicit $w1 +... +... +--- +name: uadde_opt_prev_uaddo +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1: + liveins: $x0, $x1, $x2, $x3 + + ; CHECK-LABEL: name: uadde_opt_prev_uaddo + ; CHECK: liveins: $x0, $x1, $x2, $x3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY $x2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64 = COPY $x3 + ; CHECK-NEXT: [[ADDSXrr:%[0-9]+]]:gpr64 = ADDSXrr [[COPY]], [[COPY2]], implicit-def $nzcv + ; CHECK-NEXT: [[ADCSXr:%[0-9]+]]:gpr64 = ADCSXr [[COPY1]], [[COPY3]], implicit-def $nzcv, implicit $nzcv + ; CHECK-NEXT: $x0 = COPY [[ADDSXrr]] + ; CHECK-NEXT: $x1 = COPY [[ADCSXr]] + ; CHECK-NEXT: RET_ReallyLR implicit $x0, implicit $x1 + %0:gpr(s64) = COPY $x0 + %1:gpr(s64) = COPY $x1 + %2:gpr(s64) = COPY $x2 + %3:gpr(s64) = COPY $x3 + %8:gpr(s64), %12:gpr(s32) = G_UADDO %0, %2 + %9:gpr(s64), %13:gpr(s32) = G_UADDE %1, %3, %12 + $x0 = COPY %8(s64) + $x1 = COPY %9(s64) + RET_ReallyLR implicit $x0, implicit $x1 +... +... +--- +name: uadde_opt_prev_uadde +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1: + liveins: $x0, $x1, $x2, $x3 + + ; CHECK-LABEL: name: uadde_opt_prev_uadde + ; CHECK: liveins: $x0, $x1, $x2, $x3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY $x2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64 = COPY $x3 + ; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32common = MOVi32imm 1 + ; CHECK-NEXT: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[MOVi32imm]], 1, 0, implicit-def $nzcv + ; CHECK-NEXT: [[ADCSXr:%[0-9]+]]:gpr64 = ADCSXr [[COPY]], [[COPY2]], implicit-def $nzcv, implicit $nzcv + ; CHECK-NEXT: [[ADCSXr1:%[0-9]+]]:gpr64 = ADCSXr [[COPY1]], [[COPY3]], implicit-def $nzcv, implicit $nzcv + ; CHECK-NEXT: $x0 = COPY [[ADCSXr]] + ; CHECK-NEXT: $x1 = COPY [[ADCSXr1]] + ; CHECK-NEXT: RET_ReallyLR implicit $x0, implicit $x1 + %0:gpr(s64) = COPY $x0 + %1:gpr(s64) = COPY $x1 + %2:gpr(s64) = COPY $x2 + %3:gpr(s64) = COPY $x3 + %6:gpr(s32) = G_CONSTANT i32 1 + %8:gpr(s64), %12:gpr(s32) = G_UADDE %0, %2, %6 + %9:gpr(s64), %13:gpr(s32) = G_UADDE %1, %3, %12 + $x0 = COPY %8(s64) + $x1 = COPY %9(s64) + RET_ReallyLR implicit $x0, implicit $x1 +... +... +--- +name: uadde_opt_bail_clobber +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1: + liveins: $x0, $x1, $x2, $x4, $x5, $x6 + + ; CHECK-LABEL: name: uadde_opt_bail_clobber + ; CHECK: liveins: $x0, $x1, $x2, $x4, $x5, $x6 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY $x2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64 = COPY $x4 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr64 = COPY $x5 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:gpr64 = COPY $x6 + ; CHECK-NEXT: [[ADDSXrr:%[0-9]+]]:gpr64 = ADDSXrr [[COPY]], [[COPY3]], implicit-def $nzcv + ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32common = CSINCWr $wzr, $wzr, 3, implicit $nzcv + ; CHECK-NEXT: [[ADCSXr:%[0-9]+]]:gpr64 = ADCSXr [[COPY1]], [[COPY4]], implicit-def $nzcv, implicit $nzcv + ; CHECK-NEXT: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[CSINCWr]], 1, 0, implicit-def $nzcv + ; CHECK-NEXT: [[ADCSXr1:%[0-9]+]]:gpr64 = ADCSXr [[COPY2]], [[COPY5]], implicit-def $nzcv, implicit $nzcv + ; CHECK-NEXT: $x0 = COPY [[ADDSXrr]] + ; CHECK-NEXT: $x1 = COPY [[ADCSXr]] + ; CHECK-NEXT: $x2 = COPY [[ADCSXr1]] + ; CHECK-NEXT: RET_ReallyLR implicit $x0, implicit $x1, implicit $x2 + %0:gpr(s64) = COPY $x0 + %1:gpr(s64) = COPY $x1 + %2:gpr(s64) = COPY $x2 + %4:gpr(s64) = COPY $x4 + %5:gpr(s64) = COPY $x5 + %6:gpr(s64) = COPY $x6 + %7:gpr(s64), %11:gpr(s32) = G_UADDO %0, %4 + %8:gpr(s64), %12:gpr(s32) = G_UADDE %1, %5, %11 + ; carry-in is not produced by previous instruction + %9:gpr(s64), %13:gpr(s32) = G_UADDE %2, %6, %11 + $x0 = COPY %7(s64) + $x1 = COPY %8(s64) + $x2 = COPY %9(s64) + RET_ReallyLR implicit $x0, implicit $x1, implicit $x2 +... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-uaddo.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-uaddo.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-uaddo.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-uaddo.mir @@ -74,7 +74,6 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: %copy:gpr32sp = COPY $w0 ; CHECK-NEXT: %add:gpr32 = ADDSWri %copy, 16, 0, implicit-def $nzcv - ; CHECK-NEXT: %overflow:gpr32 = CSINCWr $wzr, $wzr, 3, implicit $nzcv ; CHECK-NEXT: $w0 = COPY %add ; CHECK-NEXT: RET_ReallyLR implicit $w0 %copy:gpr(s32) = COPY $w0 @@ -101,7 +100,6 @@ ; CHECK-NEXT: %copy1:gpr32 = COPY $w0 ; CHECK-NEXT: %copy2:gpr32 = COPY $w1 ; CHECK-NEXT: %add:gpr32 = ADDSWrs %copy1, %copy2, 16, implicit-def $nzcv - ; CHECK-NEXT: %overflow:gpr32 = CSINCWr $wzr, $wzr, 3, implicit $nzcv ; CHECK-NEXT: $w0 = COPY %add ; CHECK-NEXT: RET_ReallyLR implicit $w0 %copy1:gpr(s32) = COPY $w0 @@ -129,7 +127,6 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: %copy:gpr32sp = COPY $w0 ; CHECK-NEXT: %add:gpr32 = SUBSWri %copy, 16, 0, implicit-def $nzcv - ; CHECK-NEXT: %overflow:gpr32 = CSINCWr $wzr, $wzr, 3, implicit $nzcv ; CHECK-NEXT: $w0 = COPY %add ; CHECK-NEXT: RET_ReallyLR implicit $w0 %copy:gpr(s32) = COPY $w0 @@ -155,7 +152,6 @@ ; CHECK-NEXT: %reg0:gpr64sp = COPY $x0 ; CHECK-NEXT: %reg1:gpr32 = COPY $w0 ; CHECK-NEXT: %add:gpr64 = ADDSXrx %reg0, %reg1, 18, implicit-def $nzcv - ; CHECK-NEXT: %flags:gpr32 = CSINCWr $wzr, $wzr, 3, implicit $nzcv ; CHECK-NEXT: $x0 = COPY %add ; CHECK-NEXT: RET_ReallyLR implicit $x0 %reg0:gpr(s64) = COPY $x0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-usube.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-usube.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-usube.mir @@ -0,0 +1,177 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2 +# RUN: llc -verify-machineinstrs -mtriple aarch64-unknown-uknown -global-isel-abort=1 -run-pass=instruction-select %s -o - | FileCheck %s +... +--- +name: usube_s64 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: usube_s64 + ; CHECK: liveins: $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 + ; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1 + ; CHECK-NEXT: [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr $wzr, [[MOVi32imm]], implicit-def $nzcv + ; CHECK-NEXT: [[SBCSXr:%[0-9]+]]:gpr64 = SBCSXr [[COPY]], [[COPY1]], implicit-def $nzcv, implicit $nzcv + ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 2, implicit $nzcv + ; CHECK-NEXT: $x0 = COPY [[SBCSXr]] + ; CHECK-NEXT: $w1 = COPY [[CSINCWr]] + ; CHECK-NEXT: RET_ReallyLR implicit $x0, implicit $w1 + %0:gpr(s64) = COPY $x0 + %1:gpr(s64) = COPY $x1 + %2:gpr(s32) = G_CONSTANT i32 1 + %3:gpr(s64), %4:gpr(s32) = G_USUBE %0, %1, %2 + $x0 = COPY %3(s64) + $w1 = COPY %4(s32) + RET_ReallyLR implicit $x0, implicit $w1 +... +... +--- +name: usube_s32 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1: + liveins: $w0, $w1 + + ; CHECK-LABEL: name: usube_s32 + ; CHECK: liveins: $w0, $w1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 + ; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1 + ; CHECK-NEXT: [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr $wzr, [[MOVi32imm]], implicit-def $nzcv + ; CHECK-NEXT: [[SBCSWr:%[0-9]+]]:gpr32 = SBCSWr [[COPY]], [[COPY1]], implicit-def $nzcv, implicit $nzcv + ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 2, implicit $nzcv + ; CHECK-NEXT: $w0 = COPY [[SBCSWr]] + ; CHECK-NEXT: $w1 = COPY [[CSINCWr]] + ; CHECK-NEXT: RET_ReallyLR implicit $w0, implicit $w1 + %0:gpr(s32) = COPY $w0 + %1:gpr(s32) = COPY $w1 + %2:gpr(s32) = G_CONSTANT i32 1 + %3:gpr(s32), %4:gpr(s32) = G_USUBE %0, %1, %2 + $w0 = COPY %3(s32) + $w1 = COPY %4(s32) + RET_ReallyLR implicit $w0, implicit $w1 +... +... +--- +name: usube_opt_prev_usubo +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1: + liveins: $x0, $x1, $x2, $x3 + + ; CHECK-LABEL: name: usube_opt_prev_usubo + ; CHECK: liveins: $x0, $x1, $x2, $x3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY $x2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64 = COPY $x3 + ; CHECK-NEXT: [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr [[COPY]], [[COPY2]], implicit-def $nzcv + ; CHECK-NEXT: [[SBCSXr:%[0-9]+]]:gpr64 = SBCSXr [[COPY1]], [[COPY3]], implicit-def $nzcv, implicit $nzcv + ; CHECK-NEXT: $x0 = COPY [[SUBSXrr]] + ; CHECK-NEXT: $x1 = COPY [[SBCSXr]] + ; CHECK-NEXT: RET_ReallyLR implicit $x0, implicit $x1 + %0:gpr(s64) = COPY $x0 + %1:gpr(s64) = COPY $x1 + %2:gpr(s64) = COPY $x2 + %3:gpr(s64) = COPY $x3 + %8:gpr(s64), %12:gpr(s32) = G_USUBO %0, %2 + %9:gpr(s64), %13:gpr(s32) = G_USUBE %1, %3, %12 + $x0 = COPY %8(s64) + $x1 = COPY %9(s64) + RET_ReallyLR implicit $x0, implicit $x1 +... +... +--- +name: usube_opt_prev_usube +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1: + liveins: $x0, $x1, $x2, $x3 + + ; CHECK-LABEL: name: usube_opt_prev_usube + ; CHECK: liveins: $x0, $x1, $x2, $x3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY $x2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64 = COPY $x3 + ; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1 + ; CHECK-NEXT: [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr $wzr, [[MOVi32imm]], implicit-def $nzcv + ; CHECK-NEXT: [[SBCSXr:%[0-9]+]]:gpr64 = SBCSXr [[COPY]], [[COPY2]], implicit-def $nzcv, implicit $nzcv + ; CHECK-NEXT: [[SBCSXr1:%[0-9]+]]:gpr64 = SBCSXr [[COPY1]], [[COPY3]], implicit-def $nzcv, implicit $nzcv + ; CHECK-NEXT: $x0 = COPY [[SBCSXr]] + ; CHECK-NEXT: $x1 = COPY [[SBCSXr1]] + ; CHECK-NEXT: RET_ReallyLR implicit $x0, implicit $x1 + %0:gpr(s64) = COPY $x0 + %1:gpr(s64) = COPY $x1 + %2:gpr(s64) = COPY $x2 + %3:gpr(s64) = COPY $x3 + %6:gpr(s32) = G_CONSTANT i32 1 + %8:gpr(s64), %12:gpr(s32) = G_USUBE %0, %2, %6 + %9:gpr(s64), %13:gpr(s32) = G_USUBE %1, %3, %12 + $x0 = COPY %8(s64) + $x1 = COPY %9(s64) + RET_ReallyLR implicit $x0, implicit $x1 +... +... +--- +name: usube_opt_bail_clobber +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1: + liveins: $x0, $x1, $x2, $x4, $x5, $x6 + + ; CHECK-LABEL: name: usube_opt_bail_clobber + ; CHECK: liveins: $x0, $x1, $x2, $x4, $x5, $x6 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY $x2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64 = COPY $x4 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr64 = COPY $x5 + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:gpr64 = COPY $x6 + ; CHECK-NEXT: [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr [[COPY]], [[COPY3]], implicit-def $nzcv + ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 2, implicit $nzcv + ; CHECK-NEXT: [[SBCSXr:%[0-9]+]]:gpr64 = SBCSXr [[COPY1]], [[COPY4]], implicit-def $nzcv, implicit $nzcv + ; CHECK-NEXT: [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr $wzr, [[CSINCWr]], implicit-def $nzcv + ; CHECK-NEXT: [[SBCSXr1:%[0-9]+]]:gpr64 = SBCSXr [[COPY2]], [[COPY5]], implicit-def $nzcv, implicit $nzcv + ; CHECK-NEXT: $x0 = COPY [[SUBSXrr]] + ; CHECK-NEXT: $x1 = COPY [[SBCSXr]] + ; CHECK-NEXT: $x2 = COPY [[SBCSXr1]] + ; CHECK-NEXT: RET_ReallyLR implicit $x0, implicit $x1, implicit $x2 + %0:gpr(s64) = COPY $x0 + %1:gpr(s64) = COPY $x1 + %2:gpr(s64) = COPY $x2 + %4:gpr(s64) = COPY $x4 + %5:gpr(s64) = COPY $x5 + %6:gpr(s64) = COPY $x6 + %7:gpr(s64), %11:gpr(s32) = G_USUBO %0, %4 + %8:gpr(s64), %12:gpr(s32) = G_USUBE %1, %5, %11 + ; carry-in is not produced by previous instruction + %9:gpr(s64), %13:gpr(s32) = G_USUBE %2, %6, %11 + $x0 = COPY %7(s64) + $x1 = COPY %8(s64) + $x2 = COPY %9(s64) + RET_ReallyLR implicit $x0, implicit $x1, implicit $x2 +... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-usubo.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-usubo.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-usubo.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-usubo.mir @@ -19,13 +19,15 @@ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 ; CHECK-NEXT: [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr [[COPY]], [[COPY1]], implicit-def $nzcv ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 2, implicit $nzcv - ; CHECK-NEXT: $w0 = COPY [[CSINCWr]] - ; CHECK-NEXT: RET_ReallyLR implicit $w0 + ; CHECK-NEXT: $w0 = COPY [[SUBSWrr]] + ; CHECK-NEXT: $w1 = COPY [[CSINCWr]] + ; CHECK-NEXT: RET_ReallyLR implicit $w0, implicit $w1 %0:gpr(s32) = COPY $w0 %1:gpr(s32) = COPY $w1 %3:gpr(s32), %4:gpr(s32) = G_USUBO %0, %1 - $w0 = COPY %4(s32) - RET_ReallyLR implicit $w0 + $w0 = COPY %3 + $w1 = COPY %4 + RET_ReallyLR implicit $w0, implicit $w1 ... --- @@ -45,13 +47,15 @@ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 ; CHECK-NEXT: [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr [[COPY]], [[COPY1]], implicit-def $nzcv ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 2, implicit $nzcv - ; CHECK-NEXT: $w0 = COPY [[CSINCWr]] - ; CHECK-NEXT: RET_ReallyLR implicit $w0 + ; CHECK-NEXT: $x0 = COPY [[SUBSXrr]] + ; CHECK-NEXT: $w1 = COPY [[CSINCWr]] + ; CHECK-NEXT: RET_ReallyLR implicit $x0, implicit $w1 %0:gpr(s64) = COPY $x0 %1:gpr(s64) = COPY $x1 %3:gpr(s64), %4:gpr(s32) = G_USUBO %0, %1 - $w0 = COPY %4 - RET_ReallyLR implicit $w0 + $x0 = COPY %3 + $w1 = COPY %4 + RET_ReallyLR implicit $x0, implicit $w1 ... --- @@ -70,16 +74,13 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: %copy:gpr32sp = COPY $w0 ; CHECK-NEXT: %add:gpr32 = SUBSWri %copy, 16, 0, implicit-def $nzcv - ; CHECK-NEXT: %overflow:gpr32 = CSINCWr $wzr, $wzr, 2, implicit $nzcv ; CHECK-NEXT: $w0 = COPY %add - ; CHECK-NEXT: $w1 = COPY %overflow - ; CHECK-NEXT: RET_ReallyLR implicit $w0, implicit $w1 + ; CHECK-NEXT: RET_ReallyLR implicit $w0 %copy:gpr(s32) = COPY $w0 %constant:gpr(s32) = G_CONSTANT i32 16 %add:gpr(s32), %overflow:gpr(s32) = G_USUBO %copy, %constant $w0 = COPY %add - $w1 = COPY %overflow - RET_ReallyLR implicit $w0, implicit $w1 + RET_ReallyLR implicit $w0 ... --- @@ -99,18 +100,15 @@ ; CHECK-NEXT: %copy1:gpr32 = COPY $w0 ; CHECK-NEXT: %copy2:gpr32 = COPY $w1 ; CHECK-NEXT: %add:gpr32 = SUBSWrs %copy1, %copy2, 16, implicit-def $nzcv - ; CHECK-NEXT: %overflow:gpr32 = CSINCWr $wzr, $wzr, 2, implicit $nzcv ; CHECK-NEXT: $w0 = COPY %add - ; CHECK-NEXT: $w1 = COPY %overflow - ; CHECK-NEXT: RET_ReallyLR implicit $w0, implicit $w1 + ; CHECK-NEXT: RET_ReallyLR implicit $w0 %copy1:gpr(s32) = COPY $w0 %copy2:gpr(s32) = COPY $w1 %constant:gpr(s32) = G_CONSTANT i32 16 %shift:gpr(s32) = G_SHL %copy2(s32), %constant(s32) %add:gpr(s32), %overflow:gpr(s32) = G_USUBO %copy1, %shift $w0 = COPY %add(s32) - $w1 = COPY %overflow(s32) - RET_ReallyLR implicit $w0, implicit $w1 + RET_ReallyLR implicit $w0 ... --- @@ -129,16 +127,13 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: %copy:gpr32sp = COPY $w0 ; CHECK-NEXT: %add:gpr32 = ADDSWri %copy, 16, 0, implicit-def $nzcv - ; CHECK-NEXT: %overflow:gpr32 = CSINCWr $wzr, $wzr, 2, implicit $nzcv ; CHECK-NEXT: $w0 = COPY %add - ; CHECK-NEXT: $w1 = COPY %overflow - ; CHECK-NEXT: RET_ReallyLR implicit $w0, implicit $w1 + ; CHECK-NEXT: RET_ReallyLR implicit $w0 %copy:gpr(s32) = COPY $w0 %constant:gpr(s32) = G_CONSTANT i32 -16 %add:gpr(s32), %overflow:gpr(s32) = G_USUBO %copy, %constant $w0 = COPY %add(s32) - $w1 = COPY %overflow(s32) - RET_ReallyLR implicit $w0, implicit $w1 + RET_ReallyLR implicit $w0 ... --- @@ -157,10 +152,8 @@ ; CHECK-NEXT: %reg0:gpr64sp = COPY $x0 ; CHECK-NEXT: %reg1:gpr32 = COPY $w0 ; CHECK-NEXT: %add:gpr64 = SUBSXrx %reg0, %reg1, 18, implicit-def $nzcv - ; CHECK-NEXT: %flags:gpr32 = CSINCWr $wzr, $wzr, 2, implicit $nzcv ; CHECK-NEXT: $x0 = COPY %add - ; CHECK-NEXT: $w1 = COPY %flags - ; CHECK-NEXT: RET_ReallyLR implicit $x0, implicit $w1 + ; CHECK-NEXT: RET_ReallyLR implicit $x0 %reg0:gpr(s64) = COPY $x0 %reg1:gpr(s32) = COPY $w0 %ext:gpr(s64) = G_ZEXT %reg1(s32) @@ -168,5 +161,4 @@ %shift:gpr(s64) = G_SHL %ext, %cst(s64) %add:gpr(s64), %flags:gpr(s32) = G_USUBO %reg0, %shift $x0 = COPY %add(s64) - $w1 = COPY %flags - RET_ReallyLR implicit $x0, implicit $w1 + RET_ReallyLR implicit $x0 diff --git a/llvm/test/CodeGen/AArch64/popcount.ll b/llvm/test/CodeGen/AArch64/popcount.ll --- a/llvm/test/CodeGen/AArch64/popcount.ll +++ b/llvm/test/CodeGen/AArch64/popcount.ll @@ -57,7 +57,6 @@ ; CHECK-NEXT: // kill: def $x10 killed $w10 ; CHECK-NEXT: bfi x8, x10, #32, #32 ; CHECK-NEXT: adds x8, x8, x9 -; CHECK-NEXT: cset w9, hs ; CHECK-NEXT: mov w0, w8 ; CHECK-NEXT: ret Entry: