Index: llvm/lib/Target/AArch64/AArch64GenRegisterBankInfo.def =================================================================== --- llvm/lib/Target/AArch64/AArch64GenRegisterBankInfo.def +++ llvm/lib/Target/AArch64/AArch64GenRegisterBankInfo.def @@ -29,6 +29,8 @@ {0, 32, AArch64::GPRRegBank}, // 7: GPR 64-bit value. {0, 64, AArch64::GPRRegBank}, + // 8: GPR 128-bit value. + {0, 128, AArch64::GPRRegBank}, }; // ValueMappings. @@ -66,51 +68,55 @@ {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1}, {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1}, {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1}, - // 22: GPR 64-bit value. <-- This must match Last3OpsIdx. + // 22: GPR 64-bit value. {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR64 - PMI_Min], 1}, {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR64 - PMI_Min], 1}, {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR64 - PMI_Min], 1}, + // 25: GPR 128-bit value. <-- This must match Last3OpsIdx. + {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR128 - PMI_Min], 1}, + {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR128 - PMI_Min], 1}, + {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR128 - PMI_Min], 1}, // Cross register bank copies. - // 25: FPR 16-bit value to GPR 16-bit. <-- This must match + // 28: FPR 16-bit value to GPR 16-bit. <-- This must match // FirstCrossRegCpyIdx. // Note: This is the kind of copy we see with physical registers. {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR16 - PMI_Min], 1}, {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1}, - // 27: FPR 32-bit value to GPR 32-bit value. + // 30: FPR 32-bit value to GPR 32-bit value. {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR32 - PMI_Min], 1}, {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1}, - // 29: FPR 64-bit value to GPR 64-bit value. + // 32: FPR 64-bit value to GPR 64-bit value. {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1}, {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR64 - PMI_Min], 1}, - // 31: FPR 128-bit value to GPR 128-bit value (invalid) + // 34: FPR 128-bit value to GPR 128-bit value (invalid) {nullptr, 1}, {nullptr, 1}, - // 33: FPR 256-bit value to GPR 256-bit value (invalid) + // 36: FPR 256-bit value to GPR 256-bit value (invalid) {nullptr, 1}, {nullptr, 1}, - // 35: FPR 512-bit value to GPR 512-bit value (invalid) + // 38: FPR 512-bit value to GPR 512-bit value (invalid) {nullptr, 1}, {nullptr, 1}, - // 37: GPR 32-bit value to FPR 32-bit value. + // 40: GPR 32-bit value to FPR 32-bit value. {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1}, {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR32 - PMI_Min], 1}, - // 39: GPR 64-bit value to FPR 64-bit value. <-- This must match + // 42: GPR 64-bit value to FPR 64-bit value. <-- This must match // LastCrossRegCpyIdx. {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR64 - PMI_Min], 1}, {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1}, - // 41: FPExt: 16 to 32. <-- This must match FPExt16To32Idx. + // 44: FPExt: 16 to 32. <-- This must match FPExt16To32Idx. {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR32 - PMI_Min], 1}, {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR16 - PMI_Min], 1}, - // 43: FPExt: 16 to 32. <-- This must match FPExt16To64Idx. + // 46: FPExt: 16 to 32. <-- This must match FPExt16To64Idx. {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1}, {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR16 - PMI_Min], 1}, - // 45: FPExt: 32 to 64. <-- This must match FPExt32To64Idx. + // 48: FPExt: 32 to 64. <-- This must match FPExt32To64Idx. {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1}, {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR32 - PMI_Min], 1}, - // 47: FPExt vector: 64 to 128. <-- This must match FPExt64To128Idx. + // 50: FPExt vector: 64 to 128. <-- This must match FPExt64To128Idx. {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR128 - PMI_Min], 1}, {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1}, - // 49: Shift scalar with 64 bit shift imm + // 52: Shift scalar with 64 bit shift imm {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1}, {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1}, {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR64 - PMI_Min], 1}, @@ -167,6 +173,8 @@ return 0; if (Size <= 64) return 1; + if (Size <= 128) + return 2; return -1; } if (RBIdx == PMI_FirstFPR) { Index: llvm/lib/Target/AArch64/AArch64InstrGISel.td =================================================================== --- llvm/lib/Target/AArch64/AArch64InstrGISel.td +++ llvm/lib/Target/AArch64/AArch64InstrGISel.td @@ -217,3 +217,8 @@ def : Pat<(atomic_cmp_swap_64 GPR64:$addr, GPR64:$desired, GPR64:$new), (CMP_SWAP_64 GPR64:$addr, GPR64:$desired, GPR64:$new)>; } + +def : Pat<(int_aarch64_stlxp GPR64:$lo, GPR64:$hi, GPR64:$addr), + (STLXPX GPR64:$lo, GPR64:$hi, GPR64:$addr)>; +def : Pat<(int_aarch64_stxp GPR64:$lo, GPR64:$hi, GPR64:$addr), + (STXPX GPR64:$lo, GPR64:$hi, GPR64:$addr)>; Index: llvm/lib/Target/AArch64/AArch64RegisterBanks.td =================================================================== --- llvm/lib/Target/AArch64/AArch64RegisterBanks.td +++ llvm/lib/Target/AArch64/AArch64RegisterBanks.td @@ -10,7 +10,7 @@ //===----------------------------------------------------------------------===// /// General Purpose Registers: W, X. -def GPRRegBank : RegisterBank<"GPR", [GPR64all]>; +def GPRRegBank : RegisterBank<"GPR", [XSeqPairsClass]>; /// Floating Point/Vector Registers: B, H, S, D, Q. def FPRRegBank : RegisterBank<"FPR", [QQQQ]>; Index: llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp =================================================================== --- llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -489,6 +489,8 @@ if (Ty.getSizeInBits() == 64) return GetAllRegSet ? &AArch64::GPR64allRegClass : &AArch64::GPR64RegClass; + if (Ty.getSizeInBits() == 128) + return &AArch64::XSeqPairsClassRegClass; return nullptr; } @@ -521,6 +523,8 @@ if (SizeInBits == 64) return GetAllRegSet ? &AArch64::GPR64allRegClass : &AArch64::GPR64RegClass; + if (SizeInBits == 128) + return &AArch64::XSeqPairsClassRegClass; } if (RegBankID == AArch64::FPRRegBankID) { @@ -2518,19 +2522,25 @@ if (DstTy.getSizeInBits() != 64) return false; + unsigned Offset = I.getOperand(2).getImm(); + if (Offset % 64 != 0) + return false; + + // Check we have the right regbank always. const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI); const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI); - // Check we have the right regbank always. - assert(SrcRB.getID() == AArch64::FPRRegBankID && - DstRB.getID() == AArch64::FPRRegBankID && - "Wrong extract regbank!"); - (void)SrcRB; + assert(SrcRB.getID() == DstRB.getID() && "Wrong extract regbank!"); + + if (SrcRB.getID() == AArch64::GPRRegBankID && + DstRB.getID() == AArch64::GPRRegBankID) { + MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {}) + .addUse(SrcReg, 0, Offset == 0 ? AArch64::sube64 : AArch64::subo64); + I.eraseFromParent(); + return true; + } // Emit the same code as a vector extract. // Offset must be a multiple of 64. - unsigned Offset = I.getOperand(2).getImm(); - if (Offset % 64 != 0) - return false; unsigned LaneIdx = Offset / 64; MachineInstr *Extract = emitExtractVectorElt( DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB); @@ -4947,6 +4957,15 @@ switch (IntrinID) { default: return false; + case Intrinsic::aarch64_ldxp: + case Intrinsic::aarch64_ldaxp: { + auto NewI = MIB.buildInstr( + IntrinID == Intrinsic::aarch64_ldxp ? AArch64::LDXPX : AArch64::LDAXPX, + {I.getOperand(0).getReg(), I.getOperand(1).getReg()}, + {I.getOperand(3)}); + NewI.cloneMemRefs(I); + break; + } case Intrinsic::trap: MIB.buildInstr(AArch64::BRK, {}, {}).addImm(1); break; Index: llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h =================================================================== --- llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h +++ llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h @@ -54,6 +54,8 @@ LegalizerHelper &Helper) const; bool legalizeCTPOP(MachineInstr &MI, MachineRegisterInfo &MRI, LegalizerHelper &Helper) const; + bool legalizeAtomicCmpxchg128(MachineInstr &MI, MachineRegisterInfo &MRI, + LegalizerHelper &Helper) const; const AArch64Subtarget *ST; }; } // End llvm namespace. Index: llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp =================================================================== --- llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "AArch64LegalizerInfo.h" +#include "AArch64RegisterBankInfo.h" #include "AArch64Subtarget.h" #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" @@ -498,14 +499,19 @@ getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG_WITH_SUCCESS) .lowerIf( - all(typeInSet(0, {s8, s16, s32, s64}), typeIs(1, s1), typeIs(2, p0))); + all(typeInSet(0, {s8, s16, s32, s64, s128}), typeIs(1, s1), typeIs(2, p0))); + + getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG) + .legalIf(all(typeInSet(0, {s8, s16, s32, s64}), typeIs(1, p0))) + .customIf([](const LegalityQuery &Query) { + return Query.Types[0].getSizeInBits() == 128; + }); getActionDefinitionsBuilder( {G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD, G_ATOMICRMW_SUB, G_ATOMICRMW_AND, G_ATOMICRMW_OR, G_ATOMICRMW_XOR, G_ATOMICRMW_MIN, G_ATOMICRMW_MAX, - G_ATOMICRMW_UMIN, G_ATOMICRMW_UMAX, G_ATOMIC_CMPXCHG}) - .legalIf(all( - typeInSet(0, {s8, s16, s32, s64}), typeIs(1, p0))); + G_ATOMICRMW_UMIN, G_ATOMICRMW_UMAX}) + .legalIf(all(typeInSet(0, {s8, s16, s32, s64}), typeIs(1, p0))); getActionDefinitionsBuilder(G_BLOCK_ADDR).legalFor({p0}); @@ -760,6 +766,8 @@ return legalizeRotate(MI, MRI, Helper); case TargetOpcode::G_CTPOP: return legalizeCTPOP(MI, MRI, Helper); + case TargetOpcode::G_ATOMIC_CMPXCHG: + return legalizeAtomicCmpxchg128(MI, MRI, Helper); } llvm_unreachable("expected switch to return"); @@ -1048,3 +1056,82 @@ MI.eraseFromParent(); return true; } + +bool AArch64LegalizerInfo::legalizeAtomicCmpxchg128( + MachineInstr &MI, MachineRegisterInfo &MRI, LegalizerHelper &Helper) const { + MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; + LLT s64 = LLT::scalar(64); + auto Addr = MI.getOperand(1).getReg(); + auto DesiredI = MIRBuilder.buildUnmerge({s64, s64}, MI.getOperand(2)); + auto NewI = MIRBuilder.buildUnmerge({s64, s64}, MI.getOperand(3)); + auto DstLo = MRI.createGenericVirtualRegister(s64); + auto DstHi = MRI.createGenericVirtualRegister(s64); + + MachineInstrBuilder CAS; + if (ST->hasLSE()) { + // We have 128-bit CASP instructions taking XSeqPair registers, which are + // s128. We need the merge/unmerge to bracket the expansion and pair up with + // the rest of the MIR so we must reassemble the extracted registers into a + // 128-bit known-regclass one with code like this: + // + // %in1 = REG_SEQUENCE Lo, Hi ; One for each input + // %out = CASP %in1, ... + // %OldLo = G_EXTRACT %out, 0 + // %OldHi = G_EXTRACT %out, 64 + auto Ordering = (*MI.memoperands_begin())->getOrdering(); + unsigned Opcode; + switch (Ordering) { + case AtomicOrdering::Acquire: + Opcode = AArch64::CASPAX; + break; + case AtomicOrdering::Release: + Opcode = AArch64::CASPLX; + break; + case AtomicOrdering::AcquireRelease: + case AtomicOrdering::SequentiallyConsistent: + Opcode = AArch64::CASPALX; + break; + default: + Opcode = AArch64::CASPX; + break; + } + + LLT s128 = LLT::scalar(128); + auto CASDst = MRI.createGenericVirtualRegister(s128); + auto CASDesired = MRI.createGenericVirtualRegister(s128); + auto CASNew = MRI.createGenericVirtualRegister(s128); + MIRBuilder.buildInstr(TargetOpcode::REG_SEQUENCE, {CASDesired}, {}) + .addUse(DesiredI->getOperand(0).getReg()) + .addImm(AArch64::sube64) + .addUse(DesiredI->getOperand(1).getReg()) + .addImm(AArch64::subo64); + MIRBuilder.buildInstr(TargetOpcode::REG_SEQUENCE, {CASNew}, {}) + .addUse(NewI->getOperand(0).getReg()) + .addImm(AArch64::sube64) + .addUse(NewI->getOperand(1).getReg()) + .addImm(AArch64::subo64); + + CAS = MIRBuilder.buildInstr(Opcode, {CASDst}, {CASDesired, CASNew, Addr}); + + MIRBuilder.buildExtract({DstLo}, {CASDst}, 0); + MIRBuilder.buildExtract({DstHi}, {CASDst}, 64); + } else { + // The -O0 CMP_SWAP_128 is friendlier to generate code for because LDXP/STXP + // can take arbitrary registers so it just has the normal GPR64 operands the + // rest of AArch64 is expecting. + auto Scratch = MRI.createVirtualRegister(&AArch64::GPR64RegClass); + CAS = MIRBuilder.buildInstr(AArch64::CMP_SWAP_128, {DstLo, DstHi, Scratch}, + {Addr, DesiredI->getOperand(0), + DesiredI->getOperand(1), NewI->getOperand(0), + NewI->getOperand(1)}); + } + + CAS.cloneMemRefs(MI); + constrainSelectedInstRegOperands(*CAS, *ST->getInstrInfo(), + *MRI.getTargetRegisterInfo(), + *ST->getRegBankInfo()); + + MIRBuilder.buildMerge(MI.getOperand(0), {DstLo, DstHi}); + MI.eraseFromParent(); + return true; +} Index: llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h =================================================================== --- llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h +++ llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h @@ -34,8 +34,9 @@ PMI_FPR512, PMI_GPR32, PMI_GPR64, + PMI_GPR128, PMI_FirstGPR = PMI_GPR32, - PMI_LastGPR = PMI_GPR64, + PMI_LastGPR = PMI_GPR128, PMI_FirstFPR = PMI_FPR16, PMI_LastFPR = PMI_FPR512, PMI_Min = PMI_FirstFPR, @@ -48,16 +49,16 @@ enum ValueMappingIdx { InvalidIdx = 0, First3OpsIdx = 1, - Last3OpsIdx = 22, + Last3OpsIdx = 25, DistanceBetweenRegBanks = 3, - FirstCrossRegCpyIdx = 25, - LastCrossRegCpyIdx = 39, + FirstCrossRegCpyIdx = 28, + LastCrossRegCpyIdx = 42, DistanceBetweenCrossRegCpy = 2, - FPExt16To32Idx = 41, - FPExt16To64Idx = 43, - FPExt32To64Idx = 45, - FPExt64To128Idx = 47, - Shift64Imm = 49 + FPExt16To32Idx = 44, + FPExt16To64Idx = 46, + FPExt32To64Idx = 48, + FPExt64To128Idx = 50, + Shift64Imm = 52, }; static bool checkPartialMap(unsigned Idx, unsigned ValStartIdx, Index: llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp =================================================================== --- llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp +++ llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp @@ -69,7 +69,7 @@ // GR64all + its subclasses. assert(RBGPR.covers(*TRI.getRegClass(AArch64::GPR32RegClassID)) && "Subclass not added?"); - assert(RBGPR.getSize() == 64 && "GPRs should hold up to 64-bit"); + assert(RBGPR.getSize() == 128 && "GPRs should hold up to 128-bit"); // The FPR register bank is fully defined by all the registers in // GR64all + its subclasses. @@ -87,7 +87,7 @@ // Check that the TableGen'ed like file is in sync we our expectations. // First, the Idx. assert(checkPartialMappingIdx(PMI_FirstGPR, PMI_LastGPR, - {PMI_GPR32, PMI_GPR64}) && + {PMI_GPR32, PMI_GPR64, PMI_GPR128}) && "PartialMappingIdx's are incorrectly ordered"); assert(checkPartialMappingIdx(PMI_FirstFPR, PMI_LastFPR, {PMI_FPR16, PMI_FPR32, PMI_FPR64, PMI_FPR128, @@ -104,6 +104,7 @@ CHECK_PARTIALMAP(PMI_GPR32, 0, 32, RBGPR); CHECK_PARTIALMAP(PMI_GPR64, 0, 64, RBGPR); + CHECK_PARTIALMAP(PMI_GPR128, 0, 128, RBGPR); CHECK_PARTIALMAP(PMI_FPR16, 0, 16, RBFPR); CHECK_PARTIALMAP(PMI_FPR32, 0, 32, RBFPR); CHECK_PARTIALMAP(PMI_FPR64, 0, 64, RBFPR); @@ -124,6 +125,7 @@ CHECK_VALUEMAP(GPR, 32); CHECK_VALUEMAP(GPR, 64); + CHECK_VALUEMAP(GPR, 128); CHECK_VALUEMAP(FPR, 16); CHECK_VALUEMAP(FPR, 32); CHECK_VALUEMAP(FPR, 64); @@ -142,6 +144,7 @@ CHECK_VALUEMAP_3OPS(GPR, 32); CHECK_VALUEMAP_3OPS(GPR, 64); + CHECK_VALUEMAP_3OPS(GPR, 128); CHECK_VALUEMAP_3OPS(FPR, 32); CHECK_VALUEMAP_3OPS(FPR, 64); CHECK_VALUEMAP_3OPS(FPR, 128); @@ -871,12 +874,16 @@ OpRegBankIdx[3] = PMI_FirstGPR; break; case TargetOpcode::G_EXTRACT: { - // For s128 sources we have to use fpr. + // For s128 sources we have to use fpr unless we know otherwise. + auto Src = MI.getOperand(1).getReg(); LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); - if (SrcTy.getSizeInBits() == 128) { - OpRegBankIdx[0] = PMI_FirstFPR; - OpRegBankIdx[1] = PMI_FirstFPR; - } + if (SrcTy.getSizeInBits() != 128) + break; + auto Idx = MRI.getRegClassOrNull(Src) == &AArch64::XSeqPairsClassRegClass + ? PMI_FirstGPR + : PMI_FirstFPR; + OpRegBankIdx[0] = Idx; + OpRegBankIdx[1] = Idx; break; } case TargetOpcode::G_BUILD_VECTOR: { Index: llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic-128.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic-128.ll @@ -0,0 +1,52 @@ +; RUN: llc < %s -mtriple=arm64-linux-gnu -verify-machineinstrs -global-isel -global-isel-abort=1 | FileCheck %s --check-prefix=CHECK-LLSC-O1 +; RUN: llc < %s -mtriple=arm64-linux-gnu -verify-machineinstrs -mcpu=apple-a13 -global-isel -global-isel-abort=1 | FileCheck %s --check-prefix=CHECK-CAS-O1 +; RUN: llc < %s -mtriple=arm64-linux-gnu -verify-machineinstrs -O0 -global-isel -global-isel-abort=1 | FileCheck %s --check-prefix=CHECK-LLSC-O0 +; RUN: llc < %s -mtriple=arm64-linux-gnu -verify-machineinstrs -O0 -mcpu=apple-a13 -global-isel -global-isel-abort=1 | FileCheck %s --check-prefix=CHECK-CAS-O0 +@var = global i128 0 + +define void @val_compare_and_swap(i128* %p, i128 %oldval, i128 %newval) { +; CHECK-LLSC-O1-LABEL: val_compare_and_swap: +; CHECK-LLSC-O1: ldaxp {{x[0-9]+}}, {{x[0-9]+}}, [x0] +; [... LOTS of stuff that is generic IR unrelated to atomic operations ...] +; CHECK-LLSC-O1: stxp {{w[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, [x0] +; +; CHECK-CAS-O1-LABEL: val_compare_and_swap: +; CHECK-CAS-O1: caspa x2, x3, x4, x5, [x0] +; CHECK-CAS-O1: mov v[[OLD:[0-9]+]].d[0], x2 +; CHECK-CAS-O1: mov v[[OLD]].d[1], x3 +; CHECK-CAS-O1: str q[[OLD]], [x0] + +; CHECK-LLSC-O0-LABEL: val_compare_and_swap: +; CHECK-LLSC-O0: .LBB0_1: +; CHECK-LLSC-O0: ldaxp [[OLD_LO:x[0-9]+]], [[OLD_HI:x[0-9]+]], [x0] +; CHECK-LLSC-O0: cmp [[OLD_LO]], x2 +; CHECK-LLSC-O0: cset [[EQUAL_TMP:w[0-9]+]], ne +; CHECK-LLSC-O0: cmp [[OLD_HI]], x3 +; CHECK-LLSC-O0: cinc [[EQUAL:w[0-9]+]], [[EQUAL_TMP]], ne +; CHECK-LLSC-O0: cbnz [[EQUAL]], .LBB0_3 +; CHECK-LLSC-O0: stlxp [[STATUS:w[0-9]+]], x4, x5, [x0] +; CHECK-LLSC-O0: cbnz [[STATUS]], .LBB0_1 +; CHECK-LLSC-O0: .LBB0_3: +; CHECK-LLSC-O0: mov v[[OLD:[0-9]+]].d[0], [[OLD_LO]] +; CHECK-LLSC-O0: mov v[[OLD]].d[1], [[OLD_HI]] +; CHECK-LLSC-O0: str q[[OLD]], [x0] + + +; CHECK-CAS-O0-LABEL: val_compare_and_swap: +; CHECK-CAS-O0: str x3, [sp, #[[SLOT:[0-9]+]]] +; CHECK-CAS-O0: mov [[NEW_HI_TMP:x[0-9]+]], x5 +; CHECK-CAS-O0: ldr [[DESIRED_HI_TMP:x[0-9]+]], [sp, #[[SLOT]]] +; CHECK-CAS-O0: mov [[DESIRED_HI:x[0-9]+]], [[DESIRED_HI_TMP]] +; CHECK-CAS-O0: mov [[NEW_HI:x[0-9]+]], [[NEW_HI_TMP]] +; CHECK-CAS-O0: caspa x2, [[DESIRED_HI]], x4, [[NEW_HI]], [x0] +; CHECK-CAS-O0: mov [[OLD_LO:x[0-9]+]], x2 +; CHECK-CAS-O0: mov [[OLD_HI:x[0-9]+]], x3 +; CHECK-CAS-O0: mov v[[OLD:[0-9]+]].d[0], [[OLD_LO]] +; CHECK-CAS-O0: mov v[[OLD]].d[1], [[OLD_HI]] +; CHECK-CAS-O0: str q[[OLD]], [x0] + +%pair = cmpxchg i128* %p, i128 %oldval, i128 %newval acquire acquire + %val = extractvalue { i128, i1 } %pair, 0 + store i128 %val, i128* %p + ret void +} Index: llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmpxchg-128.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmpxchg-128.mir @@ -0,0 +1,73 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=arm64-apple-ios -global-isel -run-pass=legalizer %s -o - | FileCheck %s --check-prefix=CHECK-NOLSE +# RUN: llc -mtriple=arm64-apple-ios -mcpu=apple-a13 -global-isel -run-pass=legalizer %s -o - | FileCheck %s --check-prefix=CHECK-LSE + +--- +name: compare_swap_128 +alignment: 4 +tracksRegLiveness: true +body: | + bb.1: + liveins: $x0_x1, $x1 + + liveins: $x0, $x1, $x2, $x3, $x4 + + ; CHECK-LABEL: name: compare_swap_128 + ; CHECK: liveins: $x0_x1, $x1, $x0, $x1, $x2, $x3, $x4 + ; CHECK: [[COPY:%[0-9]+]]:gpr64(p0) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3 + ; CHECK: [[COPY4:%[0-9]+]]:_(s64) = COPY $x4 + ; CHECK: [[COPY5:%[0-9]+]]:gpr64(s64) = COPY [[COPY1]](s64) + ; CHECK: [[COPY6:%[0-9]+]]:gpr64(s64) = COPY [[COPY2]](s64) + ; CHECK: [[COPY7:%[0-9]+]]:gpr64(s64) = COPY [[COPY3]](s64) + ; CHECK: [[COPY8:%[0-9]+]]:gpr64(s64) = COPY [[COPY4]](s64) + ; CHECK: early-clobber %13:gpr64(s64), early-clobber %14:gpr64(s64), early-clobber %16:gpr32 = CMP_SWAP_128 [[COPY]](p0), [[COPY5]](s64), [[COPY6]](s64), [[COPY7]](s64), [[COPY8]](s64) :: (load store acquire acquire 16) + ; CHECK: [[COPY9:%[0-9]+]]:gpr64 = COPY %16 + ; CHECK: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES %13(s64), %14(s64) + ; CHECK: G_STORE [[MV]](s128), [[COPY]](p0) :: (store 16) + ; CHECK: RET_ReallyLR + ; CHECK-NOLSE-LABEL: name: compare_swap_128 + ; CHECK-NOLSE: liveins: $x0_x1, $x1, $x0, $x1, $x2, $x3, $x4 + ; CHECK-NOLSE: [[COPY:%[0-9]+]]:gpr64(p0) = COPY $x0 + ; CHECK-NOLSE: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 + ; CHECK-NOLSE: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 + ; CHECK-NOLSE: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3 + ; CHECK-NOLSE: [[COPY4:%[0-9]+]]:_(s64) = COPY $x4 + ; CHECK-NOLSE: [[COPY5:%[0-9]+]]:gpr64(s64) = COPY [[COPY1]](s64) + ; CHECK-NOLSE: [[COPY6:%[0-9]+]]:gpr64(s64) = COPY [[COPY2]](s64) + ; CHECK-NOLSE: [[COPY7:%[0-9]+]]:gpr64(s64) = COPY [[COPY3]](s64) + ; CHECK-NOLSE: [[COPY8:%[0-9]+]]:gpr64(s64) = COPY [[COPY4]](s64) + ; CHECK-NOLSE: early-clobber %13:gpr64(s64), early-clobber %14:gpr64(s64), early-clobber %16:gpr32 = CMP_SWAP_128 [[COPY]](p0), [[COPY5]](s64), [[COPY6]](s64), [[COPY7]](s64), [[COPY8]](s64) :: (load store acquire acquire 16) + ; CHECK-NOLSE: [[COPY9:%[0-9]+]]:gpr64 = COPY %16 + ; CHECK-NOLSE: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES %13(s64), %14(s64) + ; CHECK-NOLSE: G_STORE [[MV]](s128), [[COPY]](p0) :: (store 16) + ; CHECK-NOLSE: RET_ReallyLR + ; CHECK-LSE-LABEL: name: compare_swap_128 + ; CHECK-LSE: liveins: $x0_x1, $x1, $x0, $x1, $x2, $x3, $x4 + ; CHECK-LSE: [[COPY:%[0-9]+]]:gpr64sp(p0) = COPY $x0 + ; CHECK-LSE: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 + ; CHECK-LSE: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 + ; CHECK-LSE: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3 + ; CHECK-LSE: [[COPY4:%[0-9]+]]:_(s64) = COPY $x4 + ; CHECK-LSE: [[REG_SEQUENCE:%[0-9]+]]:xseqpairsclass(s128) = REG_SEQUENCE [[COPY1]](s64), %subreg.sube64, [[COPY2]](s64), %subreg.subo64 + ; CHECK-LSE: [[REG_SEQUENCE1:%[0-9]+]]:xseqpairsclass(s128) = REG_SEQUENCE [[COPY3]](s64), %subreg.sube64, [[COPY4]](s64), %subreg.subo64 + ; CHECK-LSE: [[CASPAX:%[0-9]+]]:xseqpairsclass(s128) = CASPAX [[REG_SEQUENCE]](s128), [[REG_SEQUENCE1]](s128), [[COPY]](p0) :: (load store acquire acquire 16) + ; CHECK-LSE: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[CASPAX]](s128), 0 + ; CHECK-LSE: [[EXTRACT1:%[0-9]+]]:_(s64) = G_EXTRACT [[CASPAX]](s128), 64 + ; CHECK-LSE: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[EXTRACT]](s64), [[EXTRACT1]](s64) + ; CHECK-LSE: G_STORE [[MV]](s128), [[COPY]](p0) :: (store 16) + ; CHECK-LSE: RET_ReallyLR + %0:_(p0) = COPY $x0 + %3:_(s64) = COPY $x1 + %4:_(s64) = COPY $x2 + %1:_(s128) = G_MERGE_VALUES %3(s64), %4(s64) + %5:_(s64) = COPY $x3 + %6:_(s64) = COPY $x4 + %2:_(s128) = G_MERGE_VALUES %5(s64), %6(s64) + %7:_(s128), %8:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS %0(p0), %1, %2 :: (load store acquire acquire 16) + G_STORE %7(s128), %0(p0) :: (store 16) + RET_ReallyLR + +... Index: llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir +++ llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir @@ -180,7 +180,6 @@ # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: G_ATOMIC_CMPXCHG (opcode {{[0-9]+}}): 2 type indices, 0 imm indices -# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}} # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: G_ATOMICRMW_XCHG (opcode {{[0-9]+}}): 2 type indices, 0 imm indices Index: llvm/test/CodeGen/AArch64/GlobalISel/regbank-extract.mir =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/regbank-extract.mir +++ llvm/test/CodeGen/AArch64/GlobalISel/regbank-extract.mir @@ -18,6 +18,27 @@ %0:_(s128) = COPY $q0 %1:_(s64) = G_EXTRACT %0(s128), 0 $d2 = COPY %1(s64) + RET_ReallyLR implicit $d2 ... +--- +name: extract_s64_s128_gpr +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.1: + liveins: $x0_x1, $x1 + + ; CHECK-LABEL: name: extract_s64_s128_gpr + ; CHECK: liveins: $x0_x1, $x1 + ; CHECK: [[CASPX:%[0-9]+]]:xseqpairsclass(s128) = CASPX $x0_x1, $x0_x1, $x0 + ; CHECK: [[EXTRACT:%[0-9]+]]:gpr(s64) = G_EXTRACT [[CASPX]](s128), 0 + ; CHECK: RET_ReallyLR + %0:xseqpairsclass = CASPX $x0_x1, $x0_x1, $x0 + %1:_(s64) = G_EXTRACT %0:xseqpairsclass(s128), 0 + + RET_ReallyLR + +...