Index: llvm/lib/Target/AArch64/AArch64GenRegisterBankInfo.def =================================================================== --- llvm/lib/Target/AArch64/AArch64GenRegisterBankInfo.def +++ llvm/lib/Target/AArch64/AArch64GenRegisterBankInfo.def @@ -29,6 +29,8 @@ {0, 32, AArch64::GPRRegBank}, // 7: GPR 64-bit value. {0, 64, AArch64::GPRRegBank}, + // 8: GPR 128-bit value. + {0, 128, AArch64::GPRRegBank}, }; // ValueMappings. @@ -66,51 +68,55 @@ {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1}, {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1}, {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1}, - // 22: GPR 64-bit value. <-- This must match Last3OpsIdx. + // 22: GPR 64-bit value. {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR64 - PMI_Min], 1}, {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR64 - PMI_Min], 1}, {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR64 - PMI_Min], 1}, + // 25: GPR 128-bit value. <-- This must match Last3OpsIdx. + {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR128 - PMI_Min], 1}, + {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR128 - PMI_Min], 1}, + {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR128 - PMI_Min], 1}, // Cross register bank copies. - // 25: FPR 16-bit value to GPR 16-bit. <-- This must match + // 28: FPR 16-bit value to GPR 16-bit. <-- This must match // FirstCrossRegCpyIdx. // Note: This is the kind of copy we see with physical registers. {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR16 - PMI_Min], 1}, {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1}, - // 27: FPR 32-bit value to GPR 32-bit value. + // 30: FPR 32-bit value to GPR 32-bit value. {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR32 - PMI_Min], 1}, {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1}, - // 29: FPR 64-bit value to GPR 64-bit value. + // 32: FPR 64-bit value to GPR 64-bit value. {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1}, {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR64 - PMI_Min], 1}, - // 31: FPR 128-bit value to GPR 128-bit value (invalid) + // 34: FPR 128-bit value to GPR 128-bit value (invalid) {nullptr, 1}, {nullptr, 1}, - // 33: FPR 256-bit value to GPR 256-bit value (invalid) + // 36: FPR 256-bit value to GPR 256-bit value (invalid) {nullptr, 1}, {nullptr, 1}, - // 35: FPR 512-bit value to GPR 512-bit value (invalid) + // 38: FPR 512-bit value to GPR 512-bit value (invalid) {nullptr, 1}, {nullptr, 1}, - // 37: GPR 32-bit value to FPR 32-bit value. + // 40: GPR 32-bit value to FPR 32-bit value. {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1}, {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR32 - PMI_Min], 1}, - // 39: GPR 64-bit value to FPR 64-bit value. <-- This must match + // 42: GPR 64-bit value to FPR 64-bit value. <-- This must match // LastCrossRegCpyIdx. {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR64 - PMI_Min], 1}, {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1}, - // 41: FPExt: 16 to 32. <-- This must match FPExt16To32Idx. + // 44: FPExt: 16 to 32. <-- This must match FPExt16To32Idx. {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR32 - PMI_Min], 1}, {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR16 - PMI_Min], 1}, - // 43: FPExt: 16 to 32. <-- This must match FPExt16To64Idx. + // 46: FPExt: 16 to 32. <-- This must match FPExt16To64Idx. {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1}, {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR16 - PMI_Min], 1}, - // 45: FPExt: 32 to 64. <-- This must match FPExt32To64Idx. + // 48: FPExt: 32 to 64. <-- This must match FPExt32To64Idx. {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1}, {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR32 - PMI_Min], 1}, - // 47: FPExt vector: 64 to 128. <-- This must match FPExt64To128Idx. + // 50: FPExt vector: 64 to 128. <-- This must match FPExt64To128Idx. {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR128 - PMI_Min], 1}, {&AArch64GenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1}, - // 49: Shift scalar with 64 bit shift imm + // 52: Shift scalar with 64 bit shift imm {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1}, {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR32 - PMI_Min], 1}, {&AArch64GenRegisterBankInfo::PartMappings[PMI_GPR64 - PMI_Min], 1}, @@ -167,6 +173,8 @@ return 0; if (Size <= 64) return 1; + if (Size <= 128) + return 2; return -1; } if (RBIdx == PMI_FirstFPR) { Index: llvm/lib/Target/AArch64/AArch64InstrGISel.td =================================================================== --- llvm/lib/Target/AArch64/AArch64InstrGISel.td +++ llvm/lib/Target/AArch64/AArch64InstrGISel.td @@ -217,3 +217,8 @@ def : Pat<(atomic_cmp_swap_64 GPR64:$addr, GPR64:$desired, GPR64:$new), (CMP_SWAP_64 GPR64:$addr, GPR64:$desired, GPR64:$new)>; } + +def : Pat<(int_aarch64_stlxp GPR64:$lo, GPR64:$hi, GPR64:$addr), + (STLXPX GPR64:$lo, GPR64:$hi, GPR64:$addr)>; +def : Pat<(int_aarch64_stxp GPR64:$lo, GPR64:$hi, GPR64:$addr), + (STXPX GPR64:$lo, GPR64:$hi, GPR64:$addr)>; Index: llvm/lib/Target/AArch64/AArch64RegisterBanks.td =================================================================== --- llvm/lib/Target/AArch64/AArch64RegisterBanks.td +++ llvm/lib/Target/AArch64/AArch64RegisterBanks.td @@ -10,7 +10,7 @@ //===----------------------------------------------------------------------===// /// General Purpose Registers: W, X. -def GPRRegBank : RegisterBank<"GPR", [GPR64all]>; +def GPRRegBank : RegisterBank<"GPR", [XSeqPairsClass]>; /// Floating Point/Vector Registers: B, H, S, D, Q. def FPRRegBank : RegisterBank<"FPR", [QQQQ]>; Index: llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp =================================================================== --- llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -488,6 +488,8 @@ if (Ty.getSizeInBits() == 64) return GetAllRegSet ? &AArch64::GPR64allRegClass : &AArch64::GPR64RegClass; + if (Ty.getSizeInBits() == 128) + return &AArch64::XSeqPairsClassRegClass; return nullptr; } @@ -520,6 +522,8 @@ if (SizeInBits == 64) return GetAllRegSet ? &AArch64::GPR64allRegClass : &AArch64::GPR64RegClass; + if (SizeInBits == 128) + return &AArch64::XSeqPairsClassRegClass; } if (RegBankID == AArch64::FPRRegBankID) { @@ -2531,8 +2535,20 @@ if (DstTy.getSizeInBits() != 64) return false; + unsigned Offset = I.getOperand(2).getImm(); + if (Offset % 64 != 0) + return false; + const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI); const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI); + if (SrcRB.getID() == AArch64::GPRRegBankID && + DstRB.getID() == AArch64::GPRRegBankID) { + MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {}) + .addUse(SrcReg, 0, Offset == 0 ? AArch64::sube64 : AArch64::subo64); + I.eraseFromParent(); + return true; + } + // Check we have the right regbank always. assert(SrcRB.getID() == AArch64::FPRRegBankID && DstRB.getID() == AArch64::FPRRegBankID && @@ -2541,9 +2557,6 @@ // Emit the same code as a vector extract. // Offset must be a multiple of 64. - unsigned Offset = I.getOperand(2).getImm(); - if (Offset % 64 != 0) - return false; unsigned LaneIdx = Offset / 64; MachineIRBuilder MIB(I); MachineInstr *Extract = emitExtractVectorElt( @@ -4992,6 +5005,15 @@ switch (IntrinID) { default: return false; + case Intrinsic::aarch64_ldxp: + case Intrinsic::aarch64_ldaxp: { + auto NewI = MIRBuilder.buildInstr( + IntrinID == Intrinsic::aarch64_ldxp ? AArch64::LDXPX : AArch64::LDAXPX, + {I.getOperand(0).getReg(), I.getOperand(1).getReg()}, + {I.getOperand(3)}); + NewI.cloneMemRefs(I); + break; + } case Intrinsic::trap: MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(1); break; Index: llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h =================================================================== --- llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h +++ llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h @@ -54,6 +54,8 @@ LegalizerHelper &Helper) const; bool legalizeCTPOP(MachineInstr &MI, MachineRegisterInfo &MRI, LegalizerHelper &Helper) const; + bool legalizeAtomicCmpxchg128(MachineInstr &MI, MachineRegisterInfo &MRI, + LegalizerHelper &Helper) const; const AArch64Subtarget *ST; }; } // End llvm namespace. Index: llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp =================================================================== --- llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "AArch64LegalizerInfo.h" +#include "AArch64RegisterBankInfo.h" #include "AArch64Subtarget.h" #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" @@ -498,14 +499,19 @@ getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG_WITH_SUCCESS) .lowerIf( - all(typeInSet(0, {s8, s16, s32, s64}), typeIs(1, s1), typeIs(2, p0))); + all(typeInSet(0, {s8, s16, s32, s64, s128}), typeIs(1, s1), typeIs(2, p0))); + + getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG) + .legalIf(all(typeInSet(0, {s8, s16, s32, s64}), typeIs(1, p0))) + .customIf([](const LegalityQuery &Query) { + return Query.Types[0].getSizeInBits() == 128; + }); getActionDefinitionsBuilder( {G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD, G_ATOMICRMW_SUB, G_ATOMICRMW_AND, G_ATOMICRMW_OR, G_ATOMICRMW_XOR, G_ATOMICRMW_MIN, G_ATOMICRMW_MAX, - G_ATOMICRMW_UMIN, G_ATOMICRMW_UMAX, G_ATOMIC_CMPXCHG}) - .legalIf(all( - typeInSet(0, {s8, s16, s32, s64}), typeIs(1, p0))); + G_ATOMICRMW_UMIN, G_ATOMICRMW_UMAX}) + .legalIf(all(typeInSet(0, {s8, s16, s32, s64}), typeIs(1, p0))); getActionDefinitionsBuilder(G_BLOCK_ADDR).legalFor({p0}); @@ -757,6 +763,8 @@ return legalizeRotate(MI, MRI, Helper); case TargetOpcode::G_CTPOP: return legalizeCTPOP(MI, MRI, Helper); + case TargetOpcode::G_ATOMIC_CMPXCHG: + return legalizeAtomicCmpxchg128(MI, MRI, Helper); } llvm_unreachable("expected switch to return"); @@ -1045,3 +1053,71 @@ MI.eraseFromParent(); return true; } + +bool AArch64LegalizerInfo::legalizeAtomicCmpxchg128(MachineInstr &MI, + MachineRegisterInfo &MRI, + LegalizerHelper &Helper) const { + MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; + LLT s64 = LLT::scalar(64); + auto Addr = MI.getOperand(1).getReg(); + auto DesiredI = MIRBuilder.buildUnmerge({s64, s64}, MI.getOperand(2)); + auto NewI = MIRBuilder.buildUnmerge({s64, s64}, MI.getOperand(3)); + auto DstLo = MRI.createGenericVirtualRegister(s64); + auto DstHi = MRI.createGenericVirtualRegister(s64); + + MachineInstrBuilder CAS; + if (ST->hasLSE()) { + auto Ordering = (*MI.memoperands_begin())->getOrdering(); + unsigned Opcode; + switch(Ordering) { + case AtomicOrdering::Acquire: + Opcode = AArch64::CASPAX; + break; + case AtomicOrdering::Release: + Opcode = AArch64::CASPLX; + break; + case AtomicOrdering::AcquireRelease: + case AtomicOrdering::SequentiallyConsistent: + Opcode = AArch64::CASPALX; + break; + default: + Opcode = AArch64::CASPX; + break; + } + + LLT s128 = LLT::scalar(128); + auto CASDst = MRI.createGenericVirtualRegister(s128); + auto CASDesired = MRI.createGenericVirtualRegister(s128); + auto CASNew = MRI.createGenericVirtualRegister(s128); + MIRBuilder.buildInstr(TargetOpcode::REG_SEQUENCE, {CASDesired}, {}) + .addUse(DesiredI->getOperand(0).getReg()) + .addImm(AArch64::sube64) + .addUse(DesiredI->getOperand(1).getReg()) + .addImm(AArch64::subo64); + MIRBuilder.buildInstr(TargetOpcode::REG_SEQUENCE, {CASNew}, {}) + .addUse(NewI->getOperand(0).getReg()) + .addImm(AArch64::sube64) + .addUse(NewI->getOperand(1).getReg()) + .addImm(AArch64::subo64); + + CAS = MIRBuilder.buildInstr(Opcode, {CASDst}, {CASDesired, CASNew, Addr}); + + MIRBuilder.buildExtract({DstLo}, {CASDst}, 0); + MIRBuilder.buildExtract({DstHi}, {CASDst}, 64); + } else { + auto Scratch = MRI.createVirtualRegister(&AArch64::GPR64RegClass); + CAS = MIRBuilder.buildInstr(AArch64::CMP_SWAP_128, {DstLo, DstHi, Scratch}, + {Addr, DesiredI->getOperand(0), + DesiredI->getOperand(1), NewI->getOperand(0), + NewI->getOperand(1)}); + } + + CAS.cloneMemRefs(MI); + constrainSelectedInstRegOperands(*CAS, *ST->getInstrInfo(), + *MRI.getTargetRegisterInfo(), + *ST->getRegBankInfo()); + + MIRBuilder.buildMerge(MI.getOperand(0), {DstLo, DstHi}); + MI.eraseFromParent(); + return true; +} Index: llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h =================================================================== --- llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h +++ llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.h @@ -34,8 +34,9 @@ PMI_FPR512, PMI_GPR32, PMI_GPR64, + PMI_GPR128, PMI_FirstGPR = PMI_GPR32, - PMI_LastGPR = PMI_GPR64, + PMI_LastGPR = PMI_GPR128, PMI_FirstFPR = PMI_FPR16, PMI_LastFPR = PMI_FPR512, PMI_Min = PMI_FirstFPR, @@ -48,16 +49,16 @@ enum ValueMappingIdx { InvalidIdx = 0, First3OpsIdx = 1, - Last3OpsIdx = 22, + Last3OpsIdx = 25, DistanceBetweenRegBanks = 3, - FirstCrossRegCpyIdx = 25, - LastCrossRegCpyIdx = 39, + FirstCrossRegCpyIdx = 28, + LastCrossRegCpyIdx = 42, DistanceBetweenCrossRegCpy = 2, - FPExt16To32Idx = 41, - FPExt16To64Idx = 43, - FPExt32To64Idx = 45, - FPExt64To128Idx = 47, - Shift64Imm = 49 + FPExt16To32Idx = 44, + FPExt16To64Idx = 46, + FPExt32To64Idx = 48, + FPExt64To128Idx = 50, + Shift64Imm = 52, }; static bool checkPartialMap(unsigned Idx, unsigned ValStartIdx, Index: llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp =================================================================== --- llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp +++ llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp @@ -69,7 +69,7 @@ // GR64all + its subclasses. assert(RBGPR.covers(*TRI.getRegClass(AArch64::GPR32RegClassID)) && "Subclass not added?"); - assert(RBGPR.getSize() == 64 && "GPRs should hold up to 64-bit"); + assert(RBGPR.getSize() == 128 && "GPRs should hold up to 128-bit"); // The FPR register bank is fully defined by all the registers in // GR64all + its subclasses. @@ -87,7 +87,7 @@ // Check that the TableGen'ed like file is in sync we our expectations. // First, the Idx. assert(checkPartialMappingIdx(PMI_FirstGPR, PMI_LastGPR, - {PMI_GPR32, PMI_GPR64}) && + {PMI_GPR32, PMI_GPR64, PMI_GPR128}) && "PartialMappingIdx's are incorrectly ordered"); assert(checkPartialMappingIdx(PMI_FirstFPR, PMI_LastFPR, {PMI_FPR16, PMI_FPR32, PMI_FPR64, PMI_FPR128, @@ -104,6 +104,7 @@ CHECK_PARTIALMAP(PMI_GPR32, 0, 32, RBGPR); CHECK_PARTIALMAP(PMI_GPR64, 0, 64, RBGPR); + CHECK_PARTIALMAP(PMI_GPR128, 0, 128, RBGPR); CHECK_PARTIALMAP(PMI_FPR16, 0, 16, RBFPR); CHECK_PARTIALMAP(PMI_FPR32, 0, 32, RBFPR); CHECK_PARTIALMAP(PMI_FPR64, 0, 64, RBFPR); @@ -124,6 +125,7 @@ CHECK_VALUEMAP(GPR, 32); CHECK_VALUEMAP(GPR, 64); + CHECK_VALUEMAP(GPR, 128); CHECK_VALUEMAP(FPR, 16); CHECK_VALUEMAP(FPR, 32); CHECK_VALUEMAP(FPR, 64); @@ -142,6 +144,7 @@ CHECK_VALUEMAP_3OPS(GPR, 32); CHECK_VALUEMAP_3OPS(GPR, 64); + CHECK_VALUEMAP_3OPS(GPR, 128); CHECK_VALUEMAP_3OPS(FPR, 32); CHECK_VALUEMAP_3OPS(FPR, 64); CHECK_VALUEMAP_3OPS(FPR, 128); @@ -871,9 +874,13 @@ OpRegBankIdx[3] = PMI_FirstGPR; break; case TargetOpcode::G_EXTRACT: { - // For s128 sources we have to use fpr. + // For s128 sources we have to use fpr unless we know otherwise. + auto Src = MI.getOperand(1).getReg(); LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); - if (SrcTy.getSizeInBits() == 128) { + if (MRI.getRegClassOrNull(Src) == &AArch64::XSeqPairsClassRegClass) { + OpRegBankIdx[0] = PMI_FirstGPR; + OpRegBankIdx[1] = PMI_FirstGPR; + } else if (SrcTy.getSizeInBits() == 128) { OpRegBankIdx[0] = PMI_FirstFPR; OpRegBankIdx[1] = PMI_FirstFPR; } Index: llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic-128.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic-128.ll @@ -0,0 +1,52 @@ +; RUN: llc < %s -mtriple=arm64-linux-gnu -verify-machineinstrs -global-isel -global-isel-abort=1 | FileCheck %s --check-prefix=CHECK-LLSC-O1 +; RUN: llc < %s -mtriple=arm64-linux-gnu -verify-machineinstrs -mcpu=apple-a13 -global-isel -global-isel-abort=1 | FileCheck %s --check-prefix=CHECK-CAS-O1 +; RUN: llc < %s -mtriple=arm64-linux-gnu -verify-machineinstrs -O0 -global-isel -global-isel-abort=1 | FileCheck %s --check-prefix=CHECK-LLSC-O0 +; RUN: llc < %s -mtriple=arm64-linux-gnu -verify-machineinstrs -O0 -mcpu=apple-a13 -global-isel -global-isel-abort=1 | FileCheck %s --check-prefix=CHECK-CAS-O0 +@var = global i128 0 + +define void @val_compare_and_swap(i128* %p, i128 %oldval, i128 %newval) { +; CHECK-LLSC-O1-LABEL: val_compare_and_swap: +; CHECK-LLSC-O1: ldaxp {{x[0-9]+}}, {{x[0-9]+}}, [x0] +; [... LOTS of stuff that is generic IR unrelated to atomic operations ...] +; CHECK-LLSC-O1: stxp {{w[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}}, [x0] +; +; CHECK-CAS-O1-LABEL: val_compare_and_swap: +; CHECK-CAS-O1: caspa x2, x3, x4, x5, [x0] +; CHECK-CAS-O1: mov v[[OLD:[0-9]+]].d[0], x2 +; CHECK-CAS-O1: mov v[[OLD]].d[1], x3 +; CHECK-CAS-O1: str q[[OLD]], [x0] + +; CHECK-LLSC-O0-LABEL: val_compare_and_swap: +; CHECK-LLSC-O0: .LBB0_1: +; CHECK-LLSC-O0: ldaxp [[OLD_LO:x[0-9]+]], [[OLD_HI:x[0-9]+]], [x0] +; CHECK-LLSC-O0: cmp [[OLD_LO]], x2 +; CHECK-LLSC-O0: cset [[EQUAL_TMP:w[0-9]+]], ne +; CHECK-LLSC-O0: cmp [[OLD_HI]], x3 +; CHECK-LLSC-O0: cinc [[EQUAL:w[0-9]+]], [[EQUAL_TMP]], ne +; CHECK-LLSC-O0: cbnz [[EQUAL]], .LBB0_3 +; CHECK-LLSC-O0: stlxp [[STATUS:w[0-9]+]], x4, x5, [x0] +; CHECK-LLSC-O0: cbnz [[STATUS]], .LBB0_1 +; CHECK-LLSC-O0: .LBB0_3: +; CHECK-LLSC-O0: mov v[[OLD:[0-9]+]].d[0], [[OLD_LO]] +; CHECK-LLSC-O0: mov v[[OLD]].d[1], [[OLD_HI]] +; CHECK-LLSC-O0: str q[[OLD]], [x0] + + +; CHECK-CAS-O0-LABEL: val_compare_and_swap: +; CHECK-CAS-O0: str x3, [sp, #[[SLOT:[0-9]+]]] +; CHECK-CAS-O0: mov [[NEW_HI_TMP:x[0-9]+]], x5 +; CHECK-CAS-O0: ldr [[DESIRED_HI_TMP:x[0-9]+]], [sp, #[[SLOT]]] +; CHECK-CAS-O0: mov [[DESIRED_HI:x[0-9]+]], [[DESIRED_HI_TMP]] +; CHECK-CAS-O0: mov [[NEW_HI:x[0-9]+]], [[NEW_HI_TMP]] +; CHECK-CAS-O0: caspa x2, [[DESIRED_HI]], x4, [[NEW_HI]], [x0] +; CHECK-CAS-O0: mov [[OLD_LO:x[0-9]+]], x2 +; CHECK-CAS-O0: mov [[OLD_HI:x[0-9]+]], x3 +; CHECK-CAS-O0: mov v[[OLD:[0-9]+]].d[0], [[OLD_LO]] +; CHECK-CAS-O0: mov v[[OLD]].d[1], [[OLD_HI]] +; CHECK-CAS-O0: str q[[OLD]], [x0] + +%pair = cmpxchg i128* %p, i128 %oldval, i128 %newval acquire acquire + %val = extractvalue { i128, i1 } %pair, 0 + store i128 %val, i128* %p + ret void +} Index: llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir +++ llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir @@ -180,7 +180,6 @@ # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: G_ATOMIC_CMPXCHG (opcode {{[0-9]+}}): 2 type indices, 0 imm indices -# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}} # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: G_ATOMICRMW_XCHG (opcode {{[0-9]+}}): 2 type indices, 0 imm indices