Index: lib/Target/PowerPC/PPC.td =================================================================== --- lib/Target/PowerPC/PPC.td +++ lib/Target/PowerPC/PPC.td @@ -118,7 +118,9 @@ def FeatureP8Vector : SubtargetFeature<"power8-vector", "HasP8Vector", "true", "Enable POWER8 vector instructions", [FeatureVSX, FeatureP8Altivec]>; - +def FeaturePartwordAtomic : SubtargetFeature<"partword-atomics", + "HasPartwordAtomics", "true", + "Enable l[bh]arx and st[bh]cx.">; def FeatureInvariantFunctionDescriptors : SubtargetFeature<"invariant-function-descriptors", "HasInvariantFunctionDescriptors", "true", @@ -263,7 +265,7 @@ FeatureFPRND, FeatureFPCVT, FeatureISEL, FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX, FeatureP8Crypto, Feature64Bit /*, Feature64BitRegs */, FeatureICBT, - DeprecatedMFTB, DeprecatedDST]; + FeaturePartwordAtomic, DeprecatedMFTB, DeprecatedDST]; } def : ProcessorModel<"970", G5Model, @@ -342,7 +344,7 @@ FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX, FeatureFPRND, FeatureFPCVT, FeatureISEL, FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX, - Feature64Bit /*, Feature64BitRegs */, + Feature64Bit /*, Feature64BitRegs */, FeaturePartwordAtomic, DeprecatedMFTB, DeprecatedDST]>; def : ProcessorModel<"pwr8", P8Model, ProcessorFeatures.Power8FeatureList>; def : Processor<"ppc", G3Itineraries, [Directive32]>; Index: lib/Target/PowerPC/PPCISelLowering.h =================================================================== --- lib/Target/PowerPC/PPCISelLowering.h +++ lib/Target/PowerPC/PPCISelLowering.h @@ -166,14 +166,6 @@ /// F8RC = MFFS - This moves the FPSCR (not modeled) into the register. MFFS, - /// LARX = This corresponds to PPC l{w|d}arx instrcution: load and - /// reserve indexed. This is used to implement atomic operations. - LARX, - - /// STCX = This corresponds to PPC stcx. instrcution: store conditional - /// indexed. This is used to implement atomic operations. - STCX, - /// TC_RETURN - A tail call return. /// operand #0 chain /// operand #1 callee (register or absolute) @@ -489,7 +481,8 @@ EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const override; MachineBasicBlock *EmitAtomicBinary(MachineInstr *MI, - MachineBasicBlock *MBB, bool is64Bit, + MachineBasicBlock *MBB, + unsigned AtomicSize, unsigned BinOpcode) const; MachineBasicBlock *EmitPartwordAtomicBinary(MachineInstr *MI, MachineBasicBlock *MBB, Index: lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- lib/Target/PowerPC/PPCISelLowering.cpp +++ lib/Target/PowerPC/PPCISelLowering.cpp @@ -997,8 +997,6 @@ case PPCISD::STBRX: return "PPCISD::STBRX"; case PPCISD::LFIWAX: return "PPCISD::LFIWAX"; case PPCISD::LFIWZX: return "PPCISD::LFIWZX"; - case PPCISD::LARX: return "PPCISD::LARX"; - case PPCISD::STCX: return "PPCISD::STCX"; case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH"; case PPCISD::BDNZ: return "PPCISD::BDNZ"; case PPCISD::BDZ: return "PPCISD::BDZ"; @@ -7783,10 +7781,36 @@ MachineBasicBlock * PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, - bool is64bit, unsigned BinOpcode) const { + unsigned AtomicSize, + unsigned BinOpcode) const { // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. const TargetInstrInfo *TII = Subtarget.getInstrInfo(); + auto LoadMnemonic = PPC::LDARX; + auto StoreMnemonic = PPC::STDCX; + switch (AtomicSize) { + default: + llvm_unreachable("Unexpected size of atomic entity"); + case 1: + LoadMnemonic = PPC::LBARX; + StoreMnemonic = PPC::STBCX; + assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4"); + break; + case 2: + LoadMnemonic = PPC::LHARX; + StoreMnemonic = PPC::STHCX; + assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4"); + break; + case 4: + LoadMnemonic = PPC::LWARX; + StoreMnemonic = PPC::STWCX; + break; + case 8: + LoadMnemonic = PPC::LDARX; + StoreMnemonic = PPC::STDCX; + break; + } + const BasicBlock *LLVM_BB = BB->getBasicBlock(); MachineFunction *F = BB->getParent(); MachineFunction::iterator It = BB; @@ -7808,7 +7832,7 @@ MachineRegisterInfo &RegInfo = F->getRegInfo(); unsigned TmpReg = (!BinOpcode) ? incr : - RegInfo.createVirtualRegister( is64bit ? &PPC::G8RCRegClass + RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass); // thisMBB: @@ -7823,11 +7847,11 @@ // bne- loopMBB // fallthrough --> exitMBB BB = loopMBB; - BuildMI(BB, dl, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest) + BuildMI(BB, dl, TII->get(LoadMnemonic), dest) .addReg(ptrA).addReg(ptrB); if (BinOpcode) BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest); - BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX)) + BuildMI(BB, dl, TII->get(StoreMnemonic)) .addReg(TmpReg).addReg(ptrA).addReg(ptrB); BuildMI(BB, dl, TII->get(PPC::BCC)) .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB); @@ -7845,6 +7869,10 @@ MachineBasicBlock *BB, bool is8bit, // operation unsigned BinOpcode) const { + // If we support part-word atomic mnemonics, just use them + if (Subtarget.hasPartwordAtomics()) + return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode); + // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. const TargetInstrInfo *TII = Subtarget.getInstrInfo(); // In 64 bit mode we have to use 64 bits for addresses, even though the @@ -8410,68 +8438,96 @@ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I16) BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I32) - BB = EmitAtomicBinary(MI, BB, false, PPC::ADD4); + BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I64) - BB = EmitAtomicBinary(MI, BB, true, PPC::ADD8); + BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I8) BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I16) BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I32) - BB = EmitAtomicBinary(MI, BB, false, PPC::AND); + BB = EmitAtomicBinary(MI, BB, 4, PPC::AND); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I64) - BB = EmitAtomicBinary(MI, BB, true, PPC::AND8); + BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I8) BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I16) BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I32) - BB = EmitAtomicBinary(MI, BB, false, PPC::OR); + BB = EmitAtomicBinary(MI, BB, 4, PPC::OR); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I64) - BB = EmitAtomicBinary(MI, BB, true, PPC::OR8); + BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I8) BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I16) BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I32) - BB = EmitAtomicBinary(MI, BB, false, PPC::XOR); + BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I64) - BB = EmitAtomicBinary(MI, BB, true, PPC::XOR8); + BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I8) BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I16) BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I32) - BB = EmitAtomicBinary(MI, BB, false, PPC::NAND); + BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I64) - BB = EmitAtomicBinary(MI, BB, true, PPC::NAND8); + BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I8) BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I16) BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I32) - BB = EmitAtomicBinary(MI, BB, false, PPC::SUBF); + BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I64) - BB = EmitAtomicBinary(MI, BB, true, PPC::SUBF8); + BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8); else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I8) BB = EmitPartwordAtomicBinary(MI, BB, true, 0); else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I16) BB = EmitPartwordAtomicBinary(MI, BB, false, 0); else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I32) - BB = EmitAtomicBinary(MI, BB, false, 0); + BB = EmitAtomicBinary(MI, BB, 4, 0); else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I64) - BB = EmitAtomicBinary(MI, BB, true, 0); + BB = EmitAtomicBinary(MI, BB, 8, 0); else if (MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 || - MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64) { + MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 || + (Subtarget.hasPartwordAtomics() && + MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) || + (Subtarget.hasPartwordAtomics() && + MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) { bool is64bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64; + auto LoadMnemonic = PPC::LDARX; + auto StoreMnemonic = PPC::STDCX; + switch(MI->getOpcode()) { + default: + llvm_unreachable("Compare and swap of unknown size"); + case PPC::ATOMIC_CMP_SWAP_I8: + LoadMnemonic = PPC::LBARX; + StoreMnemonic = PPC::STBCX; + assert(Subtarget.hasPartwordAtomics() && "No support partword atomics."); + break; + case PPC::ATOMIC_CMP_SWAP_I16: + LoadMnemonic = PPC::LHARX; + StoreMnemonic = PPC::STHCX; + assert(Subtarget.hasPartwordAtomics() && "No support partword atomics."); + break; + case PPC::ATOMIC_CMP_SWAP_I32: + LoadMnemonic = PPC::LWARX; + StoreMnemonic = PPC::STWCX; + break; + case PPC::ATOMIC_CMP_SWAP_I64: + LoadMnemonic = PPC::LDARX; + StoreMnemonic = PPC::STDCX; + break; + } unsigned dest = MI->getOperand(0).getReg(); unsigned ptrA = MI->getOperand(1).getReg(); unsigned ptrB = MI->getOperand(2).getReg(); @@ -8497,18 +8553,18 @@ BB->addSuccessor(loop1MBB); // loop1MBB: - // l[wd]arx dest, ptr + // l[bhwd]arx dest, ptr // cmp[wd] dest, oldval // bne- midMBB // loop2MBB: - // st[wd]cx. newval, ptr + // st[bhwd]cx. newval, ptr // bne- loopMBB // b exitBB // midMBB: - // st[wd]cx. dest, ptr + // st[bhwd]cx. dest, ptr // exitBB: BB = loop1MBB; - BuildMI(BB, dl, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest) + BuildMI(BB, dl, TII->get(LoadMnemonic), dest) .addReg(ptrA).addReg(ptrB); BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0) .addReg(oldval).addReg(dest); @@ -8518,7 +8574,7 @@ BB->addSuccessor(midMBB); BB = loop2MBB; - BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX)) + BuildMI(BB, dl, TII->get(StoreMnemonic)) .addReg(newval).addReg(ptrA).addReg(ptrB); BuildMI(BB, dl, TII->get(PPC::BCC)) .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB); @@ -8527,7 +8583,7 @@ BB->addSuccessor(exitMBB); BB = midMBB; - BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX)) + BuildMI(BB, dl, TII->get(StoreMnemonic)) .addReg(dest).addReg(ptrA).addReg(ptrB); BB->addSuccessor(exitMBB); Index: lib/Target/PowerPC/PPCInstr64Bit.td =================================================================== --- lib/Target/PowerPC/PPCInstr64Bit.td +++ lib/Target/PowerPC/PPCInstr64Bit.td @@ -235,14 +235,22 @@ } // Instructions to support atomic operations +let mayLoad = 1, hasSideEffects = 0 in { def LDARX : XForm_1<31, 84, (outs g8rc:$rD), (ins memrr:$ptr), "ldarx $rD, $ptr", IIC_LdStLDARX, - [(set i64:$rD, (PPClarx xoaddr:$ptr))]>; + []>; + +// Instruction to support lock versions of atomics +// (EH=1 - see Power ISA 2.07 Book II 4.4.2) +def LDARXL : XForm_1<31, 84, (outs g8rc:$rD), (ins memrr:$ptr), + "ldarx $rD, $ptr, 1", IIC_LdStLDARX, + []>, isDOT; +} -let Defs = [CR0] in +let Defs = [CR0], mayStore = 1, hasSideEffects = 0 in def STDCX : XForm_1<31, 214, (outs), (ins g8rc:$rS, memrr:$dst), "stdcx. $rS, $dst", IIC_LdStSTDCX, - [(PPCstcx i64:$rS, xoaddr:$dst)]>, + []>, isDOT; let Interpretation64Bit = 1, isCodeGenOnly = 1 in { Index: lib/Target/PowerPC/PPCInstrInfo.td =================================================================== --- lib/Target/PowerPC/PPCInstrInfo.td +++ lib/Target/PowerPC/PPCInstrInfo.td @@ -46,13 +46,6 @@ SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT> ]>; -def SDT_PPClarx : SDTypeProfile<1, 1, [ - SDTCisInt<0>, SDTCisPtrTy<1> -]>; -def SDT_PPCstcx : SDTypeProfile<0, 2, [ - SDTCisInt<0>, SDTCisPtrTy<1> -]>; - def SDT_PPCTC_ret : SDTypeProfile<0, 2, [ SDTCisPtrTy<0>, SDTCisVT<1, i32> ]>; @@ -225,12 +218,6 @@ def PPCcr6unset : SDNode<"PPCISD::CR6UNSET", SDTNone, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; -// Instructions to support atomic operations -def PPClarx : SDNode<"PPCISD::LARX", SDT_PPClarx, - [SDNPHasChain, SDNPMayLoad]>; -def PPCstcx : SDNode<"PPCISD::STCX", SDT_PPCstcx, - [SDNPHasChain, SDNPMayStore]>; - // Instructions to support dynamic alloca. def SDTDynOp : SDTypeProfile<1, 2, []>; def PPCdynalloc : SDNode<"PPCISD::DYNALLOC", SDTDynOp, [SDNPHasChain]>; @@ -724,7 +711,7 @@ def IsE500 : Predicate<"PPCSubTarget->isE500()">; def HasSPE : Predicate<"PPCSubTarget->HasSPE()">; def HasICBT : Predicate<"PPCSubTarget->hasICBT()">; - +def HasPartwordAtomics : Predicate<"PPCSubTarget->hasPartwordAtomics()">; def NoNaNsFPMath : Predicate<"TM.Options.NoNaNsFPMath">; def NaNsFPMath : Predicate<"!TM.Options.NoNaNsFPMath">; @@ -1455,15 +1442,54 @@ } // Instructions to support atomic operations +let mayLoad = 1, hasSideEffects = 0 in { +def LBARX : XForm_1<31, 52, (outs gprc:$rD), (ins memrr:$src), + "lbarx $rD, $src", IIC_LdStLWARX, + []>, + Requires<[HasPartwordAtomics]>; + +def LHARX : XForm_1<31, 116, (outs gprc:$rD), (ins memrr:$src), + "lharx $rD, $src", IIC_LdStLWARX, + []>, + Requires<[HasPartwordAtomics]>; + def LWARX : XForm_1<31, 20, (outs gprc:$rD), (ins memrr:$src), "lwarx $rD, $src", IIC_LdStLWARX, - [(set i32:$rD, (PPClarx xoaddr:$src))]>; + []>; + +// Instructions to support lock versions of atomics +// (EH=1 - see Power ISA 2.07 Book II 4.4.2) +def LBARXL : XForm_1<31, 52, (outs gprc:$rD), (ins memrr:$src), + "lbarx $rD, $src, 1", IIC_LdStLWARX, + []>, isDOT, + Requires<[HasPartwordAtomics]>; + +def LHARXL : XForm_1<31, 116, (outs gprc:$rD), (ins memrr:$src), + "lharx $rD, $src, 1", IIC_LdStLWARX, + []>, isDOT, + Requires<[HasPartwordAtomics]>; + +def LWARXL : XForm_1<31, 20, (outs gprc:$rD), (ins memrr:$src), + "lwarx $rD, $src, 1", IIC_LdStLWARX, + []>, isDOT; +} + +let Defs = [CR0], mayStore = 1, hasSideEffects = 0 in { +def STBCX : XForm_1<31, 694, (outs), (ins gprc:$rS, memrr:$dst), + "stbcx. $rS, $dst", IIC_LdStSTWCX, + []>, + isDOT, Requires<[HasPartwordAtomics]>; + +def STHCX : XForm_1<31, 726, (outs), (ins gprc:$rS, memrr:$dst), + "sthcx. $rS, $dst", IIC_LdStSTWCX, + []>, + isDOT, Requires<[HasPartwordAtomics]>; -let Defs = [CR0] in def STWCX : XForm_1<31, 150, (outs), (ins gprc:$rS, memrr:$dst), "stwcx. $rS, $dst", IIC_LdStSTWCX, - [(PPCstcx i32:$rS, xoaddr:$dst)]>, + []>, isDOT; +} let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in def TRAP : XForm_24<31, 4, (outs), (ins), "trap", IIC_LdStLoad, [(trap)]>; Index: lib/Target/PowerPC/PPCSubtarget.h =================================================================== --- lib/Target/PowerPC/PPCSubtarget.h +++ lib/Target/PowerPC/PPCSubtarget.h @@ -114,6 +114,7 @@ bool IsLittleEndian; bool HasICBT; bool HasInvariantFunctionDescriptors; + bool HasPartwordAtomics; /// When targeting QPX running a stock PPC64 Linux kernel where the stack /// alignment has not been changed, we need to keep the 16-byte alignment @@ -236,6 +237,7 @@ bool hasInvariantFunctionDescriptors() const { return HasInvariantFunctionDescriptors; } + bool hasPartwordAtomics() const { return HasPartwordAtomics; } bool isQPXStackUnaligned() const { return IsQPXStackUnaligned; } unsigned getPlatformStackAlignment() const { Index: lib/Target/PowerPC/PPCSubtarget.cpp =================================================================== --- lib/Target/PowerPC/PPCSubtarget.cpp +++ lib/Target/PowerPC/PPCSubtarget.cpp @@ -95,6 +95,7 @@ HasLazyResolverStubs = false; HasICBT = false; HasInvariantFunctionDescriptors = false; + HasPartwordAtomics = false; IsQPXStackUnaligned = false; } Index: test/CodeGen/PowerPC/atomic-2.ll =================================================================== --- test/CodeGen/PowerPC/atomic-2.ll +++ test/CodeGen/PowerPC/atomic-2.ll @@ -1,4 +1,6 @@ ; RUN: llc < %s -march=ppc64 | FileCheck %s +; RUN: llc < %s -march=ppc64 -mcpu=pwr7 | FileCheck %s -check-prefix=CHECK-P7U +; RUN: llc < %s -march=ppc64 -mcpu=pwr8 | FileCheck %s -check-prefix=CHECK-P7U define i64 @exchange_and_add(i64* %mem, i64 %val) nounwind { ; CHECK-LABEL: exchange_and_add: @@ -8,6 +10,22 @@ ret i64 %tmp } +define i8 @exchange_and_add8(i8* %mem, i8 %val) nounwind { +; CHECK-LABEL: exchange_and_add8: +; CHECK-P7U: lbarx + %tmp = atomicrmw add i8* %mem, i8 %val monotonic +; CHECK-P7U: stbcx. + ret i8 %tmp +} + +define i16 @exchange_and_add16(i16* %mem, i16 %val) nounwind { +; CHECK-LABEL: exchange_and_add16: +; CHECK-P7U: lharx + %tmp = atomicrmw add i16* %mem, i16 %val monotonic +; CHECK-P7U: sthcx. + ret i16 %tmp +} + define i64 @exchange_and_cmp(i64* %mem) nounwind { ; CHECK-LABEL: exchange_and_cmp: ; CHECK: ldarx @@ -18,6 +36,26 @@ ret i64 %tmp } +define i8 @exchange_and_cmp8(i8* %mem) nounwind { +; CHECK-LABEL: exchange_and_cmp8: +; CHECK-P7U: lbarx + %tmppair = cmpxchg i8* %mem, i8 0, i8 1 monotonic monotonic + %tmp = extractvalue { i8, i1 } %tmppair, 0 +; CHECK-P7U: stbcx. +; CHECK-P7U: stbcx. + ret i8 %tmp +} + +define i16 @exchange_and_cmp16(i16* %mem) nounwind { +; CHECK-LABEL: exchange_and_cmp16: +; CHECK-P7U: lharx + %tmppair = cmpxchg i16* %mem, i16 0, i16 1 monotonic monotonic + %tmp = extractvalue { i16, i1 } %tmppair, 0 +; CHECK-P7U: sthcx. +; CHECK-P7U: sthcx. + ret i16 %tmp +} + define i64 @exchange(i64* %mem, i64 %val) nounwind { ; CHECK-LABEL: exchange: ; CHECK: ldarx @@ -26,6 +64,22 @@ ret i64 %tmp } +define i8 @exchange8(i8* %mem, i8 %val) nounwind { +; CHECK-LABEL: exchange8: +; CHECK-P7U: lbarx + %tmp = atomicrmw xchg i8* %mem, i8 1 monotonic +; CHECK-P7U: stbcx. + ret i8 %tmp +} + +define i16 @exchange16(i16* %mem, i16 %val) nounwind { +; CHECK-LABEL: exchange16: +; CHECK-P7U: lharx + %tmp = atomicrmw xchg i16* %mem, i16 1 monotonic +; CHECK-P7U: sthcx. + ret i16 %tmp +} + define void @atomic_store(i64* %mem, i64 %val) nounwind { entry: ; CHECK: @atomic_store Index: test/MC/Disassembler/PowerPC/ppc64-encoding-bookII.txt =================================================================== --- test/MC/Disassembler/PowerPC/ppc64-encoding-bookII.txt +++ test/MC/Disassembler/PowerPC/ppc64-encoding-bookII.txt @@ -42,12 +42,30 @@ # CHECK: dcbf 2, 3 0x7c 0x02 0x18 0xac -# CHECK: lwarx 2, 3, 4 +# CHECK: lbarx 2, 3, 4 +0x7c 0x43 0x20 0x68 + +# CHECK: lharx 2, 3, 4 +0x7c 0x43 0x20 0xe8 + +# CHECK: lwarx 2, 3, 4 0x7c 0x43 0x20 0x28 -# CHECK: ldarx 2, 3, 4 +# CHECK: ldarx 2, 3, 4 0x7c 0x43 0x20 0xa8 +# CHECK: lbarx 2, 3, 4, 1 +0x7c 0x43 0x20 0x69 + +# CHECK: lharx 2, 3, 4, 1 +0x7c 0x43 0x20 0xe9 + +# CHECK: lwarx 2, 3, 4, 1 +0x7c 0x43 0x20 0x29 + +# CHECK: ldarx 2, 3, 4, 1 +0x7c 0x43 0x20 0xa9 + # CHECK: sync 0 0x7c 0x00 0x04 0xac Index: test/MC/PowerPC/ppc64-encoding-bookII.s =================================================================== --- test/MC/PowerPC/ppc64-encoding-bookII.s +++ test/MC/PowerPC/ppc64-encoding-bookII.s @@ -34,11 +34,6 @@ # CHECK-LE: isync # encoding: [0x2c,0x01,0x00,0x4c] isync -# FIXME: lbarx 2, 3, 4, 1 -# FIXME: lharx 2, 3, 4, 1 -# FIXME: lwarx 2, 3, 4, 1 -# FIXME: ldarx 2, 3, 4, 1 - # FIXME: stbcx. 2, 3, 4 # FIXME: sthcx. 2, 3, 4 # CHECK-BE: stwcx. 2, 3, 4 # encoding: [0x7c,0x43,0x21,0x2d] @@ -70,15 +65,38 @@ dcbf 2, 3 # FIXME: dcbfl 2, 3 -# FIXME: lbarx 2, 3, 4 -# FIXME: lharx 2, 3, 4 +# CHECK-BE: lbarx 2, 3, 4 # encoding: [0x7c,0x43,0x20,0x68] +# CHECK-LE: lbarx 2, 3, 4 # encoding: [0x68,0x20,0x43,0x7c] + lbarx 2, 3, 4 + +# CHECK-BE: lharx 2, 3, 4 # encoding: [0x7c,0x43,0x20,0xe8] +# CHECK-LE: lharx 2, 3, 4 # encoding: [0xe8,0x20,0x43,0x7c] + lharx 2, 3, 4 + # CHECK-BE: lwarx 2, 3, 4 # encoding: [0x7c,0x43,0x20,0x28] # CHECK-LE: lwarx 2, 3, 4 # encoding: [0x28,0x20,0x43,0x7c] lwarx 2, 3, 4 + # CHECK-BE: ldarx 2, 3, 4 # encoding: [0x7c,0x43,0x20,0xa8] # CHECK-LE: ldarx 2, 3, 4 # encoding: [0xa8,0x20,0x43,0x7c] ldarx 2, 3, 4 +# CHECK-BE: lbarx 2, 3, 4, 1 # encoding: [0x7c,0x43,0x20,0x69] +# CHECK-LE: lbarx 2, 3, 4, 1 # encoding: [0x69,0x20,0x43,0x7c] + lbarx 2, 3, 4, 1 + +# CHECK-BE: lharx 2, 3, 4, 1 # encoding: [0x7c,0x43,0x20,0xe9] +# CHECK-LE: lharx 2, 3, 4, 1 # encoding: [0xe9,0x20,0x43,0x7c] + lharx 2, 3, 4, 1 + +# CHECK-BE: lwarx 2, 3, 4, 1 # encoding: [0x7c,0x43,0x20,0x29] +# CHECK-LE: lwarx 2, 3, 4, 1 # encoding: [0x29,0x20,0x43,0x7c] + lwarx 2, 3, 4, 1 + +# CHECK-BE: ldarx 2, 3, 4, 1 # encoding: [0x7c,0x43,0x20,0xa9] +# CHECK-LE: ldarx 2, 3, 4, 1 # encoding: [0xa9,0x20,0x43,0x7c] + ldarx 2, 3, 4, 1 + # CHECK-BE: sync 0 # encoding: [0x7c,0x00,0x04,0xac] # CHECK-LE: sync 0 # encoding: [0xac,0x04,0x00,0x7c] sync