Index: llvm/trunk/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp +++ llvm/trunk/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp @@ -13,7 +13,9 @@ #include "AArch64.h" #include "AArch64RegisterInfo.h" +#include "AArch64Subtarget.h" #include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" @@ -84,6 +86,51 @@ DEBUG(dbgs() << " Ignoring, XZR or WZR already used by the instruction\n"); continue; } + if (MF.getSubtarget().hasLSE()) { + // XZ/WZ for LSE can only be used when acquire semantics are not used, + // LDOPAL WZ is an invalid opcode. + switch (MI.getOpcode()) { + case AArch64::CASALb: + case AArch64::CASALh: + case AArch64::CASALs: + case AArch64::CASALd: + case AArch64::SWPALb: + case AArch64::SWPALh: + case AArch64::SWPALs: + case AArch64::SWPALd: + case AArch64::LDADDALb: + case AArch64::LDADDALh: + case AArch64::LDADDALs: + case AArch64::LDADDALd: + case AArch64::LDEORALb: + case AArch64::LDEORALh: + case AArch64::LDEORALs: + case AArch64::LDEORALd: + case AArch64::LDSETALb: + case AArch64::LDSETALh: + case AArch64::LDSETALs: + case AArch64::LDSETALd: + case AArch64::LDSMINALb: + case AArch64::LDSMINALh: + case AArch64::LDSMINALs: + case AArch64::LDSMINALd: + case AArch64::LDSMAXALb: + case AArch64::LDSMAXALh: + case AArch64::LDSMAXALs: + case AArch64::LDSMAXALd: + case AArch64::LDUMINALb: + case AArch64::LDUMINALh: + case AArch64::LDUMINALs: + case AArch64::LDUMINALd: + case AArch64::LDUMAXALb: + case AArch64::LDUMAXALh: + case AArch64::LDUMAXALs: + case AArch64::LDUMAXALd: + continue; + default: + break; + } + } const MCInstrDesc &Desc = MI.getDesc(); for (int I = 0, E = Desc.getNumDefs(); I != E; ++I) { MachineOperand &MO = MI.getOperand(I); Index: llvm/trunk/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ llvm/trunk/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -201,7 +201,7 @@ bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width); - void SelectCMP_SWAP(SDNode *N); + bool SelectCMP_SWAP(SDNode *N); }; } // end anonymous namespace @@ -2609,9 +2609,13 @@ } /// We've got special pseudo-instructions for these -void AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) { +bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) { unsigned Opcode; EVT MemTy = cast(N)->getMemoryVT(); + + // Leave IR for LSE if subtarget supports it. + if (Subtarget->hasLSE()) return false; + if (MemTy == MVT::i8) Opcode = AArch64::CMP_SWAP_8; else if (MemTy == MVT::i16) @@ -2637,6 +2641,8 @@ ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0)); ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2)); CurDAG->RemoveDeadNode(N); + + return true; } void AArch64DAGToDAGISel::Select(SDNode *Node) { @@ -2660,8 +2666,9 @@ break; case ISD::ATOMIC_CMP_SWAP: - SelectCMP_SWAP(Node); - return; + if (SelectCMP_SWAP(Node)) + return; + break; case ISD::READ_REGISTER: if (tryReadRegister(Node)) Index: llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -10563,11 +10563,20 @@ TargetLowering::AtomicExpansionKind AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { unsigned Size = AI->getType()->getPrimitiveSizeInBits(); - return Size <= 128 ? AtomicExpansionKind::LLSC : AtomicExpansionKind::None; + if (Size > 128) return AtomicExpansionKind::None; + // Nand not supported in LSE. + if (AI->getOperation() == AtomicRMWInst::Nand) return AtomicExpansionKind::LLSC; + // Currently leaving And and Sub to LLSC + if ((AI->getOperation() == AtomicRMWInst::And) || (AI->getOperation() == AtomicRMWInst::Sub)) + return AtomicExpansionKind::LLSC; + // Leave 128 bits to LLSC. + return (Subtarget->hasLSE() && Size < 128) ? AtomicExpansionKind::None : AtomicExpansionKind::LLSC; } bool AArch64TargetLowering::shouldExpandAtomicCmpXchgInIR( AtomicCmpXchgInst *AI) const { + // If subtarget has LSE, leave cmpxchg intact for codegen. + if (Subtarget->hasLSE()) return false; // At -O0, fast-regalloc cannot cope with the live vregs necessary to // implement cmpxchg without spilling. If the address being exchanged is also // on the stack and close enough to the spill slot, this can lead to a Index: llvm/trunk/lib/Target/AArch64/AArch64InstrAtomics.td =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64InstrAtomics.td +++ llvm/trunk/lib/Target/AArch64/AArch64InstrAtomics.td @@ -405,3 +405,49 @@ (ins GPR64:$addr, GPR64:$desiredLo, GPR64:$desiredHi, GPR64:$newLo, GPR64:$newHi), []>, Sched<[WriteAtomic]>; + +// v8.1 Atomic instructions: +def : Pat<(atomic_load_add_8 GPR64:$Rn, GPR32:$Rs), (LDADDALb GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_add_16 GPR64:$Rn, GPR32:$Rs), (LDADDALh GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_add_32 GPR64:$Rn, GPR32:$Rs), (LDADDALs GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_add_64 GPR64:$Rn, GPR64:$Rs), (LDADDALd GPR64:$Rs, GPR64sp:$Rn)>; + +def : Pat<(atomic_load_or_8 GPR64:$Rn, GPR32:$Rs), (LDSETALb GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_or_16 GPR64:$Rn, GPR32:$Rs), (LDSETALh GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_or_32 GPR64:$Rn, GPR32:$Rs), (LDSETALs GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_or_64 GPR64:$Rn, GPR64:$Rs), (LDSETALd GPR64:$Rs, GPR64sp:$Rn)>; + +def : Pat<(atomic_load_xor_8 GPR64:$Rn, GPR32:$Rs), (LDEORALb GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_xor_16 GPR64:$Rn, GPR32:$Rs), (LDEORALh GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_xor_32 GPR64:$Rn, GPR32:$Rs), (LDEORALs GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_xor_64 GPR64:$Rn, GPR64:$Rs), (LDEORALd GPR64:$Rs, GPR64sp:$Rn)>; + +def : Pat<(atomic_load_max_8 GPR64:$Rn, GPR32:$Rs), (LDSMAXALb GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_max_16 GPR64:$Rn, GPR32:$Rs), (LDSMAXALh GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_max_32 GPR64:$Rn, GPR32:$Rs), (LDSMAXALs GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_max_64 GPR64:$Rn, GPR64:$Rs), (LDSMAXALd GPR64:$Rs, GPR64sp:$Rn)>; + +def : Pat<(atomic_load_umax_8 GPR64:$Rn, GPR32:$Rs), (LDUMAXALb GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_umax_16 GPR64:$Rn, GPR32:$Rs), (LDUMAXALh GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_umax_32 GPR64:$Rn, GPR32:$Rs), (LDUMAXALs GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_umax_64 GPR64:$Rn, GPR64:$Rs), (LDUMAXALd GPR64:$Rs, GPR64sp:$Rn)>; + +def : Pat<(atomic_load_min_8 GPR64:$Rn, GPR32:$Rs), (LDSMINALb GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_min_16 GPR64:$Rn, GPR32:$Rs), (LDSMINALh GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_min_32 GPR64:$Rn, GPR32:$Rs), (LDSMINALs GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_min_64 GPR64:$Rn, GPR64:$Rs), (LDSMINALd GPR64:$Rs, GPR64sp:$Rn)>; + +def : Pat<(atomic_load_umin_8 GPR64:$Rn, GPR32:$Rs), (LDUMINALb GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_umin_16 GPR64:$Rn, GPR32:$Rs), (LDUMINALh GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_umin_32 GPR64:$Rn, GPR32:$Rs), (LDUMINALs GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_umin_64 GPR64:$Rn, GPR64:$Rs), (LDUMINALd GPR64:$Rs, GPR64sp:$Rn)>; + +def : Pat<(atomic_cmp_swap_8 GPR64:$Rn, GPR32:$Rold, GPR32:$Rnew), (CASALb GPR32:$Rold, GPR32:$Rnew, GPR64sp:$Rn)>; +def : Pat<(atomic_cmp_swap_16 GPR64:$Rn, GPR32:$Rold, GPR32:$Rnew), (CASALh GPR32:$Rold, GPR32:$Rnew, GPR64sp:$Rn)>; +def : Pat<(atomic_cmp_swap_32 GPR64:$Rn, GPR32:$Rold, GPR32:$Rnew), (CASALs GPR32:$Rold, GPR32:$Rnew, GPR64sp:$Rn)>; +def : Pat<(atomic_cmp_swap_64 GPR64:$Rn, GPR64:$Rold, GPR64:$Rnew), (CASALd GPR64:$Rold, GPR64:$Rnew, GPR64sp:$Rn)>; + +def : Pat<(atomic_swap_8 GPR64:$Rn, GPR32:$Rs), (SWPALb GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_swap_16 GPR64:$Rn, GPR32:$Rs), (SWPALh GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_swap_32 GPR64:$Rn, GPR32:$Rs), (SWPALs GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_swap_64 GPR64:$Rn, GPR64:$Rs), (SWPALd GPR64:$Rs, GPR64sp:$Rn)>;