Index: lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp =================================================================== --- lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp +++ lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp @@ -55,6 +55,8 @@ AU.setPreservesCFG(); MachineFunctionPass::getAnalysisUsage(AU); } + + bool ShouldSkip(const MachineInstr &MI, const MachineFunction &MF) const; }; char AArch64DeadRegisterDefinitions::ID = 0; } // end anonymous namespace @@ -69,6 +71,92 @@ return false; } +bool +AArch64DeadRegisterDefinitions::ShouldSkip(const MachineInstr &MI, + const MachineFunction &MF) const { + if (!MF.getSubtarget().hasLSE()) + return false; + + bool MaybeSkip = false; + +#define CASE_AARCH64_ATOMIC_(PREFIX) \ + case AArch64::PREFIX##X: \ + case AArch64::PREFIX##W: \ + case AArch64::PREFIX##H: \ + case AArch64::PREFIX##B + + // ARM Errata D11904 05/12/2017: + // The Acquire semantics are present when (A == 1 && Rt != 11111). + // The Release semantics are present when (R == 1). + for (const MachineMemOperand *MMO : MI.memoperands()) { + if (MMO->isAtomic()) { + unsigned Opcode = MI.getOpcode(); + switch (Opcode) { + default: + return false; + break; + + CASE_AARCH64_ATOMIC_(LDADDA): + CASE_AARCH64_ATOMIC_(LDADDAL): + + CASE_AARCH64_ATOMIC_(LDCLRA): + CASE_AARCH64_ATOMIC_(LDCLRAL): + + CASE_AARCH64_ATOMIC_(LDEORA): + CASE_AARCH64_ATOMIC_(LDEORAL): + + CASE_AARCH64_ATOMIC_(LDSETA): + CASE_AARCH64_ATOMIC_(LDSETAL): + + CASE_AARCH64_ATOMIC_(LDSMAXA): + CASE_AARCH64_ATOMIC_(LDSMAXAL): + + CASE_AARCH64_ATOMIC_(LDSMINA): + CASE_AARCH64_ATOMIC_(LDSMINAL): + + CASE_AARCH64_ATOMIC_(LDUMAXA): + CASE_AARCH64_ATOMIC_(LDUMAXAL): + + CASE_AARCH64_ATOMIC_(LDUMINA): + CASE_AARCH64_ATOMIC_(LDUMINAL): + + CASE_AARCH64_ATOMIC_(CASA): + CASE_AARCH64_ATOMIC_(CASAL): + + CASE_AARCH64_ATOMIC_(SWPA): + CASE_AARCH64_ATOMIC_(SWPAL): + + MaybeSkip = true; + break; + } + } + } + +#undef CASE_AARCH64_ATOMIC_ + + if (MaybeSkip) { + const MCInstrDesc &Desc = MI.getDesc(); + unsigned ND = Desc.getNumDefs(); + + for (unsigned I = 0; I < ND; ++I) { + const MachineOperand &MO = MI.getOperand(I); + if (!MO.isReg() || !MO.isDef()) + continue; + + const TargetRegisterClass *RC = TII->getRegClass(Desc, I, TRI, MF); + if (RC == nullptr) + continue; + + if (RC->contains(AArch64::WZR) || RC->contains(AArch64::XZR)) + return true; + } + } + + return false; +} + + + void AArch64DeadRegisterDefinitions::processMachineBasicBlock( MachineBasicBlock &MBB) { const MachineFunction &MF = *MBB.getParent(); @@ -86,51 +174,16 @@ DEBUG(dbgs() << " Ignoring, XZR or WZR already used by the instruction\n"); continue; } - if (MF.getSubtarget().hasLSE()) { - // XZ/WZ for LSE can only be used when acquire semantics are not used, - // LDOPAL WZ is an invalid opcode. - switch (MI.getOpcode()) { - case AArch64::CASALb: - case AArch64::CASALh: - case AArch64::CASALs: - case AArch64::CASALd: - case AArch64::SWPALb: - case AArch64::SWPALh: - case AArch64::SWPALs: - case AArch64::SWPALd: - case AArch64::LDADDALb: - case AArch64::LDADDALh: - case AArch64::LDADDALs: - case AArch64::LDADDALd: - case AArch64::LDEORALb: - case AArch64::LDEORALh: - case AArch64::LDEORALs: - case AArch64::LDEORALd: - case AArch64::LDSETALb: - case AArch64::LDSETALh: - case AArch64::LDSETALs: - case AArch64::LDSETALd: - case AArch64::LDSMINALb: - case AArch64::LDSMINALh: - case AArch64::LDSMINALs: - case AArch64::LDSMINALd: - case AArch64::LDSMAXALb: - case AArch64::LDSMAXALh: - case AArch64::LDSMAXALs: - case AArch64::LDSMAXALd: - case AArch64::LDUMINALb: - case AArch64::LDUMINALh: - case AArch64::LDUMINALs: - case AArch64::LDUMINALd: - case AArch64::LDUMAXALb: - case AArch64::LDUMAXALh: - case AArch64::LDUMAXALs: - case AArch64::LDUMAXALd: - continue; - default: - break; - } + + // ARM Errata D11904 05/12/2017: + // The Acquire semantics are present when (A == 1 && Rt != 11111). + // The Release semantics are present when (R == 1). + // This prohibits the use of WZ/XZ with LSE and Acquire semantics. + if (ShouldSkip(MI, MF)) { + DEBUG(dbgs() << " Ignoring, Atomic instruction with acquire semantics using WZR/XZR\n"); + continue; } + const MCInstrDesc &Desc = MI.getDesc(); for (int I = 0, E = Desc.getNumDefs(); I != E; ++I) { MachineOperand &MO = MI.getOperand(I); Index: lib/Target/AArch64/AArch64InstrAtomics.td =================================================================== --- lib/Target/AArch64/AArch64InstrAtomics.td +++ lib/Target/AArch64/AArch64InstrAtomics.td @@ -407,47 +407,47 @@ Sched<[WriteAtomic]>; // v8.1 Atomic instructions: -def : Pat<(atomic_load_add_8 GPR64:$Rn, GPR32:$Rs), (LDADDALb GPR32:$Rs, GPR64sp:$Rn)>; -def : Pat<(atomic_load_add_16 GPR64:$Rn, GPR32:$Rs), (LDADDALh GPR32:$Rs, GPR64sp:$Rn)>; -def : Pat<(atomic_load_add_32 GPR64:$Rn, GPR32:$Rs), (LDADDALs GPR32:$Rs, GPR64sp:$Rn)>; -def : Pat<(atomic_load_add_64 GPR64:$Rn, GPR64:$Rs), (LDADDALd GPR64:$Rs, GPR64sp:$Rn)>; - -def : Pat<(atomic_load_or_8 GPR64:$Rn, GPR32:$Rs), (LDSETALb GPR32:$Rs, GPR64sp:$Rn)>; -def : Pat<(atomic_load_or_16 GPR64:$Rn, GPR32:$Rs), (LDSETALh GPR32:$Rs, GPR64sp:$Rn)>; -def : Pat<(atomic_load_or_32 GPR64:$Rn, GPR32:$Rs), (LDSETALs GPR32:$Rs, GPR64sp:$Rn)>; -def : Pat<(atomic_load_or_64 GPR64:$Rn, GPR64:$Rs), (LDSETALd GPR64:$Rs, GPR64sp:$Rn)>; - -def : Pat<(atomic_load_xor_8 GPR64:$Rn, GPR32:$Rs), (LDEORALb GPR32:$Rs, GPR64sp:$Rn)>; -def : Pat<(atomic_load_xor_16 GPR64:$Rn, GPR32:$Rs), (LDEORALh GPR32:$Rs, GPR64sp:$Rn)>; -def : Pat<(atomic_load_xor_32 GPR64:$Rn, GPR32:$Rs), (LDEORALs GPR32:$Rs, GPR64sp:$Rn)>; -def : Pat<(atomic_load_xor_64 GPR64:$Rn, GPR64:$Rs), (LDEORALd GPR64:$Rs, GPR64sp:$Rn)>; - -def : Pat<(atomic_load_max_8 GPR64:$Rn, GPR32:$Rs), (LDSMAXALb GPR32:$Rs, GPR64sp:$Rn)>; -def : Pat<(atomic_load_max_16 GPR64:$Rn, GPR32:$Rs), (LDSMAXALh GPR32:$Rs, GPR64sp:$Rn)>; -def : Pat<(atomic_load_max_32 GPR64:$Rn, GPR32:$Rs), (LDSMAXALs GPR32:$Rs, GPR64sp:$Rn)>; -def : Pat<(atomic_load_max_64 GPR64:$Rn, GPR64:$Rs), (LDSMAXALd GPR64:$Rs, GPR64sp:$Rn)>; - -def : Pat<(atomic_load_umax_8 GPR64:$Rn, GPR32:$Rs), (LDUMAXALb GPR32:$Rs, GPR64sp:$Rn)>; -def : Pat<(atomic_load_umax_16 GPR64:$Rn, GPR32:$Rs), (LDUMAXALh GPR32:$Rs, GPR64sp:$Rn)>; -def : Pat<(atomic_load_umax_32 GPR64:$Rn, GPR32:$Rs), (LDUMAXALs GPR32:$Rs, GPR64sp:$Rn)>; -def : Pat<(atomic_load_umax_64 GPR64:$Rn, GPR64:$Rs), (LDUMAXALd GPR64:$Rs, GPR64sp:$Rn)>; - -def : Pat<(atomic_load_min_8 GPR64:$Rn, GPR32:$Rs), (LDSMINALb GPR32:$Rs, GPR64sp:$Rn)>; -def : Pat<(atomic_load_min_16 GPR64:$Rn, GPR32:$Rs), (LDSMINALh GPR32:$Rs, GPR64sp:$Rn)>; -def : Pat<(atomic_load_min_32 GPR64:$Rn, GPR32:$Rs), (LDSMINALs GPR32:$Rs, GPR64sp:$Rn)>; -def : Pat<(atomic_load_min_64 GPR64:$Rn, GPR64:$Rs), (LDSMINALd GPR64:$Rs, GPR64sp:$Rn)>; - -def : Pat<(atomic_load_umin_8 GPR64:$Rn, GPR32:$Rs), (LDUMINALb GPR32:$Rs, GPR64sp:$Rn)>; -def : Pat<(atomic_load_umin_16 GPR64:$Rn, GPR32:$Rs), (LDUMINALh GPR32:$Rs, GPR64sp:$Rn)>; -def : Pat<(atomic_load_umin_32 GPR64:$Rn, GPR32:$Rs), (LDUMINALs GPR32:$Rs, GPR64sp:$Rn)>; -def : Pat<(atomic_load_umin_64 GPR64:$Rn, GPR64:$Rs), (LDUMINALd GPR64:$Rs, GPR64sp:$Rn)>; - -def : Pat<(atomic_cmp_swap_8 GPR64:$Rn, GPR32:$Rold, GPR32:$Rnew), (CASALb GPR32:$Rold, GPR32:$Rnew, GPR64sp:$Rn)>; -def : Pat<(atomic_cmp_swap_16 GPR64:$Rn, GPR32:$Rold, GPR32:$Rnew), (CASALh GPR32:$Rold, GPR32:$Rnew, GPR64sp:$Rn)>; -def : Pat<(atomic_cmp_swap_32 GPR64:$Rn, GPR32:$Rold, GPR32:$Rnew), (CASALs GPR32:$Rold, GPR32:$Rnew, GPR64sp:$Rn)>; -def : Pat<(atomic_cmp_swap_64 GPR64:$Rn, GPR64:$Rold, GPR64:$Rnew), (CASALd GPR64:$Rold, GPR64:$Rnew, GPR64sp:$Rn)>; - -def : Pat<(atomic_swap_8 GPR64:$Rn, GPR32:$Rs), (SWPALb GPR32:$Rs, GPR64sp:$Rn)>; -def : Pat<(atomic_swap_16 GPR64:$Rn, GPR32:$Rs), (SWPALh GPR32:$Rs, GPR64sp:$Rn)>; -def : Pat<(atomic_swap_32 GPR64:$Rn, GPR32:$Rs), (SWPALs GPR32:$Rs, GPR64sp:$Rn)>; -def : Pat<(atomic_swap_64 GPR64:$Rn, GPR64:$Rs), (SWPALd GPR64:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_add_8 GPR64:$Rn, GPR32:$Rs), (LDADDALB GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_add_16 GPR64:$Rn, GPR32:$Rs), (LDADDALH GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_add_32 GPR64:$Rn, GPR32:$Rs), (LDADDALW GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_add_64 GPR64:$Rn, GPR64:$Rs), (LDADDALX GPR64:$Rs, GPR64sp:$Rn)>; + +def : Pat<(atomic_load_or_8 GPR64:$Rn, GPR32:$Rs), (LDSETALB GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_or_16 GPR64:$Rn, GPR32:$Rs), (LDSETALH GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_or_32 GPR64:$Rn, GPR32:$Rs), (LDSETALW GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_or_64 GPR64:$Rn, GPR64:$Rs), (LDSETALX GPR64:$Rs, GPR64sp:$Rn)>; + +def : Pat<(atomic_load_xor_8 GPR64:$Rn, GPR32:$Rs), (LDEORALB GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_xor_16 GPR64:$Rn, GPR32:$Rs), (LDEORALH GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_xor_32 GPR64:$Rn, GPR32:$Rs), (LDEORALW GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_xor_64 GPR64:$Rn, GPR64:$Rs), (LDEORALX GPR64:$Rs, GPR64sp:$Rn)>; + +def : Pat<(atomic_load_max_8 GPR64:$Rn, GPR32:$Rs), (LDSMAXALB GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_max_16 GPR64:$Rn, GPR32:$Rs), (LDSMAXALH GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_max_32 GPR64:$Rn, GPR32:$Rs), (LDSMAXALW GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_max_64 GPR64:$Rn, GPR64:$Rs), (LDSMAXALX GPR64:$Rs, GPR64sp:$Rn)>; + +def : Pat<(atomic_load_umax_8 GPR64:$Rn, GPR32:$Rs), (LDUMAXALB GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_umax_16 GPR64:$Rn, GPR32:$Rs), (LDUMAXALH GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_umax_32 GPR64:$Rn, GPR32:$Rs), (LDUMAXALW GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_umax_64 GPR64:$Rn, GPR64:$Rs), (LDUMAXALX GPR64:$Rs, GPR64sp:$Rn)>; + +def : Pat<(atomic_load_min_8 GPR64:$Rn, GPR32:$Rs), (LDSMINALB GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_min_16 GPR64:$Rn, GPR32:$Rs), (LDSMINALH GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_min_32 GPR64:$Rn, GPR32:$Rs), (LDSMINALW GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_min_64 GPR64:$Rn, GPR64:$Rs), (LDSMINALX GPR64:$Rs, GPR64sp:$Rn)>; + +def : Pat<(atomic_load_umin_8 GPR64:$Rn, GPR32:$Rs), (LDUMINALB GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_umin_16 GPR64:$Rn, GPR32:$Rs), (LDUMINALH GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_umin_32 GPR64:$Rn, GPR32:$Rs), (LDUMINALW GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_load_umin_64 GPR64:$Rn, GPR64:$Rs), (LDUMINALX GPR64:$Rs, GPR64sp:$Rn)>; + +def : Pat<(atomic_cmp_swap_8 GPR64:$Rn, GPR32:$Rold, GPR32:$Rnew), (CASALB GPR32:$Rold, GPR32:$Rnew, GPR64sp:$Rn)>; +def : Pat<(atomic_cmp_swap_16 GPR64:$Rn, GPR32:$Rold, GPR32:$Rnew), (CASALH GPR32:$Rold, GPR32:$Rnew, GPR64sp:$Rn)>; +def : Pat<(atomic_cmp_swap_32 GPR64:$Rn, GPR32:$Rold, GPR32:$Rnew), (CASALW GPR32:$Rold, GPR32:$Rnew, GPR64sp:$Rn)>; +def : Pat<(atomic_cmp_swap_64 GPR64:$Rn, GPR64:$Rold, GPR64:$Rnew), (CASALX GPR64:$Rold, GPR64:$Rnew, GPR64sp:$Rn)>; + +def : Pat<(atomic_swap_8 GPR64:$Rn, GPR32:$Rs), (SWPALB GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_swap_16 GPR64:$Rn, GPR32:$Rs), (SWPALH GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_swap_32 GPR64:$Rn, GPR32:$Rs), (SWPALW GPR32:$Rs, GPR64sp:$Rn)>; +def : Pat<(atomic_swap_64 GPR64:$Rn, GPR64:$Rs), (SWPALX GPR64:$Rs, GPR64sp:$Rn)>; Index: lib/Target/AArch64/AArch64InstrFormats.td =================================================================== --- lib/Target/AArch64/AArch64InstrFormats.td +++ lib/Target/AArch64/AArch64InstrFormats.td @@ -9365,10 +9365,11 @@ // ST{}[] , [] // ST{} , [] -let Predicates = [HasLSE], mayLoad = 1, mayStore = 1, hasSideEffects = 1 in +let Predicates = [HasLSE], + mayLoad = 1, mayStore = 1, hasSideEffects = 1 in class BaseCASEncoding pattern> - : I { + : I { bits<2> Sz; bit NP; bit Acq; @@ -9389,41 +9390,39 @@ let Predicates = [HasLSE]; } -class BaseCAS - : BaseCASEncoding<(outs RC:$out),(ins RC:$Rs, RC:$Rt, GPR64sp:$Rn), - "cas" # order # size, "\t$Rs, $Rt, [$Rn]", - "$out = $Rs",[]>, - Sched<[WriteAtomic]> { +let Predicates = [HasLSE], mayLoad = 1, mayStore = 1, + hasSideEffects = 1 in +class BaseCAS sz, bits<1> acq, bits<1> rel, string order, + string size, RegisterClass RC> + : BaseCASEncoding<(outs RC:$out), (ins RC:$Rs, RC:$Rt, GPR64sp:$Rn), + "cas" # order # size, "\t$Rs, $Rt, [$Rn]", "$out = $Rs", + []>, Sched<[WriteAtomic, WriteLD, WriteST]> { + let Sz = sz; + let Acq = acq; + let Rel = rel; let NP = 1; } -multiclass CompareAndSwap Acq, bits<1> Rel, string order> { - let Sz = 0b00, Acq = Acq, Rel = Rel in def b : BaseCAS; - let Sz = 0b01, Acq = Acq, Rel = Rel in def h : BaseCAS; - let Sz = 0b10, Acq = Acq, Rel = Rel in def s : BaseCAS; - let Sz = 0b11, Acq = Acq, Rel = Rel in def d : BaseCAS; -} - -class BaseCASP - : BaseCASEncoding<(outs RC:$out),(ins RC:$Rs, RC:$Rt, GPR64sp:$Rn), - "casp" # order # size, "\t$Rs, $Rt, [$Rn]", - "$out = $Rs",[]>, - Sched<[WriteAtomic]> { +let Predicates = [HasLSE], mayLoad = 1, mayStore = 1, + hasSideEffects = 1 in +class BaseCASP sz, bits<1> acq, bits<1> rel, string order, + string size, RegisterOperand RC> + : BaseCASEncoding<(outs RC:$out), + (ins RC:$Rs, RC:$Rt, GPR64sp:$Rn), + "casp" # order # size, "\t$Rs, $Rt, [$Rn]", + "$out = $Rs", + []>, Sched<[WriteAtomic, WriteLD, WriteST]> { + let Sz = sz; + let Acq = acq; + let Rel = rel; let NP = 0; } -multiclass CompareAndSwapPair Acq, bits<1> Rel, string order> { - let Sz = 0b00, Acq = Acq, Rel = Rel in - def s : BaseCASP; - let Sz = 0b01, Acq = Acq, Rel = Rel in - def d : BaseCASP; -} - -let Predicates = [HasLSE] in -class BaseSWP - : I<(outs RC:$Rt),(ins RC:$Rs, GPR64sp:$Rn), "swp" # order # size, - "\t$Rs, $Rt, [$Rn]","",[]>, - Sched<[WriteAtomic]> { +let Predicates = [HasLSE], + mayLoad = 1, mayStore = 1, hasSideEffects = 1 in +class BaseSWPEncoding pattern> + : I { bits<2> Sz; bit Acq; bit Rel; @@ -9445,23 +9444,29 @@ let Predicates = [HasLSE]; } -multiclass Swap Acq, bits<1> Rel, string order> { - let Sz = 0b00, Acq = Acq, Rel = Rel in def b : BaseSWP; - let Sz = 0b01, Acq = Acq, Rel = Rel in def h : BaseSWP; - let Sz = 0b10, Acq = Acq, Rel = Rel in def s : BaseSWP; - let Sz = 0b11, Acq = Acq, Rel = Rel in def d : BaseSWP; +let Predicates = [HasLSE], mayStore = 1, hasSideEffects = 1 in +class BaseSWP sz, bit acq, bit rel, string order, + string size, RegisterClass RC> + : BaseSWPEncoding<(outs RC:$Rt), + (ins RC:$Rs, GPR64sp:$Rn), + "swp" # order # size, + "\t$Rs, $Rt, [$Rn]", "", + []>, Sched<[WriteAtomic, WriteLD, WriteST]> { + let Sz = sz; + let Acq = acq; + let Rel = rel; + let Constraints = "@earlyclobber $Rt"; } let Predicates = [HasLSE], mayLoad = 1, mayStore = 1, hasSideEffects = 1 in -class BaseLDOPregister - : I<(outs RC:$Rt),(ins RC:$Rs, GPR64sp:$Rn), "ld" # op # order # size, - "\t$Rs, $Rt, [$Rn]","",[]>, - Sched<[WriteAtomic]> { +class BaseLDOPEncoding pattern> + : I { bits<2> Sz; bit Acq; bit Rel; bits<5> Rs; - bits<3> opc; + bits<3> Opc; bits<5> Rn; bits<5> Rt; let Inst{31-30} = Sz; @@ -9471,49 +9476,124 @@ let Inst{21} = 0b1; let Inst{20-16} = Rs; let Inst{15} = 0b0; - let Inst{14-12} = opc; + let Inst{14-12} = Opc; let Inst{11-10} = 0b00; let Inst{9-5} = Rn; let Inst{4-0} = Rt; let Predicates = [HasLSE]; } -multiclass LDOPregister opc, string op, bits<1> Acq, bits<1> Rel, - string order> { - let Sz = 0b00, Acq = Acq, Rel = Rel, opc = opc in - def b : BaseLDOPregister; - let Sz = 0b01, Acq = Acq, Rel = Rel, opc = opc in - def h : BaseLDOPregister; - let Sz = 0b10, Acq = Acq, Rel = Rel, opc = opc in - def s : BaseLDOPregister; - let Sz = 0b11, Acq = Acq, Rel = Rel, opc = opc in - def d : BaseLDOPregister; -} - -let Predicates = [HasLSE] in -class BaseSTOPregister : - InstAlias; - -multiclass STOPregister { - def : BaseSTOPregister(instr # "Lb")>; - def : BaseSTOPregister(instr # "Lh")>; - def : BaseSTOPregister(instr # "Ls")>; - def : BaseSTOPregister(instr # "Ld")>; - def : BaseSTOPregister(instr # "b")>; - def : BaseSTOPregister(instr # "h")>; - def : BaseSTOPregister(instr # "s")>; - def : BaseSTOPregister(instr # "d")>; +let Predicates = [HasLSE], mayLoad = 1, mayStore = 1, hasSideEffects = 1 in +class BaseLDOP sz, bit acq, bit rel, bits<3> opc, + string op, string order, string size, + RegisterClass RC> + : BaseLDOPEncoding<(outs RC:$Rt), + (ins RC:$Rs, GPR64sp:$Rn), + "ld" # op # order # size, + "\t$Rs, $Rt, [$Rn]", "", + []>, Sched<[WriteAtomic, WriteLD, WriteST]> { + let Sz = sz; + let Acq = acq; + let Rel = rel; + let Opc = opc; } +let Predicates = [HasLSE], mayLoad = 1, mayStore = 1, hasSideEffects = 1 in +class BaseSTOPEncoding pattern> + : I { + bits<2> Sz; + bit Acq; + bit Rel; + bits<5> Rs; + bits<3> Opc; + bits<5> Rn; + bits<5> Rt; + let Inst{31-30} = Sz; + let Inst{29-24} = 0b111000; + let Inst{23} = Acq; + let Inst{22} = Rel; + let Inst{21} = 0b1; + let Inst{20-16} = Rs; + let Inst{15} = 0b0; + let Inst{14-12} = Opc; + let Inst{11-10} = 0b00; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; + let Predicates = [HasLSE]; +} + +let Predicates = [HasLSE], mayLoad = 1, mayStore = 1, hasSideEffects = 1 in +class BaseSTOP sz, bits<1> acq, bits<1> rel, bits<3> opc, + bits<5> rt, string op, string order, string size, + RegisterClass RC> + : BaseSTOPEncoding<(outs), + (ins RC:$Rs, GPR64sp:$Rn), + "st" # op # order # size, + "\t$Rs, [$Rn]", "", + []>, Sched<[WriteAtomic, WriteLD, WriteST]> { + let Sz = sz; + let Acq = acq; + let Rel = rel; + let Opc = opc; + let Rt = rt; +} + +let Predicates = [HasLSE], mayLoad = 1, mayStore = 1, hasSideEffects = 1 in +class BaseSTLOEncoding pattern> + : I { + bits<2> Sz; + bit Lo; + bit Oo; + bits<5> Rs; + bits<5> Rn; + bits<5> Rt; + bits<5> Rt2; + let Inst{31-30} = Sz; + let Inst{29-24} = 0b001000; + let Inst{23} = 0b1; + let Inst{22} = Lo; + let Inst{21} = 0b0; + let Inst{20-16} = Rs; + let Inst{15} = Oo; + let Inst{14-10} = Rt2; + let Inst{9-5} = Rn; + let Inst{4-0} = Rt; + let Predicates = [HasLSE]; +} + +let Predicates = [HasLSE], mayLoad = 1, mayStore = 1, hasSideEffects = 1 in +class BaseSTLO sz, bits<1> lo, bits<1> oo, bits<5> rs, + bits<5> rt2, string order, + RegisterClass RC> + : BaseSTLOEncoding<(outs), (ins RC:$Rs, GPR64sp:$Rn), + "stllr" # order, + "\t$Rs [$Rn]", "", + []>, Sched<[WriteAtomic, WriteLD, WriteST]> { + let Sz = sz; + let Lo = lo; + let Oo = oo; + let Rs = rs; + let Rt2 = rt2; +} + +class BaseNOPEncoding pattern> + : I { + let Inst{31-22} = 0b1101010100; + let Inst{21} = 0b0; + let Inst{20-19} = 0b00; + let Inst{18-16} = 0b011; + let Inst{15-12} = 0b0010; + let Inst{11-8} = 0b0000; + let Inst{7-5} = 0b000; + let Inst{4-0} = 0b11111; +} + +class BaseNOP : BaseNOPEncoding<(outs), (ins), "nop", "", "", []>, + Sched<[]> { } + //---------------------------------------------------------------------------- // Allow the size specifier tokens to be upper case, not just lower. def : TokenAlias<".8B", ".8b">; Index: lib/Target/AArch64/AArch64InstrInfo.td =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.td +++ lib/Target/AArch64/AArch64InstrInfo.td @@ -309,13 +309,11 @@ def AArch64umaxv : SDNode<"AArch64ISD::UMAXV", SDT_AArch64UnaryVec>; //===----------------------------------------------------------------------===// - //===----------------------------------------------------------------------===// // AArch64 Instruction Predicate Definitions. def IsDarwin : Predicate<"Subtarget->isTargetDarwin()">; def IsNotDarwin: Predicate<"!Subtarget->isTargetDarwin()">; - // We could compute these on a per-module basis but doing so requires accessing // the Function object through the Subtarget and objections were raised // to that (see post-commit review comments for r301750). @@ -829,74 +827,502 @@ def CRC32CWrr : BaseCRC32<0, 0b10, 1, GPR32, int_aarch64_crc32cw, "crc32cw">; def CRC32CXrr : BaseCRC32<1, 0b11, 1, GPR64, int_aarch64_crc32cx, "crc32cx">; +// NOP +def NOP : BaseNOP; + // v8.1 atomic CAS -defm CAS : CompareAndSwap<0, 0, "">; -defm CASA : CompareAndSwap<1, 0, "a">; -defm CASL : CompareAndSwap<0, 1, "l">; -defm CASAL : CompareAndSwap<1, 1, "al">; +let AddedComplexity = 5, Predicates = [HasLSE] in { + def CASB : BaseCAS<0b00, 0, 0, "", "b", GPR32>; + def CASH : BaseCAS<0b01, 0, 0, "", "h", GPR32>; + def CASW : BaseCAS<0b10, 0, 0, "", "", GPR32>; + def CASX : BaseCAS<0b11, 0, 0, "", "", GPR64>; + + def CASAB : BaseCAS<0b00, 1, 0, "a", "b", GPR32>; + def CASAH : BaseCAS<0b01, 1, 0, "a", "h", GPR32>; + def CASAW : BaseCAS<0b10, 1, 0, "a", "", GPR32>; + def CASAX : BaseCAS<0b11, 1, 0, "a", "", GPR64>; + + def CASLB : BaseCAS<0b00, 0, 1, "l", "b", GPR32>; + def CASLH : BaseCAS<0b01, 0, 1, "l", "h", GPR32>; + def CASLW : BaseCAS<0b10, 0, 1, "l", "", GPR32>; + def CASLX : BaseCAS<0b11, 0, 1, "l", "", GPR64>; + + def CASALB : BaseCAS<0b00, 1, 1, "al", "b", GPR32>; + def CASALH : BaseCAS<0b01, 1, 1, "al", "h", GPR32>; + def CASALW : BaseCAS<0b10, 1, 1, "al", "", GPR32>; + def CASALX : BaseCAS<0b11, 1, 1, "al", "", GPR64>; +} // v8.1 atomic CASP -defm CASP : CompareAndSwapPair<0, 0, "">; -defm CASPA : CompareAndSwapPair<1, 0, "a">; -defm CASPL : CompareAndSwapPair<0, 1, "l">; -defm CASPAL : CompareAndSwapPair<1, 1, "al">; +let AddedComplexity = 5, Predicates = [HasLSE] in { + def CASPW : BaseCASP<0b00, 0, 0, "", "", WSeqPairClassOperand>; + def CASPAW : BaseCASP<0b00, 1, 0, "a", "", WSeqPairClassOperand>; + def CASPLW : BaseCASP<0b00, 0, 1, "l", "", WSeqPairClassOperand>; + def CASPALW : BaseCASP<0b00, 1, 1, "al", "", WSeqPairClassOperand>; + + def CASPX : BaseCASP<0b01, 0, 0, "", "", XSeqPairClassOperand>; + def CASPAX : BaseCASP<0b01, 1, 0, "a", "", XSeqPairClassOperand>; + def CASPLX : BaseCASP<0b01, 0, 1, "l", "", XSeqPairClassOperand>; + def CASPALX : BaseCASP<0b01, 1, 1, "al", "", XSeqPairClassOperand>; + } + // v8.1 atomic SWP -defm SWP : Swap<0, 0, "">; -defm SWPA : Swap<1, 0, "a">; -defm SWPL : Swap<0, 1, "l">; -defm SWPAL : Swap<1, 1, "al">; - -// v8.1 atomic LD(register). Performs load and then ST(register) -defm LDADD : LDOPregister<0b000, "add", 0, 0, "">; -defm LDADDA : LDOPregister<0b000, "add", 1, 0, "a">; -defm LDADDL : LDOPregister<0b000, "add", 0, 1, "l">; -defm LDADDAL : LDOPregister<0b000, "add", 1, 1, "al">; - -defm LDCLR : LDOPregister<0b001, "clr", 0, 0, "">; -defm LDCLRA : LDOPregister<0b001, "clr", 1, 0, "a">; -defm LDCLRL : LDOPregister<0b001, "clr", 0, 1, "l">; -defm LDCLRAL : LDOPregister<0b001, "clr", 1, 1, "al">; - -defm LDEOR : LDOPregister<0b010, "eor", 0, 0, "">; -defm LDEORA : LDOPregister<0b010, "eor", 1, 0, "a">; -defm LDEORL : LDOPregister<0b010, "eor", 0, 1, "l">; -defm LDEORAL : LDOPregister<0b010, "eor", 1, 1, "al">; - -defm LDSET : LDOPregister<0b011, "set", 0, 0, "">; -defm LDSETA : LDOPregister<0b011, "set", 1, 0, "a">; -defm LDSETL : LDOPregister<0b011, "set", 0, 1, "l">; -defm LDSETAL : LDOPregister<0b011, "set", 1, 1, "al">; - -defm LDSMAX : LDOPregister<0b100, "smax", 0, 0, "">; -defm LDSMAXA : LDOPregister<0b100, "smax", 1, 0, "a">; -defm LDSMAXL : LDOPregister<0b100, "smax", 0, 1, "l">; -defm LDSMAXAL : LDOPregister<0b100, "smax", 1, 1, "al">; - -defm LDSMIN : LDOPregister<0b101, "smin", 0, 0, "">; -defm LDSMINA : LDOPregister<0b101, "smin", 1, 0, "a">; -defm LDSMINL : LDOPregister<0b101, "smin", 0, 1, "l">; -defm LDSMINAL : LDOPregister<0b101, "smin", 1, 1, "al">; - -defm LDUMAX : LDOPregister<0b110, "umax", 0, 0, "">; -defm LDUMAXA : LDOPregister<0b110, "umax", 1, 0, "a">; -defm LDUMAXL : LDOPregister<0b110, "umax", 0, 1, "l">; -defm LDUMAXAL : LDOPregister<0b110, "umax", 1, 1, "al">; - -defm LDUMIN : LDOPregister<0b111, "umin", 0, 0, "">; -defm LDUMINA : LDOPregister<0b111, "umin", 1, 0, "a">; -defm LDUMINL : LDOPregister<0b111, "umin", 0, 1, "l">; -defm LDUMINAL : LDOPregister<0b111, "umin", 1, 1, "al">; - -// v8.1 atomic ST(register) as aliases to "LD(register) when Rt=xZR" -defm : STOPregister<"stadd","LDADD">; // STADDx -defm : STOPregister<"stclr","LDCLR">; // STCLRx -defm : STOPregister<"steor","LDEOR">; // STEORx -defm : STOPregister<"stset","LDSET">; // STSETx -defm : STOPregister<"stsmax","LDSMAX">;// STSMAXx -defm : STOPregister<"stsmin","LDSMIN">;// STSMINx -defm : STOPregister<"stumax","LDUMAX">;// STUMAXx -defm : STOPregister<"stumin","LDUMIN">;// STUMINx +let AddedComplexity = 5, Predicates = [HasLSE] in { + def SWPB : BaseSWP<0b00, 0, 0, "", "b", GPR32>; + def SWPH : BaseSWP<0b01, 0, 0, "", "h", GPR32>; + def SWPW : BaseSWP<0b10, 0, 0, "", "", GPR32>; + def SWPX : BaseSWP<0b11, 0, 0, "", "", GPR64>; + def SWPAB : BaseSWP<0b00, 1, 0, "a", "b", GPR32>; + def SWPAH : BaseSWP<0b01, 1, 0, "a", "h", GPR32>; + def SWPAW : BaseSWP<0b10, 1, 0, "a", "", GPR32>; + def SWPAX : BaseSWP<0b11, 1, 0, "a", "", GPR64>; + def SWPLB : BaseSWP<0b00, 0, 1, "l", "b", GPR32>; + def SWPLH : BaseSWP<0b01, 0, 1, "l", "h", GPR32>; + def SWPLW : BaseSWP<0b10, 0, 1, "l", "", GPR32>; + def SWPLX : BaseSWP<0b11, 0, 1, "l", "", GPR64>; + def SWPALB : BaseSWP<0b00, 1, 1, "al", "b", GPR32>; + def SWPALH : BaseSWP<0b01, 1, 1, "al", "h", GPR32>; + def SWPALW : BaseSWP<0b10, 1, 1, "al", "", GPR32>; + def SWPALX : BaseSWP<0b11, 1, 1, "al", "", GPR64>; +} + +let AddedComplexity = 5, Predicates = [HasLSE] in { + def LDADDB : BaseLDOP<0b00, 0, 0, 0b000, "add", "", "b", + GPR32>; + def LDADDH : BaseLDOP<0b01, 0, 0, 0b000, "add", "", "h", + GPR32>; + def LDADDW : BaseLDOP<0b10, 0, 0, 0b000, "add", "", "", + GPR32>; + def LDADDX : BaseLDOP<0b11, 0, 0, 0b000, "add", "", "", + GPR64>; + + def LDADDAB : BaseLDOP<0b00, 1, 0, 0b000, "add", "a", "b", + GPR32>; + def LDADDAH : BaseLDOP<0b01, 1, 0, 0b000, "add", "a", "h", + GPR32>; + def LDADDAW : BaseLDOP<0b10, 1, 0, 0b000, "add", "a", "", + GPR32>; + def LDADDAX : BaseLDOP<0b11, 1, 0, 0b000, "add", "a", "", + GPR64>; + + def LDADDLB : BaseLDOP<0b00, 0, 1, 0b000, "add", "l", "b", + GPR32>; + def LDADDLH : BaseLDOP<0b01, 0, 1, 0b000, "add", "l", "h", + GPR32>; + def LDADDLW : BaseLDOP<0b10, 0, 1, 0b000, "add", "l", "", + GPR32>; + def LDADDLX : BaseLDOP<0b11, 0, 1, 0b000, "add", "l", "", + GPR64>; + + def LDADDALB : BaseLDOP<0b00, 1, 1, 0b000, "add", "al", "b", + GPR32>; + def LDADDALH : BaseLDOP<0b01, 1, 1, 0b000, "add", "al", "h", + GPR32>; + def LDADDALW : BaseLDOP<0b10, 1, 1, 0b000, "add", "al", "", + GPR32>; + def LDADDALX : BaseLDOP<0b11, 1, 1, 0b000, "add", "al", "", + GPR64>; + + def LDCLRB : BaseLDOP<0b00, 0, 0, 0b001, "clr", "", "b", + GPR32>; + def LDCLRH : BaseLDOP<0b01, 0, 0, 0b001, "clr", "", "h", + GPR32>; + def LDCLRW : BaseLDOP<0b10, 0, 0, 0b001, "clr", "", "", + GPR32>; + def LDCLRX : BaseLDOP<0b11, 0, 0, 0b001, "clr", "", "", + GPR64>; + + def LDCLRAB : BaseLDOP<0b00, 1, 0, 0b001, "clr", "a", "b", + GPR32>; + def LDCLRAH : BaseLDOP<0b01, 1, 0, 0b001, "clr", "a", "h", + GPR32>; + def LDCLRAW : BaseLDOP<0b10, 1, 0, 0b001, "clr", "a", "", + GPR32>; + def LDCLRAX : BaseLDOP<0b11, 1, 0, 0b001, "clr", "a", "", + GPR64>; + + def LDCLRLB : BaseLDOP<0b00, 0, 1, 0b001, "clr", "l", "b", + GPR32>; + def LDCLRLH : BaseLDOP<0b01, 0, 1, 0b001, "clr", "l", "h", + GPR32>; + def LDCLRLW : BaseLDOP<0b10, 0, 1, 0b001, "clr", "l", "", + GPR32>; + def LDCLRLX : BaseLDOP<0b11, 0, 1, 0b001, "clr", "l", "", + GPR64>; + + def LDCLRALB : BaseLDOP<0b00, 1, 1, 0b001, "clr", "al", "b", + GPR32>; + def LDCLRALH : BaseLDOP<0b01, 1, 1, 0b001, "clr", "al", "h", + GPR32>; + def LDCLRALW : BaseLDOP<0b10, 1, 1, 0b001, "clr", "al", "", + GPR32>; + def LDCLRALX : BaseLDOP<0b11, 1, 1, 0b001, "clr", "al", "", + GPR64>; + + def LDEORB : BaseLDOP<0b00, 0, 0, 0b010, "eor", "", "b", + GPR32>; + def LDEORH : BaseLDOP<0b01, 0, 0, 0b010, "eor", "", "h", + GPR32>; + def LDEORW : BaseLDOP<0b10, 0, 0, 0b010, "eor", "", "", + GPR32>; + def LDEORX : BaseLDOP<0b11, 0, 0, 0b010, "eor", "", "", + GPR64>; + + def LDEORAB : BaseLDOP<0b00, 1, 0, 0b010, "eor", "a", "b", + GPR32>; + def LDEORAH : BaseLDOP<0b01, 1, 0, 0b010, "eor", "a", "h", + GPR32>; + def LDEORAW : BaseLDOP<0b10, 1, 0, 0b010, "eor", "a", "", + GPR32>; + def LDEORAX : BaseLDOP<0b11, 1, 0, 0b010, "eor", "a", "", + GPR64>; + + def LDEORLB : BaseLDOP<0b00, 0, 1, 0b010, "eor", "l", "b", + GPR32>; + def LDEORLH : BaseLDOP<0b01, 0, 1, 0b010, "eor", "l", "h", + GPR32>; + def LDEORLW : BaseLDOP<0b10, 0, 1, 0b010, "eor", "l", "", + GPR32>; + def LDEORLX : BaseLDOP<0b11, 0, 1, 0b010, "eor", "l", "", + GPR64>; + + def LDEORALB : BaseLDOP<0b00, 1, 1, 0b010, "eor", "al", "b", + GPR32>; + def LDEORALH : BaseLDOP<0b01, 1, 1, 0b010, "eor", "al", "h", + GPR32>; + def LDEORALW : BaseLDOP<0b10, 1, 1, 0b010, "eor", "al", "", + GPR32>; + def LDEORALX : BaseLDOP<0b11, 1, 1, 0b010, "eor", "al", "", + GPR64>; + + def LDSETB : BaseLDOP<0b00, 0, 0, 0b011, "set", "", "b", + GPR32>; + def LDSETH : BaseLDOP<0b01, 0, 0, 0b011, "set", "", "h", + GPR32>; + def LDSETW : BaseLDOP<0b10, 0, 0, 0b011, "set", "", "", + GPR32>; + def LDSETX : BaseLDOP<0b11, 0, 0, 0b011, "set", "", "", + GPR64>; + + def LDSETAB : BaseLDOP<0b00, 1, 0, 0b011, "set", "a", "b", + GPR32>; + def LDSETAH : BaseLDOP<0b01, 1, 0, 0b011, "set", "a", "h", + GPR32>; + def LDSETAW : BaseLDOP<0b10, 1, 0, 0b011, "set", "a", "", + GPR32>; + def LDSETAX : BaseLDOP<0b11, 1, 0, 0b011, "set", "a", "", + GPR64>; + + def LDSETLB : BaseLDOP<0b00, 0, 1, 0b011, "set", "l", "b", + GPR32>; + def LDSETLH : BaseLDOP<0b01, 0, 1, 0b011, "set", "l", "h", + GPR32>; + def LDSETLW : BaseLDOP<0b10, 0, 1, 0b011, "set", "l", "", + GPR32>; + def LDSETLX : BaseLDOP<0b11, 0, 1, 0b011, "set", "l", "", + GPR64>; + + def LDSETALB : BaseLDOP<0b00, 1, 1, 0b011, "set", "al", "b", + GPR32>; + def LDSETALH : BaseLDOP<0b01, 1, 1, 0b011, "set", "al", "h", + GPR32>; + def LDSETALW : BaseLDOP<0b10, 1, 1, 0b011, "set", "al", "", + GPR32>; + def LDSETALX : BaseLDOP<0b11, 1, 1, 0b011, "set", "al", "", + GPR64>; + + def LDSMAXB : BaseLDOP<0b00, 0, 0, 0b100, "smax", "", "b", + GPR32>; + def LDSMAXH : BaseLDOP<0b01, 0, 0, 0b100, "smax", "", "h", + GPR32>; + def LDSMAXW : BaseLDOP<0b10, 0, 0, 0b100, "smax", "", "", + GPR32>; + def LDSMAXX : BaseLDOP<0b11, 0, 0, 0b100, "smax", "", "", + GPR64>; + + def LDSMAXAB : BaseLDOP<0b00, 1, 0, 0b100, "smax", "a", "b", + GPR32>; + def LDSMAXAH : BaseLDOP<0b01, 1, 0, 0b100, "smax", "a", "h", + GPR32>; + def LDSMAXAW : BaseLDOP<0b10, 1, 0, 0b100, "smax", "a", "", + GPR32>; + def LDSMAXAX : BaseLDOP<0b11, 1, 0, 0b100, "smax", "a", "", + GPR64>; + + def LDSMAXLB : BaseLDOP<0b00, 0, 1, 0b100, "smax", "l", "b", + GPR32>; + def LDSMAXLH : BaseLDOP<0b01, 0, 1, 0b100, "smax", "l", "h", + GPR32>; + def LDSMAXLW : BaseLDOP<0b10, 0, 1, 0b100, "smax", "l", "", + GPR32>; + def LDSMAXLX : BaseLDOP<0b11, 0, 1, 0b100, "smax", "l", "", + GPR64>; + + def LDSMAXALB : BaseLDOP<0b00, 1, 1, 0b100, "smax", "al", "b", + GPR32>; + def LDSMAXALH : BaseLDOP<0b01, 1, 1, 0b100, "smax", "al", "h", + GPR32>; + def LDSMAXALW : BaseLDOP<0b10, 1, 1, 0b100, "smax", "al", "", + GPR32>; + def LDSMAXALX : BaseLDOP<0b11, 1, 1, 0b100, "smax", "al", "", + GPR64>; + + def LDSMINB : BaseLDOP<0b00, 0, 0, 0b101, "smin", "", "b", + GPR32>; + def LDSMINH : BaseLDOP<0b01, 0, 0, 0b101, "smin", "", "h", + GPR32>; + def LDSMINW : BaseLDOP<0b10, 0, 0, 0b101, "smin", "", "", + GPR32>; + def LDSMINX : BaseLDOP<0b11, 0, 0, 0b101, "smin", "", "", + GPR64>; + + def LDSMINAB : BaseLDOP<0b00, 1, 0, 0b101, "smin", "a", "b", + GPR32>; + def LDSMINAH : BaseLDOP<0b01, 1, 0, 0b101, "smin", "a", "h", + GPR32>; + def LDSMINAW : BaseLDOP<0b10, 1, 0, 0b101, "smin", "a", "", + GPR32>; + def LDSMINAX : BaseLDOP<0b11, 1, 0, 0b101, "smin", "a", "", + GPR64>; + + def LDSMINLB : BaseLDOP<0b00, 0, 1, 0b101, "smin", "l", "b", + GPR32>; + def LDSMINLH : BaseLDOP<0b01, 0, 1, 0b101, "smin", "l", "h", + GPR32>; + def LDSMINLW : BaseLDOP<0b10, 0, 1, 0b101, "smin", "l", "", + GPR32>; + def LDSMINLX : BaseLDOP<0b11, 0, 1, 0b101, "smin", "l", "", + GPR64>; + + def LDSMINALB : BaseLDOP<0b00, 1, 1, 0b101, "smin", "al", "b", + GPR32>; + def LDSMINALH : BaseLDOP<0b01, 1, 1, 0b101, "smin", "al", "h", + GPR32>; + def LDSMINALW : BaseLDOP<0b10, 1, 1, 0b101, "smin", "al", "", + GPR32>; + def LDSMINALX : BaseLDOP<0b11, 1, 1, 0b101, "smin", "al", "", + GPR64>; + + def LDUMAXB : BaseLDOP<0b00, 0, 0, 0b110, "umax", "", "b", + GPR32>; + def LDUMAXH : BaseLDOP<0b01, 0, 0, 0b110, "umax", "", "h", + GPR32>; + def LDUMAXW : BaseLDOP<0b10, 0, 0, 0b110, "umax", "", "", + GPR32>; + def LDUMAXX : BaseLDOP<0b11, 0, 0, 0b110, "umax", "", "", + GPR64>; + + def LDUMAXAB : BaseLDOP<0b00, 1, 0, 0b110, "umax", "a", "b", + GPR32>; + def LDUMAXAH : BaseLDOP<0b01, 1, 0, 0b110, "umax", "a", "h", + GPR32>; + def LDUMAXAW : BaseLDOP<0b10, 1, 0, 0b110, "umax", "a", "", + GPR32>; + def LDUMAXAX : BaseLDOP<0b11, 1, 0, 0b110, "umax", "a", "", + GPR64>; + + def LDUMAXLB : BaseLDOP<0b00, 0, 1, 0b110, "umax", "l", "b", + GPR32>; + def LDUMAXLH : BaseLDOP<0b01, 0, 1, 0b110, "umax", "l", "h", + GPR32>; + def LDUMAXLW : BaseLDOP<0b10, 0, 1, 0b110, "umax", "l", "", + GPR32>; + def LDUMAXLX : BaseLDOP<0b11, 0, 1, 0b110, "umax", "l", "", + GPR64>; + + def LDUMAXALB : BaseLDOP<0b00, 1, 1, 0b110, "umax", "al", "b", + GPR32>; + def LDUMAXALH : BaseLDOP<0b01, 1, 1, 0b110, "umax", "al", "h", + GPR32>; + def LDUMAXALW : BaseLDOP<0b10, 1, 1, 0b110, "umax", "al", "", + GPR32>; + def LDUMAXALX : BaseLDOP<0b11, 1, 1, 0b110, "umax", "al", "", + GPR64>; + + def LDUMINB : BaseLDOP<0b00, 0, 0, 0b111, "umin", "", "b", + GPR32>; + def LDUMINH : BaseLDOP<0b01, 0, 0, 0b111, "umin", "", "h", + GPR32>; + def LDUMINW : BaseLDOP<0b10, 0, 0, 0b111, "umin", "", "", + GPR32>; + def LDUMINX : BaseLDOP<0b11, 0, 0, 0b111, "umin", "", "", + GPR64>; + + def LDUMINAB : BaseLDOP<0b00, 1, 0, 0b111, "umin", "a", "b", + GPR32>; + def LDUMINAH : BaseLDOP<0b01, 1, 0, 0b111, "umin", "a", "h", + GPR32>; + def LDUMINAW : BaseLDOP<0b10, 1, 0, 0b111, "umin", "a", "", + GPR32>; + def LDUMINAX : BaseLDOP<0b11, 1, 0, 0b111, "umin", "a", "", + GPR64>; + + def LDUMINLB : BaseLDOP<0b00, 0, 1, 0b111, "umin", "l", "b", + GPR32>; + def LDUMINLH : BaseLDOP<0b01, 0, 1, 0b111, "umin", "l", "h", + GPR32>; + def LDUMINLW : BaseLDOP<0b10, 0, 1, 0b111, "umin", "l", "", + GPR32>; + def LDUMINLX : BaseLDOP<0b11, 0, 1, 0b111, "umin", "l", "", + GPR64>; + + def LDUMINALB : BaseLDOP<0b00, 1, 1, 0b111, "umin", "al", "b", + GPR32>; + def LDUMINALH : BaseLDOP<0b01, 1, 1, 0b111, "umin", "al", "h", + GPR32>; + def LDUMINALW : BaseLDOP<0b10, 1, 1, 0b111, "umin", "al", "", + GPR32>; + def LDUMINALX : BaseLDOP<0b11, 1, 1, 0b111, "umin", "al", "", + GPR64>; +} + +// v8.1 atomic ST(register) +let AddedComplexity = 5, Predicates = [HasLSE] in { + def STADDB : BaseSTOP<0b00, 0, 0, 0b000, 0b11111, "add", "", "b", + GPR32>; + def STADDH : BaseSTOP<0b01, 0, 0, 0b000, 0b11111, "add", "", "h", + GPR32>; + def STADDW : BaseSTOP<0b10, 0, 0, 0b000, 0b11111, "add", "", "", + GPR32>; + def STADDX : BaseSTOP<0b11, 0, 0, 0b000, 0b11111, "add", "", "", + GPR64>; + + def STADDLB : BaseSTOP<0b00, 0, 1, 0b000, 0b11111, "add", "l", "b", + GPR32>; + def STADDLH : BaseSTOP<0b01, 0, 1, 0b000, 0b11111, "add", "l", "h", + GPR32>; + def STADDLW : BaseSTOP<0b10, 0, 1, 0b000, 0b11111, "add", "l", "", + GPR32>; + def STADDLX : BaseSTOP<0b11, 0, 1, 0b000, 0b11111, "add", "l", "", + GPR64>; + + def STCLRB : BaseSTOP<0b00, 0, 0, 0b001, 0b11111, "clr", "", "b", + GPR32>; + def STCLRH : BaseSTOP<0b01, 0, 0, 0b001, 0b11111, "clr", "", "h", + GPR32>; + def STCLRW : BaseSTOP<0b10, 0, 0, 0b001, 0b11111, "clr", "", "", + GPR32>; + def STCLRX : BaseSTOP<0b11, 0, 0, 0b001, 0b11111, "clr", "", "", + GPR64>; + + def STCLRLB : BaseSTOP<0b00, 0, 1, 0b001, 0b11111, "clr", "l", "b", + GPR32>; + def STCLRLH : BaseSTOP<0b01, 0, 1, 0b001, 0b11111, "clr", "l", "h", + GPR32>; + def STCLRLW : BaseSTOP<0b10, 0, 1, 0b001, 0b11111, "clr", "l", "", + GPR32>; + def STCLRLX : BaseSTOP<0b11, 0, 1, 0b001, 0b11111, "clr", "l", "", + GPR64>; + + def STEORB : BaseSTOP<0b00, 0, 0, 0b010, 0b11111, "eor", "", "b", + GPR32>; + def STEORH : BaseSTOP<0b01, 0, 0, 0b010, 0b11111, "eor", "", "h", + GPR32>; + def STEORW : BaseSTOP<0b10, 0, 0, 0b010, 0b11111, "eor", "", "", + GPR32>; + def STEORX : BaseSTOP<0b11, 0, 0, 0b010, 0b11111, "eor", "", "", + GPR64>; + + def STEORLB : BaseSTOP<0b00, 0, 1, 0b010, 0b11111, "eor", "l", "b", + GPR32>; + def STEORLH : BaseSTOP<0b01, 0, 1, 0b010, 0b11111, "eor", "l", "h", + GPR32>; + def STEORLW : BaseSTOP<0b10, 0, 1, 0b010, 0b11111, "eor", "l", "", + GPR32>; + def STEORLX : BaseSTOP<0b11, 0, 1, 0b010, 0b11111, "eor", "l", "", + GPR64>; + + def STSETB : BaseSTOP<0b00, 0, 0, 0b011, 0b11111, "set", "", "b", + GPR32>; + def STSETH : BaseSTOP<0b01, 0, 0, 0b011, 0b11111, "set", "", "h", + GPR32>; + def STSETW : BaseSTOP<0b10, 0, 0, 0b011, 0b11111, "set", "", "", + GPR32>; + def STSETX : BaseSTOP<0b11, 0, 0, 0b011, 0b11111, "set", "", "", + GPR64>; + + def STSETLB : BaseSTOP<0b00, 0, 1, 0b011, 0b11111, "set", "l", "b", + GPR32>; + def STSETLH : BaseSTOP<0b01, 0, 1, 0b011, 0b11111, "set", "l", "h", + GPR32>; + def STSETLW : BaseSTOP<0b10, 0, 1, 0b011, 0b11111, "set", "l", "", + GPR32>; + def STSETLX : BaseSTOP<0b11, 0, 1, 0b011, 0b11111, "set", "l", "", + GPR64>; + + def STSMAXB : BaseSTOP<0b00, 0, 0, 0b100, 0b11111, "smax", "", "b", + GPR32>; + def STSMAXH : BaseSTOP<0b01, 0, 0, 0b100, 0b11111, "smax", "", "h", + GPR32>; + def STSMAXW : BaseSTOP<0b10, 0, 0, 0b100, 0b11111, "smax", "", "", + GPR32>; + def STSMAXX : BaseSTOP<0b11, 0, 0, 0b100, 0b11111, "smax", "", "", + GPR64>; + + def STSMAXLB : BaseSTOP<0b00, 0, 1, 0b100, 0b11111, "smax", "l", "b", + GPR32>; + def STSMAXLH : BaseSTOP<0b01, 0, 1, 0b100, 0b11111, "smax", "l", "h", + GPR32>; + def STSMAXLW : BaseSTOP<0b10, 0, 1, 0b100, 0b11111, "smax", "l", "", + GPR32>; + def STSMAXLX : BaseSTOP<0b11, 0, 1, 0b100, 0b11111, "smax", "l", "", + GPR64>; + + def STSMINB : BaseSTOP<0b00, 0, 0, 0b101, 0b11111, "smin", "", "b", + GPR32>; + def STSMINH : BaseSTOP<0b01, 0, 0, 0b101, 0b11111, "smin", "", "h", + GPR32>; + def STSMINW : BaseSTOP<0b10, 0, 0, 0b101, 0b11111, "smin", "", "", + GPR32>; + def STSMINX : BaseSTOP<0b11, 0, 0, 0b101, 0b11111, "smin", "", "", + GPR64>; + + def STSMINLB : BaseSTOP<0b00, 0, 1, 0b101, 0b11111, "smin", "l", "b", + GPR32>; + def STSMINLH : BaseSTOP<0b01, 0, 1, 0b101, 0b11111, "smin", "l", "h", + GPR32>; + def STSMINLW : BaseSTOP<0b10, 0, 1, 0b101, 0b11111, "smin", "l", "", + GPR32>; + def STSMINLX : BaseSTOP<0b11, 0, 1, 0b101, 0b11111, "smin", "l", "", + GPR64>; + + def STUMAXB : BaseSTOP<0b00, 0, 0, 0b110, 0b11111, "umax", "", "b", + GPR32>; + def STUMAXH : BaseSTOP<0b01, 0, 0, 0b110, 0b11111, "umax", "", "h", + GPR32>; + def STUMAXW : BaseSTOP<0b10, 0, 0, 0b110, 0b11111, "umax", "", "", + GPR32>; + def STUMAXX : BaseSTOP<0b11, 0, 0, 0b110, 0b11111, "umax", "", "", + GPR64>; + + def STUMAXLB : BaseSTOP<0b00, 0, 1, 0b110, 0b11111, "umax", "l", "b", + GPR32>; + def STUMAXLH : BaseSTOP<0b01, 0, 1, 0b110, 0b11111, "umax", "l", "h", + GPR32>; + def STUMAXLW : BaseSTOP<0b10, 0, 1, 0b110, 0b11111, "umax", "l", "", + GPR32>; + def STUMAXLX : BaseSTOP<0b11, 0, 1, 0b110, 0b11111, "umax", "l", "", + GPR64>; + + def STUMINB : BaseSTOP<0b00, 0, 0, 0b111, 0b11111, "umin", "", "b", + GPR32>; + def STUMINH : BaseSTOP<0b01, 0, 0, 0b111, 0b11111, "umin", "", "h", + GPR32>; + def STUMINW : BaseSTOP<0b10, 0, 0, 0b111, 0b11111, "umin", "", "", + GPR32>; + def STUMINX : BaseSTOP<0b11, 0, 0, 0b111, 0b11111, "umin", "", "", + GPR64>; + + def STUMINLB : BaseSTOP<0b00, 0, 1, 0b111, 0b11111, "umin", "l", "b", + GPR32>; + def STUMINLH : BaseSTOP<0b01, 0, 1, 0b111, 0b11111, "umin", "l", "h", + GPR32>; + def STUMINLW : BaseSTOP<0b10, 0, 1, 0b111, 0b11111, "umin", "l", "", + GPR32>; + def STUMINLX : BaseSTOP<0b11, 0, 1, 0b111, 0b11111, "umin", "l", "", + GPR64>; +} //===----------------------------------------------------------------------===// // Logical instructions. Index: lib/Target/AArch64/AArch64RegisterInfo.td =================================================================== --- lib/Target/AArch64/AArch64RegisterInfo.td +++ lib/Target/AArch64/AArch64RegisterInfo.td @@ -602,17 +602,18 @@ // ARMv8.1a atomic CASP register operands -def WSeqPairs : RegisterTuples<[sube32, subo32], +def WSeqPairs : RegisterTuples<[sube32, subo32], [(rotl GPR32, 0), (rotl GPR32, 1)]>; -def XSeqPairs : RegisterTuples<[sube64, subo64], +def XSeqPairs : RegisterTuples<[sube64, subo64], [(rotl GPR64, 0), (rotl GPR64, 1)]>; -def WSeqPairsClass : RegisterClass<"AArch64", [untyped], 32, - (add WSeqPairs)>{ +def WSeqPairsClass : RegisterClass<"AArch64", [untyped], 32, + (add WSeqPairs)> { let Size = 64; } -def XSeqPairsClass : RegisterClass<"AArch64", [untyped], 64, - (add XSeqPairs)>{ + +def XSeqPairsClass : RegisterClass<"AArch64", [untyped], 64, + (add XSeqPairs)> { let Size = 128; } Index: lib/Target/AArch64/AArch64SchedThunderX2T99.td =================================================================== --- lib/Target/AArch64/AArch64SchedThunderX2T99.td +++ lib/Target/AArch64/AArch64SchedThunderX2T99.td @@ -315,6 +315,36 @@ let NumMicroOps = 3; } +// 8 cycles on LS0 or LS1 and I0, I1, or I2. +def THX2T99Write_8Cyc_I012 : SchedWriteRes<[THX2T99LS01, THX2T99I012]> { + let Latency = 8; + let NumMicroOps = 4; +} + +// 12 cycles on LS0 or LS1 and I0, I1, or I2. +def THX2T99Write_12Cyc_I012 : SchedWriteRes<[THX2T99LS01, THX2T99I012]> { + let Latency = 12; + let NumMicroOps = 6; +} + +// 16 cycles on LS0 or LS1 and I0, I1, or I2. +def THX2T99Write_16Cyc_I012 : SchedWriteRes<[THX2T99LS01, THX2T99I012]> { + let Latency = 16; + let NumMicroOps = 8; +} + +// 24 cycles on LS0 or LS1 and I0, I1, or I2. +def THX2T99Write_24Cyc_I012 : SchedWriteRes<[THX2T99LS01, THX2T99I012]> { + let Latency = 24; + let NumMicroOps = 12; +} + +// 32 cycles on LS0 or LS1 and I0, I1, or I2. +def THX2T99Write_32Cyc_I012 : SchedWriteRes<[THX2T99LS01, THX2T99I012]> { + let Latency = 32; + let NumMicroOps = 16; +} + // Define commonly used read types. // No forwarding is provided for these types. @@ -1741,5 +1771,144 @@ def : InstRW<[THX2T99Write_1Cyc_LS01_F01, WriteAdr], (instregex "^ST4i(8|16|32|64)_POST$")>; +// V8.1a Atomics (LSE) +def : InstRW<[THX2T99Write_8Cyc_I012, WriteAtomic], + (instrs CASB, CASH, CASW, CASX)>; + +def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic], + (instrs CASAB, CASAH, CASAW, CASAX)>; + +def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic], + (instrs CASLB, CASLH, CASLW, CASLX)>; + +def : InstRW<[THX2T99Write_16Cyc_I012, WriteAtomic], + (instrs CASALB, CASALH, CASALW, CASALX)>; + +def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic], + (instrs LDLARB, LDLARH, LDLARW, LDLARX)>; + +def : InstRW<[THX2T99Write_8Cyc_I012, WriteAtomic], + (instrs LDADDB, LDADDH, LDADDW, LDADDX)>; + +def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic], + (instrs LDADDAB, LDADDAH, LDADDAW, LDADDAX)>; + +def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic], + (instrs LDADDLB, LDADDLH, LDADDLW, LDADDLX)>; + +def : InstRW<[THX2T99Write_16Cyc_I012, WriteAtomic], + (instrs LDADDALB, LDADDALH, LDADDALW, LDADDALX)>; + +def : InstRW<[THX2T99Write_8Cyc_I012, WriteAtomic], + (instrs LDCLRB, LDCLRH, LDCLRW, LDCLRX)>; + +def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic], + (instrs LDCLRAB, LDCLRAH, LDCLRAW, LDCLRAX)>; + +def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic], + (instrs LDCLRLB, LDCLRLH, LDCLRLW, LDCLRLX)>; + +def : InstRW<[THX2T99Write_16Cyc_I012, WriteAtomic], + (instrs LDCLRALB, LDCLRALH, LDCLRALW, LDCLRALX)>; + +def : InstRW<[THX2T99Write_8Cyc_I012, WriteAtomic], + (instrs LDEORB, LDEORH, LDEORW, LDEORX)>; + +def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic], + (instrs LDEORAB, LDEORAH, LDEORAW, LDEORAX)>; + +def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic], + (instrs LDEORLB, LDEORLH, LDEORLW, LDEORLX)>; + +def : InstRW<[THX2T99Write_16Cyc_I012, WriteAtomic], + (instrs LDEORALB, LDEORALH, LDEORALW, LDEORALX)>; + +def : InstRW<[THX2T99Write_8Cyc_I012, WriteAtomic], + (instrs LDSETB, LDSETH, LDSETW, LDSETX)>; + +def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic], + (instrs LDSETAB, LDSETAH, LDSETAW, LDSETAX)>; + +def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic], + (instrs LDSETLB, LDSETLH, LDSETLW, LDSETLX)>; + +def : InstRW<[THX2T99Write_16Cyc_I012, WriteAtomic], + (instrs LDSETALB, LDSETALH, LDSETALW, LDSETALX)>; + +def : InstRW<[THX2T99Write_8Cyc_I012, WriteAtomic], + (instrs LDSMAXB, LDSMAXH, LDSMAXW, LDSMAXX, + LDSMAXAB, LDSMAXAH, LDSMAXAW, LDSMAXAX, + LDSMAXLB, LDSMAXLH, LDSMAXLW, LDSMAXLX, + LDSMAXALB, LDSMAXALH, LDSMAXALW, LDSMAXALX)>; + +def : InstRW<[THX2T99Write_8Cyc_I012, WriteAtomic], + (instrs LDSMINB, LDSMINH, LDSMINW, LDSMINX, + LDSMINAB, LDSMINAH, LDSMINAW, LDSMINAX, + LDSMINLB, LDSMINLH, LDSMINLW, LDSMINLX, + LDSMINALB, LDSMINALH, LDSMINALW, LDSMINALX)>; + +def : InstRW<[THX2T99Write_8Cyc_I012, WriteAtomic], + (instrs LDUMAXB, LDUMAXH, LDUMAXW, LDUMAXX, + LDUMAXAB, LDUMAXAH, LDUMAXAW, LDUMAXAX, + LDUMAXLB, LDUMAXLH, LDUMAXLW, LDUMAXLX, + LDUMAXALB, LDUMAXALH, LDUMAXALW, LDUMAXALX)>; + +def : InstRW<[THX2T99Write_8Cyc_I012, WriteAtomic], + (instrs LDUMINB, LDUMINH, LDUMINW, LDUMINX, + LDUMINAB, LDUMINAH, LDUMINAW, LDUMINAX, + LDUMINLB, LDUMINLH, LDUMINLW, LDUMINLX, + LDUMINALB, LDUMINALH, LDUMINALW, LDUMINALX)>; + +def : InstRW<[THX2T99Write_8Cyc_I012, WriteAtomic], + (instrs SWPB, SWPH, SWPW, SWPX)>; + +def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic], + (instrs SWPAB, SWPAH, SWPAW, SWPAX)>; + +def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic], + (instrs SWPLB, SWPLH, SWPLW, SWPLX)>; + +def : InstRW<[THX2T99Write_16Cyc_I012, WriteAtomic], + (instrs SWPALB, SWPALH, SWPALW, SWPALX)>; + +def : InstRW<[THX2T99Write_8Cyc_I012, WriteAtomic], + (instrs STLLRB, STLLRH, STLLRW, STLLRX)>; + +def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic], + (instrs STADDB, STADDH, STADDW, STADDX)>; + +def : InstRW<[THX2T99Write_16Cyc_I012, WriteAtomic], + (instrs STADDLB, STADDLH, STADDLW, STADDLX)>; + +def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic], + (instrs STCLRB, STCLRH, STCLRW, STCLRX)>; + +def : InstRW<[THX2T99Write_16Cyc_I012, WriteAtomic], + (instrs STCLRLB, STCLRLH, STCLRLW, STCLRLX)>; + +def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic], + (instrs STEORB, STEORH, STEORW, STEORX)>; + +def : InstRW<[THX2T99Write_16Cyc_I012, WriteAtomic], + (instrs STEORLB, STEORLH, STEORLW, STEORLX)>; + +def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic], + (instrs STSETB, STSETH, STSETW, STSETX)>; + +def : InstRW<[THX2T99Write_16Cyc_I012, WriteAtomic], + (instrs STSETLB, STSETLH, STSETLW, STSETLX)>; + +def : InstRW<[THX2T99Write_8Cyc_I012, WriteAtomic], + (instrs STSMAXB, STSMAXH, STSMAXW, STSMAXX, + STSMINB, STSMINH, STSMINW, STSMINX, + STUMAXB, STUMAXH, STUMAXW, STUMAXX, + STUMINB, STUMINH, STUMINW, STUMINX)>; + +def : InstRW<[THX2T99Write_12Cyc_I012, WriteAtomic], + (instrs STSMAXLB, STSMAXLH, STSMAXLW, STSMAXLX, + STSMINLB, STSMINLH, STSMINLW, STSMINLX, + STUMAXLB, STUMAXLH, STUMAXLW, STUMAXLX, + STUMINLB, STUMINLH, STUMINLW, STUMINLX)>; + } // SchedModel = ThunderX2T99Model