diff --git a/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp --- a/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp +++ b/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp @@ -294,6 +294,11 @@ return (unsigned) Imm.Val >> 1; } + unsigned getG8pReg() const { + assert(isEvenRegNumber() && "Invalid access!"); + return (unsigned)Imm.Val; + } + unsigned getCCReg() const { assert(isCCRegNumber() && "Invalid access!"); return (unsigned) (Kind == Immediate ? Imm.Val : Expr.CRVal); @@ -432,6 +437,9 @@ && isUInt<5>(getExprCRVal())) || (Kind == Immediate && isUInt<5>(getImm())); } + + bool isEvenRegNumber() const { return isRegNumber() && (getImm() & 1) == 0; } + bool isCRBitMask() const { return Kind == Immediate && isUInt<8>(getImm()) && isPowerOf2_32(getImm()); } bool isATBitsAsHint() const { return false; } @@ -462,6 +470,11 @@ Inst.addOperand(MCOperand::createReg(XRegsNoX0[getReg()])); } + void addRegG8pRCOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::createReg(XRegs[getG8pReg()])); + } + void addRegGxRCOperands(MCInst &Inst, unsigned N) const { if (isPPC64()) addRegG8RCOperands(Inst, N); diff --git a/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp b/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp --- a/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp +++ b/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp @@ -160,6 +160,12 @@ return decodeRegisterClass(Inst, RegNo, XRegs); } +static DecodeStatus DecodeG8pRCRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, XRegs); +} + static DecodeStatus DecodeG8RC_NOX0RegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, const void *Decoder) { diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td --- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td @@ -271,11 +271,22 @@ let mayLoad = 1, hasSideEffects = 0 in { def LDARX : XForm_1_memOp<31, 84, (outs g8rc:$rD), (ins memrr:$ptr), "ldarx $rD, $ptr", IIC_LdStLDARX, []>; +// TODO: Add scheduling info. +let hasNoSchedulingInfo = 1 in +def LQARX : XForm_1_memOp<31, 276, (outs g8prc:$RTp), (ins memrr:$ptr), + "lqarx $RTp, $ptr", IIC_LdStLQARX, []>, isPPC64; // Instruction to support lock versions of atomics // (EH=1 - see Power ISA 2.07 Book II 4.4.2) def LDARXL : XForm_1<31, 84, (outs g8rc:$rD), (ins memrr:$ptr), "ldarx $rD, $ptr, 1", IIC_LdStLDARX, []>, isRecordForm; +// TODO: Add scheduling info. +let hasNoSchedulingInfo = 1 in +// FIXME: We have to seek a way to remove isRecordForm since +// LQARXL is not really altering CR0. +def LQARXL : XForm_1<31, 276, (outs g8prc:$RTp), (ins memrr:$ptr), + "lqarx $RTp, $ptr, 1", IIC_LdStLQARX, []>, + isPPC64, isRecordForm; let hasExtraDefRegAllocReq = 1 in def LDAT : X_RD5_RS5_IM5<31, 614, (outs g8rc:$rD), (ins g8rc:$rA, u5imm:$FC), @@ -283,9 +294,15 @@ Requires<[IsISA3_0]>; } -let Defs = [CR0], mayStore = 1, mayLoad = 0, hasSideEffects = 0 in +let Defs = [CR0], mayStore = 1, mayLoad = 0, hasSideEffects = 0 in { def STDCX : XForm_1_memOp<31, 214, (outs), (ins g8rc:$rS, memrr:$dst), "stdcx. $rS, $dst", IIC_LdStSTDCX, []>, isRecordForm; +// TODO: Add scheduling info. +let hasNoSchedulingInfo = 1 in +def STQCX : XForm_1_memOp<31, 182, (outs), (ins g8prc:$RSp, memrr:$dst), + "stqcx. $RSp, $dst", IIC_LdStSTQCX, []>, + isPPC64, isRecordForm; +} let mayStore = 1, mayLoad = 0, hasSideEffects = 0 in def STDAT : X_RD5_RS5_IM5<31, 742, (outs), (ins g8rc:$rS, g8rc:$rA, u5imm:$FC), @@ -1228,6 +1245,22 @@ "ldmx $rD, $src", IIC_LdStLD, []>, isPPC64, Requires<[IsISA3_0]>; } + +let mayLoad = 1, hasNoSchedulingInfo = 1 in { +// Full 16-byte load. +// Early clobber $RTp to avoid assigned to the same register as RA. +// TODO: Add scheduling info. +def LQ : DQForm_RTp5_RA17_MEM<56, 0, + (outs g8prc:$RTp), + (ins memrix16:$src), + "lq $RTp, $src", IIC_LdStLQ, + []>, + RegConstraint<"@earlyclobber $RTp">, + isPPC64; +def RESTORE_QUADWORD : PPCEmitTimePseudo<(outs g8prc:$RTp), (ins memrix:$src), + "#RESTORE_QUADWORD", []>; +} + } // Support for medium and large code model. @@ -1411,6 +1444,17 @@ "stdbrx $rS, $dst", IIC_LdStStore, [(PPCstbrx i64:$rS, ForceXForm:$dst, i64)]>, isPPC64, PPC970_DGroup_Cracked; + +let mayStore = 1, hasNoSchedulingInfo = 1 in { +// Normal 16-byte stores. +// TODO: Add scheduling info. +def STQ : DSForm_1<62, 2, (outs), (ins g8prc:$RSp, memrix:$dst), + "stq $RSp, $dst", IIC_LdStSTQ, + []>, isPPC64; +def SPILL_QUADWORD : PPCEmitTimePseudo<(outs), (ins g8prc:$RSp, memrix:$dst), + "#SPILL_QUADWORD", []>; +} + } // Stores with Update (pre-inc). diff --git a/llvm/lib/Target/PowerPC/PPCInstrFormats.td b/llvm/lib/Target/PowerPC/PPCInstrFormats.td --- a/llvm/lib/Target/PowerPC/PPCInstrFormats.td +++ b/llvm/lib/Target/PowerPC/PPCInstrFormats.td @@ -422,6 +422,20 @@ let Inst{29-31} = xo; } +class DQForm_RTp5_RA17_MEM opcode, bits<4> xo, dag OOL, dag IOL, + string asmstr, InstrItinClass itin, + list pattern> + : I { + bits<5> RTp; + bits<17> DQ_RA; + let Pattern = pattern; + + let Inst{6-10} = RTp{4-0}; + let Inst{11-15} = DQ_RA{16-12}; // Register # + let Inst{16-27} = DQ_RA{11-0}; // Displacement. + let Inst{28-31} = xo; +} + // 1.7.6 X-Form class XForm_base_r3xo opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, InstrItinClass itin, list pattern> diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h @@ -127,6 +127,7 @@ SOK_AccumulatorSpill, SOK_UAccumulatorSpill, SOK_SPESpill, + SOK_PairedG8Spill, SOK_LastOpcodeSpill // This must be last on the enum. }; @@ -136,14 +137,16 @@ { \ PPC::LWZ, PPC::LD, PPC::LFD, PPC::LFS, PPC::RESTORE_CR, \ PPC::RESTORE_CRBIT, PPC::LVX, PPC::LXVD2X, PPC::LXSDX, PPC::LXSSPX, \ - PPC::SPILLTOVSR_LD, NoInstr, NoInstr, NoInstr, PPC::EVLDD \ + PPC::SPILLTOVSR_LD, NoInstr, NoInstr, NoInstr, PPC::EVLDD, \ + PPC::RESTORE_QUADWORD \ } #define Pwr9LoadOpcodes \ { \ PPC::LWZ, PPC::LD, PPC::LFD, PPC::LFS, PPC::RESTORE_CR, \ PPC::RESTORE_CRBIT, PPC::LVX, PPC::LXV, PPC::DFLOADf64, \ - PPC::DFLOADf32, PPC::SPILLTOVSR_LD, NoInstr, NoInstr, NoInstr, NoInstr \ + PPC::DFLOADf32, PPC::SPILLTOVSR_LD, NoInstr, NoInstr, NoInstr, \ + NoInstr, PPC::RESTORE_QUADWORD \ } #define Pwr10LoadOpcodes \ @@ -151,21 +154,23 @@ PPC::LWZ, PPC::LD, PPC::LFD, PPC::LFS, PPC::RESTORE_CR, \ PPC::RESTORE_CRBIT, PPC::LVX, PPC::LXV, PPC::DFLOADf64, \ PPC::DFLOADf32, PPC::SPILLTOVSR_LD, PPC::LXVP, PPC::RESTORE_ACC, \ - PPC::RESTORE_UACC, NoInstr \ + PPC::RESTORE_UACC, NoInstr, PPC::RESTORE_QUADWORD \ } #define Pwr8StoreOpcodes \ { \ PPC::STW, PPC::STD, PPC::STFD, PPC::STFS, PPC::SPILL_CR, PPC::SPILL_CRBIT, \ PPC::STVX, PPC::STXVD2X, PPC::STXSDX, PPC::STXSSPX, \ - PPC::SPILLTOVSR_ST, NoInstr, NoInstr, NoInstr, PPC::EVSTDD \ + PPC::SPILLTOVSR_ST, NoInstr, NoInstr, NoInstr, PPC::EVSTDD, \ + PPC::SPILL_QUADWORD \ } #define Pwr9StoreOpcodes \ { \ PPC::STW, PPC::STD, PPC::STFD, PPC::STFS, PPC::SPILL_CR, PPC::SPILL_CRBIT, \ PPC::STVX, PPC::STXV, PPC::DFSTOREf64, PPC::DFSTOREf32, \ - PPC::SPILLTOVSR_ST, NoInstr, NoInstr, NoInstr, NoInstr \ + PPC::SPILLTOVSR_ST, NoInstr, NoInstr, NoInstr, NoInstr, \ + PPC::SPILL_QUADWORD \ } #define Pwr10StoreOpcodes \ @@ -173,7 +178,7 @@ PPC::STW, PPC::STD, PPC::STFD, PPC::STFS, PPC::SPILL_CR, PPC::SPILL_CRBIT, \ PPC::STVX, PPC::STXV, PPC::DFSTOREf64, PPC::DFSTOREf32, \ PPC::SPILLTOVSR_ST, PPC::STXVP, PPC::SPILL_ACC, PPC::SPILL_UACC, \ - NoInstr \ + NoInstr, PPC::SPILL_QUADWORD \ } // Initialize arrays for load and store spill opcodes on supported subtargets. diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -1834,6 +1834,22 @@ if (SrcPrimed && !KillSrc) BuildMI(MBB, I, DL, get(PPC::XXMTACC), SrcReg).addReg(SrcReg); return; + } else if (PPC::G8pRCRegClass.contains(DestReg) && + PPC::G8pRCRegClass.contains(SrcReg)) { + // TODO: Handle G8RC to G8pRC (and vice versa) copy. + unsigned DestRegIdx = DestReg - PPC::G8p0; + MCRegister DestRegSub0 = PPC::X0 + 2 * DestRegIdx; + MCRegister DestRegSub1 = PPC::X0 + 2 * DestRegIdx + 1; + unsigned SrcRegIdx = SrcReg - PPC::G8p0; + MCRegister SrcRegSub0 = PPC::X0 + 2 * SrcRegIdx; + MCRegister SrcRegSub1 = PPC::X0 + 2 * SrcRegIdx + 1; + BuildMI(MBB, I, DL, get(PPC::OR8), DestRegSub0) + .addReg(SrcRegSub0) + .addReg(SrcRegSub0, getKillRegState(KillSrc)); + BuildMI(MBB, I, DL, get(PPC::OR8), DestRegSub1) + .addReg(SrcRegSub1) + .addReg(SrcRegSub1, getKillRegState(KillSrc)); + return; } else llvm_unreachable("Impossible reg-to-reg copy"); @@ -1886,6 +1902,8 @@ assert(Subtarget.pairedVectorMemops() && "Register unexpected when paired memops are disabled."); OpcodeIndex = SOK_PairedVecSpill; + } else if (PPC::G8pRCRegClass.hasSubClassEq(RC)) { + OpcodeIndex = SOK_PairedG8Spill; } else { llvm_unreachable("Unknown regclass!"); } diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -635,6 +635,12 @@ def g8rc : RegisterOperand { let ParserMatchClass = PPCRegG8RCAsmOperand; } +def PPCRegG8pRCAsmOperand : AsmOperandClass { + let Name = "RegG8pRC"; let PredicateMethod = "isEvenRegNumber"; +} +def g8prc : RegisterOperand { + let ParserMatchClass = PPCRegG8pRCAsmOperand; +} def PPCRegGPRCNoR0AsmOperand : AsmOperandClass { let Name = "RegGPRCNoR0"; let PredicateMethod = "isRegNumber"; } diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.h b/llvm/lib/Target/PowerPC/PPCRegisterInfo.h --- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.h +++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.h @@ -123,6 +123,11 @@ void lowerACCRestore(MachineBasicBlock::iterator II, unsigned FrameIndex) const; + void lowerQuadwordSpilling(MachineBasicBlock::iterator II, + unsigned FrameIndex) const; + void lowerQuadwordRestore(MachineBasicBlock::iterator II, + unsigned FrameIndex) const; + static void emitAccCopyInfo(MachineBasicBlock &MBB, MCRegister DestReg, MCRegister SrcReg); diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp --- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -1146,6 +1146,59 @@ MBB.erase(II); } +/// lowerQuadwordSpilling - Generate code to spill paired general register. +void PPCRegisterInfo::lowerQuadwordSpilling(MachineBasicBlock::iterator II, + unsigned FrameIndex) const { + MachineInstr &MI = *II; + MachineBasicBlock &MBB = *MI.getParent(); + MachineFunction &MF = *MBB.getParent(); + const PPCSubtarget &Subtarget = MF.getSubtarget(); + const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); + DebugLoc DL = MI.getDebugLoc(); + + Register SrcReg = MI.getOperand(0).getReg(); + bool IsKilled = MI.getOperand(0).isKill(); + + Register Reg = PPC::X0 + (SrcReg - PPC::G8p0) * 2; + bool IsLittleEndian = Subtarget.isLittleEndian(); + + addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::STD)) + .addReg(Reg, getKillRegState(IsKilled)), + FrameIndex, IsLittleEndian ? 8 : 0); + addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::STD)) + .addReg(Reg + 1, getKillRegState(IsKilled)), + FrameIndex, IsLittleEndian ? 0 : 8); + + // Discard the pseudo instruction. + MBB.erase(II); +} + +/// lowerQuadwordRestore - Generate code to restore paired general register. +void PPCRegisterInfo::lowerQuadwordRestore(MachineBasicBlock::iterator II, + unsigned FrameIndex) const { + MachineInstr &MI = *II; + MachineBasicBlock &MBB = *MI.getParent(); + MachineFunction &MF = *MBB.getParent(); + const PPCSubtarget &Subtarget = MF.getSubtarget(); + const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); + DebugLoc DL = MI.getDebugLoc(); + + Register DestReg = MI.getOperand(0).getReg(); + assert(MI.definesRegister(DestReg) && + "RESTORE_QUADWORD does not define its destination"); + + Register Reg = PPC::X0 + (DestReg - PPC::G8p0) * 2; + bool IsLittleEndian = Subtarget.isLittleEndian(); + + addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::LD), Reg), FrameIndex, + IsLittleEndian ? 8 : 0); + addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::LD), Reg + 1), FrameIndex, + IsLittleEndian ? 0 : 8); + + // Discard the pseudo instruction. + MBB.erase(II); +} + bool PPCRegisterInfo::hasReservedSpillSlot(const MachineFunction &MF, Register Reg, int &FrameIdx) const { // For the nonvolatile condition registers (CR2, CR3, CR4) return true to @@ -1182,12 +1235,14 @@ case PPC::LXSSP: case PPC::STXSD: case PPC::STXSSP: + case PPC::STQ: return 4; case PPC::EVLDD: case PPC::EVSTDD: return 8; case PPC::LXV: case PPC::STXV: + case PPC::LQ: return 16; } } @@ -1283,6 +1338,12 @@ } else if (OpC == PPC::RESTORE_ACC || OpC == PPC::RESTORE_UACC) { lowerACCRestore(II, FrameIndex); return; + } else if (OpC == PPC::SPILL_QUADWORD) { + lowerQuadwordSpilling(II, FrameIndex); + return; + } else if (OpC == PPC::RESTORE_QUADWORD) { + lowerQuadwordRestore(II, FrameIndex); + return; } // Replace the FrameIndex with base register with GPR1 (SP) or GPR31 (FP). diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td --- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td +++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td @@ -20,6 +20,8 @@ def sub_vsx1 : SubRegIndex<128, 128>; def sub_pair0 : SubRegIndex<256>; def sub_pair1 : SubRegIndex<256, 256>; +def sub_gp8_x0 : SubRegIndex<64>; +def sub_gp8_x1 : SubRegIndex<64, 64>; } @@ -119,6 +121,15 @@ let SubRegs = subregs; } +// GP8Pair - Consecutive even-odd paired GP8. +class GP8Pair EvenIndex> : PPCReg { + assert !eq(EvenIndex{0}, 0), "Index should be even."; + let HWEncoding{4-0} = EvenIndex; + let SubRegs = [!cast("X"#EvenIndex), !cast("X"#!add(EvenIndex, 1))]; + let DwarfNumbers = [-1, -1]; + let SubRegIndices = [sub_gp8_x0, sub_gp8_x1]; +} + // General-purpose registers foreach Index = 0-31 in { def R#Index : GPR, DwarfRegNum<[-2, Index]>; @@ -185,6 +196,11 @@ } } +// 16 paired even-odd consecutive GP8s. +foreach Index = { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 } in { + def G8p#!srl(Index, 1) : GP8Pair<"r"#Index, Index>; +} + // The representation of r0 when treated as the constant 0. def ZERO : GPR<0, "0">, DwarfRegAlias; def ZERO8 : GP8, DwarfRegAlias; @@ -462,3 +478,18 @@ VSRp27, VSRp26)> { let Size = 256; } + +// Make AllocationOrder as similar as G8RC's to avoid potential spilling. +// Similarly, we have an AltOrder for 64-bit ELF ABI which r2 is allocated +// at last. +def G8pRC : + RegisterClass<"PPC", [i128], 128, + (add (sequence "G8p%u", 1, 5), + (sequence "G8p%u", 14, 7), + G8p15, G8p6, G8p0)> { + let AltOrders = [(add (sub G8pRC, G8p1), G8p1)]; + let AltOrderSelect = [{ + return MF.getSubtarget().is64BitELFABI(); + }]; + let Size = 128; +} diff --git a/llvm/lib/Target/PowerPC/PPCSchedule.td b/llvm/lib/Target/PowerPC/PPCSchedule.td --- a/llvm/lib/Target/PowerPC/PPCSchedule.td +++ b/llvm/lib/Target/PowerPC/PPCSchedule.td @@ -54,6 +54,8 @@ def IIC_LdStLHAU : InstrItinClass; def IIC_LdStLHAUX : InstrItinClass; def IIC_LdStLMW : InstrItinClass; +def IIC_LdStLQ : InstrItinClass; +def IIC_LdStLQARX : InstrItinClass; def IIC_LdStLVecX : InstrItinClass; def IIC_LdStLWA : InstrItinClass; def IIC_LdStLWARX : InstrItinClass; @@ -61,6 +63,8 @@ def IIC_LdStSLBIE : InstrItinClass; def IIC_LdStSTD : InstrItinClass; def IIC_LdStSTDCX : InstrItinClass; +def IIC_LdStSTQ : InstrItinClass; +def IIC_LdStSTQCX : InstrItinClass; def IIC_LdStSTU : InstrItinClass; def IIC_LdStSTUX : InstrItinClass; def IIC_LdStSTFD : InstrItinClass; diff --git a/llvm/test/CodeGen/PowerPC/ldst-16-byte-asm.mir b/llvm/test/CodeGen/PowerPC/ldst-16-byte-asm.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/ldst-16-byte-asm.mir @@ -0,0 +1,23 @@ +# RUN: llc -simplify-mir -verify-machineinstrs -mtriple=powerpc64-ibm-aix-xcoff \ +# RUN: %s -o - | FileCheck %s + +--- +name: foo +alignment: 8 +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x3, $x4, $x30, $x31 + ; CHECK-LABEL: .foo + ; CHECK: lq 2, 128(4) + ; CHECK: lqarx 28, 30, 31 + ; CHECK: stqcx. 28, 30, 31 + ; CHECK: stq 2, 128(4) + $g8p1 = LQ 128, $x4 + $g8p14 = LQARX $x30, $x31 + STQCX $g8p14, $x30, $x31, implicit-def $cr0 + STQ $g8p1, 128, $x4 + $x3 = COPY $x31 + BLR8 implicit $lr8, implicit undef $rm, implicit $x3 +... + diff --git a/llvm/test/CodeGen/PowerPC/ldst-16-byte.mir b/llvm/test/CodeGen/PowerPC/ldst-16-byte.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/ldst-16-byte.mir @@ -0,0 +1,221 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -simplify-mir -verify-machineinstrs -mtriple=powerpc64-ibm-aix-xcoff \ +# RUN: -stop-after=postrapseudos %s -o - | FileCheck %s + +--- +name: foo +alignment: 8 +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x3, $x4 + ; CHECK-LABEL: name: foo + ; CHECK: liveins: $x3, $x4 + ; CHECK: early-clobber renamable $g8p3 = LQ 128, $x4 + ; CHECK: $x3 = OR8 $x7, $x7 + ; CHECK: STQ killed renamable $g8p3, 160, $x3 + ; CHECK: BLR8 implicit $lr8, implicit undef $rm, implicit $x3 + %0:g8prc = LQ 128, $x4 + $x3 = COPY %0.sub_gp8_x1:g8prc + STQ %0, 160, $x3 + BLR8 implicit $lr8, implicit undef $rm, implicit $x3 +... + +--- +name: foobar +alignment: 8 +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x3, $x4 + ; CHECK-LABEL: name: foobar + ; CHECK: liveins: $x3, $x4 + ; CHECK: renamable $g8p3 = LQARX $x3, $x4 + ; CHECK: STQCX renamable $g8p3, $x3, $x4, implicit-def dead $cr0 + ; CHECK: $x3 = OR8 $x7, $x7, implicit killed $g8p3 + ; CHECK: BLR8 implicit $lr8, implicit undef $rm, implicit $x3 + %0:g8prc = LQARX $x3, $x4 + STQCX %0:g8prc, $x3, $x4, implicit-def $cr0 + $x3 = COPY %0.sub_gp8_x1:g8prc + BLR8 implicit $lr8, implicit undef $rm, implicit $x3 +... + +--- +name: bar +alignment: 8 +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x3, $x4 + ; CHECK-LABEL: name: bar + ; CHECK: liveins: $x3, $x4 + ; CHECK: early-clobber renamable $g8p2 = LQ 128, renamable $x3 + ; CHECK: STQ renamable $g8p2, 160, $x3 + ; CHECK: $x3 = OR8 $x4, $x4, implicit killed $g8p2 + ; CHECK: BLR8 implicit $lr8, implicit undef $rm, implicit $x3 + %0:g8rc_nox0 = COPY $x3 + %1:g8prc = LQ 128, %0 + STQ %1, 160, $x3 + $x3 = COPY %1.sub_gp8_x0:g8prc + BLR8 implicit $lr8, implicit undef $rm, implicit $x3 +... + +# `spill_g8prc`'s code contains undefined behaviors. This code is only to +# demonstrate correctness of spiller. +--- +name: spill_g8prc +alignment: 8 +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10, $x11, $x12 + ; CHECK-LABEL: name: spill_g8prc + ; CHECK: liveins: $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10, $x11, $x12, $x14, $x15, $x16, $x17, $x18, $x19, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28, $x29, $x30, $x31 + ; CHECK: STD killed $x14, -144, $x1 :: (store 8 into %fixed-stack.17, align 16) + ; CHECK: STD killed $x15, -136, $x1 :: (store 8 into %fixed-stack.16) + ; CHECK: STD killed $x16, -128, $x1 :: (store 8 into %fixed-stack.15, align 16) + ; CHECK: STD killed $x17, -120, $x1 :: (store 8 into %fixed-stack.14) + ; CHECK: STD killed $x18, -112, $x1 :: (store 8 into %fixed-stack.13, align 16) + ; CHECK: STD killed $x19, -104, $x1 :: (store 8 into %fixed-stack.12) + ; CHECK: STD killed $x20, -96, $x1 :: (store 8 into %fixed-stack.11, align 16) + ; CHECK: STD killed $x21, -88, $x1 :: (store 8 into %fixed-stack.10) + ; CHECK: STD killed $x22, -80, $x1 :: (store 8 into %fixed-stack.9, align 16) + ; CHECK: STD killed $x23, -72, $x1 :: (store 8 into %fixed-stack.8) + ; CHECK: STD killed $x24, -64, $x1 :: (store 8 into %fixed-stack.7, align 16) + ; CHECK: STD killed $x25, -56, $x1 :: (store 8 into %fixed-stack.6) + ; CHECK: STD killed $x26, -48, $x1 :: (store 8 into %fixed-stack.5, align 16) + ; CHECK: STD killed $x27, -40, $x1 :: (store 8 into %fixed-stack.4) + ; CHECK: STD killed $x28, -32, $x1 :: (store 8 into %fixed-stack.3, align 16) + ; CHECK: STD killed $x29, -24, $x1 :: (store 8 into %fixed-stack.2) + ; CHECK: STD killed $x30, -16, $x1 :: (store 8 into %fixed-stack.1, align 16) + ; CHECK: STD killed $x31, -8, $x1 :: (store 8 into %fixed-stack.0) + ; CHECK: $x7 = OR8 $x3, $x3 + ; CHECK: renamable $g8p4 = LQARX $x5, $x6 + ; CHECK: STD killed $x8, -160, $x1 + ; CHECK: STD killed $x9, -152, $x1 + ; CHECK: renamable $g8p5 = LQARX $x3, renamable $x4 + ; CHECK: renamable $g8p4 = LQARX $x3, renamable $x4 + ; CHECK: STD killed $x8, -176, $x1 + ; CHECK: STD killed $x9, -168, $x1 + ; CHECK: renamable $g8p4 = LQARX $x3, renamable $x4 + ; CHECK: STD killed $x8, -192, $x1 + ; CHECK: STD killed $x9, -184, $x1 + ; CHECK: renamable $g8p4 = LQARX $x3, renamable $x4 + ; CHECK: STD killed $x8, -208, $x1 + ; CHECK: STD killed $x9, -200, $x1 + ; CHECK: renamable $g8p4 = LQARX $x3, renamable $x4 + ; CHECK: STD killed $x8, -224, $x1 + ; CHECK: STD killed $x9, -216, $x1 + ; CHECK: renamable $g8p10 = LQARX $x3, renamable $x4 + ; CHECK: renamable $g8p9 = LQARX $x3, renamable $x4 + ; CHECK: renamable $g8p8 = LQARX $x3, renamable $x4 + ; CHECK: renamable $g8p7 = LQARX $x3, renamable $x4 + ; CHECK: renamable $g8p15 = LQARX $x3, renamable $x4 + ; CHECK: renamable $g8p11 = LQARX $x3, renamable $x4 + ; CHECK: renamable $g8p12 = LQARX $x3, renamable $x4 + ; CHECK: renamable $g8p13 = LQARX $x3, renamable $x4 + ; CHECK: renamable $g8p14 = LQARX $x3, renamable $x4 + ; CHECK: renamable $g8p4 = LQARX $x3, renamable $x4 + ; CHECK: $x3 = OR8 $x11, $x11 + ; CHECK: STQCX killed renamable $g8p4, renamable $x7, renamable $x4, implicit-def dead $cr0 + ; CHECK: STQCX killed renamable $g8p14, renamable $x7, renamable $x4, implicit-def dead $cr0 + ; CHECK: STQCX killed renamable $g8p13, renamable $x7, renamable $x4, implicit-def dead $cr0 + ; CHECK: STQCX killed renamable $g8p12, renamable $x7, renamable $x4, implicit-def dead $cr0 + ; CHECK: STQCX killed renamable $g8p11, renamable $x7, renamable $x4, implicit-def dead $cr0 + ; CHECK: STQCX killed renamable $g8p15, renamable $x7, renamable $x4, implicit-def dead $cr0 + ; CHECK: STQCX killed renamable $g8p7, renamable $x7, renamable $x4, implicit-def dead $cr0 + ; CHECK: STQCX killed renamable $g8p8, renamable $x7, renamable $x4, implicit-def dead $cr0 + ; CHECK: STQCX killed renamable $g8p9, renamable $x7, renamable $x4, implicit-def dead $cr0 + ; CHECK: STQCX killed renamable $g8p10, renamable $x7, renamable $x4, implicit-def dead $cr0 + ; CHECK: $x8 = LD -224, $x1 + ; CHECK: $x9 = LD -216, $x1 + ; CHECK: STQCX killed renamable $g8p4, renamable $x7, renamable $x4, implicit-def dead $cr0 + ; CHECK: $x8 = LD -208, $x1 + ; CHECK: $x9 = LD -200, $x1 + ; CHECK: STQCX killed renamable $g8p4, renamable $x7, renamable $x4, implicit-def dead $cr0 + ; CHECK: $x8 = LD -192, $x1 + ; CHECK: $x9 = LD -184, $x1 + ; CHECK: STQCX killed renamable $g8p4, renamable $x7, renamable $x4, implicit-def dead $cr0 + ; CHECK: $x8 = LD -176, $x1 + ; CHECK: $x9 = LD -168, $x1 + ; CHECK: STQCX killed renamable $g8p4, renamable $x7, renamable $x4, implicit-def dead $cr0 + ; CHECK: STQCX killed renamable $g8p5, killed renamable $x7, killed renamable $x4, implicit-def dead $cr0 + ; CHECK: $x8 = LD -160, $x1 + ; CHECK: $x9 = LD -152, $x1 + ; CHECK: STQCX killed renamable $g8p4, $x5, $x6, implicit-def dead $cr0 + ; CHECK: $x31 = LD -8, $x1 :: (load 8 from %fixed-stack.0) + ; CHECK: $x30 = LD -16, $x1 :: (load 8 from %fixed-stack.1, align 16) + ; CHECK: $x29 = LD -24, $x1 :: (load 8 from %fixed-stack.2) + ; CHECK: $x28 = LD -32, $x1 :: (load 8 from %fixed-stack.3, align 16) + ; CHECK: $x27 = LD -40, $x1 :: (load 8 from %fixed-stack.4) + ; CHECK: $x26 = LD -48, $x1 :: (load 8 from %fixed-stack.5, align 16) + ; CHECK: $x25 = LD -56, $x1 :: (load 8 from %fixed-stack.6) + ; CHECK: $x24 = LD -64, $x1 :: (load 8 from %fixed-stack.7, align 16) + ; CHECK: $x23 = LD -72, $x1 :: (load 8 from %fixed-stack.8) + ; CHECK: $x22 = LD -80, $x1 :: (load 8 from %fixed-stack.9, align 16) + ; CHECK: $x21 = LD -88, $x1 :: (load 8 from %fixed-stack.10) + ; CHECK: $x20 = LD -96, $x1 :: (load 8 from %fixed-stack.11, align 16) + ; CHECK: $x19 = LD -104, $x1 :: (load 8 from %fixed-stack.12) + ; CHECK: $x18 = LD -112, $x1 :: (load 8 from %fixed-stack.13, align 16) + ; CHECK: $x17 = LD -120, $x1 :: (load 8 from %fixed-stack.14) + ; CHECK: $x16 = LD -128, $x1 :: (load 8 from %fixed-stack.15, align 16) + ; CHECK: $x15 = LD -136, $x1 :: (load 8 from %fixed-stack.16) + ; CHECK: $x14 = LD -144, $x1 :: (load 8 from %fixed-stack.17, align 16) + ; CHECK: BLR8 implicit $lr8, implicit undef $rm, implicit $x3 + %addr0:g8rc_nox0 = COPY $x3 + %addr1:g8rc = COPY $x4 + %0:g8prc = LQARX $x5, $x6 + %1:g8prc = LQARX %addr0, %addr1 + %2:g8prc = LQARX %addr0, %addr1 + %3:g8prc = LQARX %addr0, %addr1 + %4:g8prc = LQARX %addr0, %addr1 + %5:g8prc = LQARX %addr0, %addr1 + %6:g8prc = LQARX %addr0, %addr1 + %7:g8prc = LQARX %addr0, %addr1 + %8:g8prc = LQARX %addr0, %addr1 + %9:g8prc = LQARX %addr0, %addr1 + %10:g8prc = LQARX %addr0, %addr1 + %11:g8prc = LQARX %addr0, %addr1 + %12:g8prc = LQARX %addr0, %addr1 + %13:g8prc = LQARX %addr0, %addr1 + %14:g8prc = LQARX %addr0, %addr1 + %15:g8prc = LQARX %addr0, %addr1 + STQCX %15:g8prc, %addr0, %addr1, implicit-def $cr0 + STQCX %14:g8prc, %addr0, %addr1, implicit-def $cr0 + STQCX %13:g8prc, %addr0, %addr1, implicit-def $cr0 + STQCX %12:g8prc, %addr0, %addr1, implicit-def $cr0 + STQCX %11:g8prc, %addr0, %addr1, implicit-def $cr0 + STQCX %10:g8prc, %addr0, %addr1, implicit-def $cr0 + STQCX %9:g8prc, %addr0, %addr1, implicit-def $cr0 + STQCX %8:g8prc, %addr0, %addr1, implicit-def $cr0 + STQCX %7:g8prc, %addr0, %addr1, implicit-def $cr0 + STQCX %6:g8prc, %addr0, %addr1, implicit-def $cr0 + STQCX %5:g8prc, %addr0, %addr1, implicit-def $cr0 + STQCX %4:g8prc, %addr0, %addr1, implicit-def $cr0 + STQCX %3:g8prc, %addr0, %addr1, implicit-def $cr0 + STQCX %2:g8prc, %addr0, %addr1, implicit-def $cr0 + STQCX %1:g8prc, %addr0, %addr1, implicit-def $cr0 + STQCX %0:g8prc, $x5, $x6, implicit-def $cr0 + $x3 = COPY %1.sub_gp8_x1:g8prc + BLR8 implicit $lr8, implicit undef $rm, implicit $x3 +... + +--- +name: copy_g8prc +alignment: 8 +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $g8p8 + ; CHECK-LABEL: name: copy_g8prc + ; CHECK: liveins: $g8p8 + ; CHECK: $x4 = OR8 $x16, $x16 + ; CHECK: $x5 = OR8 $x17, $x17 + ; CHECK: $x3 = OR8 $x5, $x5 + ; CHECK: $x4 = KILL renamable $x4, implicit killed $g8p2 + ; CHECK: BLR8 implicit $lr8, implicit undef $rm, implicit killed $x3, implicit $x4 + %0:g8prc = COPY $g8p8 + $x3 = COPY %0.sub_gp8_x1:g8prc + $x4 = COPY %0.sub_gp8_x0:g8prc + BLR8 implicit $lr8, implicit undef $rm, implicit $x3, implicit $x4 +... diff --git a/llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-bookII.txt b/llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-bookII.txt --- a/llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-bookII.txt +++ b/llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-bookII.txt @@ -33,6 +33,12 @@ # CHECK: stdcx. 2, 3, 4 0x7c 0x43 0x21 0xad +# CHECK: stqcx. 2, 3, 4 +0x7c 0x43 0x21 0x6d + +# CHECK: stqcx. 28, 30, 31 +0x7f 0x9e 0xf9 0x6d + # CHECK: stwat 2, 3, 28 0x7c 0x43 0xe5 0x8c @@ -72,6 +78,12 @@ # CHECK: ldarx 2, 3, 4 0x7c 0x43 0x20 0xa8 +# CHECK: lqarx 2, 3, 4 +0x7c 0x43 0x22 0x28 + +# CHECK: lqarx 28, 30, 31 +0x7f 0x9e 0xfa 0x28 + # CHECK: lbarx 2, 3, 4, 1 0x7c 0x43 0x20 0x69 @@ -84,6 +96,12 @@ # CHECK: ldarx 2, 3, 4, 1 0x7c 0x43 0x20 0xa9 +# CHECK: lqarx 2, 3, 4, 1 +0x7c 0x43 0x22 0x29 + +# CHECK: lqarx 28, 30, 31, 1 +0x7f 0x9e 0xfa 0x29 + # CHECK: lwat 2, 3, 28 0x7c 0x43 0xe4 0x8c diff --git a/llvm/test/MC/Disassembler/PowerPC/ppc64-encoding.txt b/llvm/test/MC/Disassembler/PowerPC/ppc64-encoding.txt --- a/llvm/test/MC/Disassembler/PowerPC/ppc64-encoding.txt +++ b/llvm/test/MC/Disassembler/PowerPC/ppc64-encoding.txt @@ -154,6 +154,12 @@ # CHECK: ldmx 2, 3, 4 0x7c 0x43 0x22 0x6a +# CHECK: lq 2, 128(4) +0xe0 0x44 0x00 0x80 + +# CHECK: lq 28, 128(30) +0xe3 0x9e 0x00 0x80 + # CHECK: stb 2, 128(4) 0x98 0x44 0x00 0x80 @@ -202,6 +208,12 @@ # CHECK: stdux 2, 3, 4 0x7c 0x43 0x21 0x6a +# CHECK: stq 2, 128(4) +0xf8 0x44 0x00 0x82 + +# CHECK: stq 28, 128(30) +0xfb 0x9e 0x00 0x82 + # CHECK: lhbrx 2, 3, 4 0x7c 0x43 0x26 0x2c diff --git a/llvm/test/MC/PowerPC/ppc64-encoding-bookII.s b/llvm/test/MC/PowerPC/ppc64-encoding-bookII.s --- a/llvm/test/MC/PowerPC/ppc64-encoding-bookII.s +++ b/llvm/test/MC/PowerPC/ppc64-encoding-bookII.s @@ -83,6 +83,14 @@ # CHECK-LE: stdcx. 2, 3, 4 # encoding: [0xad,0x21,0x43,0x7c] stdcx. 2, 3, 4 +# CHECK-BE: stqcx. 2, 3, 4 # encoding: [0x7c,0x43,0x21,0x6d] +# CHECK-LE: stqcx. 2, 3, 4 # encoding: [0x6d,0x21,0x43,0x7c] + stqcx. 2, 3, 4 + +# CHECK-BE: stqcx. 28, 30, 31 # encoding: [0x7f,0x9e,0xf9,0x6d] +# CHECK-LE: stqcx. 28, 30, 31 # encoding: [0x6d,0xf9,0x9e,0x7f] + stqcx. 28, 30, 31 + # CHECK-BE: stwat 2, 3, 28 # encoding: [0x7c,0x43,0xe5,0x8c] # CHECK-LE: stwat 2, 3, 28 # encoding: [0x8c,0xe5,0x43,0x7c] stwat 2, 3, 28 @@ -134,6 +142,14 @@ # CHECK-LE: ldarx 2, 3, 4 # encoding: [0xa8,0x20,0x43,0x7c] ldarx 2, 3, 4 +# CHECK-BE: lqarx 2, 3, 4 # encoding: [0x7c,0x43,0x22,0x28] +# CHECK-LE: lqarx 2, 3, 4 # encoding: [0x28,0x22,0x43,0x7c] + lqarx 2, 3, 4 + +# CHECK-BE: lqarx 28, 30, 31 # encoding: [0x7f,0x9e,0xfa,0x28] +# CHECK-LE: lqarx 28, 30, 31 # encoding: [0x28,0xfa,0x9e,0x7f] + lqarx 28, 30, 31 + # CHECK-BE: lbarx 2, 3, 4, 1 # encoding: [0x7c,0x43,0x20,0x69] # CHECK-LE: lbarx 2, 3, 4, 1 # encoding: [0x69,0x20,0x43,0x7c] lbarx 2, 3, 4, 1 @@ -150,6 +166,14 @@ # CHECK-LE: ldarx 2, 3, 4, 1 # encoding: [0xa9,0x20,0x43,0x7c] ldarx 2, 3, 4, 1 +# CHECK-BE: lqarx 2, 3, 4, 1 # encoding: [0x7c,0x43,0x22,0x29] +# CHECK-LE: lqarx 2, 3, 4, 1 # encoding: [0x29,0x22,0x43,0x7c] + lqarx 2, 3, 4, 1 + +# CHECK-BE: lqarx 28, 30, 31, 1 # encoding: [0x7f,0x9e,0xfa,0x29] +# CHECK-LE: lqarx 28, 30, 31, 1 # encoding: [0x29,0xfa,0x9e,0x7f] + lqarx 28, 30, 31, 1 + # CHECK-BE: lwat 2, 3, 28 # encoding: [0x7c,0x43,0xe4,0x8c] # CHECK-LE: lwat 2, 3, 28 # encoding: [0x8c,0xe4,0x43,0x7c] lwat 2, 3, 28 diff --git a/llvm/test/MC/PowerPC/ppc64-encoding.s b/llvm/test/MC/PowerPC/ppc64-encoding.s --- a/llvm/test/MC/PowerPC/ppc64-encoding.s +++ b/llvm/test/MC/PowerPC/ppc64-encoding.s @@ -241,6 +241,12 @@ # CHECK-BE: ldmx 2, 3, 4 # encoding: [0x7c,0x43,0x22,0x6a] # CHECK-LE: ldmx 2, 3, 4 # encoding: [0x6a,0x22,0x43,0x7c] ldmx 2, 3, 4 +# CHECK-BE: lq 2, 128(4) # encoding: [0xe0,0x44,0x00,0x80] +# CHECK-LE: lq 2, 128(4) # encoding: [0x80,0x00,0x44,0xe0] + lq 2, 128(4) +# CHECK-BE: lq 28, 128(30) # encoding: [0xe3,0x9e,0x00,0x80] +# CHECK-LE: lq 28, 128(30) # encoding: [0x80,0x00,0x9e,0xe3] + lq 28, 128(30) # Fixed-point store instructions @@ -292,6 +298,12 @@ # CHECK-BE: stdux 2, 3, 4 # encoding: [0x7c,0x43,0x21,0x6a] # CHECK-LE: stdux 2, 3, 4 # encoding: [0x6a,0x21,0x43,0x7c] stdux 2, 3, 4 +# CHECK-BE: stq 2, 128(4) # encoding: [0xf8,0x44,0x00,0x82] +# CHECK-LE: stq 2, 128(4) # encoding: [0x82,0x00,0x44,0xf8] + stq 2, 128(4) +# CHECK-BE: stq 28, 128(30) # encoding: [0xfb,0x9e,0x00,0x82] +# CHECK-LE: stq 28, 128(30) # encoding: [0x82,0x00,0x9e,0xfb] + stq 28, 128(30) # Fixed-point load and store with byte reversal instructions