Index: lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp =================================================================== --- lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp +++ lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp @@ -83,6 +83,16 @@ PPC::F24, PPC::F25, PPC::F26, PPC::F27, PPC::F28, PPC::F29, PPC::F30, PPC::F31 }; +static const MCPhysReg VFRegs[32] = { + PPC::VF0, PPC::VF1, PPC::VF2, PPC::VF3, + PPC::VF4, PPC::VF5, PPC::VF6, PPC::VF7, + PPC::VF8, PPC::VF9, PPC::VF10, PPC::VF11, + PPC::VF12, PPC::VF13, PPC::VF14, PPC::VF15, + PPC::VF16, PPC::VF17, PPC::VF18, PPC::VF19, + PPC::VF20, PPC::VF21, PPC::VF22, PPC::VF23, + PPC::VF24, PPC::VF25, PPC::VF26, PPC::VF27, + PPC::VF28, PPC::VF29, PPC::VF30, PPC::VF31 +}; static const MCPhysReg VRegs[32] = { PPC::V0, PPC::V1, PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, @@ -590,6 +600,11 @@ Inst.addOperand(MCOperand::createReg(FRegs[getReg()])); } + void addRegVFRCOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::createReg(VFRegs[getReg()])); + } + void addRegVRRCOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); Inst.addOperand(MCOperand::createReg(VRegs[getReg()])); Index: lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp =================================================================== --- lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp +++ lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp @@ -89,6 +89,17 @@ PPC::F28, PPC::F29, PPC::F30, PPC::F31 }; +static const unsigned VFRegs[] = { + PPC::VF0, PPC::VF1, PPC::VF2, PPC::VF3, + PPC::VF4, PPC::VF5, PPC::VF6, PPC::VF7, + PPC::VF8, PPC::VF9, PPC::VF10, PPC::VF11, + PPC::VF12, PPC::VF13, PPC::VF14, PPC::VF15, + PPC::VF16, PPC::VF17, PPC::VF18, PPC::VF19, + PPC::VF20, PPC::VF21, PPC::VF22, PPC::VF23, + PPC::VF24, PPC::VF25, PPC::VF26, PPC::VF27, + PPC::VF28, PPC::VF29, PPC::VF30, PPC::VF31 +}; + static const unsigned VRegs[] = { PPC::V0, PPC::V1, PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, @@ -242,6 +253,12 @@ return decodeRegisterClass(Inst, RegNo, FRegs); } +static DecodeStatus DecodeVFRCRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { + return decodeRegisterClass(Inst, RegNo, VFRegs); +} + static DecodeStatus DecodeVRRCRegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address, const void *Decoder) { Index: lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp =================================================================== --- lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp +++ lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp @@ -10,8 +10,8 @@ // This class prints an PPC MCInst to a .s file. // //===----------------------------------------------------------------------===// - #include "PPCInstPrinter.h" +#include "PPCInstrInfo.h" #include "MCTargetDesc/PPCMCTargetDesc.h" #include "MCTargetDesc/PPCPredicates.h" #include "llvm/MC/MCExpr.h" @@ -429,11 +429,28 @@ return RegName; } +static bool isVFRegister(unsigned Reg) { + return Reg >= PPC::VF0 && Reg <= PPC::VF31; +} + void PPCInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) { const MCOperand &Op = MI->getOperand(OpNo); if (Op.isReg()) { - const char *RegName = getRegisterName(Op.getReg()); + unsigned Reg = Op.getReg(); + + // If this is a vsx instruction that using 64-bit altivec register (we use + // VFRC to represent 64-bit altivec register), we need upgrade VFRC to VRRC + // (128-bit full altivec register) + // (Please synchronize with PPCAsmPrinter::printOperand) + if (isVFRegister(Reg) && + MII.get(MI->getOpcode()).TSFlags & PPCII::VsxUseAltivecReg) { + const MCRegisterClass *VRRC = &MRI.getRegClass(PPC::VRRCRegClassID); + Reg = MRI.getMatchingSuperReg(Reg, PPC::sub_64, VRRC); + assert(Reg && "VFRC's super register should include VRRC"); + } + + const char *RegName = getRegisterName(Reg); // The linux and AIX assembler does not take register prefixes. if (!isDarwinSyntax()) RegName = stripRegisterPrefix(RegName); Index: lib/Target/PowerPC/PPCAsmPrinter.cpp =================================================================== --- lib/Target/PowerPC/PPCAsmPrinter.cpp +++ lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -164,6 +164,10 @@ return RegName; } +static bool isVFRegister(unsigned Reg) { + return Reg >= PPC::VF0 && Reg <= PPC::VF31; +} + void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) { const DataLayout &DL = getDataLayout(); @@ -171,7 +175,22 @@ switch (MO.getType()) { case MachineOperand::MO_Register: { - const char *RegName = PPCInstPrinter::getRegisterName(MO.getReg()); + unsigned Reg = MO.getReg(); + + // If this is a vsx instruction that using 64-bit altivec register (we use + // VFRC to represent 64-bit altivec register), we need upgrade VFRC to VRRC + // (128-bit full altivec register) + // (Please synchronize with PPCInstPrinter::printOperand) + if (isVFRegister(Reg) && + MI->getDesc().TSFlags & PPCII::VsxUseAltivecReg) { + const TargetRegisterClass *VRRC = + Subtarget->getRegisterInfo()->getRegClass(PPC::VRRCRegClassID); + Reg = Subtarget->getRegisterInfo() + ->getMatchingSuperReg(Reg, PPC::sub_64, VRRC); + assert(Reg && "VFRC's super register should include VRRC"); + } + const char *RegName = PPCInstPrinter::getRegisterName(Reg); + // Linux assembler (Others?) does not take register mnemonics. // FIXME - What about special registers used in mfspr/mtspr? if (!Subtarget->isDarwin()) Index: lib/Target/PowerPC/PPCInstrFormats.td =================================================================== --- lib/Target/PowerPC/PPCInstrFormats.td +++ lib/Target/PowerPC/PPCInstrFormats.td @@ -38,6 +38,10 @@ let TSFlags{2} = PPC970_Cracked; let TSFlags{5-3} = PPC970_Unit; + /// Vsx instruction which uses altivec register + bits<1> VsxUseAltivecReg = 0; + let TSFlags{6} = VsxUseAltivecReg; + // Fields used for relation models. string BaseName = ""; Index: lib/Target/PowerPC/PPCInstrInfo.h =================================================================== --- lib/Target/PowerPC/PPCInstrInfo.h +++ lib/Target/PowerPC/PPCInstrInfo.h @@ -61,6 +61,14 @@ PPC970_VPERM = 6 << PPC970_Shift, // Vector Permute Unit PPC970_BRU = 7 << PPC970_Shift // Branch Unit }; + +enum { + // Shift count to bypass PPC970 flags + NewDef_Shift = 6, + + // Vsx instruction which uses altivec register + VsxUseAltivecReg = 0x1 << NewDef_Shift +}; } // end namespace PPCII class PPCSubtarget; Index: lib/Target/PowerPC/PPCInstrVSX.td =================================================================== --- lib/Target/PowerPC/PPCInstrVSX.td +++ lib/Target/PowerPC/PPCInstrVSX.td @@ -47,6 +47,13 @@ let ParserMatchClass = PPCRegVSSRCAsmOperand; } +def PPCRegVFRCAsmOperand : AsmOperandClass { + let Name = "RegVFRC"; let PredicateMethod = "isRegNumber"; +} +def vfrc : RegisterOperand { + let ParserMatchClass = PPCRegVFRCAsmOperand; +} + // Little-endian-specific nodes. def SDT_PPClxvd2x : SDTypeProfile<1, 1, [ SDTCisVT<0, v2f64>, SDTCisPtrTy<1> @@ -2090,12 +2097,14 @@ // Load Vector def LXV : DQ_RD6_RS5_DQ12<61, 1, (outs vsrc:$XT), (ins memrix16:$src), "lxv $XT, $src", IIC_LdStLFD, []>; + let VsxUseAltivecReg = 1 in { // Load DWord - def LXSD : DSForm_1<57, 2, (outs vrrc:$vD), (ins memrix:$src), + def LXSD : DSForm_1<57, 2, (outs vfrc:$vD), (ins memrix:$src), "lxsd $vD, $src", IIC_LdStLFD, []>; // Load SP from src, convert it to DP, and place in dword[0] - def LXSSP : DSForm_1<57, 3, (outs vrrc:$vD), (ins memrix:$src), + def LXSSP : DSForm_1<57, 3, (outs vfrc:$vD), (ins memrix:$src), "lxssp $vD, $src", IIC_LdStLFD, []>; + } // end VsxUseAltivecReg // [PO T RA RB XO TX] almost equal to [PO S RA RB XO SX], but has different // "out" and "in" dag @@ -2127,12 +2136,14 @@ // Store Vector def STXV : DQ_RD6_RS5_DQ12<61, 5, (outs), (ins vsrc:$XT, memrix16:$dst), "stxv $XT, $dst", IIC_LdStSTFD, []>; + let VsxUseAltivecReg = 1 in { // Store DWord - def STXSD : DSForm_1<61, 2, (outs), (ins vrrc:$vS, memrix:$dst), + def STXSD : DSForm_1<61, 2, (outs), (ins vfrc:$vS, memrix:$dst), "stxsd $vS, $dst", IIC_LdStSTFD, []>; // Convert DP of dword[0] to SP, and Store to dst - def STXSSP : DSForm_1<61, 3, (outs), (ins vrrc:$vS, memrix:$dst), + def STXSSP : DSForm_1<61, 3, (outs), (ins vfrc:$vS, memrix:$dst), "stxssp $vS, $dst", IIC_LdStSTFD, []>; + } // end VsxUseAltivecReg // [PO S RA RB XO SX] class X_XS6_RA5_RB5 opcode, bits<10> xo, string opc, @@ -2155,4 +2166,16 @@ def STXVL : X_XS6_RA5_RB5<31, 397, "stxvl" , vsrc, []>; def STXVLL : X_XS6_RA5_RB5<31, 429, "stxvll" , vsrc, []>; } // end mayStore + + // Prefer Power9 (aka Power v.3 instructions) + let AddedComplexity = 500 in { + def : Pat<(f64 (load iaddr:$src)), (LXSD iaddr:$src)>; + def : Pat<(f32 (load iaddr:$src)), + (COPY_TO_REGCLASS (LXSSP iaddr:$src), VFRC)>; + def : Pat<(f64 (extloadf32 iaddr:$src)), + (COPY_TO_REGCLASS (LXSSP iaddr:$src), VFRC)>; + def : Pat<(store f64:$vS, iaddr:$dst), (STXSD $vS, iaddr:$dst)>; + def : Pat<(store f32:$vS, iaddr:$dst), + (STXSSP (COPY_TO_REGCLASS $vS, VFRC), iaddr:$dst)>; + } } // end HasP9Vector Index: test/CodeGen/PowerPC/dform-test.ll =================================================================== --- /dev/null +++ test/CodeGen/PowerPC/dform-test.ll @@ -0,0 +1,201 @@ +; RUN: llc < %s -march=ppc64 -mcpu=pwr9 -o - | FileCheck %s --check-prefix=PWR9 --check-prefix=CHECK +; RUN: llc < %s -march=ppc64 -mcpu=pwr8 -o - | FileCheck %s --check-prefix=PWR8 --check-prefix=CHECK + +; CHECK-LABEL: LXSD: +define void @LXSD(i32 zeroext %N) { +entry: + %cmp17 = icmp eq i32 %N, 0 + br i1 %cmp17, label %for.cond.cleanup, label %for.body.preheader + +for.body.preheader: ; preds = %entry + br label %for.body + +for.cond.cleanup.loopexit: ; preds = %for.body + br label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry + ret void + +for.body: ; preds = %for.body.preheader, %for.body +; v0 = vsx32 +; PWR9-DAG: lxsd 0 +; PWR9-DAG: lxsd 2 +; PWR9-DAG: lxsd 3 +; PWR9-DAG: lxsd 4 +; PWR9-DAG: lxssp 5 +; PWR9-DAG: xxmrghd 34, 37, 34 +; PWR9-DAG: xxmrghd 35, 37, 35 +; PWR9-DAG: xxmrghd 36, 37, 36 +; PWR9-DAG: xxmrghd 37, 37, 32 +; PWR8-NOT: lxsd{{[^x]}} +; PWR8-NOT: lxssp{{[^x]}} + %i.018 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ] + %call = tail call double* @getDoublePtr() + %0 = load double, double* %call, align 8 + %vecinit = insertelement <2 x double> , double %0, i32 1 + %arrayidx1 = getelementptr inbounds double, double* %call, i64 1 + %1 = load double, double* %arrayidx1, align 8 + %vecinit2 = insertelement <2 x double> , double %1, i32 1 + %arrayidx3 = getelementptr inbounds double, double* %call, i64 2 + %2 = load double, double* %arrayidx3, align 8 + %vecinit4 = insertelement <2 x double> , double %2, i32 1 + %arrayidx5 = getelementptr inbounds double, double* %call, i64 3 + %3 = load double, double* %arrayidx5, align 8 + %vecinit6 = insertelement <2 x double> , double %3, i32 1 + tail call void @passVSX(<2 x double> %vecinit, <2 x double> %vecinit2, <2 x double> %vecinit4, <2 x double> %vecinit6) + %inc = add nuw nsw i32 %i.018, 1 + %exitcond = icmp eq i32 %inc, %N + br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body +} + +declare double* @getDoublePtr() + +; CHECK-LABEL: LXSSP: +define void @LXSSP(i32 zeroext %N) #0 { +entry: + %cmp20 = icmp eq i32 %N, 0 + br i1 %cmp20, label %for.cond.cleanup, label %for.body.preheader + +for.body.preheader: ; preds = %entry + br label %for.body + +for.cond.cleanup.loopexit: ; preds = %for.body + br label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry + ret void + +for.body: ; preds = %for.body.preheader, %for.body + +; PWR9-DAG: lxssp 0 +; PWR9-DAG: lxssp 2 +; PWR9-DAG: lxssp 3 +; PWR9-DAG: lxssp 4 +; PWR9-DAG: lxssp 5 +; PWR9-DAG: xxmrghd 34, 37, 34 +; PWR9-DAG: xxmrghd 35, 37, 35 +; PWR9-DAG: xxmrghd 36, 37, 36 +; PWR9-DAG: xxmrghd 37, 37, 32 +; PWR8-NOT: lxssp{{[^x]}} + %i.021 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ] + %call = tail call float* @getFloatPtr() + %0 = load float, float* %call, align 4 + %conv = fpext float %0 to double + %vecinit = insertelement <2 x double> , double %conv, i32 1 + %arrayidx1 = getelementptr inbounds float, float* %call, i64 1 + %1 = load float, float* %arrayidx1, align 4 + %conv2 = fpext float %1 to double + %vecinit3 = insertelement <2 x double> , double %conv2, i32 1 + %arrayidx4 = getelementptr inbounds float, float* %call, i64 2 + %2 = load float, float* %arrayidx4, align 4 + %conv5 = fpext float %2 to double + %vecinit6 = insertelement <2 x double> , double %conv5, i32 1 + %arrayidx7 = getelementptr inbounds float, float* %call, i64 3 + %3 = load float, float* %arrayidx7, align 4 + %conv8 = fpext float %3 to double + %vecinit9 = insertelement <2 x double> , double %conv8, i32 1 + tail call void @passVSX(<2 x double> %vecinit, <2 x double> %vecinit3, <2 x double> %vecinit6, <2 x double> %vecinit9) + %inc = add nuw nsw i32 %i.021, 1 + %exitcond = icmp eq i32 %inc, %N + br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body +} + +declare float* @getFloatPtr() + +; CHECK-LABEL: STXSD +define void @STXSD(i32 zeroext %N) { +entry: + %cmp17 = icmp eq i32 %N, 0 + br i1 %cmp17, label %for.cond.cleanup, label %for.body.preheader + +for.body.preheader: ; preds = %entry + br label %for.body + +for.cond.cleanup.loopexit: ; preds = %for.body + br label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry + ret void + +for.body: ; preds = %for.body.preheader, %for.body +; PWR9: stxsd{{[^x]}} +; PWR9: stxsd{{[^x]}} +; PWR9: stxsd{{[^x]}} +; PWR9: stxsd{{[^x]}} +; PWR8-NOT: stxsd{{[^x]}} + %i.018 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ] + %call = tail call double* @getDoublePtr() + %call1 = tail call <2 x double> @getVSX() + %vecext = extractelement <2 x double> %call1, i32 0 + store double %vecext, double* %call, align 8 + %call2 = tail call <2 x double> @getVSX() + %vecext3 = extractelement <2 x double> %call2, i32 0 + %arrayidx4 = getelementptr inbounds double, double* %call, i64 1 + store double %vecext3, double* %arrayidx4, align 8 + %call5 = tail call <2 x double> @getVSX() + %vecext6 = extractelement <2 x double> %call5, i32 0 + %arrayidx7 = getelementptr inbounds double, double* %call, i64 2 + store double %vecext6, double* %arrayidx7, align 8 + %call8 = tail call <2 x double> @getVSX() + %vecext9 = extractelement <2 x double> %call8, i32 0 + %arrayidx10 = getelementptr inbounds double, double* %call, i64 3 + store double %vecext9, double* %arrayidx10, align 8 + %inc = add nuw nsw i32 %i.018, 1 + %exitcond = icmp eq i32 %inc, %N + br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body +} + +declare <2 x double> @getVSX() + +define void @STXSSP(i32 zeroext %N) { +entry: + %cmp20 = icmp eq i32 %N, 0 + br i1 %cmp20, label %for.cond.cleanup, label %for.body.preheader + +for.body.preheader: ; preds = %entry + br label %for.body + +for.cond.cleanup.loopexit: ; preds = %for.body + br label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry + ret void + +for.body: ; preds = %for.body.preheader, %for.body +; v19 = vsx51 +; PWR9: xxlor 51 +; PWR9: stxssp 19 +; PWR9: xxlor 51 +; PWR9: stxssp 19 +; PWR9: xxlor 51 +; PWR9: stxssp 19 +; PWR9: xxlor 51 +; PWR9: stxssp 19 +; PWR8-NOT: stxssp{{[^x]}} + %i.021 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ] + %call = tail call float* @getFloatPtr() + %call1 = tail call <2 x double> @getVSX() + %vecext = extractelement <2 x double> %call1, i32 0 + %conv = fptrunc double %vecext to float + store float %conv, float* %call, align 4 + %call2 = tail call <2 x double> @getVSX() + %vecext3 = extractelement <2 x double> %call2, i32 0 + %conv4 = fptrunc double %vecext3 to float + %arrayidx5 = getelementptr inbounds float, float* %call, i64 1 + store float %conv4, float* %arrayidx5, align 4 + %call6 = tail call <2 x double> @getVSX() + %vecext7 = extractelement <2 x double> %call6, i32 0 + %conv8 = fptrunc double %vecext7 to float + %arrayidx9 = getelementptr inbounds float, float* %call, i64 2 + store float %conv8, float* %arrayidx9, align 4 + %call10 = tail call <2 x double> @getVSX() + %vecext11 = extractelement <2 x double> %call10, i32 0 + %conv12 = fptrunc double %vecext11 to float + %arrayidx13 = getelementptr inbounds float, float* %call, i64 3 + store float %conv12, float* %arrayidx13, align 4 + %inc = add nuw nsw i32 %i.021, 1 + %exitcond = icmp eq i32 %inc, %N + br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body +} + +declare void @passVSX(<2 x double>, <2 x double>, <2 x double>, <2 x double>)