Index: lib/Target/PowerPC/PPCISelLowering.h =================================================================== --- lib/Target/PowerPC/PPCISelLowering.h +++ lib/Target/PowerPC/PPCISelLowering.h @@ -128,6 +128,13 @@ /// Direct move from a GPR to a VSX register (zero) MTVSRZ, + /// Direct move from a GPR to a VSX register (keeping a vector type) + MTVSRDVEC, + + /// Conversions between SP 64-bit values and SP 32-bit values + SP_TO_VEC_SP, + VEC_SP_TO_SP, + // FIXME: Remove these once the ANDI glue bug is fixed: /// i1 = ANDIo_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the /// eq or gt bit of CR0 after executing andi. x, 1. This is used to Index: lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- lib/Target/PowerPC/PPCISelLowering.cpp +++ lib/Target/PowerPC/PPCISelLowering.cpp @@ -542,6 +542,14 @@ if (Subtarget.hasVSX()) { setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal); + if (Subtarget.hasP8Vector()) + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal); + if (Subtarget.hasDirectMove()) { + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom); + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom); + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom); + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i64, Custom); + } setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal); setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal); @@ -1008,6 +1016,9 @@ case PPCISD::MFVSR: return "PPCISD::MFVSR"; case PPCISD::MTVSRA: return "PPCISD::MTVSRA"; case PPCISD::MTVSRZ: return "PPCISD::MTVSRZ"; + case PPCISD::MTVSRDVEC: return "PPCISD::MTVSRDVEC"; + case PPCISD::SP_TO_VEC_SP: return "PPCISD::SP_TO_VEC_SP"; + case PPCISD::VEC_SP_TO_SP: return "PPCISD::VEC_SP_TO_SP"; case PPCISD::ANDIo_1_EQ_BIT: return "PPCISD::ANDIo_1_EQ_BIT"; case PPCISD::ANDIo_1_GT_BIT: return "PPCISD::ANDIo_1_GT_BIT"; case PPCISD::VCMP: return "PPCISD::VCMP"; @@ -7488,6 +7499,13 @@ SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); + // With direct moves, we can build integral vectors without store/load + if (Subtarget.hasDirectMove()) { + SDValue Move = DAG.getNode(PPCISD::MTVSRDVEC, dl, Op.getValueType(), + Op.getOperand(0)); + return Move; + } + // Create a stack slot that is 16-byte aligned. MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); int FrameIdx = FrameInfo->CreateStackObject(16, 16, false); @@ -11469,7 +11487,7 @@ PPCTargetLowering::shouldExpandBuildVectorWithShuffles( EVT VT , unsigned DefinedValues) const { if (VT == MVT::v2i64) - return false; + return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves if (Subtarget.hasQPX()) { if (VT == MVT::v4f32 || VT == MVT::v4f64 || VT == MVT::v4i1) Index: lib/Target/PowerPC/PPCInstrVSX.td =================================================================== --- lib/Target/PowerPC/PPCInstrVSX.td +++ lib/Target/PowerPC/PPCInstrVSX.td @@ -66,6 +66,9 @@ def PPCmfvsr : SDNode<"PPCISD::MFVSR", SDTUnaryOp, []>; def PPCmtvsra : SDNode<"PPCISD::MTVSRA", SDTUnaryOp, []>; def PPCmtvsrz : SDNode<"PPCISD::MTVSRZ", SDTUnaryOp, []>; +def PPCSPtoVecSP : SDNode<"PPCISD::SP_TO_VEC_SP", SDTUnaryOp, []>; +def PPCVecSPtoSP : SDNode<"PPCISD::VEC_SP_TO_SP", SDTUnaryOp, []>; +def PPCmtvsrdVec : SDNode<"PPCISD::MTVSRDVEC", SDTUnaryOp, []>; multiclass XX3Form_Rcr opcode, bits<7> xo, dag OOL, dag IOL, string asmbase, string asmstr, InstrItinClass itin, @@ -1181,6 +1184,25 @@ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, AltVSXFMARel; } + + // Single Precision Conversions (FP <-> INT) + def XSCVSXDSP : XX2Form<60, 312, + (outs vssrc:$XT), (ins vsfrc:$XB), + "xscvsxdsp $XT, $XB", IIC_VecFP, + [(set f32:$XT, (PPCfcfids f64:$XB))]>; + def XSCVUXDSP : XX2Form<60, 296, + (outs vssrc:$XT), (ins vsfrc:$XB), + "xscvuxdsp $XT, $XB", IIC_VecFP, + [(set f32:$XT, (PPCfcfidus f64:$XB))]>; + + // Conversions between vector and scalar single precision + def XSCVDPSPN : XX2Form<60, 267, (outs vsrc:$XT), (ins vssrc:$XB), + "xscvdpspn $XT, $XB", IIC_VecFP, + [(set v4f32:$XT, (PPCSPtoVecSP f32:$XB))]>; + def XSCVSPDPN : XX2Form<60, 331, (outs vssrc:$XT), (ins vsrc:$XB), + "xscvspdpn $XT, $XB", IIC_VecFP, + [(set f32:$XT, (PPCVecSPtoSP v4f32:$XB))]>; + } // AddedComplexity = 400 } // HasP8Vector @@ -1204,3 +1226,54 @@ "mtvsrwz $XT, $rA", IIC_VecGeneral, [(set f64:$XT, (PPCmtvsrz i32:$rA))]>; } // HasDirectMove, HasVSX + +/* Direct moves of various size entities from GPR's into VSR's. Each lines + the value up into element 0 (both BE and LE). Namely, entities smaller than + a doubleword are shifted left and moved for BE. For LE, they're moved, then + swapped to go into the least significant element of the VSR. +*/ +def Moves { + dag BE_BYTE = (MTVSRD + (RLDICR + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32), 56, 7)); + dag BE_HALF = (MTVSRD + (RLDICR + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32), 48, 15)); + dag BE_WORD = (MTVSRD + (RLDICR + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32), 32, 31)); + dag DBLWORD = (MTVSRD $A); + + dag LE_MVW = (MTVSRD (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32)); + dag LE_CPW = (v2i64 (COPY_TO_REGCLASS LE_MVW, VSRC)); + dag LE_BHW = (XXPERMDI LE_CPW, LE_CPW, 2); + dag LE_CPD = (v2i64 (COPY_TO_REGCLASS DBLWORD, VSRC)); + dag LE_DBL = (XXPERMDI LE_CPD, LE_CPD, 2); +} + +let Predicates = [IsBigEndian, HasDirectMove] in { + def : Pat<(v4f32 (scalar_to_vector f32:$A)), + (v4f32 (XSCVDPSPN $A))>; + def : Pat<(v16i8 (PPCmtvsrdVec i32:$A)), + (v16i8 (COPY_TO_REGCLASS Moves.BE_BYTE, VSRC))>; + def : Pat<(v8i16 (PPCmtvsrdVec i32:$A)), + (v8i16 (COPY_TO_REGCLASS Moves.BE_HALF, VSRC))>; + def : Pat<(v4i32 (PPCmtvsrdVec i32:$A)), + (v4i32 (COPY_TO_REGCLASS Moves.BE_WORD, VSRC))>; + def : Pat<(v2i64 (PPCmtvsrdVec i64:$A)), + (v2i64 (COPY_TO_REGCLASS Moves.DBLWORD, VSRC))>; +} // IsBigEndian + +let Predicates = [IsLittleEndian, HasDirectMove] in { + def : Pat<(v4f32 (scalar_to_vector f32:$A)), + (v4f32 (XXSLDWI (XSCVDPSPN $A), (XSCVDPSPN $A), 1))>; + def : Pat<(v16i8 (PPCmtvsrdVec i32:$A)), + (v16i8 (COPY_TO_REGCLASS Moves.LE_BHW, VSRC))>; + def : Pat<(v8i16 (PPCmtvsrdVec i32:$A)), + (v8i16 (COPY_TO_REGCLASS Moves.LE_BHW, VSRC))>; + def : Pat<(v4i32 (PPCmtvsrdVec i32:$A)), + (v4i32 (COPY_TO_REGCLASS Moves.LE_BHW, VSRC))>; + def : Pat<(v2i64 (PPCmtvsrdVec i64:$A)), + (v2i64 Moves.LE_DBL)>; +} // IsLittleEndian + Index: lib/Target/PowerPC/PPCVSXCopy.cpp =================================================================== --- lib/Target/PowerPC/PPCVSXCopy.cpp +++ lib/Target/PowerPC/PPCVSXCopy.cpp @@ -77,6 +77,10 @@ return IsRegInClass(Reg, &PPC::F8RCRegClass, MRI); } + bool IsVSFReg(unsigned Reg, MachineRegisterInfo &MRI) { + return IsRegInClass(Reg, &PPC::VSFRCRegClass, MRI); + } + protected: bool processBlock(MachineBasicBlock &MBB) { bool Changed = false; @@ -100,7 +104,8 @@ IsVRReg(SrcMO.getReg(), MRI) ? &PPC::VSHRCRegClass : &PPC::VSLRCRegClass; assert((IsF8Reg(SrcMO.getReg(), MRI) || - IsVRReg(SrcMO.getReg(), MRI)) && + IsVRReg(SrcMO.getReg(), MRI) || + IsVSFReg(SrcMO.getReg(), MRI)) && "Unknown source for a VSX copy"); unsigned NewVReg = MRI.createVirtualRegister(SrcRC); Index: test/CodeGen/PowerPC/fp-int-conversions-direct-moves.ll =================================================================== --- test/CodeGen/PowerPC/fp-int-conversions-direct-moves.ll +++ test/CodeGen/PowerPC/fp-int-conversions-direct-moves.ll @@ -24,8 +24,7 @@ ret float %conv ; CHECK-LABEL: @_Z6testfcc ; CHECK: mtvsrwz [[MOVEREG01:[0-9]+]], 3 -; FIXME: Once we have XSCVUXDSP implemented, this will change -; CHECK: fcfidus 1, [[MOVEREG01]] +; CHECK: xscvuxdsp 1, [[MOVEREG01]] } ; Function Attrs: nounwind @@ -77,8 +76,7 @@ ret float %conv ; CHECK-LABEL: @_Z7testfuch ; CHECK: mtvsrwz [[MOVEREG03:[0-9]+]], 3 -; FIXME: Once we have XSCVUXDSP implemented, this will change -; CHECK: fcfidus 1, [[MOVEREG03]] +; CHECK: xscvuxdsp 1, [[MOVEREG03]] } ; Function Attrs: nounwind @@ -130,8 +128,7 @@ ret float %conv ; CHECK-LABEL: @_Z6testfss ; CHECK: mtvsrwa [[MOVEREG05:[0-9]+]], 3 -; FIXME: Once we have XSCVSXDSP implemented, this will change -; CHECK: fcfids 1, [[MOVEREG05]] +; CHECK: xscvsxdsp 1, [[MOVEREG05]] } ; Function Attrs: nounwind @@ -183,8 +180,7 @@ ret float %conv ; CHECK-LABEL: @_Z7testfust ; CHECK: mtvsrwz [[MOVEREG07:[0-9]+]], 3 -; FIXME: Once we have XSCVUXDSP implemented, this will change -; CHECK: fcfidus 1, [[MOVEREG07]] +; CHECK: xscvuxdsp 1, [[MOVEREG07]] } ; Function Attrs: nounwind @@ -236,8 +232,7 @@ ret float %conv ; CHECK-LABEL: @_Z6testfii ; CHECK: mtvsrwa [[MOVEREG09:[0-9]+]], 3 -; FIXME: Once we have XSCVSXDSP implemented, this will change -; CHECK: fcfids 1, [[MOVEREG09]] +; CHECK: xscvsxdsp 1, [[MOVEREG09]] } ; Function Attrs: nounwind @@ -289,8 +284,7 @@ ret float %conv ; CHECK-LABEL: @_Z7testfuij ; CHECK: mtvsrwz [[MOVEREG11:[0-9]+]], 3 -; FIXME: Once we have XSCVUXDSP implemented, this will change -; CHECK: fcfidus 1, [[MOVEREG11]] +; CHECK: xscvuxdsp 1, [[MOVEREG11]] } ; Function Attrs: nounwind @@ -342,8 +336,7 @@ ret float %conv ; CHECK-LABEL:@_Z7testfllx ; CHECK: mtvsrd [[MOVEREG13:[0-9]+]], 3 -; FIXME: Once we have XSCVSXDSP implemented, this will change -; CHECK: fcfids 1, [[MOVEREG13]] +; CHECK: xscvsxdsp 1, [[MOVEREG13]] } ; Function Attrs: nounwind @@ -395,8 +388,7 @@ ret float %conv ; CHECK-LABEL: @_Z8testfully ; CHECK: mtvsrd [[MOVEREG15:[0-9]+]], 3 -; FIXME: Once we have XSCVUXDSP implemented, this will change -; CHECK: fcfidus 1, [[MOVEREG15]] +; CHECK: xscvuxdsp 1, [[MOVEREG15]] } ; Function Attrs: nounwind Index: test/CodeGen/PowerPC/vsx.ll =================================================================== --- test/CodeGen/PowerPC/vsx.ll +++ test/CodeGen/PowerPC/vsx.ll @@ -1226,11 +1226,11 @@ ; CHECK-FISL: blr ; CHECK-LE-LABEL: @test80 -; CHECK-LE-DAG: addi [[R1:[0-9]+]], 1, -16 +; CHECK-LE-DAG: mtvsrd [[R1:[0-9]+]], 3 ; CHECK-LE-DAG: addi [[R2:[0-9]+]], {{[0-9]+}}, .LCPI -; CHECK-LE-DAG: lxvd2x [[V1:[0-9]+]], 0, [[R1]] +; CHECK-LE-DAG: xxswapd [[V1:[0-9]+]], [[R1]] ; CHECK-LE-DAG: lxvd2x [[V2:[0-9]+]], 0, [[R2]] -; CHECK-LE-DAG: xxswapd 34, [[V1]] +; CHECK-LE-DAG: xxspltd 34, [[V1]] ; CHECK-LE-DAG: xxswapd 35, [[V2]] ; CHECK-LE: vaddudm 2, 2, 3 ; CHECK-LE: blr Index: test/CodeGen/PowerPC/vsx_scalar_ld_st.ll =================================================================== --- test/CodeGen/PowerPC/vsx_scalar_ld_st.ll +++ test/CodeGen/PowerPC/vsx_scalar_ld_st.ll @@ -55,8 +55,7 @@ ret void ; CHECK-LABEL: @intToFlt ; CHECK: lxsiwax [[REGLD2:[0-9]+]], -; FIXME: the below will change when the VSX form is implemented -; CHECK: fcfids {{[0-9]}}, [[REGLD2]] +; CHECK: xscvsxdsp {{[0-9]}}, [[REGLD2]] } ; Function Attrs: nounwind @@ -108,8 +107,7 @@ ret void ; CHECK-LABEL: @uIntToFlt ; CHECK: lxsiwzx [[REGLD4:[0-9]+]], -; FIXME: the below will change when the VSX form is implemented -; CHECK: fcfidus {{[0-9]+}}, [[REGLD4]] +; CHECK: xscvuxdsp {{[0-9]+}}, [[REGLD4]] } ; Function Attrs: nounwind Index: test/MC/Disassembler/PowerPC/vsx.txt =================================================================== --- test/MC/Disassembler/PowerPC/vsx.txt +++ test/MC/Disassembler/PowerPC/vsx.txt @@ -57,6 +57,9 @@ # CHECK: xscvdpsp 7, 27 0xf0 0xe0 0xdc 0x24 +# CHECK: xscvdpspn 7, 27 +0xf0 0xe0 0xdc 0x2c + # CHECK: xscvdpsxds 7, 27 0xf0 0xe0 0xdd 0x60 @@ -72,9 +75,18 @@ # CHECK: xscvspdp 7, 27 0xf0 0xe0 0xdd 0x24 +# CHECK: xscvspdpn 7, 27 +0xf0 0xe0 0xdd 0x2c + +# CHECK: xscvsxdsp 7, 27 +0xf0 0xe0 0xdc 0xe0 + # CHECK: xscvsxddp 7, 27 0xf0 0xe0 0xdd 0xe0 +# CHECK: xscvuxdsp 7, 27 +0xf0 0xe0 0xdc 0xa0 + # CHECK: xscvuxddp 7, 27 0xf0 0xe0 0xdd 0xa0 Index: test/MC/PowerPC/vsx.s =================================================================== --- test/MC/PowerPC/vsx.s +++ test/MC/PowerPC/vsx.s @@ -62,6 +62,9 @@ # CHECK-BE: xscvdpsp 7, 27 # encoding: [0xf0,0xe0,0xdc,0x24] # CHECK-LE: xscvdpsp 7, 27 # encoding: [0x24,0xdc,0xe0,0xf0] xscvdpsp 7, 27 +# CHECK-BE: xscvdpspn 7, 27 # encoding: [0xf0,0xe0,0xdc,0x2c] +# CHECK-LE: xscvdpspn 7, 27 # encoding: [0x2c,0xdc,0xe0,0xf0] + xscvdpspn 7, 27 # CHECK-BE: xscvdpsxds 7, 27 # encoding: [0xf0,0xe0,0xdd,0x60] # CHECK-LE: xscvdpsxds 7, 27 # encoding: [0x60,0xdd,0xe0,0xf0] xscvdpsxds 7, 27 @@ -77,9 +80,18 @@ # CHECK-BE: xscvspdp 7, 27 # encoding: [0xf0,0xe0,0xdd,0x24] # CHECK-LE: xscvspdp 7, 27 # encoding: [0x24,0xdd,0xe0,0xf0] xscvspdp 7, 27 +# CHECK-BE: xscvspdpn 7, 27 # encoding: [0xf0,0xe0,0xdd,0x2c] +# CHECK-LE: xscvspdpn 7, 27 # encoding: [0x2c,0xdd,0xe0,0xf0] + xscvspdpn 7, 27 +# CHECK-BE: xscvsxdsp 7, 27 # encoding: [0xf0,0xe0,0xdc,0xe0] +# CHECK-LE: xscvsxdsp 7, 27 # encoding: [0xe0,0xdc,0xe0,0xf0] + xscvsxdsp 7, 27 # CHECK-BE: xscvsxddp 7, 27 # encoding: [0xf0,0xe0,0xdd,0xe0] # CHECK-LE: xscvsxddp 7, 27 # encoding: [0xe0,0xdd,0xe0,0xf0] xscvsxddp 7, 27 +# CHECK-BE: xscvuxdsp 7, 27 # encoding: [0xf0,0xe0,0xdc,0xa0] +# CHECK-LE: xscvuxdsp 7, 27 # encoding: [0xa0,0xdc,0xe0,0xf0] + xscvuxdsp 7, 27 # CHECK-BE: xscvuxddp 7, 27 # encoding: [0xf0,0xe0,0xdd,0xa0] # CHECK-LE: xscvuxddp 7, 27 # encoding: [0xa0,0xdd,0xe0,0xf0] xscvuxddp 7, 27