Index: lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- lib/Target/PowerPC/PPCISelLowering.cpp +++ lib/Target/PowerPC/PPCISelLowering.cpp @@ -542,6 +542,14 @@ if (Subtarget.hasVSX()) { setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal); + if (Subtarget.hasP8Vector()) + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal); + if (Subtarget.hasDirectMove()) { + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Legal); + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Legal); + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Legal); + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i64, Legal); + } setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal); setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal); @@ -11469,7 +11477,7 @@ PPCTargetLowering::shouldExpandBuildVectorWithShuffles( EVT VT , unsigned DefinedValues) const { if (VT == MVT::v2i64) - return false; + return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves if (Subtarget.hasQPX()) { if (VT == MVT::v4f32 || VT == MVT::v4f64 || VT == MVT::v4i1) Index: lib/Target/PowerPC/PPCInstrVSX.td =================================================================== --- lib/Target/PowerPC/PPCInstrVSX.td +++ lib/Target/PowerPC/PPCInstrVSX.td @@ -1181,6 +1181,23 @@ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">, AltVSXFMARel; } + + // Single Precision Conversions (FP <-> INT) + def XSCVSXDSP : XX2Form<60, 312, + (outs vssrc:$XT), (ins vsfrc:$XB), + "xscvsxdsp $XT, $XB", IIC_VecFP, + [(set f32:$XT, (PPCfcfids f64:$XB))]>; + def XSCVUXDSP : XX2Form<60, 296, + (outs vssrc:$XT), (ins vsfrc:$XB), + "xscvuxdsp $XT, $XB", IIC_VecFP, + [(set f32:$XT, (PPCfcfidus f64:$XB))]>; + + // Conversions between vector and scalar single precision + def XSCVDPSPN : XX2Form<60, 267, (outs vsrc:$XT), (ins vssrc:$XB), + "xscvdpspn $XT, $XB", IIC_VecFP, []>; + def XSCVSPDPN : XX2Form<60, 331, (outs vssrc:$XT), (ins vsrc:$XB), + "xscvspdpn $XT, $XB", IIC_VecFP, []>; + } // AddedComplexity = 400 } // HasP8Vector @@ -1204,3 +1221,60 @@ "mtvsrwz $XT, $rA", IIC_VecGeneral, [(set f64:$XT, (PPCmtvsrz i32:$rA))]>; } // HasDirectMove, HasVSX + +/* Direct moves of various size entities from GPR's into VSR's. Each lines + the value up into element 0 (both BE and LE). Namely, entities smaller than + a doubleword are shifted left and moved for BE. For LE, they're moved, then + swapped to go into the least significant element of the VSR. +*/ +def Moves { + dag BE_BYTE_0 = (MTVSRD + (RLDICR + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32), 56, 7)); + dag BE_HALF_0 = (MTVSRD + (RLDICR + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32), 48, 15)); + dag BE_WORD_0 = (MTVSRD + (RLDICR + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32), 32, 31)); + dag DBLWORD_0 = (MTVSRD $A); + + dag LE_MVW = (MTVSRD (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32)); + dag LE_CPW = (v2i64 (COPY_TO_REGCLASS LE_MVW, VSRC)); + dag LE_BHW = (XXPERMDI LE_CPW, LE_CPW, 2); + dag LE_CPD = (v2i64 (COPY_TO_REGCLASS DBLWORD_0, VSRC)); + dag LE_DBL = (XXPERMDI LE_CPD, LE_CPD, 2); +} + +let Predicates = [IsBigEndian, HasP8Vector] in { + def : Pat<(v4f32 (scalar_to_vector f32:$A)), + (v4f32 (XSCVDPSPN $A))>; +} // IsBigEndian, HasP8Vector + +let Predicates = [IsBigEndian, HasDirectMove] in { + def : Pat<(v16i8 (scalar_to_vector i32:$A)), + (v16i8 (COPY_TO_REGCLASS Moves.BE_BYTE_0, VSRC))>; + def : Pat<(v8i16 (scalar_to_vector i32:$A)), + (v8i16 (COPY_TO_REGCLASS Moves.BE_HALF_0, VSRC))>; + def : Pat<(v4i32 (scalar_to_vector i32:$A)), + (v4i32 (COPY_TO_REGCLASS Moves.BE_WORD_0, VSRC))>; + def : Pat<(v2i64 (scalar_to_vector i64:$A)), + (v2i64 (COPY_TO_REGCLASS Moves.DBLWORD_0, VSRC))>; +} // IsBigEndian, HasDirectMove + +let Predicates = [IsLittleEndian, HasP8Vector] in { + def : Pat<(v4f32 (scalar_to_vector f32:$A)), + (v4f32 (XXSLDWI (XSCVDPSPN $A), (XSCVDPSPN $A), 1))>; +} // IsLittleEndian, HasP8Vector + +let Predicates = [IsLittleEndian, HasDirectMove] in { + def : Pat<(v16i8 (scalar_to_vector i32:$A)), + (v16i8 (COPY_TO_REGCLASS Moves.LE_BHW, VSRC))>; + def : Pat<(v8i16 (scalar_to_vector i32:$A)), + (v8i16 (COPY_TO_REGCLASS Moves.LE_BHW, VSRC))>; + def : Pat<(v4i32 (scalar_to_vector i32:$A)), + (v4i32 (COPY_TO_REGCLASS Moves.LE_BHW, VSRC))>; + def : Pat<(v2i64 (scalar_to_vector i64:$A)), + (v2i64 Moves.LE_DBL)>; +} // IsLittleEndian, HasDirectMove + Index: lib/Target/PowerPC/PPCVSXCopy.cpp =================================================================== --- lib/Target/PowerPC/PPCVSXCopy.cpp +++ lib/Target/PowerPC/PPCVSXCopy.cpp @@ -77,6 +77,10 @@ return IsRegInClass(Reg, &PPC::F8RCRegClass, MRI); } + bool IsVSFReg(unsigned Reg, MachineRegisterInfo &MRI) { + return IsRegInClass(Reg, &PPC::VSFRCRegClass, MRI); + } + protected: bool processBlock(MachineBasicBlock &MBB) { bool Changed = false; @@ -100,7 +104,8 @@ IsVRReg(SrcMO.getReg(), MRI) ? &PPC::VSHRCRegClass : &PPC::VSLRCRegClass; assert((IsF8Reg(SrcMO.getReg(), MRI) || - IsVRReg(SrcMO.getReg(), MRI)) && + IsVRReg(SrcMO.getReg(), MRI) || + IsVSFReg(SrcMO.getReg(), MRI)) && "Unknown source for a VSX copy"); unsigned NewVReg = MRI.createVirtualRegister(SrcRC); Index: test/CodeGen/PowerPC/fp-int-conversions-direct-moves.ll =================================================================== --- test/CodeGen/PowerPC/fp-int-conversions-direct-moves.ll +++ test/CodeGen/PowerPC/fp-int-conversions-direct-moves.ll @@ -24,8 +24,7 @@ ret float %conv ; CHECK-LABEL: @_Z6testfcc ; CHECK: mtvsrwz [[MOVEREG01:[0-9]+]], 3 -; FIXME: Once we have XSCVUXDSP implemented, this will change -; CHECK: fcfidus 1, [[MOVEREG01]] +; CHECK: xscvuxdsp 1, [[MOVEREG01]] } ; Function Attrs: nounwind @@ -77,8 +76,7 @@ ret float %conv ; CHECK-LABEL: @_Z7testfuch ; CHECK: mtvsrwz [[MOVEREG03:[0-9]+]], 3 -; FIXME: Once we have XSCVUXDSP implemented, this will change -; CHECK: fcfidus 1, [[MOVEREG03]] +; CHECK: xscvuxdsp 1, [[MOVEREG03]] } ; Function Attrs: nounwind @@ -130,8 +128,7 @@ ret float %conv ; CHECK-LABEL: @_Z6testfss ; CHECK: mtvsrwa [[MOVEREG05:[0-9]+]], 3 -; FIXME: Once we have XSCVSXDSP implemented, this will change -; CHECK: fcfids 1, [[MOVEREG05]] +; CHECK: xscvsxdsp 1, [[MOVEREG05]] } ; Function Attrs: nounwind @@ -183,8 +180,7 @@ ret float %conv ; CHECK-LABEL: @_Z7testfust ; CHECK: mtvsrwz [[MOVEREG07:[0-9]+]], 3 -; FIXME: Once we have XSCVUXDSP implemented, this will change -; CHECK: fcfidus 1, [[MOVEREG07]] +; CHECK: xscvuxdsp 1, [[MOVEREG07]] } ; Function Attrs: nounwind @@ -236,8 +232,7 @@ ret float %conv ; CHECK-LABEL: @_Z6testfii ; CHECK: mtvsrwa [[MOVEREG09:[0-9]+]], 3 -; FIXME: Once we have XSCVSXDSP implemented, this will change -; CHECK: fcfids 1, [[MOVEREG09]] +; CHECK: xscvsxdsp 1, [[MOVEREG09]] } ; Function Attrs: nounwind @@ -289,8 +284,7 @@ ret float %conv ; CHECK-LABEL: @_Z7testfuij ; CHECK: mtvsrwz [[MOVEREG11:[0-9]+]], 3 -; FIXME: Once we have XSCVUXDSP implemented, this will change -; CHECK: fcfidus 1, [[MOVEREG11]] +; CHECK: xscvuxdsp 1, [[MOVEREG11]] } ; Function Attrs: nounwind @@ -342,8 +336,7 @@ ret float %conv ; CHECK-LABEL:@_Z7testfllx ; CHECK: mtvsrd [[MOVEREG13:[0-9]+]], 3 -; FIXME: Once we have XSCVSXDSP implemented, this will change -; CHECK: fcfids 1, [[MOVEREG13]] +; CHECK: xscvsxdsp 1, [[MOVEREG13]] } ; Function Attrs: nounwind @@ -395,8 +388,7 @@ ret float %conv ; CHECK-LABEL: @_Z8testfully ; CHECK: mtvsrd [[MOVEREG15:[0-9]+]], 3 -; FIXME: Once we have XSCVUXDSP implemented, this will change -; CHECK: fcfidus 1, [[MOVEREG15]] +; CHECK: xscvuxdsp 1, [[MOVEREG15]] } ; Function Attrs: nounwind Index: test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll =================================================================== --- test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll +++ test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll @@ -0,0 +1,75 @@ +; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 | FileCheck %s +; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 | FileCheck %s -check-prefix=CHECK-LE + +; Function Attrs: nounwind +define <16 x i8> @buildc(i8 zeroext %a) { +entry: + %a.addr = alloca i8, align 1 + store i8 %a, i8* %a.addr, align 1 + %0 = load i8, i8* %a.addr, align 1 + %splat.splatinsert = insertelement <16 x i8> undef, i8 %0, i32 0 + %splat.splat = shufflevector <16 x i8> %splat.splatinsert, <16 x i8> undef, <16 x i32> zeroinitializer + ret <16 x i8> %splat.splat +; CHECK: sldi [[REG1:[0-9]+]], 3, 56 +; CHECK: mtvsrd {{[0-9]+}}, [[REG1]] +; CHECK-LE: mtvsrd [[REG1:[0-9]+]], 3 +; CHECK-LE: xxswapd {{[0-9]+}}, [[REG1]] +} + +; Function Attrs: nounwind +define <8 x i16> @builds(i16 zeroext %a) { +entry: + %a.addr = alloca i16, align 2 + store i16 %a, i16* %a.addr, align 2 + %0 = load i16, i16* %a.addr, align 2 + %splat.splatinsert = insertelement <8 x i16> undef, i16 %0, i32 0 + %splat.splat = shufflevector <8 x i16> %splat.splatinsert, <8 x i16> undef, <8 x i32> zeroinitializer + ret <8 x i16> %splat.splat +; CHECK: sldi [[REG1:[0-9]+]], 3, 48 +; CHECK: mtvsrd {{[0-9]+}}, [[REG1]] +; CHECK-LE: mtvsrd [[REG1:[0-9]+]], 3 +; CHECK-LE: xxswapd {{[0-9]+}}, [[REG1]] +} + +; Function Attrs: nounwind +define <4 x i32> @buildi(i32 zeroext %a) { +entry: + %a.addr = alloca i32, align 4 + store i32 %a, i32* %a.addr, align 4 + %0 = load i32, i32* %a.addr, align 4 + %splat.splatinsert = insertelement <4 x i32> undef, i32 %0, i32 0 + %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer + ret <4 x i32> %splat.splat +; CHECK: sldi [[REG1:[0-9]+]], 3, 32 +; CHECK: mtvsrd {{[0-9]+}}, [[REG1]] +; CHECK-LE: mtvsrd [[REG1:[0-9]+]], 3 +; CHECK-LE: xxswapd {{[0-9]+}}, [[REG1]] +} + +; Function Attrs: nounwind +define <2 x i64> @buildl(i64 %a) { +entry: + %a.addr = alloca i64, align 8 + store i64 %a, i64* %a.addr, align 8 + %0 = load i64, i64* %a.addr, align 8 + %splat.splatinsert = insertelement <2 x i64> undef, i64 %0, i32 0 + %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer + ret <2 x i64> %splat.splat +; CHECK: mtvsrd {{[0-9]+}}, 3 +; CHECK-LE: mtvsrd [[REG1:[0-9]+]], 3 +; CHECK-LE: xxswapd {{[0-9]+}}, [[REG1]] +} + +; Function Attrs: nounwind +define <4 x float> @buildf(float %a) { +entry: + %a.addr = alloca float, align 4 + store float %a, float* %a.addr, align 4 + %0 = load float, float* %a.addr, align 4 + %splat.splatinsert = insertelement <4 x float> undef, float %0, i32 0 + %splat.splat = shufflevector <4 x float> %splat.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer + ret <4 x float> %splat.splat +; CHECK: xscvdpspn {{[0-9]+}}, 1 +; CHECK-LE: xscvdpspn [[REG1:[0-9]+]], 1 +; CHECK-LE: xxsldwi {{[0-9]+}}, [[REG1]], [[REG1]], 1 +} Index: test/CodeGen/PowerPC/vsx.ll =================================================================== --- test/CodeGen/PowerPC/vsx.ll +++ test/CodeGen/PowerPC/vsx.ll @@ -1226,11 +1226,11 @@ ; CHECK-FISL: blr ; CHECK-LE-LABEL: @test80 -; CHECK-LE-DAG: addi [[R1:[0-9]+]], 1, -16 +; CHECK-LE-DAG: mtvsrd [[R1:[0-9]+]], 3 ; CHECK-LE-DAG: addi [[R2:[0-9]+]], {{[0-9]+}}, .LCPI -; CHECK-LE-DAG: lxvd2x [[V1:[0-9]+]], 0, [[R1]] +; CHECK-LE-DAG: xxswapd [[V1:[0-9]+]], [[R1]] ; CHECK-LE-DAG: lxvd2x [[V2:[0-9]+]], 0, [[R2]] -; CHECK-LE-DAG: xxswapd 34, [[V1]] +; CHECK-LE-DAG: xxspltd 34, [[V1]] ; CHECK-LE-DAG: xxswapd 35, [[V2]] ; CHECK-LE: vaddudm 2, 2, 3 ; CHECK-LE: blr Index: test/CodeGen/PowerPC/vsx_scalar_ld_st.ll =================================================================== --- test/CodeGen/PowerPC/vsx_scalar_ld_st.ll +++ test/CodeGen/PowerPC/vsx_scalar_ld_st.ll @@ -55,8 +55,7 @@ ret void ; CHECK-LABEL: @intToFlt ; CHECK: lxsiwax [[REGLD2:[0-9]+]], -; FIXME: the below will change when the VSX form is implemented -; CHECK: fcfids {{[0-9]}}, [[REGLD2]] +; CHECK: xscvsxdsp {{[0-9]}}, [[REGLD2]] } ; Function Attrs: nounwind @@ -108,8 +107,7 @@ ret void ; CHECK-LABEL: @uIntToFlt ; CHECK: lxsiwzx [[REGLD4:[0-9]+]], -; FIXME: the below will change when the VSX form is implemented -; CHECK: fcfidus {{[0-9]+}}, [[REGLD4]] +; CHECK: xscvuxdsp {{[0-9]+}}, [[REGLD4]] } ; Function Attrs: nounwind Index: test/MC/Disassembler/PowerPC/vsx.txt =================================================================== --- test/MC/Disassembler/PowerPC/vsx.txt +++ test/MC/Disassembler/PowerPC/vsx.txt @@ -57,6 +57,9 @@ # CHECK: xscvdpsp 7, 27 0xf0 0xe0 0xdc 0x24 +# CHECK: xscvdpspn 7, 27 +0xf0 0xe0 0xdc 0x2c + # CHECK: xscvdpsxds 7, 27 0xf0 0xe0 0xdd 0x60 @@ -72,9 +75,18 @@ # CHECK: xscvspdp 7, 27 0xf0 0xe0 0xdd 0x24 +# CHECK: xscvspdpn 7, 27 +0xf0 0xe0 0xdd 0x2c + +# CHECK: xscvsxdsp 7, 27 +0xf0 0xe0 0xdc 0xe0 + # CHECK: xscvsxddp 7, 27 0xf0 0xe0 0xdd 0xe0 +# CHECK: xscvuxdsp 7, 27 +0xf0 0xe0 0xdc 0xa0 + # CHECK: xscvuxddp 7, 27 0xf0 0xe0 0xdd 0xa0 Index: test/MC/PowerPC/vsx.s =================================================================== --- test/MC/PowerPC/vsx.s +++ test/MC/PowerPC/vsx.s @@ -62,6 +62,9 @@ # CHECK-BE: xscvdpsp 7, 27 # encoding: [0xf0,0xe0,0xdc,0x24] # CHECK-LE: xscvdpsp 7, 27 # encoding: [0x24,0xdc,0xe0,0xf0] xscvdpsp 7, 27 +# CHECK-BE: xscvdpspn 7, 27 # encoding: [0xf0,0xe0,0xdc,0x2c] +# CHECK-LE: xscvdpspn 7, 27 # encoding: [0x2c,0xdc,0xe0,0xf0] + xscvdpspn 7, 27 # CHECK-BE: xscvdpsxds 7, 27 # encoding: [0xf0,0xe0,0xdd,0x60] # CHECK-LE: xscvdpsxds 7, 27 # encoding: [0x60,0xdd,0xe0,0xf0] xscvdpsxds 7, 27 @@ -77,9 +80,18 @@ # CHECK-BE: xscvspdp 7, 27 # encoding: [0xf0,0xe0,0xdd,0x24] # CHECK-LE: xscvspdp 7, 27 # encoding: [0x24,0xdd,0xe0,0xf0] xscvspdp 7, 27 +# CHECK-BE: xscvspdpn 7, 27 # encoding: [0xf0,0xe0,0xdd,0x2c] +# CHECK-LE: xscvspdpn 7, 27 # encoding: [0x2c,0xdd,0xe0,0xf0] + xscvspdpn 7, 27 +# CHECK-BE: xscvsxdsp 7, 27 # encoding: [0xf0,0xe0,0xdc,0xe0] +# CHECK-LE: xscvsxdsp 7, 27 # encoding: [0xe0,0xdc,0xe0,0xf0] + xscvsxdsp 7, 27 # CHECK-BE: xscvsxddp 7, 27 # encoding: [0xf0,0xe0,0xdd,0xe0] # CHECK-LE: xscvsxddp 7, 27 # encoding: [0xe0,0xdd,0xe0,0xf0] xscvsxddp 7, 27 +# CHECK-BE: xscvuxdsp 7, 27 # encoding: [0xf0,0xe0,0xdc,0xa0] +# CHECK-LE: xscvuxdsp 7, 27 # encoding: [0xa0,0xdc,0xe0,0xf0] + xscvuxdsp 7, 27 # CHECK-BE: xscvuxddp 7, 27 # encoding: [0xf0,0xe0,0xdd,0xa0] # CHECK-LE: xscvuxddp 7, 27 # encoding: [0xa0,0xdd,0xe0,0xf0] xscvuxddp 7, 27