Index: lib/Target/PowerPC/P9InstrResources.td =================================================================== --- lib/Target/PowerPC/P9InstrResources.td +++ lib/Target/PowerPC/P9InstrResources.td @@ -515,7 +515,8 @@ XSRSQRTESP, XSSUBDP, XSSUBSP, - XSCVDPSPN + XSCVDPSPN, + XSRSP )>; // Three Cycle PM operation. Only one PM unit per superslice so we use the whole Index: lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- lib/Target/PowerPC/PPCISelLowering.cpp +++ lib/Target/PowerPC/PPCISelLowering.cpp @@ -799,6 +799,10 @@ setOperationAction(ISD::FP_EXTEND, MVT::f128, Legal); setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f64, Expand); setOperationAction(ISD::FMA, MVT::f128, Legal); + setOperationAction(ISD::FP_ROUND, MVT::f64, Legal); + setOperationAction(ISD::FP_ROUND, MVT::f32, Legal); + setTruncStoreAction(MVT::f128, MVT::f64, Expand); + setTruncStoreAction(MVT::f128, MVT::f32, Expand); } } Index: lib/Target/PowerPC/PPCInstrVSX.td =================================================================== --- lib/Target/PowerPC/PPCInstrVSX.td +++ lib/Target/PowerPC/PPCInstrVSX.td @@ -437,6 +437,9 @@ (outs vsfrc:$XT), (ins vsfrc:$XB), "xsredp $XT, $XB", IIC_VecFP, [(set f64:$XT, (PPCfre f64:$XB))]>; + def XSRSP : XX2Form<60, 281, + (outs vssrc:$XT), (ins vsfrc:$XB), + "xsrsp $XT, $XB", IIC_VecFP, []>; def XSRSQRTEDP : XX2Form<60, 74, (outs vsfrc:$XT), (ins vsfrc:$XB), "xsrsqrtedp $XT, $XB", IIC_VecFP, @@ -2367,6 +2370,17 @@ : X_RD5_XO5_RS5; + // [PO VRT XO VRB XO /] + class X_VT5_XO5_VB5_VFSR opcode, bits<5> xo2, bits<10> xo, string opc, + list pattern> + : X_RD5_XO5_RS5; + + // [PO VRT XO VRB XO RO], Round to Odd version of [PO VRT XO VRB XO /] + class X_VT5_XO5_VB5_VFSR_Ro opcode, bits<5> xo2, bits<10> xo, string opc, + list pattern> + : X_VT5_XO5_VB5_VFSR, isDOT; + let UseVSXReg = 1 in { // [PO T XO B XO BX /] class XX2_RT5_XO5_XB6 opcode, bits<5> xo2, bits<9> xo, string opc, @@ -2518,8 +2532,8 @@ def : Pat<(f128 (fpextend f64:$src)), (f128 (XSCVDPQP $src))>; // Round & Convert QP -> DP (dword[1] is set to zero) - def XSCVQPDP : X_VT5_XO5_VB5 <63, 20, 836, "xscvqpdp" , []>; - def XSCVQPDPO : X_VT5_XO5_VB5_Ro<63, 20, 836, "xscvqpdpo", []>; + def XSCVQPDP : X_VT5_XO5_VB5_VFSR<63, 20, 836, "xscvqpdp" , []>; + def XSCVQPDPO : X_VT5_XO5_VB5_VFSR_Ro<63, 20, 836, "xscvqpdpo", []>; // Truncate & Convert QP -> (Un)Signed (D)Word (dword[1] is set to zero) def XSCVQPSDZ : X_VT5_XO5_VB5<63, 25, 836, "xscvqpsdz", []>; @@ -3359,7 +3373,22 @@ (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), xoaddr:$dst, 1), (STXSIBX (XSCVDPUXWS f64:$src), xoaddr:$dst)>; + // Round & Convert QP -> DP + def : Pat<(f64 (fpround f128:$src)), (f64 (XSCVQPDP $src))>; + def : Pat<(store (f64 (fpround f128:$src)), xaddr:$dst), + (STXSDX (XSCVQPDP f128:$src), xaddr:$dst)>; + def : Pat<(store (f64 (fpround f128:$src)), ixaddr:$dst), + (STXSD (XSCVQPDP f128:$src), ixaddr:$dst)>; + + // Round & Convert QP -> SP + def : Pat<(f32 (fpround f128:$src)), (f32 (XSRSP (XSCVQPDPO $src)))>; + def : Pat<(store (f32 (fpround f128:$src)), xaddr:$dst), + (STXSSPX (XSRSP (XSCVQPDPO f128:$src)), xaddr:$dst)>; + def : Pat<(store (f32 (fpround f128:$src)), ixaddr:$dst), + (STXSSP (COPY_TO_REGCLASS + (XSRSP (XSCVQPDPO f128:$src)), VFRC), ixaddr:$dst)>; } // end HasP9Vector, AddedComplexity + let Predicates = [HasP9Vector] in { let isPseudo = 1 in { let mayStore = 1 in { Index: test/CodeGen/PowerPC/f128-conv.ll =================================================================== --- test/CodeGen/PowerPC/f128-conv.ll +++ test/CodeGen/PowerPC/f128-conv.ll @@ -398,3 +398,156 @@ ; CHECK-NEXT: stxv [[CONV]], 0(3) ; CHECK-NEXT: blr } + +; Convert QP to DP + +@f128Array = global [4 x fp128] + [fp128 0xL00000000000000004004C00000000000, + fp128 0xLF000000000000000400808AB851EB851, + fp128 0xL5000000000000000400E0C26324C8366, + fp128 0xL8000000000000000400A24E2E147AE14], align 16 +@f128global = global fp128 0xL300000000000000040089CA8F5C28F5C, align 16 + +; Function Attrs: norecurse nounwind readonly +define double @qpConv2dp(fp128* nocapture readonly %a) { +; CHECK-LABEL: qpConv2dp: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lxv 2, 0(3) +; CHECK-NEXT: xscvqpdp 1, 2 +; CHECK-NEXT: blr +entry: + %0 = load fp128, fp128* %a, align 16 + %conv = fptrunc fp128 %0 to double + ret double %conv +} + +; Function Attrs: norecurse nounwind +define void @qpConv2dp_02(double* nocapture %res) { +; CHECK-LABEL: qpConv2dp_02: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addis 4, 2, .LC0@toc@ha +; CHECK-NEXT: ld 4, .LC0@toc@l(4) +; CHECK-NEXT: lxvx 2, 0, 4 +; CHECK-NEXT: xscvqpdp 2, 2 +; CHECK-NEXT: stxsd 2, 0(3) +; CHECK-NEXT: blr +entry: + %0 = load fp128, fp128* @f128global, align 16 + %conv = fptrunc fp128 %0 to double + store double %conv, double* %res, align 8 + ret void +} + +; Function Attrs: norecurse nounwind +define void @qpConv2dp_03(double* nocapture %res, i32 signext %idx) { +; CHECK-LABEL: qpConv2dp_03: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addis 5, 2, .LC1@toc@ha +; CHECK-NEXT: sldi 4, 4, 3 +; CHECK-NEXT: ld 5, .LC1@toc@l(5) +; CHECK-NEXT: lxvx 2, 0, 5 +; CHECK-NEXT: xscvqpdp 0, 2 +; CHECK-NEXT: stxsdx 0, 3, 4 +; CHECK-NEXT: blr +entry: + %0 = load fp128, fp128* getelementptr inbounds ([4 x fp128], [4 x fp128]* @f128Array, i64 0, i64 0), align 16 + %conv = fptrunc fp128 %0 to double + %idxprom = sext i32 %idx to i64 + %arrayidx = getelementptr inbounds double, double* %res, i64 %idxprom + store double %conv, double* %arrayidx, align 8 + ret void +} + +; Function Attrs: norecurse nounwind +define void @qpConv2dp_04(fp128* nocapture readonly %a, fp128* nocapture readonly %b, double* nocapture %res) { +; CHECK-LABEL: qpConv2dp_04: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lxv 2, 0(3) +; CHECK-NEXT: lxv 3, 0(4) +; CHECK-NEXT: xsaddqp 2, 2, 3 +; CHECK-NEXT: xscvqpdp 2, 2 +; CHECK-NEXT: stxsd 2, 0(5) +; CHECK-NEXT: blr +entry: + %0 = load fp128, fp128* %a, align 16 + %1 = load fp128, fp128* %b, align 16 + %add = fadd fp128 %0, %1 + %conv = fptrunc fp128 %add to double + store double %conv, double* %res, align 8 + ret void +} + +; Convert QP to SP + +; Function Attrs: norecurse nounwind readonly +define float @qpConv2sp(fp128* nocapture readonly %a) { +; CHECK-LABEL: qpConv2sp: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lxv 2, 0(3) +; CHECK-NEXT: xscvqpdpo 0, 2 +; CHECK-NEXT: xsrsp 1, 0 +; CHECK-NEXT: blr +entry: + %0 = load fp128, fp128* %a, align 16 + %conv = fptrunc fp128 %0 to float + ret float %conv +} + +; Function Attrs: norecurse nounwind +define void @qpConv2sp_02(float* nocapture %res) { +; CHECK-LABEL: qpConv2sp_02: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addis 4, 2, .LC0@toc@ha +; CHECK-NEXT: ld 4, .LC0@toc@l(4) +; CHECK-NEXT: lxvx 2, 0, 4 +; CHECK-NEXT: xscvqpdpo 0, 2 +; CHECK-NEXT: xsrsp 2, 0 +; CHECK-NEXT: stxssp 2, 0(3) +; CHECK-NEXT: blr +entry: + %0 = load fp128, fp128* @f128global, align 16 + %conv = fptrunc fp128 %0 to float + store float %conv, float* %res, align 4 + ret void +} + +; Function Attrs: norecurse nounwind +define void @qpConv2sp_03(float* nocapture %res, i32 signext %idx) { +; CHECK-LABEL: qpConv2sp_03: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addis 5, 2, .LC1@toc@ha +; CHECK-NEXT: sldi 4, 4, 2 +; CHECK-NEXT: ld 5, .LC1@toc@l(5) +; CHECK-NEXT: lxv 2, 48(5) +; CHECK-NEXT: xscvqpdpo 0, 2 +; CHECK-NEXT: xsrsp 0, 0 +; CHECK-NEXT: stxsspx 0, 3, 4 +; CHECK-NEXT: blr +entry: + %0 = load fp128, fp128* getelementptr inbounds ([4 x fp128], [4 x fp128]* @f128Array, i64 0, i64 3), align 16 + %conv = fptrunc fp128 %0 to float + %idxprom = sext i32 %idx to i64 + %arrayidx = getelementptr inbounds float, float* %res, i64 %idxprom + store float %conv, float* %arrayidx, align 4 + ret void +} + +; Function Attrs: norecurse nounwind +define void @qpConv2sp_04(fp128* nocapture readonly %a, fp128* nocapture readonly %b, float* nocapture %res) { +; CHECK-LABEL: qpConv2sp_04: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lxv 2, 0(3) +; CHECK-NEXT: lxv 3, 0(4) +; CHECK-NEXT: xsaddqp 2, 2, 3 +; CHECK-NEXT: xscvqpdpo 0, 2 +; CHECK-NEXT: xsrsp 2, 0 +; CHECK-NEXT: stxssp 2, 0(5) +; CHECK-NEXT: blr +entry: + %0 = load fp128, fp128* %a, align 16 + %1 = load fp128, fp128* %b, align 16 + %add = fadd fp128 %0, %1 + %conv = fptrunc fp128 %add to float + store float %conv, float* %res, align 4 + ret void +}