Index: lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- lib/Target/PowerPC/PPCISelLowering.cpp +++ lib/Target/PowerPC/PPCISelLowering.cpp @@ -797,7 +797,9 @@ setOperationAction(ISD::FDIV, MVT::f128, Legal); setOperationAction(ISD::FMUL, MVT::f128, Legal); setOperationAction(ISD::FP_EXTEND, MVT::f128, Legal); - setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f64, Expand); + // No extending loads to f128 on PPC. + for (MVT FT : MVT::fp_valuetypes()) + setLoadExtAction(ISD::EXTLOAD, MVT::f128, FT, Expand); setOperationAction(ISD::FMA, MVT::f128, Legal); setOperationAction(ISD::FP_ROUND, MVT::f64, Legal); setOperationAction(ISD::FP_ROUND, MVT::f32, Legal); @@ -13729,6 +13731,9 @@ bool PPCTargetLowering::isFPExtFree(EVT DestVT, EVT SrcVT) const { assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() && "invalid fpext types"); + // Extending to float128 is not free. + if (DestVT == MVT::f128) + return false; return true; } Index: lib/Target/PowerPC/PPCInstrVSX.td =================================================================== --- lib/Target/PowerPC/PPCInstrVSX.td +++ lib/Target/PowerPC/PPCInstrVSX.td @@ -2531,8 +2531,8 @@ // Quad-Precision Floating-Point Conversion Instructions: // Convert DP -> QP - def XSCVDPQP : X_VT5_XO5_VB5_TyVB<63, 22, 836, "xscvdpqp", vfrc, []>; - def : Pat<(f128 (fpextend f64:$src)), (f128 (XSCVDPQP $src))>; + def XSCVDPQP : X_VT5_XO5_VB5_TyVB<63, 22, 836, "xscvdpqp", vfrc, + [(set f128:$vT, (fpextend f64:$vB))]>; // Round & Convert QP -> DP (dword[1] is set to zero) def XSCVQPDP : X_VT5_XO5_VB5_VSFR<63, 20, 836, "xscvqpdp" , []>; @@ -3380,6 +3380,11 @@ // Round & Convert QP -> DP/SP def : Pat<(f64 (fpround f128:$src)), (f64 (XSCVQPDP $src))>; def : Pat<(f32 (fpround f128:$src)), (f32 (XSRSP (XSCVQPDPO $src)))>; + + // Convert SP -> QP + def : Pat<(f128 (fpextend f32:$src)), + (f128 (XSCVDPQP (COPY_TO_REGCLASS $src, VFRC)))>; + } // end HasP9Vector, AddedComplexity let Predicates = [HasP9Vector] in { Index: test/CodeGen/PowerPC/f128-arith.ll =================================================================== --- test/CodeGen/PowerPC/f128-arith.ll +++ test/CodeGen/PowerPC/f128-arith.ll @@ -148,30 +148,3 @@ ; CHECK: stxv ; CHECK: blr } - -; Function Attrs: norecurse nounwind -define void @dpConv2qp(double* nocapture readonly %a, fp128* nocapture %res) { -entry: - %0 = load double, double* %a, align 8 - %conv = fpext double %0 to fp128 - store fp128 %conv, fp128* %res, align 16 - ret void -; CHECK-LABEL: dpConv2qp -; CHECK-NOT: bl __extenddftf2 -; CHECK: lxsd -; CHECK: xscvdpqp -; CHECK: blr -} - -; Function Attrs: norecurse nounwind -define void @dpConv2qp_02(double %a, fp128* nocapture %res) { -entry: - %conv = fpext double %a to fp128 - store fp128 %conv, fp128* %res, align 16 - ret void -; CHECK-LABEL: dpConv2qp_02 -; CHECK-NOT: bl __extenddftf2 -; CHECK: xxlor -; CHECK: xscvdpqp -; CHECK: blr -} Index: test/CodeGen/PowerPC/f128-conv.ll =================================================================== --- test/CodeGen/PowerPC/f128-conv.ll +++ test/CodeGen/PowerPC/f128-conv.ll @@ -552,3 +552,165 @@ store float %conv, float* %res, align 4 ret void } + +@f128Glob = common global fp128 0xL00000000000000000000000000000000, align 16 + +; Function Attrs: norecurse nounwind readnone +define fp128 @dpConv2qp(double %a) { +; CHECK-LABEL: dpConv2qp: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxlor 2, 1, 1 +; CHECK-NEXT: xscvdpqp 2, 2 +; CHECK-NEXT: blr +entry: + %conv = fpext double %a to fp128 + ret fp128 %conv +} + +; Function Attrs: norecurse nounwind +define void @dpConv2qp_02(double* nocapture readonly %a) { +; CHECK-LABEL: dpConv2qp_02: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lxsd 2, 0(3) +; CHECK-NEXT: addis 3, 2, .LC8@toc@ha +; CHECK-NEXT: ld 3, .LC8@toc@l(3) +; CHECK-NEXT: xscvdpqp 2, 2 +; CHECK-NEXT: stxvx 2, 0, 3 +; CHECK-NEXT: blr +entry: + %0 = load double, double* %a, align 8 + %conv = fpext double %0 to fp128 + store fp128 %conv, fp128* @f128Glob, align 16 + ret void +} + +; Function Attrs: norecurse nounwind +define void @dpConv2qp_02b(double* nocapture readonly %a, i32 signext %idx) { +; CHECK-LABEL: dpConv2qp_02b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sldi 4, 4, 3 +; CHECK-NEXT: lxsdx 2, 3, 4 +; CHECK-NEXT: addis 3, 2, .LC8@toc@ha +; CHECK-NEXT: ld 3, .LC8@toc@l(3) +; CHECK-NEXT: xscvdpqp 2, 2 +; CHECK-NEXT: stxvx 2, 0, 3 +; CHECK-NEXT: blr +entry: + %idxprom = sext i32 %idx to i64 + %arrayidx = getelementptr inbounds double, double* %a, i64 %idxprom + %0 = load double, double* %arrayidx, align 8 + %conv = fpext double %0 to fp128 + store fp128 %conv, fp128* @f128Glob, align 16 + ret void +} + +; Function Attrs: norecurse nounwind +define void @dpConv2qp_03(fp128* nocapture %res, i32 signext %idx, double %a) { +; CHECK-LABEL: dpConv2qp_03: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxlor 2, 1, 1 +; CHECK-NEXT: sldi 4, 4, 4 +; CHECK-NEXT: xscvdpqp 2, 2 +; CHECK-NEXT: stxvx 2, 3, 4 +; CHECK-NEXT: blr +entry: + %conv = fpext double %a to fp128 + %idxprom = sext i32 %idx to i64 + %arrayidx = getelementptr inbounds fp128, fp128* %res, i64 %idxprom + store fp128 %conv, fp128* %arrayidx, align 16 + ret void +} + +; Function Attrs: norecurse nounwind +define void @dpConv2qp_04(double %a, fp128* nocapture %res) { +; CHECK-LABEL: dpConv2qp_04: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxlor 2, 1, 1 +; CHECK-NEXT: xscvdpqp 2, 2 +; CHECK-NEXT: stxv 2, 0(4) +; CHECK-NEXT: blr +entry: + %conv = fpext double %a to fp128 + store fp128 %conv, fp128* %res, align 16 + ret void +} + +; Function Attrs: norecurse nounwind readnone +define fp128 @spConv2qp(float %a) { +; CHECK-LABEL: spConv2qp: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxlor 2, 1, 1 +; CHECK-NEXT: xscvdpqp 2, 2 +; CHECK-NEXT: blr +entry: + %conv = fpext float %a to fp128 + ret fp128 %conv +} + +; Function Attrs: norecurse nounwind +define void @spConv2qp_02(float* nocapture readonly %a) { +; CHECK-LABEL: spConv2qp_02: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lxssp 2, 0(3) +; CHECK-NEXT: addis 3, 2, .LC8@toc@ha +; CHECK-NEXT: ld 3, .LC8@toc@l(3) +; CHECK-NEXT: xscvdpqp 2, 2 +; CHECK-NEXT: stxvx 2, 0, 3 +; CHECK-NEXT: blr +entry: + %0 = load float, float* %a, align 4 + %conv = fpext float %0 to fp128 + store fp128 %conv, fp128* @f128Glob, align 16 + ret void +} + +; Function Attrs: norecurse nounwind +define void @spConv2qp_02b(float* nocapture readonly %a, i32 signext %idx) { +; CHECK-LABEL: spConv2qp_02b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sldi 4, 4, 2 +; CHECK-NEXT: lxsspx 2, 3, 4 +; CHECK-NEXT: addis 3, 2, .LC8@toc@ha +; CHECK-NEXT: ld 3, .LC8@toc@l(3) +; CHECK-NEXT: xscvdpqp 2, 2 +; CHECK-NEXT: stxvx 2, 0, 3 +; CHECK-NEXT: blr +entry: + %idxprom = sext i32 %idx to i64 + %arrayidx = getelementptr inbounds float, float* %a, i64 %idxprom + %0 = load float, float* %arrayidx, align 4 + %conv = fpext float %0 to fp128 + store fp128 %conv, fp128* @f128Glob, align 16 + ret void +} + +; Function Attrs: norecurse nounwind +define void @spConv2qp_03(fp128* nocapture %res, i32 signext %idx, float %a) { +; CHECK-LABEL: spConv2qp_03: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxlor 2, 1, 1 +; CHECK-NEXT: sldi 4, 4, 4 +; CHECK-NEXT: xscvdpqp 2, 2 +; CHECK-NEXT: stxvx 2, 3, 4 +; CHECK-NEXT: blr +entry: + %conv = fpext float %a to fp128 + %idxprom = sext i32 %idx to i64 + %arrayidx = getelementptr inbounds fp128, fp128* %res, i64 %idxprom + store fp128 %conv, fp128* %arrayidx, align 16 + ret void +} + +; Function Attrs: norecurse nounwind +define void @spConv2qp_04(float %a, fp128* nocapture %res) { +; CHECK-LABEL: spConv2qp_04: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxlor 2, 1, 1 +; CHECK-NEXT: xscvdpqp 2, 2 +; CHECK-NEXT: stxv 2, 0(4) +; CHECK-NEXT: blr +entry: + %conv = fpext float %a to fp128 + store fp128 %conv, fp128* %res, align 16 + ret void +}