diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -12760,9 +12760,10 @@ SDValue PPCTargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG, const DenormalMode &Mode) const { - // TODO - add support for v2f64/v4f32 + // We only have VSX Vector Test for software Square Root. EVT VT = Op.getValueType(); - if (VT != MVT::f64) + if (VT != MVT::f64 && + ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX())) return SDValue(); SDLoc DL(Op); @@ -12788,9 +12789,10 @@ SDValue PPCTargetLowering::getSqrtResultForDenormInput(SDValue Op, SelectionDAG &DAG) const { - // TODO - add support for v2f64/v4f32 + // We only have VSX Vector Square Root. EVT VT = Op.getValueType(); - if (VT != MVT::f64) + if (VT != MVT::f64 && + ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX())) return TargetLowering::getSqrtResultForDenormInput(Op, DAG); return DAG.getNode(PPCISD::FSQRT, SDLoc(Op), VT, Op); diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td --- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -640,10 +640,12 @@ def XVTSQRTDP : XX2Form_1<60, 234, (outs crrc:$crD), (ins vsrc:$XB), - "xvtsqrtdp $crD, $XB", IIC_FPCompare, []>; + "xvtsqrtdp $crD, $XB", IIC_FPCompare, + [(set i32:$crD, (PPCftsqrt v2f64:$XB))]>; def XVTSQRTSP : XX2Form_1<60, 170, (outs crrc:$crD), (ins vsrc:$XB), - "xvtsqrtsp $crD, $XB", IIC_FPCompare, []>; + "xvtsqrtsp $crD, $XB", IIC_FPCompare, + [(set i32:$crD, (PPCftsqrt v4f32:$XB))]>; } def XVDIVDP : XX3Form<60, 120, @@ -2464,6 +2466,8 @@ (XVNMADDASP $C, $A, $B)>; def : Pat<(PPCfsqrt f64:$frA), (XSSQRTDP $frA)>; +def : Pat<(PPCfsqrt v2f64:$frA), (XVSQRTDP $frA)>; +def : Pat<(PPCfsqrt v4f32:$frA), (XVSQRTSP $frA)>; def : Pat<(v2f64 (bitconvert v4f32:$A)), (COPY_TO_REGCLASS $A, VSRC)>; diff --git a/llvm/test/CodeGen/PowerPC/recipest.ll b/llvm/test/CodeGen/PowerPC/recipest.ll --- a/llvm/test/CodeGen/PowerPC/recipest.ll +++ b/llvm/test/CodeGen/PowerPC/recipest.ll @@ -953,24 +953,30 @@ ; ; CHECK-P8-LABEL: hoo3_fmf: ; CHECK-P8: # %bb.0: +; CHECK-P8-NEXT: xvtsqrtsp 0, 34 +; CHECK-P8-NEXT: bc 12, 2, .LBB24_2 +; CHECK-P8-NEXT: # %bb.1: ; CHECK-P8-NEXT: xvrsqrtesp 0, 34 ; CHECK-P8-NEXT: addis 3, 2, .LCPI24_0@toc@ha ; CHECK-P8-NEXT: addis 4, 2, .LCPI24_1@toc@ha ; CHECK-P8-NEXT: addi 3, 3, .LCPI24_0@toc@l -; CHECK-P8-NEXT: lvx 3, 0, 3 -; CHECK-P8-NEXT: addi 3, 4, .LCPI24_1@toc@l -; CHECK-P8-NEXT: lvx 4, 0, 3 ; CHECK-P8-NEXT: xvmulsp 1, 34, 0 -; CHECK-P8-NEXT: xvmaddasp 35, 1, 0 -; CHECK-P8-NEXT: xvmulsp 0, 1, 36 -; CHECK-P8-NEXT: xxlxor 1, 1, 1 -; CHECK-P8-NEXT: xvcmpeqsp 2, 34, 1 -; CHECK-P8-NEXT: xvmulsp 0, 0, 35 -; CHECK-P8-NEXT: xxsel 34, 0, 1, 2 +; CHECK-P8-NEXT: lvx 2, 0, 3 +; CHECK-P8-NEXT: addi 3, 4, .LCPI24_1@toc@l +; CHECK-P8-NEXT: lvx 3, 0, 3 +; CHECK-P8-NEXT: xvmaddasp 34, 1, 0 +; CHECK-P8-NEXT: xvmulsp 0, 1, 35 +; CHECK-P8-NEXT: xvmulsp 34, 0, 34 +; CHECK-P8-NEXT: blr +; CHECK-P8-NEXT: .LBB24_2: +; CHECK-P8-NEXT: xvsqrtsp 34, 34 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: hoo3_fmf: ; CHECK-P9: # %bb.0: +; CHECK-P9-NEXT: xvtsqrtsp 0, 34 +; CHECK-P9-NEXT: bc 12, 2, .LBB24_2 +; CHECK-P9-NEXT: # %bb.1: ; CHECK-P9-NEXT: xvrsqrtesp 0, 34 ; CHECK-P9-NEXT: addis 3, 2, .LCPI24_0@toc@ha ; CHECK-P9-NEXT: addi 3, 3, .LCPI24_0@toc@l @@ -981,10 +987,10 @@ ; CHECK-P9-NEXT: xvmaddasp 2, 1, 0 ; CHECK-P9-NEXT: lxvx 0, 0, 3 ; CHECK-P9-NEXT: xvmulsp 0, 1, 0 -; CHECK-P9-NEXT: xxlxor 1, 1, 1 -; CHECK-P9-NEXT: xvmulsp 0, 0, 2 -; CHECK-P9-NEXT: xvcmpeqsp 2, 34, 1 -; CHECK-P9-NEXT: xxsel 34, 0, 1, 2 +; CHECK-P9-NEXT: xvmulsp 34, 0, 2 +; CHECK-P9-NEXT: blr +; CHECK-P9-NEXT: .LBB24_2: +; CHECK-P9-NEXT: xvsqrtsp 34, 34 ; CHECK-P9-NEXT: blr %r = call reassoc ninf afn <4 x float> @llvm.sqrt.v4f32(<4 x float> %a) ret <4 x float> %r @@ -1066,6 +1072,9 @@ ; ; CHECK-P8-LABEL: hoo4_fmf: ; CHECK-P8: # %bb.0: +; CHECK-P8-NEXT: xvtsqrtdp 0, 34 +; CHECK-P8-NEXT: bc 12, 2, .LBB26_2 +; CHECK-P8-NEXT: # %bb.1: ; CHECK-P8-NEXT: xvrsqrtedp 0, 34 ; CHECK-P8-NEXT: addis 3, 2, .LCPI26_0@toc@ha ; CHECK-P8-NEXT: addi 3, 3, .LCPI26_0@toc@l @@ -1083,14 +1092,17 @@ ; CHECK-P8-NEXT: xvmuldp 2, 34, 0 ; CHECK-P8-NEXT: xvmaddadp 1, 2, 0 ; CHECK-P8-NEXT: xvmuldp 0, 2, 3 -; CHECK-P8-NEXT: xxlxor 2, 2, 2 -; CHECK-P8-NEXT: xvcmpeqdp 34, 34, 2 -; CHECK-P8-NEXT: xvmuldp 0, 0, 1 -; CHECK-P8-NEXT: xxsel 34, 0, 2, 34 +; CHECK-P8-NEXT: xvmuldp 34, 0, 1 +; CHECK-P8-NEXT: blr +; CHECK-P8-NEXT: .LBB26_2: +; CHECK-P8-NEXT: xvsqrtdp 34, 34 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: hoo4_fmf: ; CHECK-P9: # %bb.0: +; CHECK-P9-NEXT: xvtsqrtdp 0, 34 +; CHECK-P9-NEXT: bc 12, 2, .LBB26_2 +; CHECK-P9-NEXT: # %bb.1: ; CHECK-P9-NEXT: xvrsqrtedp 0, 34 ; CHECK-P9-NEXT: addis 3, 2, .LCPI26_0@toc@ha ; CHECK-P9-NEXT: addi 3, 3, .LCPI26_0@toc@l @@ -1106,10 +1118,10 @@ ; CHECK-P9-NEXT: xvmuldp 3, 34, 0 ; CHECK-P9-NEXT: xvmaddadp 2, 3, 0 ; CHECK-P9-NEXT: xvmuldp 0, 3, 1 -; CHECK-P9-NEXT: xxlxor 1, 1, 1 -; CHECK-P9-NEXT: xvcmpeqdp 34, 34, 1 -; CHECK-P9-NEXT: xvmuldp 0, 0, 2 -; CHECK-P9-NEXT: xxsel 34, 0, 1, 34 +; CHECK-P9-NEXT: xvmuldp 34, 0, 2 +; CHECK-P9-NEXT: blr +; CHECK-P9-NEXT: .LBB26_2: +; CHECK-P9-NEXT: xvsqrtdp 34, 34 ; CHECK-P9-NEXT: blr %r = call reassoc ninf afn <2 x double> @llvm.sqrt.v2f64(<2 x double> %a) ret <2 x double> %r