Index: lib/Target/PowerPC/PPCInstrVSX.td =================================================================== --- lib/Target/PowerPC/PPCInstrVSX.td +++ lib/Target/PowerPC/PPCInstrVSX.td @@ -989,21 +989,23 @@ def HasDirectMove : Predicate<"PPCSubTarget->hasDirectMove()">; let Predicates = [HasP8Vector] in { let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns. -let isCommutable = 1 in { - def XXLEQV : XX3Form<60, 186, - (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), - "xxleqv $XT, $XA, $XB", IIC_VecGeneral, - [(set v4i32:$XT, (vnot_ppc (xor v4i32:$XA, v4i32:$XB)))]>; - def XXLNAND : XX3Form<60, 178, - (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), - "xxlnand $XT, $XA, $XB", IIC_VecGeneral, - [(set v4i32:$XT, (vnot_ppc (and v4i32:$XA, + let isCommutable = 1 in { + def XXLEQV : XX3Form<60, 186, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xxleqv $XT, $XA, $XB", IIC_VecGeneral, + [(set v4i32:$XT, (vnot_ppc (xor v4i32:$XA, v4i32:$XB)))]>; + def XXLNAND : XX3Form<60, 178, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xxlnand $XT, $XA, $XB", IIC_VecGeneral, + [(set v4i32:$XT, (vnot_ppc (and v4i32:$XA, v4i32:$XB)))]>; } // isCommutable -def XXLORC : XX3Form<60, 170, - (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), - "xxlorc $XT, $XA, $XB", IIC_VecGeneral, - [(set v4i32:$XT, (or v4i32:$XA, (vnot_ppc v4i32:$XB)))]>; + + def XXLORC : XX3Form<60, 170, + (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB), + "xxlorc $XT, $XA, $XB", IIC_VecGeneral, + [(set v4i32:$XT, (or v4i32:$XA, (vnot_ppc v4i32:$XB)))]>; + // VSX scalar loads introduced in ISA 2.07 let mayLoad = 1 in { def LXSSPX : XX1Form<31, 524, (outs vssrc:$XT), (ins memrr:$src), @@ -1026,42 +1028,76 @@ "stxsiwx $XT, $dst", IIC_LdStSTFD, [(PPCstfiwx f64:$XT, xoaddr:$dst)]>; } // mayStore -def : Pat<(f64 (extloadf32 xoaddr:$src)), - (COPY_TO_REGCLASS (LXSSPX xoaddr:$src), VSFRC)>; -def : Pat<(f64 (fextend f32:$src)), - (COPY_TO_REGCLASS $src, VSFRC)>; -def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLT)), - (SELECT_VSSRC (CRANDC $rhs, $lhs), $tval, $fval)>; -def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLE)), - (SELECT_VSSRC (CRORC $rhs, $lhs), $tval, $fval)>; -def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETEQ)), - (SELECT_VSSRC (CREQV $lhs, $rhs), $tval, $fval)>; -def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETGE)), - (SELECT_VSSRC (CRORC $lhs, $rhs), $tval, $fval)>; -def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETGT)), - (SELECT_VSSRC (CRANDC $lhs, $rhs), $tval, $fval)>; -def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETNE)), + + def : Pat<(f64 (extloadf32 xoaddr:$src)), + (COPY_TO_REGCLASS (LXSSPX xoaddr:$src), VSFRC)>; + def : Pat<(f64 (fextend f32:$src)), + (COPY_TO_REGCLASS $src, VSFRC)>; + def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLT)), + (SELECT_VSSRC (CRANDC $rhs, $lhs), $tval, $fval)>; + def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLE)), + (SELECT_VSSRC (CRORC $rhs, $lhs), $tval, $fval)>; + def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETEQ)), + (SELECT_VSSRC (CREQV $lhs, $rhs), $tval, $fval)>; + def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETGE)), + (SELECT_VSSRC (CRORC $lhs, $rhs), $tval, $fval)>; + def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETGT)), + (SELECT_VSSRC (CRANDC $lhs, $rhs), $tval, $fval)>; + def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETNE)), (SELECT_VSSRC (CRXOR $lhs, $rhs), $tval, $fval)>; + + // VSX Elementary Scalar FP arithmetic (SP) + let isCommutable = 1 in { + def XSADDSP : XX3Form<60, 0, + (outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB), + "xsaddsp $XT, $XA, $XB", IIC_VecFP, + [(set f32:$XT, (fadd f32:$XA, f32:$XB))]>; + def XSMULSP : XX3Form<60, 16, + (outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB), + "xsmulsp $XT, $XA, $XB", IIC_VecFP, + [(set f32:$XT, (fmul f32:$XA, f32:$XB))]>; + } // isCommutable + + def XSDIVSP : XX3Form<60, 24, + (outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB), + "xsdivsp $XT, $XA, $XB", IIC_FPDivS, + [(set f32:$XT, (fdiv f32:$XA, f32:$XB))]>; + def XSRESP : XX2Form<60, 26, + (outs vssrc:$XT), (ins vssrc:$XB), + "xsresp $XT, $XB", IIC_VecFP, + [(set f32:$XT, (PPCfre f32:$XB))]>; + def XSSQRTSP : XX2Form<60, 11, + (outs vssrc:$XT), (ins vssrc:$XB), + "xssqrtsp $XT, $XB", IIC_FPSqrtS, + [(set f32:$XT, (fsqrt f32:$XB))]>; + def XSRSQRTESP : XX2Form<60, 10, + (outs vssrc:$XT), (ins vssrc:$XB), + "xsrsqrtesp $XT, $XB", IIC_VecFP, + [(set f32:$XT, (PPCfrsqrte f32:$XB))]>; + def XSSUBSP : XX3Form<60, 8, + (outs vssrc:$XT), (ins vssrc:$XA, vssrc:$XB), + "xssubsp $XT, $XA, $XB", IIC_VecFP, + [(set f32:$XT, (fsub f32:$XA, f32:$XB))]>; } // AddedComplexity = 400 } // HasP8Vector let Predicates = [HasDirectMove, HasVSX] in { -// VSX direct move instructions -def MFVSRD : XX1_RS6_RD5_XO<31, 51, (outs g8rc:$rA), (ins vsfrc:$XT), - "mfvsrd $rA, $XT", IIC_VecGeneral, - [(set i64:$rA, (PPCmfvsr f64:$XT))]>, - Requires<[In64BitMode]>; -def MFVSRWZ : XX1_RS6_RD5_XO<31, 115, (outs gprc:$rA), (ins vsfrc:$XT), - "mfvsrwz $rA, $XT", IIC_VecGeneral, - [(set i32:$rA, (PPCmfvsr f64:$XT))]>; -def MTVSRD : XX1_RS6_RD5_XO<31, 179, (outs vsfrc:$XT), (ins g8rc:$rA), - "mtvsrd $XT, $rA", IIC_VecGeneral, - [(set f64:$XT, (PPCmtvsra i64:$rA))]>, - Requires<[In64BitMode]>; -def MTVSRWA : XX1_RS6_RD5_XO<31, 211, (outs vsfrc:$XT), (ins gprc:$rA), - "mtvsrwa $XT, $rA", IIC_VecGeneral, - [(set f64:$XT, (PPCmtvsra i32:$rA))]>; -def MTVSRWZ : XX1_RS6_RD5_XO<31, 243, (outs vsfrc:$XT), (ins gprc:$rA), - "mtvsrwz $XT, $rA", IIC_VecGeneral, - [(set f64:$XT, (PPCmtvsrz i32:$rA))]>; + // VSX direct move instructions + def MFVSRD : XX1_RS6_RD5_XO<31, 51, (outs g8rc:$rA), (ins vsfrc:$XT), + "mfvsrd $rA, $XT", IIC_VecGeneral, + [(set i64:$rA, (PPCmfvsr f64:$XT))]>, + Requires<[In64BitMode]>; + def MFVSRWZ : XX1_RS6_RD5_XO<31, 115, (outs gprc:$rA), (ins vsfrc:$XT), + "mfvsrwz $rA, $XT", IIC_VecGeneral, + [(set i32:$rA, (PPCmfvsr f64:$XT))]>; + def MTVSRD : XX1_RS6_RD5_XO<31, 179, (outs vsfrc:$XT), (ins g8rc:$rA), + "mtvsrd $XT, $rA", IIC_VecGeneral, + [(set f64:$XT, (PPCmtvsra i64:$rA))]>, + Requires<[In64BitMode]>; + def MTVSRWA : XX1_RS6_RD5_XO<31, 211, (outs vsfrc:$XT), (ins gprc:$rA), + "mtvsrwa $XT, $rA", IIC_VecGeneral, + [(set f64:$XT, (PPCmtvsra i32:$rA))]>; + def MTVSRWZ : XX1_RS6_RD5_XO<31, 243, (outs vsfrc:$XT), (ins gprc:$rA), + "mtvsrwz $XT, $rA", IIC_VecGeneral, + [(set f64:$XT, (PPCmtvsrz i32:$rA))]>; } // HasDirectMove, HasVSX Index: test/CodeGen/PowerPC/vsx-elementary-arith.ll =================================================================== --- test/CodeGen/PowerPC/vsx-elementary-arith.ll +++ test/CodeGen/PowerPC/vsx-elementary-arith.ll @@ -0,0 +1,120 @@ +; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 | FileCheck %s +; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 | FileCheck %s +@a = global float 3.000000e+00, align 4 +@b = global float 4.000000e+00, align 4 +@c = global double 3.000000e+00, align 8 +@d = global double 4.000000e+00, align 8 + +; Function Attrs: nounwind +define float @emit_xsaddsp() { +entry: + %0 = load float, float* @a, align 4 + %1 = load float, float* @b, align 4 + %add = fadd float %0, %1 + ret float %add +; CHECK-LABEL: @emit_xsaddsp +; CHECK: xsaddsp {{[0-9]+}} +} + +; Function Attrs: nounwind +define float @emit_xssubsp() { +entry: + %0 = load float, float* @a, align 4 + %1 = load float, float* @b, align 4 + %sub = fsub float %0, %1 + ret float %sub +; CHECK-LABEL: @emit_xssubsp +; CHECK: xssubsp {{[0-9]+}} +} + +; Function Attrs: nounwind +define float @emit_xsdivsp() { +entry: + %0 = load float, float* @a, align 4 + %1 = load float, float* @b, align 4 + %div = fdiv float %0, %1 + ret float %div +; CHECK-LABEL: @emit_xsdivsp +; CHECK: xsdivsp {{[0-9]+}} +} + +; Function Attrs: nounwind +define float @emit_xsmulsp() { +entry: + %0 = load float, float* @a, align 4 + %1 = load float, float* @b, align 4 + %mul = fmul float %0, %1 + ret float %mul +; CHECK-LABEL: @emit_xsmulsp +; CHECK: xsmulsp {{[0-9]+}} +} + +; Function Attrs: nounwind +define float @emit_xssqrtsp() { +entry: + %0 = load float, float* @b, align 4 + %call = call float @sqrtf(float %0) + ret float %call +; CHECK-LABEL: @emit_xssqrtsp +; CHECK: xssqrtsp {{[0-9]+}} +} + +; Function Attrs: nounwind +declare float @sqrtf(float) + +; Function Attrs: nounwind +define double @emit_xsadddp() { +entry: + %0 = load double, double* @c, align 8 + %1 = load double, double* @d, align 8 + %add = fadd double %0, %1 + ret double %add +; CHECK-LABEL: @emit_xsadddp +; CHECK: xsadddp {{[0-9]+}} +} + +; Function Attrs: nounwind +define double @emit_xssubdp() { +entry: + %0 = load double, double* @c, align 8 + %1 = load double, double* @d, align 8 + %sub = fsub double %0, %1 + ret double %sub +; CHECK-LABEL: @emit_xssubdp +; CHECK: xssubdp {{[0-9]+}} +} + +; Function Attrs: nounwind +define double @emit_xsdivdp() { +entry: + %0 = load double, double* @c, align 8 + %1 = load double, double* @d, align 8 + %div = fdiv double %0, %1 + ret double %div +; CHECK-LABEL: @emit_xsdivdp +; CHECK: xsdivdp {{[0-9]+}} +} + +; Function Attrs: nounwind +define double @emit_xsmuldp() { +entry: + %0 = load double, double* @c, align 8 + %1 = load double, double* @d, align 8 + %mul = fmul double %0, %1 + ret double %mul +; CHECK-LABEL: @emit_xsmuldp +; CHECK: xsmuldp {{[0-9]+}} +} + +; Function Attrs: nounwind +define double @emit_xssqrtdp() { +entry: + %0 = load double, double* @d, align 8 + %call = call double @sqrt(double %0) + ret double %call +; CHECK-LABEL: @emit_xssqrtdp +; CHECK: xssqrtdp {{[0-9]+}} +} + +; Function Attrs: nounwind +declare double @sqrt(double) Index: test/CodeGen/PowerPC/vsx-recip-est.ll =================================================================== --- test/CodeGen/PowerPC/vsx-recip-est.ll +++ test/CodeGen/PowerPC/vsx-recip-est.ll @@ -0,0 +1,62 @@ +; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 -enable-unsafe-fp-math | FileCheck %s +; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -enable-unsafe-fp-math | FileCheck %s +@a = global float 3.000000e+00, align 4 +@b = global float 4.000000e+00, align 4 +@c = global double 3.000000e+00, align 8 +@d = global double 4.000000e+00, align 8 + +; Function Attrs: nounwind +define float @emit_xsresp() { +entry: + %0 = load float, float* @a, align 4 + %1 = load float, float* @b, align 4 + %div = fdiv fast float %0, %1 + ret float %div +; CHECK-LABEL: @emit_xsresp +; CHECK: xsresp {{[0-9]+}} +} + +; Function Attrs: nounwind +define float @emit_xsrsqrtesp(float %f) { +entry: + %f.addr = alloca float, align 4 + store float %f, float* %f.addr, align 4 + %0 = load float, float* %f.addr, align 4 + %1 = load float, float* @b, align 4 + %2 = call float @llvm.sqrt.f32(float %1) + %div = fdiv fast float %0, %2 + ret float %div +; CHECK-LABEL: @emit_xsrsqrtesp +; CHECK: xsrsqrtesp {{[0-9]+}} +} + +; Function Attrs: nounwind readnone +declare float @llvm.sqrt.f32(float) + +; Function Attrs: nounwind +define double @emit_xsredp() { +entry: + %0 = load double, double* @c, align 8 + %1 = load double, double* @d, align 8 + %div = fdiv fast double %0, %1 + ret double %div +; CHECK-LABEL: @emit_xsredp +; CHECK: xsredp {{[0-9]+}} +} + +; Function Attrs: nounwind +define double @emit_xsrsqrtedp(double %f) { +entry: + %f.addr = alloca double, align 8 + store double %f, double* %f.addr, align 8 + %0 = load double, double* %f.addr, align 8 + %1 = load double, double* @d, align 8 + %2 = call double @llvm.sqrt.f64(double %1) + %div = fdiv fast double %0, %2 + ret double %div +; CHECK-LABEL: @emit_xsrsqrtedp +; CHECK: xsrsqrtedp {{[0-9]+}} +} + +; Function Attrs: nounwind readnone +declare double @llvm.sqrt.f64(double) #1 Index: test/MC/Disassembler/PowerPC/vsx.txt =================================================================== --- test/MC/Disassembler/PowerPC/vsx.txt +++ test/MC/Disassembler/PowerPC/vsx.txt @@ -39,6 +39,9 @@ # CHECK: xsabsdp 7, 27 0xf0 0xe0 0xdd 0x64 +# CHECK: xsaddsp 7, 63, 27 +0xf0 0xff 0xd8 0x04 + # CHECK: xsadddp 7, 63, 27 0xf0 0xff 0xd9 0x04 @@ -75,6 +78,9 @@ # CHECK: xscvuxddp 7, 27 0xf0 0xe0 0xdd 0xa0 +# CHECK: xsdivsp 7, 63, 27 +0xf0 0xff 0xd8 0xc4 + # CHECK: xsdivdp 7, 63, 27 0xf0 0xff 0xd9 0xc4 @@ -96,6 +102,9 @@ # CHECK: xsmsubmdp 7, 63, 27 0xf0 0xff 0xd9 0xcc +# CHECK: xsmulsp 7, 63, 27 +0xf0 0xff 0xd8 0x84 + # CHECK: xsmuldp 7, 63, 27 0xf0 0xff 0xd9 0x84 @@ -132,15 +141,27 @@ # CHECK: xsrdpiz 7, 27 0xf0 0xe0 0xd9 0x64 +# CHECK: xsresp 7, 27 +0xf0 0xe0 0xd8 0x68 + # CHECK: xsredp 7, 27 0xf0 0xe0 0xd9 0x68 +# CHECK: xsrsqrtesp 7, 27 +0xf0 0xe0 0xd8 0x28 + # CHECK: xsrsqrtedp 7, 27 0xf0 0xe0 0xd9 0x28 +# CHECK: xssqrtsp 7, 27 +0xf0 0xe0 0xd8 0x2c + # CHECK: xssqrtdp 7, 27 0xf0 0xe0 0xd9 0x2c +# CHECK: xssubsp 7, 63, 27 +0xf0 0xff 0xd8 0x44 + # CHECK: xssubdp 7, 63, 27 0xf0 0xff 0xd9 0x44 Index: test/MC/PowerPC/vsx.s =================================================================== --- test/MC/PowerPC/vsx.s +++ test/MC/PowerPC/vsx.s @@ -44,6 +44,9 @@ # CHECK-BE: xsabsdp 7, 27 # encoding: [0xf0,0xe0,0xdd,0x64] # CHECK-LE: xsabsdp 7, 27 # encoding: [0x64,0xdd,0xe0,0xf0] xsabsdp 7, 27 +# CHECK-BE: xsaddsp 7, 63, 27 # encoding: [0xf0,0xff,0xd8,0x04] +# CHECK-LE: xsaddsp 7, 63, 27 # encoding: [0x04,0xd8,0xff,0xf0] + xsaddsp 7, 63, 27 # CHECK-BE: xsadddp 7, 63, 27 # encoding: [0xf0,0xff,0xd9,0x04] # CHECK-LE: xsadddp 7, 63, 27 # encoding: [0x04,0xd9,0xff,0xf0] xsadddp 7, 63, 27 @@ -80,6 +83,9 @@ # CHECK-BE: xscvuxddp 7, 27 # encoding: [0xf0,0xe0,0xdd,0xa0] # CHECK-LE: xscvuxddp 7, 27 # encoding: [0xa0,0xdd,0xe0,0xf0] xscvuxddp 7, 27 +# CHECK-BE: xsdivsp 7, 63, 27 # encoding: [0xf0,0xff,0xd8,0xc4] +# CHECK-LE: xsdivsp 7, 63, 27 # encoding: [0xc4,0xd8,0xff,0xf0] + xsdivsp 7, 63, 27 # CHECK-BE: xsdivdp 7, 63, 27 # encoding: [0xf0,0xff,0xd9,0xc4] # CHECK-LE: xsdivdp 7, 63, 27 # encoding: [0xc4,0xd9,0xff,0xf0] xsdivdp 7, 63, 27 @@ -101,6 +107,9 @@ # CHECK-BE: xsmsubmdp 7, 63, 27 # encoding: [0xf0,0xff,0xd9,0xcc] # CHECK-LE: xsmsubmdp 7, 63, 27 # encoding: [0xcc,0xd9,0xff,0xf0] xsmsubmdp 7, 63, 27 +# CHECK-BE: xsmulsp 7, 63, 27 # encoding: [0xf0,0xff,0xd8,0x84] +# CHECK-LE: xsmulsp 7, 63, 27 # encoding: [0x84,0xd8,0xff,0xf0] + xsmulsp 7, 63, 27 # CHECK-BE: xsmuldp 7, 63, 27 # encoding: [0xf0,0xff,0xd9,0x84] # CHECK-LE: xsmuldp 7, 63, 27 # encoding: [0x84,0xd9,0xff,0xf0] xsmuldp 7, 63, 27 @@ -137,15 +146,27 @@ # CHECK-BE: xsrdpiz 7, 27 # encoding: [0xf0,0xe0,0xd9,0x64] # CHECK-LE: xsrdpiz 7, 27 # encoding: [0x64,0xd9,0xe0,0xf0] xsrdpiz 7, 27 +# CHECK-BE: xsresp 7, 27 # encoding: [0xf0,0xe0,0xd8,0x68] +# CHECK-LE: xsresp 7, 27 # encoding: [0x68,0xd8,0xe0,0xf0] + xsresp 7, 27 # CHECK-BE: xsredp 7, 27 # encoding: [0xf0,0xe0,0xd9,0x68] # CHECK-LE: xsredp 7, 27 # encoding: [0x68,0xd9,0xe0,0xf0] xsredp 7, 27 +# CHECK-BE: xsrsqrtesp 7, 27 # encoding: [0xf0,0xe0,0xd8,0x28] +# CHECK-LE: xsrsqrtesp 7, 27 # encoding: [0x28,0xd8,0xe0,0xf0] + xsrsqrtesp 7, 27 # CHECK-BE: xsrsqrtedp 7, 27 # encoding: [0xf0,0xe0,0xd9,0x28] # CHECK-LE: xsrsqrtedp 7, 27 # encoding: [0x28,0xd9,0xe0,0xf0] xsrsqrtedp 7, 27 +# CHECK-BE: xssqrtsp 7, 27 # encoding: [0xf0,0xe0,0xd8,0x2c] +# CHECK-LE: xssqrtsp 7, 27 # encoding: [0x2c,0xd8,0xe0,0xf0] + xssqrtsp 7, 27 # CHECK-BE: xssqrtdp 7, 27 # encoding: [0xf0,0xe0,0xd9,0x2c] # CHECK-LE: xssqrtdp 7, 27 # encoding: [0x2c,0xd9,0xe0,0xf0] xssqrtdp 7, 27 +# CHECK-BE: xssubsp 7, 63, 27 # encoding: [0xf0,0xff,0xd8,0x44] +# CHECK-LE: xssubsp 7, 63, 27 # encoding: [0x44,0xd8,0xff,0xf0] + xssubsp 7, 63, 27 # CHECK-BE: xssubdp 7, 63, 27 # encoding: [0xf0,0xff,0xd9,0x44] # CHECK-LE: xssubdp 7, 63, 27 # encoding: [0x44,0xd9,0xff,0xf0] xssubdp 7, 63, 27