Index: llvm/lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -635,9 +635,9 @@ setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); } - if (Subtarget.hasP8Altivec()) + if (Subtarget.hasP8Altivec()) setOperationAction(ISD::MUL, MVT::v4i32, Legal); - else + else setOperationAction(ISD::MUL, MVT::v4i32, Custom); setOperationAction(ISD::MUL, MVT::v8i16, Custom); @@ -8335,7 +8335,7 @@ SDValue Src = V1.getOperand(0); if (Src.getOpcode() == ISD::SCALAR_TO_VECTOR && Src.getOperand(0).getOpcode() == ISD::LOAD && - Src.getOperand(0).hasOneUse()) + Src.getOperand(0).hasOneUse()) return V1; } SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1); @@ -8990,7 +8990,7 @@ } SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, - SelectionDAG &DAG) const { + SelectionDAG &DAG) const { SDLoc dl(Op); // Create a stack slot that is 16-byte aligned. MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); Index: llvm/lib/Target/PowerPC/PPCInstrVSX.td =================================================================== --- llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -1463,8 +1463,6 @@ (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 1)))))), (f32 (XSCVUXDSP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>; } - def : Pat<(v4i32 (scalar_to_vector ScalarLoads.Li32)), - (v4i32 (XXSPLTWs (LIWAX xoaddr:$src), 1))>; // Instructions for converting float to i64 feeding a store. let Predicates = [NoP9Vector] in { @@ -2984,13 +2982,41 @@ (STXVX $rS, xoaddr:$dst)>; def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, xoaddr:$dst), (STXVX $rS, xoaddr:$dst)>; - def : Pat<(v4i32 (scalar_to_vector (i32 (load xoaddr:$src)))), - (v4i32 (LXVWSX xoaddr:$src))>; - def : Pat<(v4f32 (scalar_to_vector (f32 (load xoaddr:$src)))), - (v4f32 (LXVWSX xoaddr:$src))>; - def : Pat<(v4f32 (scalar_to_vector - (f32 (fpround (f64 (extloadf32 xoaddr:$src)))))), - (v4f32 (LXVWSX xoaddr:$src))>; + + // Improvement to 32bit / 64bit vector loads + let AddedComplexity = 400 in { + // LIWAX - used for sign extending i32 -> i64 + // LIWZX - emitted for either i32 or float + let Predicates = [IsLittleEndian] in { + + def : Pat<(v2i64 (scalar_to_vector (i64 (sextloadi32 xoaddr:$src)))), + (v2i64 (XXPERMDIs (COPY_TO_REGCLASS (LIWAX xoaddr:$src), VSRC), 2))>; + + def : Pat<(v2i64 (scalar_to_vector (i64 (zextloadi32 xoaddr:$src)))), + (v2i64 (XXPERMDIs (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC), 2))>; + + def : Pat<(v4i32 (scalar_to_vector (i32 (load xoaddr:$src)))), + (v4i32 (XXPERMDIs (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC), 2))>; + + def : Pat<(v4f32 (scalar_to_vector (f32 (load xoaddr:$src)))), + (v4f32 (XXPERMDIs (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC), 2))>; + } + + let Predicates = [IsBigEndian] in { + def : Pat<(v2i64 (scalar_to_vector (i64 (sextloadi32 xoaddr:$src)))), + (v2i64 (COPY_TO_REGCLASS (LIWAX xoaddr:$src), VSRC))>; + + def : Pat<(v2i64 (scalar_to_vector (i64 (zextloadi32 xoaddr:$src)))), + (v2i64 (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC))>; + + def : Pat<(v4i32 (scalar_to_vector (i32 (load xoaddr:$src)))), + (v4i32 (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC))>; + + def : Pat<(v4f32 (scalar_to_vector (f32 (load xoaddr:$src)))), + (v4f32 (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC))>; + } + + } // Build vectors from i8 loads def : Pat<(v16i8 (scalar_to_vector ScalarLoads.Li8)), @@ -3152,6 +3178,35 @@ def : Pat<(f32 (fpround (f64 (extloadf32 ixaddr:$src)))), (f32 (DFLOADf32 ixaddr:$src))>; + + // Improvement to 64 bit vector loads + let AddedComplexity = 400 in { + // Using pseudoinstructions to ensure utilization of 64 bit registers + let Predicates = [IsLittleEndian, HasP9Vector] in { + def : Pat<(v2i64 (scalar_to_vector (i64 (load ixaddr:$src)))), + (v2i64 (XXPERMDIs (COPY_TO_REGCLASS (DFLOADf64 ixaddr:$src), VSRC), 2))>; + def : Pat<(v2i64 (scalar_to_vector (i64 (load xaddr:$src)))), + (v2i64 (XXPERMDIs (COPY_TO_REGCLASS (XFLOADf64 xaddr:$src), VSRC), 2))>; + + def : Pat<(v2f64 (scalar_to_vector (f64 (load ixaddr:$src)))), + (v2f64 (XXPERMDIs (COPY_TO_REGCLASS (DFLOADf64 ixaddr:$src), VSRC), 2))>; + def : Pat<(v2f64 (scalar_to_vector (f64 (load xaddr:$src)))), + (v2f64 (XXPERMDIs (COPY_TO_REGCLASS (XFLOADf64 xaddr:$src), VSRC), 2))>; + } + + let Predicates = [IsBigEndian, HasP9Vector] in { + def : Pat<(v2i64 (scalar_to_vector (i64 (load ixaddr:$src)))), + (v2i64 (COPY_TO_REGCLASS (DFLOADf64 ixaddr:$src), VSRC))>; + def : Pat<(v2i64 (scalar_to_vector (i64 (load xaddr:$src)))), + (v2i64 (COPY_TO_REGCLASS (XFLOADf64 xaddr:$src), VSRC))>; + + def : Pat<(v2f64 (scalar_to_vector (f64 (load ixaddr:$src)))), + (v2f64 (COPY_TO_REGCLASS (DFLOADf64 ixaddr:$src), VSRC))>; + def : Pat<(v2f64 (scalar_to_vector (f64 (load xaddr:$src)))), + (v2f64 (COPY_TO_REGCLASS (XFLOADf64 xaddr:$src), VSRC))>; + } + } + let Predicates = [IsBigEndian, HasP9Vector] in { // (Un)Signed DWord vector extract -> QP @@ -3687,3 +3742,4 @@ (v4i32 (VEXTSH2W $A))>; } } + Index: llvm/test/CodeGen/PowerPC/VSX-XForm-Scalars.ll =================================================================== --- llvm/test/CodeGen/PowerPC/VSX-XForm-Scalars.ll +++ llvm/test/CodeGen/PowerPC/VSX-XForm-Scalars.ll @@ -8,19 +8,20 @@ define void @testExpandPostRAPseudo(i32* nocapture readonly %ptr) { ; CHECK-P8-LABEL: testExpandPostRAPseudo: -; CHECK-P8: lxsiwax 34, 0, 3 -; CHECK-P8-NEXT: xxspltw 34, 34, 1 -; CHECK-P8-NEXT: stvx 2, 0, 4 +; CHECK-P8: lfiwzx 0, 0, 3 +; CHECK-P8: xxpermdi 0, 0, 0, 2 +; CHECK-P8: stvx 2, 0, 4 +; CHECK-P8: lis 4, 1024 ; CHECK-P8: #APP ; CHECK-P8-NEXT: #Clobber Rigisters ; CHECK-P8-NEXT: #NO_APP -; CHECK-P8-NEXT: lis 4, 1024 ; CHECK-P8-NEXT: lfiwax 0, 0, 3 ; CHECK-P8: stfsx 0, 3, 4 ; CHECK-P8-NEXT: blr ; CHECK-P9-LABEL: testExpandPostRAPseudo: -; CHECK-P9: lxvwsx 0, 0, 3 +; CHECK-P9: lfiwzx 0, 0, 3 +; CHECK-P9: xxpermdi 0, 0, 0, 2 ; CHECK-P9: stxvx 0, 0, 4 ; CHECK-P9: #APP ; CHECK-P9-NEXT: #Clobber Rigisters Index: llvm/test/CodeGen/PowerPC/build-vector-tests.ll =================================================================== --- llvm/test/CodeGen/PowerPC/build-vector-tests.ll +++ llvm/test/CodeGen/PowerPC/build-vector-tests.ll @@ -109,8 +109,8 @@ ;vector int spltRegVali(int val) { // ; return (vector int) val; // ;} // -;// P8: lxsiwax, xxspltw // -;// P9: lxvwsx // +;// P8: (LE) lfiwzx, xxpermdi, xxspltw (BE): lfiwzx, xxspltw // +;// P9: (LE) lfiwzx, xxpermdi, (BE): lfiwzx // ;vector int spltMemVali(int *ptr) { // ; return (vector int)*ptr; // ;} // @@ -286,8 +286,8 @@ ;vector unsigned int spltRegValui(unsigned int val) { // ; return (vector unsigned int) val; // ;} // -;// P8: lxsiwax, xxspltw // -;// P9: lxvwsx // +;// P8: (LE) lfiwzx, xxpermdi (BE): lfiwzx // +;// P9: (LE) lfiwzx, xxpermdi (BE): lfiwzx // ;vector unsigned int spltMemValui(unsigned int *ptr) { // ; return (vector unsigned int)*ptr; // ;} // @@ -1205,15 +1205,17 @@ ; P9LE-LABEL: spltMemVali ; P8BE-LABEL: spltMemVali ; P8LE-LABEL: spltMemVali -; P9BE: lxvwsx v2, 0, r3 +; P9BE: lfiwzx ; P9BE: blr -; P9LE: lxvwsx v2, 0, r3 +; P9LE: lfiwzx +; P9LE: xxpermdi ; P9LE: blr -; P8BE: lxsiwax {{[vsf0-9]+}}, 0, r3 -; P8BE: xxspltw v2, {{[vsf0-9]+}}, 1 +; P8BE: lfiwzx +; P8BE: xxspltw ; P8BE: blr -; P8LE: lxsiwax {{[vsf0-9]+}}, 0, r3 -; P8LE: xxspltw v2, {{[vsf0-9]+}}, 1 +; P8LE: lfiwzx +; P8LE: xxpermdi +; P8LE: xxspltw ; P8LE: blr } @@ -2365,15 +2367,17 @@ ; P9LE-LABEL: spltMemValui ; P8BE-LABEL: spltMemValui ; P8LE-LABEL: spltMemValui -; P9BE: lxvwsx v2, 0, r3 +; P9BE: lfiwzx ; P9BE: blr -; P9LE: lxvwsx v2, 0, r3 +; P9LE: lfiwzx +; P9LE: xxpermdi ; P9LE: blr -; P8BE: lxsiwax {{[vsf0-9]+}}, 0, r3 -; P8BE: xxspltw v2, {{[vsf0-9]+}}, 1 +; P8BE: lfiwzx +; P8BE: xxspltw ; P8BE: blr -; P8LE: lxsiwax {{[vsf0-9]+}}, 0, r3 -; P8LE: xxspltw v2, {{[vsf0-9]+}}, 1 +; P8LE: lfiwzx +; P8LE: xxpermdi +; P8LE: xxspltw ; P8LE: blr } Index: llvm/test/CodeGen/PowerPC/load-v4i8-improved.ll =================================================================== --- llvm/test/CodeGen/PowerPC/load-v4i8-improved.ll +++ llvm/test/CodeGen/PowerPC/load-v4i8-improved.ll @@ -1,5 +1,5 @@ ; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck \ -; RUN: -implicit-check-not vmrg -implicit-check-not=vperm %s +; RUN: --check-prefix=CHECK-LE -implicit-check-not vmrg -implicit-check-not=vperm %s ; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck \ ; RUN: -implicit-check-not vmrg -implicit-check-not=vperm %s @@ -10,6 +10,11 @@ %2 = shufflevector <4 x i8> %1, <4 x i8> undef, <16 x i32> ret <16 x i8> %2 ; CHECK-LABEL: test -; CHECK: lxsiwax 34, 0, 3 -; CHECK: xxspltw 34, 34, 1 +; CHECK: lfiwzx 0, 0, 3 +; CHECK: xxspltw 34, 0, 0 + +; CHECK-LE-LABEL: test +; CHECK-LE: lfiwzx 0, 0, 3 +; CHECK-LE: xxpermdi 0, 0, 0, 2 +; CHECK-LE: xxspltw 34, 0, 3 } Index: llvm/test/CodeGen/PowerPC/power9-moves-and-splats.ll =================================================================== --- llvm/test/CodeGen/PowerPC/power9-moves-and-splats.ll +++ llvm/test/CodeGen/PowerPC/power9-moves-and-splats.ll @@ -37,10 +37,11 @@ define <4 x i32> @test4(i32* nocapture readonly %in) { entry: ; CHECK-LABEL: test4 -; CHECK: lxvwsx 34, 0, 3 +; CHECK: lfiwzx 0, 0, 3 +; CHECK: xxpermdi ; CHECK-NOT: xxspltw ; CHECK-BE-LABEL: test4 -; CHECK-BE: lxvwsx 34, 0, 3 +; CHECK-BE: lfiwzx 0, 0, 3 ; CHECK-BE-NOT: xxspltw %0 = load i32, i32* %in, align 4 %splat.splatinsert = insertelement <4 x i32> undef, i32 %0, i32 0 @@ -51,10 +52,11 @@ define <4 x float> @test5(float* nocapture readonly %in) { entry: ; CHECK-LABEL: test5 -; CHECK: lxvwsx 34, 0, 3 +; CHECK: lfiwzx 0, 0, 3 +; CHECK: xxpermdi ; CHECK-NOT: xxspltw ; CHECK-BE-LABEL: test5 -; CHECK-BE: lxvwsx 34, 0, 3 +; CHECK-BE: lfiwzx 0, 0, 3 ; CHECK-BE-NOT: xxspltw %0 = load float, float* %in, align 4 %splat.splatinsert = insertelement <4 x float> undef, float %0, i32 0 @@ -67,12 +69,13 @@ ; CHECK-LABEL: test6 ; CHECK: addis ; CHECK: ld [[TOC:[0-9]+]], .LC0 -; CHECK: lxvwsx 34, 0, 3 +; CHECK: lfiwzx 0, 0, 3 +; CHECK: xxpermdi ; CHECK-NOT: xxspltw ; CHECK-BE-LABEL: test6 ; CHECK-BE: addis ; CHECK-BE: ld [[TOC:[0-9]+]], .LC0 -; CHECK-BE: lxvwsx 34, 0, 3 +; CHECK-BE: lfiwzx 0, 0, 3 ; CHECK-BE-NOT: xxspltw %0 = load i32, i32* @Globi, align 4 %splat.splatinsert = insertelement <4 x i32> undef, i32 %0, i32 0 @@ -85,12 +88,13 @@ ; CHECK-LABEL: test7 ; CHECK: addis ; CHECK: ld [[TOC:[0-9]+]], .LC1 -; CHECK: lxvwsx 34, 0, 3 +; CHECK: lfiwzx 0, 0, 3 +; CHECK: xxpermdi ; CHECK-NOT: xxspltw ; CHECK-BE-LABEL: test7 ; CHECK-BE: addis ; CHECK-BE: ld [[TOC:[0-9]+]], .LC1 -; CHECK-BE: lxvwsx 34, 0, 3 +; CHECK-BE: lfiwzx 0, 0, 3 ; CHECK-BE-NOT: xxspltw %0 = load float, float* @Globf, align 4 %splat.splatinsert = insertelement <4 x float> undef, float %0, i32 0 Index: llvm/test/CodeGen/PowerPC/qpx-load-splat.ll =================================================================== --- llvm/test/CodeGen/PowerPC/qpx-load-splat.ll +++ llvm/test/CodeGen/PowerPC/qpx-load-splat.ll @@ -1,6 +1,4 @@ ; RUN: llc -verify-machineinstrs < %s | FileCheck %s -target datalayout = "E-m:e-i64:64-n32:64" -target triple = "powerpc64-bgq-linux" ; Function Attrs: norecurse nounwind readonly define <4 x double> @foo(double* nocapture readonly %a) #0 { @@ -11,7 +9,7 @@ ret <4 x double> %shuffle.i ; CHECK-LABEL: @foo -; CHECK: lfd 1, 0(3) +; CHECK: lxvdsx ; CHECK: blr } @@ -25,7 +23,7 @@ ; CHECK-LABEL: @foox ; CHECK: sldi [[REG1:[0-9]+]], 4, 3 -; CHECK: lfdx 1, 3, [[REG1]] +; CHECK: lxvdsx ; CHECK: blr } @@ -40,7 +38,7 @@ ; CHECK-LABEL: @foox ; CHECK: sldi [[REG1:[0-9]+]], 4, 3 -; CHECK: lfdux 1, 3, [[REG1]] +; CHECK: lfdux 0, 3, [[REG1]] ; CHECK: std 3, 0(5) ; CHECK: blr } @@ -53,7 +51,8 @@ ret <4 x float> %shuffle.i ; CHECK-LABEL: @foof -; CHECK: lfs 1, 0(3) +; CHECK: lfiwzx +; CHECK: xxpermdi ; CHECK: blr } @@ -67,9 +66,9 @@ ; CHECK-LABEL: @foofx ; CHECK: sldi [[REG1:[0-9]+]], 4, 2 -; CHECK: lfsx 1, 3, [[REG1]] +; CHECK: lfiwzx +; CHECK: xxpermdi ; CHECK: blr } -attributes #0 = { norecurse nounwind readonly "target-cpu"="a2q" "target-features"="+qpx,-altivec,-bpermd,-crypto,-direct-move,-extdiv,-power8-vector,-vsx" } Index: llvm/test/CodeGen/PowerPC/scalar_vector_test_1.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/PowerPC/scalar_vector_test_1.ll @@ -0,0 +1,252 @@ +; RUN: llc -mcpu=pwr9 -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s \ +; RUN: --check-prefix=P9LE +; RUN: llc -mcpu=pwr9 -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s \ +; RUN: --check-prefix=P9BE +; RUN: llc -mcpu=pwr8 -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s \ +; RUN: --check-prefix=P8LE +; RUN: llc -mcpu=pwr8 -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s \ +; RUN: --check-prefix=P8BE + +; Function Attrs: norecurse nounwind readonly +define <2 x i64> @s2v_test1(i64* nocapture readonly %int64, <2 x i64> %vec) { +; P9LE-LABEL: s2v_test1 +; P9LE: lfd +; P9LE: xxpermdi +; P9LE-NEXT: xxpermdi +; P9LE: blr + +; P9BE-LABEL: s2v_test1 +; P9BE: lfd +; P9BE: xxpermdi +; P9BE: blr +entry: + %0 = load i64, i64* %int64, align 8 + %vecins = insertelement <2 x i64> %vec, i64 %0, i32 0 + ret <2 x i64> %vecins +} + +; Function Attrs: norecurse nounwind readonly +define <2 x i64> @s2v_test2(i64* nocapture readonly %int64, <2 x i64> %vec) { +; P9LE-LABEL: s2v_test2 +; P9LE: lfd +; P9LE: xxpermdi +; P9LE-NEXT: xxpermdi +; P9LE: blr + +; P9BE-LABEL: s2v_test2 +; P9BE: lfd +; P9BE: xxpermdi +; P9BE: blr +entry: + %arrayidx = getelementptr inbounds i64, i64* %int64, i64 1 + %0 = load i64, i64* %arrayidx, align 8 + %vecins = insertelement <2 x i64> %vec, i64 %0, i32 0 + ret <2 x i64> %vecins +} + +; Function Attrs: norecurse nounwind readonly +define <2 x i64> @s2v_test3(i64* nocapture readonly %int64, <2 x i64> %vec, i32 signext %Idx) { +; P9LE-LABEL: s2v_test3 +; P9LE: lfdx +; P9LE: xxpermdi +; P9LE-NEXT: xxpermdi +; P9LE: blr + +; P9BE-LABEL: s2v_test3 +; P9BE: lfdx +; P9BE: xxpermdi +; P9BE: blr +entry: + %idxprom = sext i32 %Idx to i64 + %arrayidx = getelementptr inbounds i64, i64* %int64, i64 %idxprom + %0 = load i64, i64* %arrayidx, align 8 + %vecins = insertelement <2 x i64> %vec, i64 %0, i32 0 + ret <2 x i64> %vecins +} + +; Function Attrs: norecurse nounwind readonly +define <2 x i64> @s2v_test4(i64* nocapture readonly %int64, <2 x i64> %vec) { +; P9LE-LABEL: s2v_test4 +; P9LE: lfd +; P9LE: xxpermdi +; P9LE-NEXT: xxpermdi +; P9LE: blr + +; P9BE-LABEL: s2v_test4 +; P9BE: lfd +; P9BE: xxpermdi +; P9BE: blr +entry: + %arrayidx = getelementptr inbounds i64, i64* %int64, i64 1 + %0 = load i64, i64* %arrayidx, align 8 + %vecins = insertelement <2 x i64> %vec, i64 %0, i32 0 + ret <2 x i64> %vecins +} + +; Function Attrs: norecurse nounwind readonly +define <2 x i64> @s2v_test5(<2 x i64> %vec, i64* nocapture readonly %ptr1) { +; P9LE-LABEL: s2v_test5 +; P9LE: lfd +; P9LE: xxpermdi +; P9LE-NEXT: xxpermdi +; P9LE: blr + +; P9BE-LABEL: s2v_test5 +; P9BE: lfd +; P9BE: xxpermdi +; P9BE: blr +entry: + %0 = load i64, i64* %ptr1, align 8 + %vecins = insertelement <2 x i64> %vec, i64 %0, i32 0 + ret <2 x i64> %vecins +} + +; Function Attrs: norecurse nounwind readonly +define <2 x double> @s2v_test_f1(double* nocapture readonly %f64, <2 x double> %vec) { +; P9LE-LABEL: s2v_test_f1 +; P9LE: lfd +; P9LE: xxpermdi +; P9LE-NEXT: xxpermdi +; P9LE: blr + +; P9BE-LABEL: s2v_test_f1 +; P9BE: lfd +; P9BE: xxpermdi +; P9BE: blr + +; P8LE-LABEL: s2v_test_f1 +; P8LE: lxsdx +; P8LE: xxspltd +; P8LE: xxpermdi +; P8LE: blr + +; P8BE-LABEL: s2v_test_f1 +; P8BE: lxsdx +; P8BE: xxpermdi +; P8BE: blr +entry: + %0 = load double, double* %f64, align 8 + %vecins = insertelement <2 x double> %vec, double %0, i32 0 + ret <2 x double> %vecins +} + +; Function Attrs: norecurse nounwind readonly +define <2 x double> @s2v_test_f2(double* nocapture readonly %f64, <2 x double> %vec) { +; P9LE-LABEL: s2v_test_f2 +; P9LE: lfd +; P9LE: xxpermdi +; P9LE-NEXT: xxpermdi +; P9LE: blr + +; P9BE-LABEL: s2v_test_f2 +; P9BE: lfd +; P9BE: xxpermdi +; P9BE: blr + +; P8LE-LABEL: s2v_test_f2 +; P8LE: lxsdx +; P8LE: xxspltd +; P8LE: xxpermdi +; P8LE: blr + +; P8BE-LABEL: s2v_test_f2 +; P8BE: lxsdx +; P8BE: xxpermdi +; P8BE: blr +entry: + %arrayidx = getelementptr inbounds double, double* %f64, i64 1 + %0 = load double, double* %arrayidx, align 8 + %vecins = insertelement <2 x double> %vec, double %0, i32 0 + ret <2 x double> %vecins +} + +; Function Attrs: norecurse nounwind readonly +define <2 x double> @s2v_test_f3(double* nocapture readonly %f64, <2 x double> %vec, i32 signext %Idx) { +; P9LE-LABEL: s2v_test_f3 +; P9LE: lfdx +; P9LE: xxpermdi +; P9LE-NEXT: xxpermdi +; P9LE: blr + +; P9BE-LABEL: s2v_test_f3 +; P9BE: lfdx +; P9BE: xxpermdi +; P9BE: blr + +; P8LE-LABEL: s2v_test_f3 +; P8LE: lxsdx +; P8LE: xxspltd +; P8LE: xxpermdi +; P8LE: blr + +; P8BE-LABEL: s2v_test_f3 +; P8BE: lxsdx +; P8BE: xxpermdi +; P8BE: blr +entry: + %idxprom = sext i32 %Idx to i64 + %arrayidx = getelementptr inbounds double, double* %f64, i64 %idxprom + %0 = load double, double* %arrayidx, align 8 + %vecins = insertelement <2 x double> %vec, double %0, i32 0 + ret <2 x double> %vecins +} + +; Function Attrs: norecurse nounwind readonly +define <2 x double> @s2v_test_f4(double* nocapture readonly %f64, <2 x double> %vec) { +; P9LE-LABEL: s2v_test_f4 +; P9LE: lfd +; P9LE: xxpermdi +; P9LE-NEXT: xxpermdi +; P9LE: blr + +; P9BE-LABEL: s2v_test_f4 +; P9BE: lfd +; P9BE: xxpermdi +; P9BE: blr + +; P8LE-LABEL: s2v_test_f4 +; P8LE: lxsdx +; P8LE: xxspltd +; P8LE: xxpermdi +; P8LE: blr + +; P8BE-LABEL: s2v_test_f4 +; P8BE: lxsdx +; P8BE: xxpermdi +; P8BE: blr +entry: + %arrayidx = getelementptr inbounds double, double* %f64, i64 1 + %0 = load double, double* %arrayidx, align 8 + %vecins = insertelement <2 x double> %vec, double %0, i32 0 + ret <2 x double> %vecins +} + +; Function Attrs: norecurse nounwind readonly +define <2 x double> @s2v_test_f5(<2 x double> %vec, double* nocapture readonly %ptr1) { +; P9LE-LABEL: s2v_test_f5 +; P9LE: lfd +; P9LE: xxpermdi +; P9LE-NEXT: xxpermdi +; P9LE: blr + +; P9BE-LABEL: s2v_test_f5 +; P9BE: lfd +; P9BE: xxpermdi +; P9BE: blr + +; P8LE-LABEL: s2v_test_f5 +; P8LE: lxsdx +; P8LE: xxspltd +; P8LE: xxpermdi +; P8LE: blr + +; P8BE-LABEL: s2v_test_f5 +; P8BE: lxsdx +; P8BE: xxpermdi +; P8BE: blr +entry: + %0 = load double, double* %ptr1, align 8 + %vecins = insertelement <2 x double> %vec, double %0, i32 0 + ret <2 x double> %vecins +} + Index: llvm/test/CodeGen/PowerPC/scalar_vector_test_2.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/PowerPC/scalar_vector_test_2.ll @@ -0,0 +1,94 @@ +; RUN: llc -mcpu=pwr9 -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s \ +; RUN: --check-prefix=P9LE +; RUN: llc -mcpu=pwr9 -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s \ +; RUN: --check-prefix=P9BE +; RUN: llc -mcpu=pwr8 -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s \ +; RUN: --check-prefix=P8LE +; RUN: llc -mcpu=pwr8 -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s \ +; RUN: --check-prefix=P8BE + +define void @test_liwzx1(<1 x float>* %A, <1 x float>* %B, <1 x float>* %C) { +; P9BE-LABEL: test_liwzx1 +; P9BE: lfiwzx +; P9BE-NEXT: lfiwzx +; P9BE: xvaddsp +; P9BE: xscvspdpn +; P9BE: stfs +; P9BE: blr + +; P8BE-LABEL: test_liwzx1 +; P8BE: lfiwzx +; P8BE-NEXT: lfiwzx +; P8BE: xvaddsp +; P8BE: xscvspdpn +; P8BE: stfsx +; P8BE: blr + +; P9LE-LABEL: test_liwzx1 +; P9LE: lfiwzx +; P9LE-NEXT: lfiwzx +; P9LE: xxpermdi +; P9LE-NEXT: xxpermdi +; P9LE: xvaddsp +; P9LE: xscvspdpn +; P9LE: stfs +; P9LE: blr + +; P8LE-LABEL: test_liwzx1 +; P8LE: lfiwzx +; P8LE-NEXT: lfiwzx +; P8LE: xxpermdi +; P8LE-NEXT: xxpermdi +; P8LE: xvaddsp +; P8LE: xscvspdpn +; P8LE: stfsx +; P8LE: blr + %a = load <1 x float>, <1 x float>* %A + %b = load <1 x float>, <1 x float>* %B + %X = fadd <1 x float> %a, %b + store <1 x float> %X, <1 x float>* %C + ret void +} + +define <1 x float>* @test_liwzx2(<1 x float>* %A, <1 x float>* %B, <1 x float>* %C) { +; P9BE-LABEL: test_liwzx2 +; P9BE: lfiwzx +; P9BE-NEXT: lfiwzx +; P9BE: xvsubsp +; P9BE: xscvspdpn +; P9BE: stfs +; P9BE: blr + +; P8BE-LABEL: test_liwzx2 +; P8BE: lfiwzx +; P8BE-NEXT: lfiwzx +; P8BE: xvsubsp +; P8BE: xscvspdpn +; P8BE: stfsx +; P8BE: blr + +; P9LE-LABEL: test_liwzx2 +; P9LE: lfiwzx +; P9LE-NEXT: lfiwzx +; P9LE: xxpermdi +; P9LE-NEXT: xxpermdi +; P9LE: xvsubsp +; P9LE: xscvspdpn +; P9LE: stfs +; P9LE: blr + +; P8LE-LABEL: test_liwzx2 +; P8LE: lfiwzx +; P8LE-NEXT: lfiwzx +; P8LE: xxpermdi +; P8LE-NEXT: xxpermdi +; P8LE: xvsubsp +; P8LE: xscvspdpn +; P8LE: stfsx +; P8LE: blr + %a = load <1 x float>, <1 x float>* %A + %b = load <1 x float>, <1 x float>* %B + %X = fsub <1 x float> %a, %b + store <1 x float> %X, <1 x float>* %C + ret <1 x float>* %C +} Index: llvm/test/CodeGen/PowerPC/scalar_vector_test_3.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/PowerPC/scalar_vector_test_3.ll @@ -0,0 +1,144 @@ +; RUN: llc -mcpu=pwr9 -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s \ +; RUN: --check-prefix=P9LE +; RUN: llc -mcpu=pwr9 -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s \ +; RUN: --check-prefix=P9BE + +; Function Attrs: norecurse nounwind readonly +define <2 x i64> @s2v_test1(i32* nocapture readonly %int32, <2 x i64> %vec) { +; P9LE-LABEL: s2v_test1 +; P9LE: lfiwax +; P9LE-NEXT: xxpermdi +; P9LE-NEXT: xxpermdi +; P9LE: blr + +; P9BE-LABEL: s2v_test1 +; P9BE: lfiwax +; P9BE: xxpermdi +; P9BE: blr +entry: + %0 = load i32, i32* %int32, align 4 + %conv = sext i32 %0 to i64 + %vecins = insertelement <2 x i64> %vec, i64 %conv, i32 0 + ret <2 x i64> %vecins +} + +; Function Attrs: norecurse nounwind readonly +define <2 x i64> @s2v_test2(i32* nocapture readonly %int32, <2 x i64> %vec) { +; P9LE-LABEL: s2v_test2 +; P9LE: lfiwax +; P9LE-NEXT: xxpermdi +; P9LE-NEXT: xxpermdi +; P9LE: blr + +; P9BE-LABEL: s2v_test2 +; P9BE: lfiwax +; P9BE: xxpermdi +; P9BE: blr +entry: + %arrayidx = getelementptr inbounds i32, i32* %int32, i64 1 + %0 = load i32, i32* %arrayidx, align 4 + %conv = sext i32 %0 to i64 + %vecins = insertelement <2 x i64> %vec, i64 %conv, i32 0 + ret <2 x i64> %vecins +} + +; Function Attrs: norecurse nounwind readonly +define <2 x i64> @s2v_test3(i32* nocapture readonly %int32, <2 x i64> %vec, i32 signext %Idx) { +; P9LE-LABEL: s2v_test3 +; P9LE: lfiwax +; P9LE-NEXT: xxpermdi +; P9LE-NEXT: xxpermdi +; P9LE: blr + +; P9BE-LABEL: s2v_test3 +; P9BE: lfiwax +; P9BE: xxpermdi +; P9BE: blr +entry: + %idxprom = sext i32 %Idx to i64 + %arrayidx = getelementptr inbounds i32, i32* %int32, i64 %idxprom + %0 = load i32, i32* %arrayidx, align 4 + %conv = sext i32 %0 to i64 + %vecins = insertelement <2 x i64> %vec, i64 %conv, i32 0 + ret <2 x i64> %vecins +} + +; Function Attrs: norecurse nounwind readonly +define <2 x i64> @s2v_test4(i32* nocapture readonly %int32, <2 x i64> %vec) { +; P9LE-LABEL: s2v_test4 +; P9LE: lfiwax +; P9LE-NEXT: xxpermdi +; P9LE-NEXT: xxpermdi +; P9LE: blr + +; P9BE-LABEL: s2v_test4 +; P9BE: lfiwax +; P9BE: xxpermdi +; P9BE: blr +entry: + %arrayidx = getelementptr inbounds i32, i32* %int32, i64 1 + %0 = load i32, i32* %arrayidx, align 4 + %conv = sext i32 %0 to i64 + %vecins = insertelement <2 x i64> %vec, i64 %conv, i32 0 + ret <2 x i64> %vecins +} + +; Function Attrs: norecurse nounwind readonly +define <2 x i64> @s2v_test5(<2 x i64> %vec, i32* nocapture readonly %ptr1) { +; P9LE-LABEL: s2v_test5 +; P9LE: lfiwax +; P9LE-NEXT: xxpermdi +; P9LE-NEXT: xxpermdi +; P9LE: blr + +; P9BE-LABEL: s2v_test5 +; P9BE: lfiwax +; P9BE: xxpermdi +; P9BE: blr +entry: + %0 = load i32, i32* %ptr1, align 4 + %conv = sext i32 %0 to i64 + %vecins = insertelement <2 x i64> %vec, i64 %conv, i32 0 + ret <2 x i64> %vecins +} + +; Function Attrs: norecurse nounwind readonly +define <2 x i64> @s2v_test6(i32* nocapture readonly %ptr) { +; P9LE-LABEL: s2v_test6 +; P9LE: lfiwax +; P9LE-NEXT: xxpermdi +; P9LE: xxspltd +; P9LE: blr + +; P9BE-LABEL: s2v_test6 +; P9BE: lfiwax +; P9BE: xxspltd +; P9BE: blr +entry: + %0 = load i32, i32* %ptr, align 4 + %conv = sext i32 %0 to i64 + %splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0 + %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer + ret <2 x i64> %splat.splat +} + +; Function Attrs: norecurse nounwind readonly +define <2 x i64> @s2v_test7(i32* nocapture readonly %ptr) { +; P9LE-LABEL: s2v_test7 +; P9LE: lfiwax +; P9LE-NEXT: xxpermdi +; P9LE: xxspltd +; P9LE: blr + +; P9BE-LABEL: s2v_test7 +; P9BE: lfiwax +; P9BE: xxspltd +; P9BE: blr +entry: + %0 = load i32, i32* %ptr, align 4 + %conv = sext i32 %0 to i64 + %splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0 + %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer + ret <2 x i64> %splat.splat +} + Index: llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll @@ -0,0 +1,173 @@ +; RUN: llc -mcpu=pwr8 -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s \ +; RUN: --check-prefix=P8LE +; RUN: llc -mcpu=pwr8 -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s \ +; RUN: --check-prefix=P8BE + +; Function Attrs: norecurse nounwind readonly +define <4 x i32> @s2v_test1(i32* nocapture readonly %int32, <4 x i32> %vec) { +; P8LE-LABEL: s2v_test1 +; P8LE: lfiwzx +; P8LE: xxpermdi +; P8LE: blr + +; P8BE-LABEL: s2v_test1 +; P8BE: lfiwzx +; P8BE: blr +entry: + %0 = load i32, i32* %int32, align 4 + %vecins = insertelement <4 x i32> %vec, i32 %0, i32 0 + ret <4 x i32> %vecins +} + +; Function Attrs: norecurse nounwind readonly +define <4 x i32> @s2v_test2(i32* nocapture readonly %int32, <4 x i32> %vec) { +; P8LE-LABEL: s2v_test2 +; P8LE: lfiwzx +; P8LE: xxpermdi +; P8LE: blr + +; P8BE-LABEL: s2v_test2 +; P8BE: lfiwzx +; P8BE: blr +entry: + %arrayidx = getelementptr inbounds i32, i32* %int32, i64 1 + %0 = load i32, i32* %arrayidx, align 4 + %vecins = insertelement <4 x i32> %vec, i32 %0, i32 0 + ret <4 x i32> %vecins +} + +; Function Attrs: norecurse nounwind readonly +define <4 x i32> @s2v_test3(i32* nocapture readonly %int32, <4 x i32> %vec, i32 signext %Idx) { +; P8LE-LABEL: s2v_test3 +; P8LE: lfiwzx +; P8LE: xxpermdi +; P8LE: blr + +; P8BE-LABEL: s2v_test3 +; P8BE: lfiwzx +; P8BE: blr +entry: + %idxprom = sext i32 %Idx to i64 + %arrayidx = getelementptr inbounds i32, i32* %int32, i64 %idxprom + %0 = load i32, i32* %arrayidx, align 4 + %vecins = insertelement <4 x i32> %vec, i32 %0, i32 0 + ret <4 x i32> %vecins +} + +; Function Attrs: norecurse nounwind readonly +define <4 x i32> @s2v_test4(i32* nocapture readonly %int32, <4 x i32> %vec) { +; P8LE-LABEL: s2v_test4 +; P8LE: lfiwzx +; P8LE: xxpermdi +; P8LE: blr + +; P8BE-LABEL: s2v_test4 +; P8BE: lfiwzx +; P8BE: blr +entry: + %arrayidx = getelementptr inbounds i32, i32* %int32, i64 1 + %0 = load i32, i32* %arrayidx, align 4 + %vecins = insertelement <4 x i32> %vec, i32 %0, i32 0 + ret <4 x i32> %vecins +} + +; Function Attrs: norecurse nounwind readonly +define <4 x i32> @s2v_test5(<4 x i32> %vec, i32* nocapture readonly %ptr1) { +; P8LE-LABEL: s2v_test5 +; P8LE: lfiwzx +; P8LE: xxpermdi +; P8LE: blr + +; P8BE-LABEL: s2v_test5 +; P8BE: lfiwzx +; P8BE: blr +entry: + %0 = load i32, i32* %ptr1, align 4 + %vecins = insertelement <4 x i32> %vec, i32 %0, i32 0 + ret <4 x i32> %vecins +} + +; Function Attrs: norecurse nounwind readonly +define <4 x float> @s2v_test_f1(float* nocapture readonly %f64, <4 x float> %vec) { +; P8LE-LABEL: s2v_test_f1 +; P8LE: lfiwzx +; P8LE: xxpermdi +; P8LE: blr + +; P8BE-LABEL: s2v_test_f1 +; P8BE: lfiwzx +; P8BE: blr +entry: + %0 = load float, float* %f64, align 4 + %vecins = insertelement <4 x float> %vec, float %0, i32 0 + ret <4 x float> %vecins +} + +; Function Attrs: norecurse nounwind readonly +define <2 x float> @s2v_test_f2(float* nocapture readonly %f64, <2 x float> %vec) { +; P8LE-LABEL: s2v_test_f2 +; P8LE: lfiwzx +; P8LE: xxpermdi +; P8LE: blr + +; P8BE-LABEL: s2v_test_f2 +; P8BE: lfiwzx +; P8BE: blr +entry: + %arrayidx = getelementptr inbounds float, float* %f64, i64 1 + %0 = load float, float* %arrayidx, align 8 + %vecins = insertelement <2 x float> %vec, float %0, i32 0 + ret <2 x float> %vecins +} + +; Function Attrs: norecurse nounwind readonly +define <2 x float> @s2v_test_f3(float* nocapture readonly %f64, <2 x float> %vec, i32 signext %Idx) { +; P8LE-LABEL: s2v_test_f3 +; P8LE: lfiwzx +; P8LE: xxpermdi +; P8LE: blr + +; P8BE-LABEL: s2v_test_f3 +; P8BE: lfiwzx +; P8BE: blr +entry: + %idxprom = sext i32 %Idx to i64 + %arrayidx = getelementptr inbounds float, float* %f64, i64 %idxprom + %0 = load float, float* %arrayidx, align 8 + %vecins = insertelement <2 x float> %vec, float %0, i32 0 + ret <2 x float> %vecins +} + +; Function Attrs: norecurse nounwind readonly +define <2 x float> @s2v_test_f4(float* nocapture readonly %f64, <2 x float> %vec) { +; P8LE-LABEL: s2v_test_f4 +; P8LE: lfiwzx +; P8LE: xxpermdi +; P8LE: blr + +; P8BE-LABEL: s2v_test_f4 +; P8BE: lfiwzx +; P8BE: blr +entry: + %arrayidx = getelementptr inbounds float, float* %f64, i64 1 + %0 = load float, float* %arrayidx, align 8 + %vecins = insertelement <2 x float> %vec, float %0, i32 0 + ret <2 x float> %vecins +} + +; Function Attrs: norecurse nounwind readonly +define <2 x float> @s2v_test_f5(<2 x float> %vec, float* nocapture readonly %ptr1) { +; P8LE-LABEL: s2v_test_f5 +; P8LE: lfiwzx +; P8LE: xxpermdi +; P8LE: blr + +; P8BE-LABEL: s2v_test_f5 +; P8BE: lfiwzx +; P8BE: blr +entry: + %0 = load float, float* %ptr1, align 8 + %vecins = insertelement <2 x float> %vec, float %0, i32 0 + ret <2 x float> %vecins +} + Index: llvm/test/CodeGen/PowerPC/swaps-le-6.ll =================================================================== --- llvm/test/CodeGen/PowerPC/swaps-le-6.ll +++ llvm/test/CodeGen/PowerPC/swaps-le-6.ll @@ -35,9 +35,9 @@ ; CHECK-P9-LABEL: @bar0 ; CHECK-P9-DAG: lxvx [[REG1:[0-9]+]] ; CHECK-P9-DAG: lfd [[REG2:[0-9]+]], 0(3) -; CHECK-P9: xxspltd [[REG4:[0-9]+]], [[REG2]], 0 -; CHECK-P9: xxpermdi [[REG5:[0-9]+]], [[REG1]], [[REG4]], 1 -; CHECK-P9: stxvx [[REG5]] +; CHECK-P9: xxpermdi 0, 0, 0, 2 +; CHECK-P9: xxpermdi [[REG2]], [[REG1]], [[REG2]], 1 +; CHECK-P9: stxvx 0, 0, 3 define void @bar1() { entry: @@ -58,7 +58,7 @@ ; CHECK-P9-LABEL: @bar1 ; CHECK-P9-DAG: lxvx [[REG1:[0-9]+]] ; CHECK-P9-DAG: lfd [[REG2:[0-9]+]], 0(3) -; CHECK-P9: xxspltd [[REG4:[0-9]+]], [[REG2]], 0 -; CHECK-P9: xxmrgld [[REG5:[0-9]+]], [[REG4]], [[REG1]] -; CHECK-P9: stxvx [[REG5]] +; CHECK-P9: xxpermdi 0, 0, 0, 2 +; CHECK-P9: xxmrgld [[REG2]], [[REG2]], [[REG1]] +; CHECK-P9: stxvx 0, 0, 3 Index: llvm/test/CodeGen/PowerPC/vsx_insert_extract_le.ll =================================================================== --- llvm/test/CodeGen/PowerPC/vsx_insert_extract_le.ll +++ llvm/test/CodeGen/PowerPC/vsx_insert_extract_le.ll @@ -24,8 +24,8 @@ ; CHECK-P9-LABEL: testi0 ; CHECK-P9: lfd [[REG1:[0-9]+]], 0(4) ; CHECK-P9: lxv [[REG2:[0-9]+]], 0(3) -; CHECK-P9: xxspltd [[REG3:[0-9]+]], [[REG1]], 0 -; CHECK-P9: xxpermdi 34, [[REG2]], [[REG3]], 1 +; CHECK-P9: xxpermdi 0, 0, 0, 2 +; CHECK-P9: xxpermdi 34, [[REG2]], [[REG1]], 1 } define <2 x double> @testi1(<2 x double>* %p1, double* %p2) { @@ -44,8 +44,8 @@ ; CHECK-P9-LABEL: testi1 ; CHECK-P9: lfd [[REG1:[0-9]+]], 0(4) ; CHECK-P9: lxv [[REG2:[0-9]+]], 0(3) -; CHECK-P9: xxspltd [[REG3:[0-9]+]], [[REG1]], 0 -; CHECK-P9: xxmrgld 34, [[REG3]], [[REG2]] +; CHECK-P9: xxpermdi 0, 0, 0, 2 +; CHECK-P9: xxmrgld 34, [[REG1]], [[REG2]] } define double @teste0(<2 x double>* %p1) {