Index: llvm/lib/Target/PowerPC/P9InstrResources.td =================================================================== --- llvm/lib/Target/PowerPC/P9InstrResources.td +++ llvm/lib/Target/PowerPC/P9InstrResources.td @@ -591,6 +591,7 @@ XXPERM, XXPERMR, XXSLDWI, + XXSLDWIs, XXSPLTIB, XXSPLTW, XXSPLTWs, Index: llvm/lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -8327,17 +8327,6 @@ if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) { int SplatIdx = PPC::getVSPLTImmediate(SVOp, 4, DAG); - // If the source for the shuffle is a scalar_to_vector that came from a - // 32-bit load, it will have used LXVWSX so we don't need to splat again. - if (Subtarget.hasP9Vector() && - ((isLittleEndian && SplatIdx == 3) || - (!isLittleEndian && SplatIdx == 0))) { - SDValue Src = V1.getOperand(0); - if (Src.getOpcode() == ISD::SCALAR_TO_VECTOR && - Src.getOperand(0).getOpcode() == ISD::LOAD && - Src.getOperand(0).hasOneUse()) - return V1; - } SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1); SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv, DAG.getConstant(SplatIdx, dl, MVT::i32)); Index: llvm/lib/Target/PowerPC/PPCInstrVSX.td =================================================================== --- llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -877,6 +877,12 @@ "xxsldwi $XT, $XA, $XB, $SHW", IIC_VecPerm, [(set v4i32:$XT, (PPCvecshl v4i32:$XA, v4i32:$XB, imm32SExt16:$SHW))]>; + + let isCodeGenOnly = 1 in + def XXSLDWIs : XX3Form_2s<60, 2, + (outs vsrc:$XT), (ins vsfrc:$XA, u2imm:$SHW), + "xxsldwi $XT, $XA, $XA, $SHW", IIC_VecPerm, []>; + def XXSPLTW : XX2Form_2<60, 164, (outs vsrc:$XT), (ins vsrc:$XB, u2imm:$UIM), "xxspltw $XT, $XB, $UIM", IIC_VecPerm, @@ -886,6 +892,7 @@ def XXSPLTWs : XX2Form_2<60, 164, (outs vsrc:$XT), (ins vfrc:$XB, u2imm:$UIM), "xxspltw $XT, $XB, $UIM", IIC_VecPerm, []>; + } // hasSideEffects } // UseVSXReg = 1 @@ -1463,8 +1470,6 @@ (f64 (PPCmtvsra (i64 (vector_extract v2i64:$S, 1)))))), (f32 (XSCVUXDSP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>; } - def : Pat<(v4i32 (scalar_to_vector ScalarLoads.Li32)), - (v4i32 (XXSPLTWs (LIWAX xoaddr:$src), 1))>; // Instructions for converting float to i64 feeding a store. let Predicates = [NoP9Vector] in { @@ -2984,13 +2989,47 @@ (STXVX $rS, xoaddr:$dst)>; def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, xoaddr:$dst), (STXVX $rS, xoaddr:$dst)>; - def : Pat<(v4i32 (scalar_to_vector (i32 (load xoaddr:$src)))), - (v4i32 (LXVWSX xoaddr:$src))>; - def : Pat<(v4f32 (scalar_to_vector (f32 (load xoaddr:$src)))), - (v4f32 (LXVWSX xoaddr:$src))>; - def : Pat<(v4f32 (scalar_to_vector - (f32 (fpround (f64 (extloadf32 xoaddr:$src)))))), - (v4f32 (LXVWSX xoaddr:$src))>; + + let AddedComplexity = 400 in { + // LIWAX - This instruction is used for sign extending i32 -> i64. + // LIWZX - This instruction will be emitted for i32, f32, and when + // zero-extending i32 to i64 (zext i32 -> i64). + let Predicates = [IsLittleEndian] in { + + def : Pat<(v2i64 (scalar_to_vector (i64 (sextloadi32 xoaddr:$src)))), + (v2i64 (XXPERMDIs + (COPY_TO_REGCLASS (LIWAX xoaddr:$src), VSRC), 2))>; + + def : Pat<(v2i64 (scalar_to_vector (i64 (zextloadi32 xoaddr:$src)))), + (v2i64 (XXPERMDIs + (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC), 2))>; + + def : Pat<(v4i32 (scalar_to_vector (i32 (load xoaddr:$src)))), + (v4i32 (XXPERMDIs + (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC), 2))>; + + def : Pat<(v4f32 (scalar_to_vector (f32 (load xoaddr:$src)))), + (v4f32 (XXPERMDIs + (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC), 2))>; + } + + let Predicates = [IsBigEndian] in { + def : Pat<(v2i64 (scalar_to_vector (i64 (sextloadi32 xoaddr:$src)))), + (v2i64 (COPY_TO_REGCLASS (LIWAX xoaddr:$src), VSRC))>; + + def : Pat<(v2i64 (scalar_to_vector (i64 (zextloadi32 xoaddr:$src)))), + (v2i64 (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC))>; + + def : Pat<(v4i32 (scalar_to_vector (i32 (load xoaddr:$src)))), + (v4i32 (XXSLDWIs + (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC), 1))>; + + def : Pat<(v4f32 (scalar_to_vector (f32 (load xoaddr:$src)))), + (v4f32 (XXSLDWIs + (COPY_TO_REGCLASS (LIWZX xoaddr:$src), VSRC), 1))>; + } + + } // Build vectors from i8 loads def : Pat<(v16i8 (scalar_to_vector ScalarLoads.Li8)), @@ -3152,6 +3191,39 @@ def : Pat<(f32 (fpround (f64 (extloadf32 ixaddr:$src)))), (f32 (DFLOADf32 ixaddr:$src))>; + + let AddedComplexity = 400 in { + // The following pseudoinstructions are used to ensure the utilization + // of all 64 VSX registers. + let Predicates = [IsLittleEndian, HasP9Vector] in { + def : Pat<(v2i64 (scalar_to_vector (i64 (load ixaddr:$src)))), + (v2i64 (XXPERMDIs + (COPY_TO_REGCLASS (DFLOADf64 ixaddr:$src), VSRC), 2))>; + def : Pat<(v2i64 (scalar_to_vector (i64 (load xaddr:$src)))), + (v2i64 (XXPERMDIs + (COPY_TO_REGCLASS (XFLOADf64 xaddr:$src), VSRC), 2))>; + + def : Pat<(v2f64 (scalar_to_vector (f64 (load ixaddr:$src)))), + (v2f64 (XXPERMDIs + (COPY_TO_REGCLASS (DFLOADf64 ixaddr:$src), VSRC), 2))>; + def : Pat<(v2f64 (scalar_to_vector (f64 (load xaddr:$src)))), + (v2f64 (XXPERMDIs + (COPY_TO_REGCLASS (XFLOADf64 xaddr:$src), VSRC), 2))>; + } + + let Predicates = [IsBigEndian, HasP9Vector] in { + def : Pat<(v2i64 (scalar_to_vector (i64 (load ixaddr:$src)))), + (v2i64 (COPY_TO_REGCLASS (DFLOADf64 ixaddr:$src), VSRC))>; + def : Pat<(v2i64 (scalar_to_vector (i64 (load xaddr:$src)))), + (v2i64 (COPY_TO_REGCLASS (XFLOADf64 xaddr:$src), VSRC))>; + + def : Pat<(v2f64 (scalar_to_vector (f64 (load ixaddr:$src)))), + (v2f64 (COPY_TO_REGCLASS (DFLOADf64 ixaddr:$src), VSRC))>; + def : Pat<(v2f64 (scalar_to_vector (f64 (load xaddr:$src)))), + (v2f64 (COPY_TO_REGCLASS (XFLOADf64 xaddr:$src), VSRC))>; + } + } + let Predicates = [IsBigEndian, HasP9Vector] in { // (Un)Signed DWord vector extract -> QP @@ -3687,3 +3759,4 @@ (v4i32 (VEXTSH2W $A))>; } } + Index: llvm/test/CodeGen/PowerPC/VSX-XForm-Scalars.ll =================================================================== --- llvm/test/CodeGen/PowerPC/VSX-XForm-Scalars.ll +++ llvm/test/CodeGen/PowerPC/VSX-XForm-Scalars.ll @@ -1,35 +1,123 @@ -; RUN: llc < %s -mcpu=pwr8 -mtriple=powerpc64le-unknown-unknown \ -; RUN: -verify-machineinstrs | FileCheck %s --check-prefix=CHECK-P8 -; RUN: llc < %s -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown \ -; RUN: -verify-machineinstrs | FileCheck %s --check-prefix=CHECK-P9 +; RUN: llc < %s -mcpu=pwr8 -mtriple=powerpc64le-unknown-unknown -ppc-vsr-nums-as-vr \ +; RUN: -ppc-asm-full-reg-names -verify-machineinstrs | FileCheck %s --check-prefix=CHECK-P8 +; RUN: llc < %s -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown -ppc-vsr-nums-as-vr \ +; RUN: -ppc-asm-full-reg-names -verify-machineinstrs | FileCheck %s --check-prefix=CHECK-P9 @a = external local_unnamed_addr global <4 x i32>, align 16 @pb = external local_unnamed_addr global float*, align 8 define void @testExpandPostRAPseudo(i32* nocapture readonly %ptr) { -; CHECK-P8-LABEL: testExpandPostRAPseudo: -; CHECK-P8: lxsiwax 34, 0, 3 -; CHECK-P8-NEXT: xxspltw 34, 34, 1 -; CHECK-P8-NEXT: stvx 2, 0, 4 -; CHECK-P8: #APP -; CHECK-P8-NEXT: #Clobber Rigisters -; CHECK-P8-NEXT: #NO_APP -; CHECK-P8-NEXT: lis 4, 1024 -; CHECK-P8-NEXT: lfiwax 0, 0, 3 -; CHECK-P8: stfsx 0, 3, 4 -; CHECK-P8-NEXT: blr - -; CHECK-P9-LABEL: testExpandPostRAPseudo: -; CHECK-P9: lxvwsx 0, 0, 3 -; CHECK-P9: stxvx 0, 0, 4 -; CHECK-P9: #APP -; CHECK-P9-NEXT: #Clobber Rigisters -; CHECK-P9-NEXT: #NO_APP -; CHECK-P9-NEXT: lis 4, 1024 -; CHECK-P9-NEXT: lfiwax 0, 0, 3 -; CHECK-P9: stfsx 0, 3, 4 -; CHECK-P9-NEXT: blr - +; CHECK-P8-LABEL: testExpandPostRAPseudo: +; CHECK-P8: # %bb.0: # %entry +; CHECK-P8-NEXT: lfiwzx f0, 0, r3 +; CHECK-P8-NEXT: addis r4, r2, .LC0@toc@ha +; CHECK-P8-NEXT: stfd f14, -144(r1) # 8-byte Folded Spill +; CHECK-P8-NEXT: stfd f15, -136(r1) # 8-byte Folded Spill +; CHECK-P8-NEXT: ld r4, .LC0@toc@l(r4) +; CHECK-P8-NEXT: stfd f16, -128(r1) # 8-byte Folded Spill +; CHECK-P8-NEXT: stfd f17, -120(r1) # 8-byte Folded Spill +; CHECK-P8-NEXT: stfd f18, -112(r1) # 8-byte Folded Spill +; CHECK-P8-NEXT: stfd f19, -104(r1) # 8-byte Folded Spill +; CHECK-P8-NEXT: xxpermdi vs0, f0, f0, 2 +; CHECK-P8-NEXT: stfd f20, -96(r1) # 8-byte Folded Spill +; CHECK-P8-NEXT: stfd f21, -88(r1) # 8-byte Folded Spill +; CHECK-P8-NEXT: stfd f22, -80(r1) # 8-byte Folded Spill +; CHECK-P8-NEXT: stfd f23, -72(r1) # 8-byte Folded Spill +; CHECK-P8-NEXT: stfd f24, -64(r1) # 8-byte Folded Spill +; CHECK-P8-NEXT: xxspltw v2, vs0, 3 +; CHECK-P8-NEXT: stfd f25, -56(r1) # 8-byte Folded Spill +; CHECK-P8-NEXT: stfd f26, -48(r1) # 8-byte Folded Spill +; CHECK-P8-NEXT: stfd f27, -40(r1) # 8-byte Folded Spill +; CHECK-P8-NEXT: stfd f28, -32(r1) # 8-byte Folded Spill +; CHECK-P8-NEXT: stfd f29, -24(r1) # 8-byte Folded Spill +; CHECK-P8-NEXT: stvx v2, 0, r4 +; CHECK-P8-NEXT: stfd f30, -16(r1) # 8-byte Folded Spill +; CHECK-P8-NEXT: lis r4, 1024 +; CHECK-P8-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill +; CHECK-P8-NEXT: #APP +; CHECK-P8-NEXT: #Clobber Rigisters +; CHECK-P8-NEXT: #NO_APP +; CHECK-P8-NEXT: lfiwax f0, 0, r3 +; CHECK-P8-NEXT: addis r3, r2, .LC1@toc@ha +; CHECK-P8-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload +; CHECK-P8-NEXT: ld r3, .LC1@toc@l(r3) +; CHECK-P8-NEXT: lfd f30, -16(r1) # 8-byte Folded Reload +; CHECK-P8-NEXT: lfd f29, -24(r1) # 8-byte Folded Reload +; CHECK-P8-NEXT: lfd f28, -32(r1) # 8-byte Folded Reload +; CHECK-P8-NEXT: lfd f27, -40(r1) # 8-byte Folded Reload +; CHECK-P8-NEXT: xscvsxdsp f0, f0 +; CHECK-P8-NEXT: lfd f26, -48(r1) # 8-byte Folded Reload +; CHECK-P8-NEXT: lfd f25, -56(r1) # 8-byte Folded Reload +; CHECK-P8-NEXT: ld r3, 0(r3) +; CHECK-P8-NEXT: lfd f24, -64(r1) # 8-byte Folded Reload +; CHECK-P8-NEXT: lfd f23, -72(r1) # 8-byte Folded Reload +; CHECK-P8-NEXT: lfd f22, -80(r1) # 8-byte Folded Reload +; CHECK-P8-NEXT: lfd f21, -88(r1) # 8-byte Folded Reload +; CHECK-P8-NEXT: lfd f20, -96(r1) # 8-byte Folded Reload +; CHECK-P8-NEXT: lfd f19, -104(r1) # 8-byte Folded Reload +; CHECK-P8-NEXT: lfd f18, -112(r1) # 8-byte Folded Reload +; CHECK-P8-NEXT: lfd f17, -120(r1) # 8-byte Folded Reload +; CHECK-P8-NEXT: lfd f16, -128(r1) # 8-byte Folded Reload +; CHECK-P8-NEXT: lfd f15, -136(r1) # 8-byte Folded Reload +; CHECK-P8-NEXT: lfd f14, -144(r1) # 8-byte Folded Reload +; CHECK-P8-NEXT: stfsx f0, r3, r4 +; CHECK-P8-NEXT: blr +; +; CHECK-P9-LABEL: testExpandPostRAPseudo: +; CHECK-P9: # %bb.0: # %entry +; CHECK-P9-NEXT: lfiwzx f0, 0, r3 +; CHECK-P9-NEXT: addis r4, r2, .LC0@toc@ha +; CHECK-P9-NEXT: stfd f14, -144(r1) # 8-byte Folded Spill +; CHECK-P9-NEXT: stfd f15, -136(r1) # 8-byte Folded Spill +; CHECK-P9-NEXT: stfd f16, -128(r1) # 8-byte Folded Spill +; CHECK-P9-NEXT: stfd f17, -120(r1) # 8-byte Folded Spill +; CHECK-P9-NEXT: stfd f18, -112(r1) # 8-byte Folded Spill +; CHECK-P9-NEXT: stfd f19, -104(r1) # 8-byte Folded Spill +; CHECK-P9-NEXT: stfd f20, -96(r1) # 8-byte Folded Spill +; CHECK-P9-NEXT: ld r4, .LC0@toc@l(r4) +; CHECK-P9-NEXT: stfd f21, -88(r1) # 8-byte Folded Spill +; CHECK-P9-NEXT: stfd f22, -80(r1) # 8-byte Folded Spill +; CHECK-P9-NEXT: stfd f23, -72(r1) # 8-byte Folded Spill +; CHECK-P9-NEXT: stfd f24, -64(r1) # 8-byte Folded Spill +; CHECK-P9-NEXT: stfd f25, -56(r1) # 8-byte Folded Spill +; CHECK-P9-NEXT: stfd f26, -48(r1) # 8-byte Folded Spill +; CHECK-P9-NEXT: stfd f27, -40(r1) # 8-byte Folded Spill +; CHECK-P9-NEXT: stfd f28, -32(r1) # 8-byte Folded Spill +; CHECK-P9-NEXT: xxpermdi vs0, f0, f0, 2 +; CHECK-P9-NEXT: stfd f29, -24(r1) # 8-byte Folded Spill +; CHECK-P9-NEXT: stfd f30, -16(r1) # 8-byte Folded Spill +; CHECK-P9-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill +; CHECK-P9-NEXT: xxspltw vs0, vs0, 3 +; CHECK-P9-NEXT: stxvx vs0, 0, r4 +; CHECK-P9-NEXT: #APP +; CHECK-P9-NEXT: #Clobber Rigisters +; CHECK-P9-NEXT: #NO_APP +; CHECK-P9-NEXT: lfiwax f0, 0, r3 +; CHECK-P9-NEXT: addis r3, r2, .LC1@toc@ha +; CHECK-P9-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload +; CHECK-P9-NEXT: lfd f30, -16(r1) # 8-byte Folded Reload +; CHECK-P9-NEXT: lfd f29, -24(r1) # 8-byte Folded Reload +; CHECK-P9-NEXT: lfd f28, -32(r1) # 8-byte Folded Reload +; CHECK-P9-NEXT: lfd f27, -40(r1) # 8-byte Folded Reload +; CHECK-P9-NEXT: lfd f26, -48(r1) # 8-byte Folded Reload +; CHECK-P9-NEXT: lfd f25, -56(r1) # 8-byte Folded Reload +; CHECK-P9-NEXT: ld r3, .LC1@toc@l(r3) +; CHECK-P9-NEXT: lfd f24, -64(r1) # 8-byte Folded Reload +; CHECK-P9-NEXT: lfd f23, -72(r1) # 8-byte Folded Reload +; CHECK-P9-NEXT: lfd f22, -80(r1) # 8-byte Folded Reload +; CHECK-P9-NEXT: lfd f21, -88(r1) # 8-byte Folded Reload +; CHECK-P9-NEXT: lfd f20, -96(r1) # 8-byte Folded Reload +; CHECK-P9-NEXT: lfd f19, -104(r1) # 8-byte Folded Reload +; CHECK-P9-NEXT: lfd f18, -112(r1) # 8-byte Folded Reload +; CHECK-P9-NEXT: lfd f17, -120(r1) # 8-byte Folded Reload +; CHECK-P9-NEXT: lfd f16, -128(r1) # 8-byte Folded Reload +; CHECK-P9-NEXT: lfd f15, -136(r1) # 8-byte Folded Reload +; CHECK-P9-NEXT: lfd f14, -144(r1) # 8-byte Folded Reload +; CHECK-P9-NEXT: lis r4, 1024 +; CHECK-P9-NEXT: xscvsxdsp f0, f0 +; CHECK-P9-NEXT: ld r3, 0(r3) +; CHECK-P9-NEXT: stfsx f0, r3, r4 +; CHECK-P9-NEXT: blr entry: %0 = load i32, i32* %ptr, align 4 %splat.splatinsert = insertelement <4 x i32> undef, i32 %0, i32 0 Index: llvm/test/CodeGen/PowerPC/build-vector-tests.ll =================================================================== --- llvm/test/CodeGen/PowerPC/build-vector-tests.ll +++ llvm/test/CodeGen/PowerPC/build-vector-tests.ll @@ -109,8 +109,8 @@ ;vector int spltRegVali(int val) { // ; return (vector int) val; // ;} // -;// P8: lxsiwax, xxspltw // -;// P9: lxvwsx // +;// P8: (LE) lfiwzx, xxpermdi, xxspltw (BE): lfiwzx, xxsldwi, xxspltw // +;// P9: (LE) lfiwzx, xxpermdi, xxspltw (BE): lfiwzx, xxsldwi, xxspltw // ;vector int spltMemVali(int *ptr) { // ; return (vector int)*ptr; // ;} // @@ -286,8 +286,8 @@ ;vector unsigned int spltRegValui(unsigned int val) { // ; return (vector unsigned int) val; // ;} // -;// P8: lxsiwax, xxspltw // -;// P9: lxvwsx // +;// P8: (LE) lfiwzx, xxpermdi, xxspltw (BE): lfiwzx, xxsldwi, xxspltw // +;// P9: (LE) lfiwzx, xxpermdi, xxspltw (BE): lfiwzx, xxsldwi, xxspltw // ;vector unsigned int spltMemValui(unsigned int *ptr) { // ; return (vector unsigned int)*ptr; // ;} // @@ -1205,15 +1205,21 @@ ; P9LE-LABEL: spltMemVali ; P8BE-LABEL: spltMemVali ; P8LE-LABEL: spltMemVali -; P9BE: lxvwsx v2, 0, r3 +; P9BE: lfiwzx f0, 0, r3 +; P9BE: xxsldwi vs0, f0, f0, 1 +; P9BE: xxspltw v2, vs0, 0 ; P9BE: blr -; P9LE: lxvwsx v2, 0, r3 +; P9LE: lfiwzx f0, 0, r3 +; P9LE: xxpermdi vs0, f0, f0, 2 +; P9LE: xxspltw v2, vs0, 3 ; P9LE: blr -; P8BE: lxsiwax {{[vsf0-9]+}}, 0, r3 -; P8BE: xxspltw v2, {{[vsf0-9]+}}, 1 +; P8BE: lfiwzx f0, 0, r3 +; P8BE: xxsldwi vs0, f0, f0, 1 +; P8BE: xxspltw v2, vs0, 0 ; P8BE: blr -; P8LE: lxsiwax {{[vsf0-9]+}}, 0, r3 -; P8LE: xxspltw v2, {{[vsf0-9]+}}, 1 +; P8LE: lfiwzx f0, 0, r3 +; P8LE: xxpermdi vs0, f0, f0, 2 +; P8LE: xxspltw v2, vs0, 3 ; P8LE: blr } @@ -2365,15 +2371,21 @@ ; P9LE-LABEL: spltMemValui ; P8BE-LABEL: spltMemValui ; P8LE-LABEL: spltMemValui -; P9BE: lxvwsx v2, 0, r3 +; P9BE: lfiwzx f0, 0, r3 +; P9BE: xxsldwi vs0, f0, f0, 1 +; P9BE: xxspltw v2, vs0, 0 ; P9BE: blr -; P9LE: lxvwsx v2, 0, r3 +; P9LE: lfiwzx f0, 0, r3 +; P9LE: xxpermdi vs0, f0, f0, 2 +; P9LE: xxspltw v2, vs0, 3 ; P9LE: blr -; P8BE: lxsiwax {{[vsf0-9]+}}, 0, r3 -; P8BE: xxspltw v2, {{[vsf0-9]+}}, 1 +; P8BE: lfiwzx f0, 0, r3 +; P8BE: xxsldwi vs0, f0, f0, 1 +; P8BE: xxspltw v2, vs0, 0 ; P8BE: blr -; P8LE: lxsiwax {{[vsf0-9]+}}, 0, r3 -; P8LE: xxspltw v2, {{[vsf0-9]+}}, 1 +; P8LE: lfiwzx f0, 0, r3 +; P8LE: xxpermdi vs0, f0, f0, 2 +; P8LE: xxspltw v2, vs0, 3 ; P8LE: blr } Index: llvm/test/CodeGen/PowerPC/load-v4i8-improved.ll =================================================================== --- llvm/test/CodeGen/PowerPC/load-v4i8-improved.ll +++ llvm/test/CodeGen/PowerPC/load-v4i8-improved.ll @@ -1,15 +1,27 @@ -; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck \ +; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu < %s \ +; RUN: -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names | FileCheck --check-prefix=CHECK-LE \ ; RUN: -implicit-check-not vmrg -implicit-check-not=vperm %s -; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck \ +; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64-unknown-linux-gnu < %s \ +; RUN: -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names | FileCheck \ ; RUN: -implicit-check-not vmrg -implicit-check-not=vperm %s define <16 x i8> @test(i32* %s, i32* %t) { +; CHECK-LE-LABEL: test: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: lfiwzx f0, 0, r3 +; CHECK-LE-NEXT: xxpermdi vs0, f0, f0, 2 +; CHECK-LE-NEXT: xxspltw v2, vs0, 3 +; CHECK-LE-NEXT: blr + +; CHECK-LABEL: test: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lfiwzx f0, 0, r3 +; CHECK-NEXT: xxsldwi vs0, f0, f0, 1 +; CHECK-NEXT: xxspltw v2, vs0, 0 +; CHECK-NEXT: blr entry: %0 = bitcast i32* %s to <4 x i8>* %1 = load <4 x i8>, <4 x i8>* %0, align 4 %2 = shufflevector <4 x i8> %1, <4 x i8> undef, <16 x i32> ret <16 x i8> %2 -; CHECK-LABEL: test -; CHECK: lxsiwax 34, 0, 3 -; CHECK: xxspltw 34, 34, 1 } Index: llvm/test/CodeGen/PowerPC/power9-moves-and-splats.ll =================================================================== --- llvm/test/CodeGen/PowerPC/power9-moves-and-splats.ll +++ llvm/test/CodeGen/PowerPC/power9-moves-and-splats.ll @@ -1,47 +1,74 @@ -; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s -; RUN: llc -mcpu=pwr9 -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s \ -; RUN: --check-prefix=CHECK-BE +; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu -ppc-vsr-nums-as-vr \ +; RUN: -ppc-asm-full-reg-names < %s | FileCheck %s +; RUN: llc -mcpu=pwr9 -mtriple=powerpc64-unknown-linux-gnu -ppc-vsr-nums-as-vr \ +; RUN: -ppc-asm-full-reg-names < %s | FileCheck %s --check-prefix=CHECK-BE @Globi = external global i32, align 4 @Globf = external global float, align 4 define <2 x i64> @test1(i64 %a, i64 %b) { +; CHECK-LABEL: test1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mtvsrdd v2, r4, r3 +; CHECK-NEXT: blr + +; CHECK-BE-LABEL: test1: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: mtvsrdd v2, r3, r4 +; CHECK-BE-NEXT: blr entry: ; The FIXME below is due to the lowering for BUILD_VECTOR needing a re-vamp ; which will happen in a subsequent patch. -; CHECK-LABEL: test1 -; CHECK: mtvsrdd 34, 4, 3 -; CHECK-BE-LABEL: test1 -; CHECK-BE: mtvsrdd 34, 3, 4 %vecins = insertelement <2 x i64> undef, i64 %a, i32 0 %vecins1 = insertelement <2 x i64> %vecins, i64 %b, i32 1 ret <2 x i64> %vecins1 } define i64 @test2(<2 x i64> %a) { +; CHECK-LABEL: test2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mfvsrld r3, v2 +; CHECK-NEXT: blr + +; CHECK-BE-LABEL: test2: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: mfvsrd r3, v2 +; CHECK-BE-NEXT: blr entry: -; CHECK-LABEL: test2 -; CHECK: mfvsrld 3, 34 %0 = extractelement <2 x i64> %a, i32 0 ret i64 %0 } define i64 @test3(<2 x i64> %a) { +; CHECK-LABEL: test3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mfvsrd r3, v2 +; CHECK-NEXT: blr + +; CHECK-BE-LABEL: test3: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: mfvsrld r3, v2 +; CHECK-BE-NEXT: blr entry: -; CHECK-BE-LABEL: test3 -; CHECK-BE: mfvsrld 3, 34 %0 = extractelement <2 x i64> %a, i32 1 ret i64 %0 } define <4 x i32> @test4(i32* nocapture readonly %in) { +; CHECK-LABEL: test4: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lfiwzx f0, 0, r3 +; CHECK-NEXT: xxpermdi vs0, f0, f0, 2 +; CHECK-NEXT: xxspltw v2, vs0, 3 +; CHECK-NEXT: blr + +; CHECK-BE-LABEL: test4: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: lfiwzx f0, 0, r3 +; CHECK-BE-NEXT: xxsldwi vs0, f0, f0, 1 +; CHECK-BE-NEXT: xxspltw v2, vs0, 0 +; CHECK-BE-NEXT: blr entry: -; CHECK-LABEL: test4 -; CHECK: lxvwsx 34, 0, 3 -; CHECK-NOT: xxspltw -; CHECK-BE-LABEL: test4 -; CHECK-BE: lxvwsx 34, 0, 3 -; CHECK-BE-NOT: xxspltw %0 = load i32, i32* %in, align 4 %splat.splatinsert = insertelement <4 x i32> undef, i32 %0, i32 0 %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer @@ -49,13 +76,20 @@ } define <4 x float> @test5(float* nocapture readonly %in) { +; CHECK-LABEL: test5: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lfiwzx f0, 0, r3 +; CHECK-NEXT: xxpermdi vs0, f0, f0, 2 +; CHECK-NEXT: xxspltw v2, vs0, 3 +; CHECK-NEXT: blr + +; CHECK-BE-LABEL: test5: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: lfiwzx f0, 0, r3 +; CHECK-BE-NEXT: xxsldwi vs0, f0, f0, 1 +; CHECK-BE-NEXT: xxspltw v2, vs0, 0 +; CHECK-BE-NEXT: blr entry: -; CHECK-LABEL: test5 -; CHECK: lxvwsx 34, 0, 3 -; CHECK-NOT: xxspltw -; CHECK-BE-LABEL: test5 -; CHECK-BE: lxvwsx 34, 0, 3 -; CHECK-BE-NOT: xxspltw %0 = load float, float* %in, align 4 %splat.splatinsert = insertelement <4 x float> undef, float %0, i32 0 %splat.splat = shufflevector <4 x float> %splat.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer @@ -63,17 +97,24 @@ } define <4 x i32> @test6() { +; CHECK-LABEL: test6: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addis r3, r2, .LC0@toc@ha +; CHECK-NEXT: ld r3, .LC0@toc@l(r3) +; CHECK-NEXT: lfiwzx f0, 0, r3 +; CHECK-NEXT: xxpermdi vs0, f0, f0, 2 +; CHECK-NEXT: xxspltw v2, vs0, 3 +; CHECK-NEXT: blr + +; CHECK-BE-LABEL: test6: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: addis r3, r2, .LC0@toc@ha +; CHECK-BE-NEXT: ld r3, .LC0@toc@l(r3) +; CHECK-BE-NEXT: lfiwzx f0, 0, r3 +; CHECK-BE-NEXT: xxsldwi vs0, f0, f0, 1 +; CHECK-BE-NEXT: xxspltw v2, vs0, 0 +; CHECK-BE-NEXT: blr entry: -; CHECK-LABEL: test6 -; CHECK: addis -; CHECK: ld [[TOC:[0-9]+]], .LC0 -; CHECK: lxvwsx 34, 0, 3 -; CHECK-NOT: xxspltw -; CHECK-BE-LABEL: test6 -; CHECK-BE: addis -; CHECK-BE: ld [[TOC:[0-9]+]], .LC0 -; CHECK-BE: lxvwsx 34, 0, 3 -; CHECK-BE-NOT: xxspltw %0 = load i32, i32* @Globi, align 4 %splat.splatinsert = insertelement <4 x i32> undef, i32 %0, i32 0 %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer @@ -81,17 +122,24 @@ } define <4 x float> @test7() { +; CHECK-LABEL: test7: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addis r3, r2, .LC1@toc@ha +; CHECK-NEXT: ld r3, .LC1@toc@l(r3) +; CHECK-NEXT: lfiwzx f0, 0, r3 +; CHECK-NEXT: xxpermdi vs0, f0, f0, 2 +; CHECK-NEXT: xxspltw v2, vs0, 3 +; CHECK-NEXT: blr + +; CHECK-BE-LABEL: test7: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: addis r3, r2, .LC1@toc@ha +; CHECK-BE-NEXT: ld r3, .LC1@toc@l(r3) +; CHECK-BE-NEXT: lfiwzx f0, 0, r3 +; CHECK-BE-NEXT: xxsldwi vs0, f0, f0, 1 +; CHECK-BE-NEXT: xxspltw v2, vs0, 0 +; CHECK-BE-NEXT: blr entry: -; CHECK-LABEL: test7 -; CHECK: addis -; CHECK: ld [[TOC:[0-9]+]], .LC1 -; CHECK: lxvwsx 34, 0, 3 -; CHECK-NOT: xxspltw -; CHECK-BE-LABEL: test7 -; CHECK-BE: addis -; CHECK-BE: ld [[TOC:[0-9]+]], .LC1 -; CHECK-BE: lxvwsx 34, 0, 3 -; CHECK-BE-NOT: xxspltw %0 = load float, float* @Globf, align 4 %splat.splatinsert = insertelement <4 x float> undef, float %0, i32 0 %splat.splat = shufflevector <4 x float> %splat.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer @@ -99,76 +147,120 @@ } define <16 x i8> @test8() { +; CHECK-LABEL: test8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxlxor v2, v2, v2 +; CHECK-NEXT: blr + +; CHECK-BE-LABEL: test8: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: xxlxor v2, v2, v2 +; CHECK-BE-NEXT: blr entry: -; CHECK-LABEL: test8 -; CHECK: xxlxor 34, 34, 34 -; CHECK-BE-LABEL: test8 -; CHECK-BE: xxlxor 34, 34, 34 ret <16 x i8> zeroinitializer } define <16 x i8> @test9() { +; CHECK-LABEL: test9: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltib v2, 1 +; CHECK-NEXT: blr + +; CHECK-BE-LABEL: test9: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: xxspltib v2, 1 +; CHECK-BE-NEXT: blr entry: -; CHECK-LABEL: test9 -; CHECK: xxspltib 34, 1 -; CHECK-BE-LABEL: test9 -; CHECK-BE: xxspltib 34, 1 ret <16 x i8> } define <16 x i8> @test10() { +; CHECK-LABEL: test10: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltib v2, 127 +; CHECK-NEXT: blr + +; CHECK-BE-LABEL: test10: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: xxspltib v2, 127 +; CHECK-BE-NEXT: blr entry: -; CHECK-LABEL: test10 -; CHECK: xxspltib 34, 127 -; CHECK-BE-LABEL: test10 -; CHECK-BE: xxspltib 34, 127 ret <16 x i8> } define <16 x i8> @test11() { +; CHECK-LABEL: test11: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltib v2, 128 +; CHECK-NEXT: blr + +; CHECK-BE-LABEL: test11: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: xxspltib v2, 128 +; CHECK-BE-NEXT: blr entry: -; CHECK-LABEL: test11 -; CHECK: xxspltib 34, 128 -; CHECK-BE-LABEL: test11 -; CHECK-BE: xxspltib 34, 128 ret <16 x i8> } define <16 x i8> @test12() { +; CHECK-LABEL: test12: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltib v2, 255 +; CHECK-NEXT: blr + +; CHECK-BE-LABEL: test12: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: xxspltib v2, 255 +; CHECK-BE-NEXT: blr entry: -; CHECK-LABEL: test12 -; CHECK: xxspltib 34, 255 -; CHECK-BE-LABEL: test12 -; CHECK-BE: xxspltib 34, 255 ret <16 x i8> } define <16 x i8> @test13() { +; CHECK-LABEL: test13: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltib v2, 129 +; CHECK-NEXT: blr + +; CHECK-BE-LABEL: test13: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: xxspltib v2, 129 +; CHECK-BE-NEXT: blr entry: -; CHECK-LABEL: test13 -; CHECK: xxspltib 34, 129 -; CHECK-BE-LABEL: test13 -; CHECK-BE: xxspltib 34, 129 ret <16 x i8> } define <16 x i8> @test13E127() { +; CHECK-LABEL: test13E127: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxspltib v2, 200 +; CHECK-NEXT: blr + +; CHECK-BE-LABEL: test13E127: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: xxspltib v2, 200 +; CHECK-BE-NEXT: blr entry: -; CHECK-LABEL: test13E127 -; CHECK: xxspltib 34, 200 -; CHECK-BE-LABEL: test13E127 -; CHECK-BE: xxspltib 34, 200 ret <16 x i8> } define <4 x i32> @test14(<4 x i32> %a, i32* nocapture readonly %b) { +; CHECK-LABEL: test14: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lwz r3, 0(r5) +; CHECK-NEXT: mtvsrws v2, r3 +; CHECK-NEXT: addi r3, r3, 5 +; CHECK-NEXT: stw r3, 0(r5) +; CHECK-NEXT: blr + +; CHECK-BE-LABEL: test14: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: lwz r3, 0(r5) +; CHECK-BE-NEXT: mtvsrws v2, r3 +; CHECK-BE-NEXT: addi r3, r3, 5 +; CHECK-BE-NEXT: stw r3, 0(r5) +; CHECK-BE-NEXT: blr entry: -; CHECK-LABEL: test14 -; CHECK: lwz [[LD:[0-9]+]], -; CHECK: mtvsrws 34, [[LD]] -; CHECK-BE-LABEL: test14 -; CHECK-BE: lwz [[LD:[0-9]+]], -; CHECK-BE: mtvsrws 34, [[LD]] %0 = load i32, i32* %b, align 4 %splat.splatinsert = insertelement <4 x i32> undef, i32 %0, i32 0 %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer Index: llvm/test/CodeGen/PowerPC/qpx-load-splat.ll =================================================================== --- llvm/test/CodeGen/PowerPC/qpx-load-splat.ll +++ llvm/test/CodeGen/PowerPC/qpx-load-splat.ll @@ -1,35 +1,44 @@ -; RUN: llc -verify-machineinstrs < %s | FileCheck %s -target datalayout = "E-m:e-i64:64-n32:64" -target triple = "powerpc64-bgq-linux" +; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -ppc-vsr-nums-as-vr \ +; RUN: -ppc-asm-full-reg-names -verify-machineinstrs < %s | FileCheck %s ; Function Attrs: norecurse nounwind readonly define <4 x double> @foo(double* nocapture readonly %a) #0 { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lxvdsx v2, 0, r3 +; CHECK-NEXT: vmr v3, v2 +; CHECK-NEXT: blr entry: %0 = load double, double* %a, align 8 %vecinit.i = insertelement <4 x double> undef, double %0, i32 0 %shuffle.i = shufflevector <4 x double> %vecinit.i, <4 x double> undef, <4 x i32> zeroinitializer ret <4 x double> %shuffle.i - -; CHECK-LABEL: @foo -; CHECK: lfd 1, 0(3) -; CHECK: blr } define <4 x double> @foox(double* nocapture readonly %a, i64 %idx) #0 { +; CHECK-LABEL: foox: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sldi r4, r4, 3 +; CHECK-NEXT: lxvdsx v2, r3, r4 +; CHECK-NEXT: vmr v3, v2 +; CHECK-NEXT: blr entry: %p = getelementptr double, double* %a, i64 %idx %0 = load double, double* %p, align 8 %vecinit.i = insertelement <4 x double> undef, double %0, i32 0 %shuffle.i = shufflevector <4 x double> %vecinit.i, <4 x double> undef, <4 x i32> zeroinitializer ret <4 x double> %shuffle.i - -; CHECK-LABEL: @foox -; CHECK: sldi [[REG1:[0-9]+]], 4, 3 -; CHECK: lfdx 1, 3, [[REG1]] -; CHECK: blr } define <4 x double> @fooxu(double* nocapture readonly %a, i64 %idx, double** %pptr) #0 { +; CHECK-LABEL: fooxu: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sldi r4, r4, 3 +; CHECK-NEXT: lfdux f0, r3, r4 +; CHECK-NEXT: xxspltd v2, vs0, 0 +; CHECK-NEXT: std r3, 0(r5) +; CHECK-NEXT: vmr v3, v2 +; CHECK-NEXT: blr entry: %p = getelementptr double, double* %a, i64 %idx %0 = load double, double* %p, align 8 @@ -37,39 +46,36 @@ %shuffle.i = shufflevector <4 x double> %vecinit.i, <4 x double> undef, <4 x i32> zeroinitializer store double* %p, double** %pptr, align 8 ret <4 x double> %shuffle.i - -; CHECK-LABEL: @foox -; CHECK: sldi [[REG1:[0-9]+]], 4, 3 -; CHECK: lfdux 1, 3, [[REG1]] -; CHECK: std 3, 0(5) -; CHECK: blr } define <4 x float> @foof(float* nocapture readonly %a) #0 { +; CHECK-LABEL: foof: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lfiwzx f0, 0, r3 +; CHECK-NEXT: xxpermdi vs0, f0, f0, 2 +; CHECK-NEXT: xxspltw v2, vs0, 3 +; CHECK-NEXT: blr entry: %0 = load float, float* %a, align 4 %vecinit.i = insertelement <4 x float> undef, float %0, i32 0 %shuffle.i = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer ret <4 x float> %shuffle.i - -; CHECK-LABEL: @foof -; CHECK: lfs 1, 0(3) -; CHECK: blr } define <4 x float> @foofx(float* nocapture readonly %a, i64 %idx) #0 { +; CHECK-LABEL: foofx: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sldi r4, r4, 2 +; CHECK-NEXT: lfiwzx f0, r3, r4 +; CHECK-NEXT: xxpermdi vs0, f0, f0, 2 +; CHECK-NEXT: xxspltw v2, vs0, 3 +; CHECK-NEXT: blr entry: %p = getelementptr float, float* %a, i64 %idx %0 = load float, float* %p, align 4 %vecinit.i = insertelement <4 x float> undef, float %0, i32 0 %shuffle.i = shufflevector <4 x float> %vecinit.i, <4 x float> undef, <4 x i32> zeroinitializer ret <4 x float> %shuffle.i - -; CHECK-LABEL: @foofx -; CHECK: sldi [[REG1:[0-9]+]], 4, 2 -; CHECK: lfsx 1, 3, [[REG1]] -; CHECK: blr } -attributes #0 = { norecurse nounwind readonly "target-cpu"="a2q" "target-features"="+qpx,-altivec,-bpermd,-crypto,-direct-move,-extdiv,-power8-vector,-vsx" } Index: llvm/test/CodeGen/PowerPC/scalar_vector_test_1.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/PowerPC/scalar_vector_test_1.ll @@ -0,0 +1,292 @@ +; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ +; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P9LE +; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ +; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P9BE +; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ +; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P8LE +; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ +; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P8BE + +; Function Attrs: norecurse nounwind readonly +define <2 x i64> @s2v_test1(i64* nocapture readonly %int64, <2 x i64> %vec) { +; P9LE-LABEL: s2v_test1: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: lfd f0, 0(r3) +; P9LE-NEXT: xxpermdi v3, f0, f0, 2 +; P9LE-NEXT: xxpermdi v2, v2, v3, 1 +; P9LE-NEXT: blr + +; P9BE-LABEL: s2v_test1: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: lfd f0, 0(r3) +; P9BE-NEXT: xxpermdi v2, vs0, v2, 1 +; P9BE-NEXT: blr +entry: + %0 = load i64, i64* %int64, align 8 + %vecins = insertelement <2 x i64> %vec, i64 %0, i32 0 + ret <2 x i64> %vecins +} + +; Function Attrs: norecurse nounwind readonly +define <2 x i64> @s2v_test2(i64* nocapture readonly %int64, <2 x i64> %vec) { +; P9LE-LABEL: s2v_test2: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: lfd f0, 8(r3) +; P9LE-NEXT: xxpermdi v3, f0, f0, 2 +; P9LE-NEXT: xxpermdi v2, v2, v3, 1 +; P9LE-NEXT: blr + +; P9BE-LABEL: s2v_test2: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: lfd f0, 8(r3) +; P9BE-NEXT: xxpermdi v2, vs0, v2, 1 +; P9BE-NEXT: blr +entry: + %arrayidx = getelementptr inbounds i64, i64* %int64, i64 1 + %0 = load i64, i64* %arrayidx, align 8 + %vecins = insertelement <2 x i64> %vec, i64 %0, i32 0 + ret <2 x i64> %vecins +} + +; Function Attrs: norecurse nounwind readonly +define <2 x i64> @s2v_test3(i64* nocapture readonly %int64, <2 x i64> %vec, i32 signext %Idx) { +; P9LE-LABEL: s2v_test3: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: sldi r4, r7, 3 +; P9LE-NEXT: lfdx f0, r3, r4 +; P9LE-NEXT: xxpermdi v3, f0, f0, 2 +; P9LE-NEXT: xxpermdi v2, v2, v3, 1 +; P9LE-NEXT: blr + +; P9BE-LABEL: s2v_test3 +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: sldi r4, r7, 3 +; P9BE-NEXT: lfdx f0, r3, r4 +; P9BE-NEXT: xxpermdi v2, vs0, v2, 1 +; P9BE-NEXT: blr +entry: + %idxprom = sext i32 %Idx to i64 + %arrayidx = getelementptr inbounds i64, i64* %int64, i64 %idxprom + %0 = load i64, i64* %arrayidx, align 8 + %vecins = insertelement <2 x i64> %vec, i64 %0, i32 0 + ret <2 x i64> %vecins +} + +; Function Attrs: norecurse nounwind readonly +define <2 x i64> @s2v_test4(i64* nocapture readonly %int64, <2 x i64> %vec) { +; P9LE-LABEL: s2v_test4: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: lfd f0, 8(r3) +; P9LE-NEXT: xxpermdi v3, f0, f0, 2 +; P9LE-NEXT: xxpermdi v2, v2, v3, 1 +; P9LE-NEXT: blr + +; P9BE-LABEL: s2v_test4: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: lfd f0, 8(r3) +; P9BE-NEXT: xxpermdi v2, vs0, v2, 1 +; P9BE-NEXT: blr +entry: + %arrayidx = getelementptr inbounds i64, i64* %int64, i64 1 + %0 = load i64, i64* %arrayidx, align 8 + %vecins = insertelement <2 x i64> %vec, i64 %0, i32 0 + ret <2 x i64> %vecins +} + +; Function Attrs: norecurse nounwind readonly +define <2 x i64> @s2v_test5(<2 x i64> %vec, i64* nocapture readonly %ptr1) { +; P9LE-LABEL: s2v_test5: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: lfd f0, 0(r5) +; P9LE-NEXT: xxpermdi v3, f0, f0, 2 +; P9LE-NEXT: xxpermdi v2, v2, v3, 1 +; P9LE-NEXT: blr + +; P9BE-LABEL: s2v_test5: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: lfd f0, 0(r5) +; P9BE-NEXT: xxpermdi v2, vs0, v2, 1 +; P9BE-NEXT: blr +entry: + %0 = load i64, i64* %ptr1, align 8 + %vecins = insertelement <2 x i64> %vec, i64 %0, i32 0 + ret <2 x i64> %vecins +} + +; Function Attrs: norecurse nounwind readonly +define <2 x double> @s2v_test_f1(double* nocapture readonly %f64, <2 x double> %vec) { +; P9LE-LABEL: s2v_test_f1: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: lfd f0, 0(r3) +; P9LE-NEXT: xxpermdi vs0, f0, f0, 2 +; P9LE-NEXT: xxpermdi v2, v2, vs0, 1 +; P9LE-NEXT: blr + +; P9BE-LABEL: s2v_test_f1: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: lfd f0, 0(r3) +; P9BE-NEXT: xxpermdi v2, vs0, v2, 1 +; P9BE-NEXT: blr + +; P8LE-LABEL: s2v_test_f1: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: lxsdx f0, 0, r3 +; P8LE-NEXT: xxspltd vs0, vs0, 0 +; P8LE-NEXT: xxpermdi v2, v2, vs0, 1 +; P8LE-NEXT: blr + +; P8BE-LABEL: s2v_test_f1: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: lxsdx f0, 0, r3 +; P8BE-NEXT: xxpermdi v2, vs0, v2, 1 +; P8BE-NEXT: blr +entry: + %0 = load double, double* %f64, align 8 + %vecins = insertelement <2 x double> %vec, double %0, i32 0 + ret <2 x double> %vecins +} + +; Function Attrs: norecurse nounwind readonly +define <2 x double> @s2v_test_f2(double* nocapture readonly %f64, <2 x double> %vec) { +; P9LE-LABEL: s2v_test_f2: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: lfd f0, 8(r3) +; P9LE-NEXT: xxpermdi vs0, f0, f0, 2 +; P9LE-NEXT: xxpermdi v2, v2, vs0, 1 +; P9LE-NEXT: blr + +; P9BE-LABEL: s2v_test_f2: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: lfd f0, 8(r3) +; P9BE-NEXT: xxpermdi v2, vs0, v2, 1 +; P9BE-NEXT: blr + +; P8LE-LABEL: s2v_test_f2: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: addi r3, r3, 8 +; P8LE-NEXT: lxsdx f0, 0, r3 +; P8LE-NEXT: xxspltd vs0, vs0, 0 +; P8LE-NEXT: xxpermdi v2, v2, vs0, 1 +; P8LE-NEXT: blr + +; P8BE-LABEL: s2v_test_f2: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: addi r3, r3, 8 +; P8BE-NEXT: lxsdx f0, 0, r3 +; P8BE-NEXT: xxpermdi v2, vs0, v2, 1 +; P8BE-NEXT: blr +entry: + %arrayidx = getelementptr inbounds double, double* %f64, i64 1 + %0 = load double, double* %arrayidx, align 8 + %vecins = insertelement <2 x double> %vec, double %0, i32 0 + ret <2 x double> %vecins +} + +; Function Attrs: norecurse nounwind readonly +define <2 x double> @s2v_test_f3(double* nocapture readonly %f64, <2 x double> %vec, i32 signext %Idx) { +; P9LE-LABEL: s2v_test_f3: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: sldi r4, r7, 3 +; P9LE-NEXT: lfdx f0, r3, r4 +; P9LE-NEXT: xxpermdi vs0, f0, f0, 2 +; P9LE-NEXT: xxpermdi v2, v2, vs0, 1 +; P9LE-NEXT: blr + +; P9BE-LABEL: s2v_test_f3: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: sldi r4, r7, 3 +; P9BE-NEXT: lfdx f0, r3, r4 +; P9BE-NEXT: xxpermdi v2, vs0, v2, 1 +; P9BE-NEXT: blr + +; P8LE-LABEL: s2v_test_f3: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: sldi r4, r7, 3 +; P8LE-NEXT: lxsdx f0, r3, r4 +; P8LE-NEXT: xxspltd vs0, vs0, 0 +; P8LE-NEXT: xxpermdi v2, v2, vs0, 1 +; P8LE-NEXT: blr + +; P8BE-LABEL: s2v_test_f3: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: sldi r4, r7, 3 +; P8BE-NEXT: lxsdx f0, r3, r4 +; P8BE-NEXT: xxpermdi v2, vs0, v2, 1 +; P8BE-NEXT: blr +entry: + %idxprom = sext i32 %Idx to i64 + %arrayidx = getelementptr inbounds double, double* %f64, i64 %idxprom + %0 = load double, double* %arrayidx, align 8 + %vecins = insertelement <2 x double> %vec, double %0, i32 0 + ret <2 x double> %vecins +} + +; Function Attrs: norecurse nounwind readonly +define <2 x double> @s2v_test_f4(double* nocapture readonly %f64, <2 x double> %vec) { +; P9LE-LABEL: s2v_test_f4: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: lfd f0, 8(r3) +; P9LE-NEXT: xxpermdi vs0, f0, f0, 2 +; P9LE-NEXT: xxpermdi v2, v2, vs0, 1 +; P9LE-NEXT: blr + +; P9BE-LABEL: s2v_test_f4: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: lfd f0, 8(r3) +; P9BE-NEXT: xxpermdi v2, vs0, v2, 1 +; P9BE-NEXT: blr + +; P8LE-LABEL: s2v_test_f4: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: addi r3, r3, 8 +; P8LE-NEXT: lxsdx f0, 0, r3 +; P8LE-NEXT: xxspltd vs0, vs0, 0 +; P8LE-NEXT: xxpermdi v2, v2, vs0, 1 +; P8LE-NEXT: blr + +; P8BE-LABEL: s2v_test_f4: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: addi r3, r3, 8 +; P8BE-NEXT: lxsdx f0, 0, r3 +; P8BE-NEXT: xxpermdi v2, vs0, v2, 1 +; P8BE-NEXT: blr +entry: + %arrayidx = getelementptr inbounds double, double* %f64, i64 1 + %0 = load double, double* %arrayidx, align 8 + %vecins = insertelement <2 x double> %vec, double %0, i32 0 + ret <2 x double> %vecins +} + +; Function Attrs: norecurse nounwind readonly +define <2 x double> @s2v_test_f5(<2 x double> %vec, double* nocapture readonly %ptr1) { +; P9LE-LABEL: s2v_test_f5: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: lfd f0, 0(r5) +; P9LE-NEXT: xxpermdi vs0, f0, f0, 2 +; P9LE-NEXT: xxpermdi v2, v2, vs0, 1 +; P9LE-NEXT: blr + +; P9BE-LABEL: s2v_test_f5: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: lfd f0, 0(r5) +; P9BE-NEXT: xxpermdi v2, vs0, v2, 1 +; P9BE-NEXT: blr + +; P8LE-LABEL: s2v_test_f5: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: lxsdx f0, 0, r5 +; P8LE-NEXT: xxspltd vs0, vs0, 0 +; P8LE-NEXT: xxpermdi v2, v2, vs0, 1 +; P8LE-NEXT: blr + +; P8BE-LABEL: s2v_test_f5: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: lxsdx f0, 0, r5 +; P8BE-NEXT: xxpermdi v2, vs0, v2, 1 +; P8BE-NEXT: blr +entry: + %0 = load double, double* %ptr1, align 8 + %vecins = insertelement <2 x double> %vec, double %0, i32 0 + ret <2 x double> %vecins +} + Index: llvm/test/CodeGen/PowerPC/scalar_vector_test_2.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/PowerPC/scalar_vector_test_2.ll @@ -0,0 +1,118 @@ +; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ +; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P9LE +; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ +; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P9BE +; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ +; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P8LE +; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ +; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P8BE + +define void @test_liwzx1(<1 x float>* %A, <1 x float>* %B, <1 x float>* %C) { +; P9LE-LABEL: test_liwzx1: +; P9LE: # %bb.0: +; P9LE-NEXT: lfiwzx f0, 0, r3 +; P9LE-NEXT: lfiwzx f1, 0, r4 +; P9LE-NEXT: xxpermdi vs0, f0, f0, 2 +; P9LE-NEXT: xxpermdi vs1, f1, f1, 2 +; P9LE-NEXT: xvaddsp vs0, vs0, vs1 +; P9LE-NEXT: xxsldwi vs0, vs0, vs0, 3 +; P9LE-NEXT: xscvspdpn f0, vs0 +; P9LE-NEXT: stfs f0, 0(r5) +; P9LE-NEXT: blr + +; P9BE-LABEL: test_liwzx1: +; P9BE: # %bb.0: +; P9BE-NEXT: lfiwzx f0, 0, r3 +; P9BE-NEXT: lfiwzx f1, 0, r4 +; P9BE-NEXT: xxsldwi vs0, f0, f0, 1 +; P9BE-NEXT: xxsldwi vs1, f1, f1, 1 +; P9BE-NEXT: xvaddsp vs0, vs0, vs1 +; P9BE-NEXT: xscvspdpn f0, vs0 +; P9BE-NEXT: stfs f0, 0(r5) +; P9BE-NEXT: blr + +; P8LE-LABEL: test_liwzx1: +; P8LE: # %bb.0: +; P8LE-NEXT: lfiwzx f0, 0, r3 +; P8LE-NEXT: lfiwzx f1, 0, r4 +; P8LE-NEXT: xxpermdi vs0, f0, f0, 2 +; P8LE-NEXT: xxpermdi vs1, f1, f1, 2 +; P8LE-NEXT: xvaddsp vs0, vs0, vs1 +; P8LE-NEXT: xxsldwi vs0, vs0, vs0, 3 +; P8LE-NEXT: xscvspdpn f0, vs0 +; P8LE-NEXT: stfsx f0, 0, r5 +; P8LE-NEXT: blr + +; P8BE-LABEL: test_liwzx1: +; P8BE: # %bb.0: +; P8BE-NEXT: lfiwzx f0, 0, r3 +; P8BE-NEXT: lfiwzx f1, 0, r4 +; P8BE-NEXT: xxsldwi vs0, f0, f0, 1 +; P8BE-NEXT: xxsldwi vs1, f1, f1, 1 +; P8BE-NEXT: xvaddsp vs0, vs0, vs1 +; P8BE-NEXT: xscvspdpn f0, vs0 +; P8BE-NEXT: stfsx f0, 0, r5 +; P8BE-NEXT: blr + %a = load <1 x float>, <1 x float>* %A + %b = load <1 x float>, <1 x float>* %B + %X = fadd <1 x float> %a, %b + store <1 x float> %X, <1 x float>* %C + ret void +} + +define <1 x float>* @test_liwzx2(<1 x float>* %A, <1 x float>* %B, <1 x float>* %C) { +; P9LE-LABEL: test_liwzx2: +; P9LE: # %bb.0: +; P9LE-NEXT: lfiwzx f0, 0, r3 +; P9LE-NEXT: lfiwzx f1, 0, r4 +; P9LE-NEXT: mr r3, r5 +; P9LE-NEXT: xxpermdi vs0, f0, f0, 2 +; P9LE-NEXT: xxpermdi vs1, f1, f1, 2 +; P9LE-NEXT: xvsubsp vs0, vs0, vs1 +; P9LE-NEXT: xxsldwi vs0, vs0, vs0, 3 +; P9LE-NEXT: xscvspdpn f0, vs0 +; P9LE-NEXT: stfs f0, 0(r5) +; P9LE-NEXT: blr + +; P9BE-LABEL: test_liwzx2: +; P9BE: # %bb.0: +; P9BE-NEXT: lfiwzx f0, 0, r3 +; P9BE-NEXT: lfiwzx f1, 0, r4 +; P9BE-NEXT: mr r3, r5 +; P9BE-NEXT: xxsldwi vs0, f0, f0, 1 +; P9BE-NEXT: xxsldwi vs1, f1, f1, 1 +; P9BE-NEXT: xvsubsp vs0, vs0, vs1 +; P9BE-NEXT: xscvspdpn f0, vs0 +; P9BE-NEXT: stfs f0, 0(r5) +; P9BE-NEXT: blr + +; P8LE-LABEL: test_liwzx2: +; P8LE: # %bb.0: +; P8LE-NEXT: lfiwzx f0, 0, r3 +; P8LE-NEXT: lfiwzx f1, 0, r4 +; P8LE-NEXT: mr r3, r5 +; P8LE-NEXT: xxpermdi vs0, f0, f0, 2 +; P8LE-NEXT: xxpermdi vs1, f1, f1, 2 +; P8LE-NEXT: xvsubsp vs0, vs0, vs1 +; P8LE-NEXT: xxsldwi vs0, vs0, vs0, 3 +; P8LE-NEXT: xscvspdpn f0, vs0 +; P8LE-NEXT: stfsx f0, 0, r5 +; P8LE-NEXT: blr + +; P8BE-LABEL: test_liwzx2: +; P8BE: # %bb.0: +; P8BE-NEXT: lfiwzx f0, 0, r3 +; P8BE-NEXT: lfiwzx f1, 0, r4 +; P8BE-NEXT: mr r3, r5 +; P8BE-NEXT: xxsldwi vs0, f0, f0, 1 +; P8BE-NEXT: xxsldwi vs1, f1, f1, 1 +; P8BE-NEXT: xvsubsp vs0, vs0, vs1 +; P8BE-NEXT: xscvspdpn f0, vs0 +; P8BE-NEXT: stfsx f0, 0, r5 +; P8BE-NEXT: blr + %a = load <1 x float>, <1 x float>* %A + %b = load <1 x float>, <1 x float>* %B + %X = fsub <1 x float> %a, %b + store <1 x float> %X, <1 x float>* %C + ret <1 x float>* %C +} Index: llvm/test/CodeGen/PowerPC/scalar_vector_test_3.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/PowerPC/scalar_vector_test_3.ll @@ -0,0 +1,265 @@ +; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ +; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P9LE +; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ +; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P9BE +; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ +; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P8LE +; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ +; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P8BE + +; Function Attrs: norecurse nounwind readonly +define <2 x i64> @s2v_test1(i32* nocapture readonly %int32, <2 x i64> %vec) { +; P9LE-LABEL: s2v_test1: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: lfiwax f0, 0, r3 +; P9LE-NEXT: xxpermdi v3, f0, f0, 2 +; P9LE-NEXT: xxpermdi v2, v2, v3, 1 +; P9LE-NEXT: blr + +; P9BE-LABEL: s2v_test1: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: lfiwax f0, 0, r3 +; P9BE-NEXT: xxpermdi v2, vs0, v2, 1 +; P9BE-NEXT: blr + +; P8LE-LABEL: s2v_test1: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: lfiwax f0, 0, r3 +; P8LE-NEXT: xxpermdi v3, f0, f0, 2 +; P8LE-NEXT: xxpermdi v2, v2, v3, 1 +; P8LE-NEXT: blr + +; P8BE-LABEL: s2v_test1: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: lfiwax f0, 0, r3 +; P8BE-NEXT: xxpermdi v2, vs0, v2, 1 +; P8BE-NEXT: blr +entry: + %0 = load i32, i32* %int32, align 4 + %conv = sext i32 %0 to i64 + %vecins = insertelement <2 x i64> %vec, i64 %conv, i32 0 + ret <2 x i64> %vecins +} + +; Function Attrs: norecurse nounwind readonly +define <2 x i64> @s2v_test2(i32* nocapture readonly %int32, <2 x i64> %vec) { +; P9LE-LABEL: s2v_test2: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: addi r3, r3, 4 +; P9LE-NEXT: lfiwax f0, 0, r3 +; P9LE-NEXT: xxpermdi v3, f0, f0, 2 +; P9LE-NEXT: xxpermdi v2, v2, v3, 1 +; P9LE-NEXT: blr + +; P9BE-LABEL: s2v_test2: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: addi r3, r3, 4 +; P9BE-NEXT: lfiwax f0, 0, r3 +; P9BE-NEXT: xxpermdi v2, vs0, v2, 1 +; P9BE-NEXT: blr + +; P8LE-LABEL: s2v_test2: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: addi r3, r3, 4 +; P8LE-NEXT: lfiwax f0, 0, r3 +; P8LE-NEXT: xxpermdi v3, f0, f0, 2 +; P8LE-NEXT: xxpermdi v2, v2, v3, 1 +; P8LE-NEXT: blr + +; P8BE-LABEL: s2v_test2: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: addi r3, r3, 4 +; P8BE-NEXT: lfiwax f0, 0, r3 +; P8BE-NEXT: xxpermdi v2, vs0, v2, 1 +; P8BE-NEXT: blr +entry: + %arrayidx = getelementptr inbounds i32, i32* %int32, i64 1 + %0 = load i32, i32* %arrayidx, align 4 + %conv = sext i32 %0 to i64 + %vecins = insertelement <2 x i64> %vec, i64 %conv, i32 0 + ret <2 x i64> %vecins +} + +; Function Attrs: norecurse nounwind readonly +define <2 x i64> @s2v_test3(i32* nocapture readonly %int32, <2 x i64> %vec, i32 signext %Idx) { +; P9LE-LABEL: s2v_test3: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: sldi r4, r7, 2 +; P9LE-NEXT: lfiwax f0, r3, r4 +; P9LE-NEXT: xxpermdi v3, f0, f0, 2 +; P9LE-NEXT: xxpermdi v2, v2, v3, 1 +; P9LE-NEXT: blr + +; P9BE-LABEL: s2v_test3: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: sldi r4, r7, 2 +; P9BE-NEXT: lfiwax f0, r3, r4 +; P9BE-NEXT: xxpermdi v2, vs0, v2, 1 +; P9BE-NEXT: blr + +; P8LE-LABEL: s2v_test3: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: sldi r4, r7, 2 +; P8LE-NEXT: lfiwax f0, r3, r4 +; P8LE-NEXT: xxpermdi v3, f0, f0, 2 +; P8LE-NEXT: xxpermdi v2, v2, v3, 1 +; P8LE-NEXT: blr + +; P8BE-LABEL: s2v_test3: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: sldi r4, r7, 2 +; P8BE-NEXT: lfiwax f0, r3, r4 +; P8BE-NEXT: xxpermdi v2, vs0, v2, 1 +; P8BE-NEXT: blr +entry: + %idxprom = sext i32 %Idx to i64 + %arrayidx = getelementptr inbounds i32, i32* %int32, i64 %idxprom + %0 = load i32, i32* %arrayidx, align 4 + %conv = sext i32 %0 to i64 + %vecins = insertelement <2 x i64> %vec, i64 %conv, i32 0 + ret <2 x i64> %vecins +} + +; Function Attrs: norecurse nounwind readonly +define <2 x i64> @s2v_test4(i32* nocapture readonly %int32, <2 x i64> %vec) { +; P9LE-LABEL: s2v_test4: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: addi r3, r3, 4 +; P9LE-NEXT: lfiwax f0, 0, r3 +; P9LE-NEXT: xxpermdi v3, f0, f0, 2 +; P9LE-NEXT: xxpermdi v2, v2, v3, 1 +; P9LE-NEXT: blr + +; P9BE-LABEL: s2v_test4: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: addi r3, r3, 4 +; P9BE-NEXT: lfiwax f0, 0, r3 +; P9BE-NEXT: xxpermdi v2, vs0, v2, 1 +; P9BE-NEXT: blr + +; P8LE-LABEL: s2v_test4: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: addi r3, r3, 4 +; P8LE-NEXT: lfiwax f0, 0, r3 +; P8LE-NEXT: xxpermdi v3, f0, f0, 2 +; P8LE-NEXT: xxpermdi v2, v2, v3, 1 +; P8LE-NEXT: blr + +; P8BE-LABEL: s2v_test4: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: addi r3, r3, 4 +; P8BE-NEXT: lfiwax f0, 0, r3 +; P8BE-NEXT: xxpermdi v2, vs0, v2, 1 +; P8BE-NEXT: blr +entry: + %arrayidx = getelementptr inbounds i32, i32* %int32, i64 1 + %0 = load i32, i32* %arrayidx, align 4 + %conv = sext i32 %0 to i64 + %vecins = insertelement <2 x i64> %vec, i64 %conv, i32 0 + ret <2 x i64> %vecins +} + +; Function Attrs: norecurse nounwind readonly +define <2 x i64> @s2v_test5(<2 x i64> %vec, i32* nocapture readonly %ptr1) { +; P9LE-LABEL: s2v_test5: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: lfiwax f0, 0, r5 +; P9LE-NEXT: xxpermdi v3, f0, f0, 2 +; P9LE-NEXT: xxpermdi v2, v2, v3, 1 +; P9LE-NEXT: blr + +; P9BE-LABEL: s2v_test5: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: lfiwax f0, 0, r5 +; P9BE-NEXT: xxpermdi v2, vs0, v2, 1 +; P9BE-NEXT: blr + +; P8LE-LABEL: s2v_test5: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: lfiwax f0, 0, r5 +; P8LE-NEXT: xxpermdi v3, f0, f0, 2 +; P8LE-NEXT: xxpermdi v2, v2, v3, 1 +; P8LE-NEXT: blr + +; P8BE-LABEL: s2v_test5: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: lfiwax f0, 0, r5 +; P8BE-NEXT: xxpermdi v2, vs0, v2, 1 +; P8BE-NEXT: blr +entry: + %0 = load i32, i32* %ptr1, align 4 + %conv = sext i32 %0 to i64 + %vecins = insertelement <2 x i64> %vec, i64 %conv, i32 0 + ret <2 x i64> %vecins +} + +; Function Attrs: norecurse nounwind readonly +define <2 x i64> @s2v_test6(i32* nocapture readonly %ptr) { +; P9LE-LABEL: s2v_test6: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: lfiwax f0, 0, r3 +; P9LE-NEXT: xxpermdi v2, f0, f0, 2 +; P9LE-NEXT: xxspltd v2, v2, 1 +; P9LE-NEXT: blr + +; P9BE-LABEL: s2v_test6: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: lfiwax f0, 0, r3 +; P9BE-NEXT: xxspltd v2, vs0, 0 +; P9BE-NEXT: blr + +; P8LE-LABEL: s2v_test6: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: lfiwax f0, 0, r3 +; P8LE-NEXT: xxpermdi v2, f0, f0, 2 +; P8LE-NEXT: xxspltd v2, v2, 1 +; P8LE-NEXT: blr + +; P8BE-LABEL: s2v_test6: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: lfiwax f0, 0, r3 +; P8BE-NEXT: xxspltd v2, vs0, 0 +; P8BE-NEXT: blr +entry: + %0 = load i32, i32* %ptr, align 4 + %conv = sext i32 %0 to i64 + %splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0 + %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer + ret <2 x i64> %splat.splat +} + +; Function Attrs: norecurse nounwind readonly +define <2 x i64> @s2v_test7(i32* nocapture readonly %ptr) { +; P9LE-LABEL: s2v_test7: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: lfiwax f0, 0, r3 +; P9LE-NEXT: xxpermdi v2, f0, f0, 2 +; P9LE-NEXT: xxspltd v2, v2, 1 +; P9LE-NEXT: blr + +; P9BE-LABEL: s2v_test7: +; P9BE: # %bb.0: # %entry +; P9BE-NEXT: lfiwax f0, 0, r3 +; P9BE-NEXT: xxspltd v2, vs0, 0 +; P9BE-NEXT: blr + +; P8LE-LABEL: s2v_test7: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: lfiwax f0, 0, r3 +; P8LE-NEXT: xxpermdi v2, f0, f0, 2 +; P8LE-NEXT: xxspltd v2, v2, 1 +; P8LE-NEXT: blr + +; P8BE-LABEL: s2v_test7: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: lfiwax f0, 0, r3 +; P8BE-NEXT: xxspltd v2, vs0, 0 +; P8BE-NEXT: blr +entry: + %0 = load i32, i32* %ptr, align 4 + %conv = sext i32 %0 to i64 + %splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0 + %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer + ret <2 x i64> %splat.splat +} + Index: llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll @@ -0,0 +1,341 @@ +; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ +; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P9LE +; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ +; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P9BE +; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ +; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P8LE +; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ +; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P8BE + +; Function Attrs: norecurse nounwind readonly +define <4 x i32> @s2v_test1(i32* nocapture readonly %int32, <4 x i32> %vec) { +; P8LE-LABEL: s2v_test1: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: lfiwzx f0, 0, r3 +; P8LE-NEXT: addis r4, r2, .LCPI0_0@toc@ha +; P8LE-NEXT: addi r3, r4, .LCPI0_0@toc@l +; P8LE-NEXT: lvx v4, 0, r3 +; P8LE-NEXT: xxpermdi v3, f0, f0, 2 +; P8LE-NEXT: vperm v2, v3, v2, v4 +; P8LE-NEXT: blr + +; P8BE-LABEL: s2v_test1: +; P8BE: # %bb.0: # %entry +; P8BE: lfiwzx f0, 0, r3 +; P8BE-NEXT: xxsldwi vs0, f0, f0, 1 +; P8BE: xxsldwi vs0, v2, vs0, 1 +; P8BE: xxsldwi v2, vs0, vs0, 3 +; P8BE-NEXT: blr +entry: + %0 = load i32, i32* %int32, align 4 + %vecins = insertelement <4 x i32> %vec, i32 %0, i32 0 + ret <4 x i32> %vecins +} + +; Function Attrs: norecurse nounwind readonly +define <4 x i32> @s2v_test2(i32* nocapture readonly %int32, <4 x i32> %vec) { +; P8LE-LABEL: s2v_test2: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: addi r3, r3, 4 +; P8LE-NEXT: addis r4, r2, .LCPI1_0@toc@ha +; P8LE-NEXT: lfiwzx f0, 0, r3 +; P8LE-NEXT: addi r3, r4, .LCPI1_0@toc@l +; P8LE-NEXT: lvx v4, 0, r3 +; P8LE-NEXT: xxpermdi v3, f0, f0, 2 +; P8LE-NEXT: vperm v2, v3, v2, v4 +; P8LE-NEXT: blr + +; P8BE-LABEL: s2v_test2: +; P8BE: # %bb.0: # %entry +; P8BE: addi r3, r3, 4 +; P8BE: lfiwzx f0, 0, r3 +; P8BE-NEXT: xxsldwi vs0, f0, f0, 1 +; P8BE: xxsldwi vs0, v2, vs0, 1 +; P8BE: xxsldwi v2, vs0, vs0, 3 +; P8BE-NEXT: blr +entry: + %arrayidx = getelementptr inbounds i32, i32* %int32, i64 1 + %0 = load i32, i32* %arrayidx, align 4 + %vecins = insertelement <4 x i32> %vec, i32 %0, i32 0 + ret <4 x i32> %vecins +} + +; Function Attrs: norecurse nounwind readonly +define <4 x i32> @s2v_test3(i32* nocapture readonly %int32, <4 x i32> %vec, i32 signext %Idx) { +; P8LE-LABEL: s2v_test3: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: sldi r5, r7, 2 +; P8LE-NEXT: addis r4, r2, .LCPI2_0@toc@ha +; P8LE-NEXT: lfiwzx f0, r3, r5 +; P8LE-NEXT: addi r3, r4, .LCPI2_0@toc@l +; P8LE-NEXT: lvx v4, 0, r3 +; P8LE-NEXT: xxpermdi v3, f0, f0, 2 +; P8LE-NEXT: vperm v2, v3, v2, v4 +; P8LE-NEXT: blr + +; P8BE-LABEL: s2v_test3: +; P8BE: # %bb.0: # %entry +; P8BE: sldi r4, r7, 2 +; P8BE: lfiwzx f0, r3, r4 +; P8BE-NEXT: xxsldwi vs0, f0, f0, 1 +; P8BE: xxsldwi vs0, v2, vs0, 1 +; P8BE: xxsldwi v2, vs0, vs0, 3 +; P8BE-NEXT: blr +entry: + %idxprom = sext i32 %Idx to i64 + %arrayidx = getelementptr inbounds i32, i32* %int32, i64 %idxprom + %0 = load i32, i32* %arrayidx, align 4 + %vecins = insertelement <4 x i32> %vec, i32 %0, i32 0 + ret <4 x i32> %vecins +} + +; Function Attrs: norecurse nounwind readonly +define <4 x i32> @s2v_test4(i32* nocapture readonly %int32, <4 x i32> %vec) { +; P8LE-LABEL: s2v_test4: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: addi r3, r3, 4 +; P8LE-NEXT: addis r4, r2, .LCPI3_0@toc@ha +; P8LE-NEXT: lfiwzx f0, 0, r3 +; P8LE-NEXT: addi r3, r4, .LCPI3_0@toc@l +; P8LE-NEXT: lvx v4, 0, r3 +; P8LE-NEXT: xxpermdi v3, f0, f0, 2 +; P8LE-NEXT: vperm v2, v3, v2, v4 +; P8LE-NEXT: blr + +; P8BE-LABEL: s2v_test4: +; P8BE: # %bb.0: # %entry +; P8BE: addi r3, r3, 4 +; P8BE: lfiwzx f0, 0, r3 +; P8BE-NEXT: xxsldwi vs0, f0, f0, 1 +; P8BE: xxsldwi vs0, v2, vs0, 1 +; P8BE: xxsldwi v2, vs0, vs0, 3 +; P8BE-NEXT: blr +entry: + %arrayidx = getelementptr inbounds i32, i32* %int32, i64 1 + %0 = load i32, i32* %arrayidx, align 4 + %vecins = insertelement <4 x i32> %vec, i32 %0, i32 0 + ret <4 x i32> %vecins +} + +; Function Attrs: norecurse nounwind readonly +define <4 x i32> @s2v_test5(<4 x i32> %vec, i32* nocapture readonly %ptr1) { +; P8LE-LABEL: s2v_test5: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: lfiwzx f0, 0, r5 +; P8LE-NEXT: addis r3, r2, .LCPI4_0@toc@ha +; P8LE-NEXT: addi r3, r3, .LCPI4_0@toc@l +; P8LE-NEXT: lvx v4, 0, r3 +; P8LE-NEXT: xxpermdi v3, f0, f0, 2 +; P8LE-NEXT: vperm v2, v3, v2, v4 +; P8LE-NEXT: blr + +; P8BE-LABEL: s2v_test5: +; P8BE: # %bb.0: # %entry +; P8BE: lfiwzx f0, 0, r5 +; P8BE-NEXT: xxsldwi vs0, f0, f0, 1 +; P8BE: xxsldwi vs0, v2, vs0, 1 +; P8BE: xxsldwi v2, vs0, vs0, 3 +; P8BE-NEXT: blr +entry: + %0 = load i32, i32* %ptr1, align 4 + %vecins = insertelement <4 x i32> %vec, i32 %0, i32 0 + ret <4 x i32> %vecins +} + +; Function Attrs: norecurse nounwind readonly +define <4 x float> @s2v_test_f1(float* nocapture readonly %f64, <4 x float> %vec) { +; P8LE-LABEL: s2v_test_f1: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: lfiwzx f0, 0, r3 +; P8LE-NEXT: addis r4, r2, .LCPI5_0@toc@ha +; P8LE-NEXT: addi r3, r4, .LCPI5_0@toc@l +; P8LE-NEXT: lvx v4, 0, r3 +; P8LE-NEXT: xxpermdi v3, f0, f0, 2 +; P8LE-NEXT: vperm v2, v3, v2, v4 +; P8LE-NEXT: blr + +; P8BE-LABEL: s2v_test_f1: +; P8BE: # %bb.0: # %entry +; P8BE: lfiwzx f0, 0, r3 +; P8BE-NEXT: xxsldwi vs0, f0, f0, 1 +; P8BE: xxsldwi vs0, v2, vs0, 1 +; P8BE: xxsldwi v2, vs0, vs0, 3 +; P8BE-NEXT: blr +entry: + %0 = load float, float* %f64, align 4 + %vecins = insertelement <4 x float> %vec, float %0, i32 0 + ret <4 x float> %vecins +} + +; Function Attrs: norecurse nounwind readonly +define <2 x float> @s2v_test_f2(float* nocapture readonly %f64, <2 x float> %vec) { +; P9LE-LABEL: s2v_test_f2: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: addi r3, r3, 4 +; P9LE-NEXT: xxspltw v2, v2, 2 +; P9LE-NEXT: lfiwzx f0, 0, r3 +; P9LE-NEXT: xxpermdi v3, f0, f0, 2 +; P9LE-NEXT: vmrglw v2, v2, v3 +; P9LE-NEXT: blr + +; P9BE-LABEL: s2v_test_f2: +; P9BE: # %bb.0: # %entry +; P9BE: addi r3, r3, 4 +; P9BE: xxspltw v2, v2, 1 +; P9BE: lfiwzx f0, 0, r3 +; P9BE-NEXT: xxsldwi v3, f0, f0, 1 +; P9BE: vmrghw v2, v3, v2 +; P9BE-NEXT: blr + +; P8LE-LABEL: s2v_test_f2: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: addi r3, r3, 4 +; P8LE-NEXT: xxspltw v2, v2, 2 +; P8LE-NEXT: lfiwzx f0, 0, r3 +; P8LE-NEXT: xxpermdi v3, f0, f0, 2 +; P8LE-NEXT: vmrglw v2, v2, v3 +; P8LE-NEXT: blr + +; P8BE-LABEL: s2v_test_f2: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: addi r3, r3, 4 +; P8BE-NEXT: xxspltw v2, v2, 1 +; P8BE-NEXT: lfiwzx f0, 0, r3 +; P8BE-NEXT: xxsldwi v3, f0, f0, 1 +; P8BE-NEXT: vmrghw v2, v3, v2 +; P8BE-NEXT: blr +entry: + %arrayidx = getelementptr inbounds float, float* %f64, i64 1 + %0 = load float, float* %arrayidx, align 8 + %vecins = insertelement <2 x float> %vec, float %0, i32 0 + ret <2 x float> %vecins +} + +; Function Attrs: norecurse nounwind readonly +define <2 x float> @s2v_test_f3(float* nocapture readonly %f64, <2 x float> %vec, i32 signext %Idx) { +; P9LE-LABEL: s2v_test_f3: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: sldi r4, r7, 2 +; P9LE-NEXT: xxspltw v2, v2, 2 +; P9LE-NEXT: lfiwzx f0, r3, r4 +; P9LE-NEXT: xxpermdi v3, f0, f0, 2 +; P9LE-NEXT: vmrglw v2, v2, v3 +; P9LE-NEXT: blr + +; P9BE-LABEL: s2v_test_f3: +; P9BE: # %bb.0: # %entry +; P9BE: sldi r4, r7, 2 +; P9BE: xxspltw v2, v2, 1 +; P9BE: lfiwzx f0, r3, r4 +; P9BE-NEXT: xxsldwi v3, f0, f0, 1 +; P9BE: vmrghw v2, v3, v2 +; P9BE-NEXT: blr + +; P8LE-LABEL: s2v_test_f3: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: sldi r4, r7, 2 +; P8LE-NEXT: xxspltw v2, v2, 2 +; P8LE-NEXT: lfiwzx f0, r3, r4 +; P8LE-NEXT: xxpermdi v3, f0, f0, 2 +; P8LE-NEXT: vmrglw v2, v2, v3 +; P8LE-NEXT: blr + +; P8BE-LABEL: s2v_test_f3: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: sldi r4, r7, 2 +; P8BE-NEXT: xxspltw v2, v2, 1 +; P8BE-NEXT: lfiwzx f0, r3, r4 +; P8BE-NEXT: xxsldwi v3, f0, f0, 1 +; P8BE-NEXT: vmrghw v2, v3, v2 +; P8BE-NEXT: blr +entry: + %idxprom = sext i32 %Idx to i64 + %arrayidx = getelementptr inbounds float, float* %f64, i64 %idxprom + %0 = load float, float* %arrayidx, align 8 + %vecins = insertelement <2 x float> %vec, float %0, i32 0 + ret <2 x float> %vecins +} + +; Function Attrs: norecurse nounwind readonly +define <2 x float> @s2v_test_f4(float* nocapture readonly %f64, <2 x float> %vec) { +; P9LE-LABEL: s2v_test_f4: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: addi r3, r3, 4 +; P9LE-NEXT: xxspltw v2, v2, 2 +; P9LE-NEXT: lfiwzx f0, 0, r3 +; P9LE-NEXT: xxpermdi v3, f0, f0, 2 +; P9LE-NEXT: vmrglw v2, v2, v3 +; P9LE-NEXT: blr + +; P9BE-LABEL: s2v_test_f4: +; P9BE: # %bb.0: # %entry +; P9BE: addi r3, r3, 4 +; P9BE: xxspltw v2, v2, 1 +; P9BE: lfiwzx f0, 0, r3 +; P9BE-NEXT: xxsldwi v3, f0, f0, 1 +; P9BE: vmrghw v2, v3, v2 +; P9BE-NEXT: blr + +; P8LE-LABEL: s2v_test_f4: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: addi r3, r3, 4 +; P8LE-NEXT: xxspltw v2, v2, 2 +; P8LE-NEXT: lfiwzx f0, 0, r3 +; P8LE-NEXT: xxpermdi v3, f0, f0, 2 +; P8LE-NEXT: vmrglw v2, v2, v3 +; P8LE-NEXT: blr + +; P8BE-LABEL: s2v_test_f4: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: addi r3, r3, 4 +; P8BE-NEXT: xxspltw v2, v2, 1 +; P8BE-NEXT: lfiwzx f0, 0, r3 +; P8BE-NEXT: xxsldwi v3, f0, f0, 1 +; P8BE-NEXT: vmrghw v2, v3, v2 +; P8BE-NEXT: blr +entry: + %arrayidx = getelementptr inbounds float, float* %f64, i64 1 + %0 = load float, float* %arrayidx, align 8 + %vecins = insertelement <2 x float> %vec, float %0, i32 0 + ret <2 x float> %vecins +} + +; Function Attrs: norecurse nounwind readonly +define <2 x float> @s2v_test_f5(<2 x float> %vec, float* nocapture readonly %ptr1) { +; P9LE-LABEL: s2v_test_f5: +; P9LE: # %bb.0: # %entry +; P9LE-NEXT: lfiwzx f0, 0, r5 +; P9LE-NEXT: xxspltw v2, v2, 2 +; P9LE-NEXT: xxpermdi v3, f0, f0, 2 +; P9LE-NEXT: vmrglw v2, v2, v3 +; P9LE-NEXT: blr + +; P9BE-LABEL: s2v_test_f5: +; P9BE: # %bb.0: # %entry +; P9BE: lfiwzx f0, 0, r5 +; P9BE: xxspltw v2, v2, 1 +; P9BE-NEXT: xxsldwi v3, f0, f0, 1 +; P9BE: vmrghw v2, v3, v2 +; P9BE-NEXT: blr + +; P8LE-LABEL: s2v_test_f5: +; P8LE: # %bb.0: # %entry +; P8LE-NEXT: lfiwzx f0, 0, r5 +; P8LE-NEXT: xxspltw v2, v2, 2 +; P8LE-NEXT: xxpermdi v3, f0, f0, 2 +; P8LE-NEXT: vmrglw v2, v2, v3 +; P8LE-NEXT: blr + +; P8BE-LABEL: s2v_test_f5: +; P8BE: # %bb.0: # %entry +; P8BE-NEXT: lfiwzx f0, 0, r5 +; P8BE-NEXT: xxspltw v2, v2, 1 +; P8BE-NEXT: xxsldwi v3, f0, f0, 1 +; P8BE-NEXT: vmrghw v2, v3, v2 +; P8BE-NEXT: blr +entry: + %0 = load float, float* %ptr1, align 8 + %vecins = insertelement <2 x float> %vec, float %0, i32 0 + ret <2 x float> %vecins +} + Index: llvm/test/CodeGen/PowerPC/swaps-le-6.ll =================================================================== --- llvm/test/CodeGen/PowerPC/swaps-le-6.ll +++ llvm/test/CodeGen/PowerPC/swaps-le-6.ll @@ -1,12 +1,15 @@ -; RUN: llc -verify-machineinstrs -mcpu=pwr8 \ -; RUN: -mtriple=powerpc64le-unknown-linux-gnu -O3 < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-vsr-nums-as-vr \ +; RUN: -ppc-asm-full-reg-names -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -O3 < %s | FileCheck %s ; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu -O3 \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK-P9 \ +; RUN: -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names -verify-machineinstrs \ +; RUN: < %s | FileCheck %s --check-prefix=CHECK-P9 \ ; RUN: --implicit-check-not xxswapd ; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu -O3 \ -; RUN: -verify-machineinstrs -mattr=-power9-vector < %s | FileCheck %s +; RUN: -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names -verify-machineinstrs \ +; RUN: -mattr=-power9-vector < %s | FileCheck %s ; These tests verify that VSX swap optimization works when loading a scalar ; into a vector register. @@ -17,6 +20,31 @@ @y = global double 1.780000e+00, align 8 define void @bar0() { +; CHECK-LABEL: bar0: +; CHECK: # %bb.0: # %entry +; CHECK: addis r3, r2, .LC0@toc@ha +; CHECK: addis r4, r2, .LC1@toc@ha +; CHECK: ld r3, .LC0@toc@l(r3) +; CHECK: addis r3, r2, .LC2@toc@ha +; CHECK: ld r3, .LC2@toc@l(r3) +; CHECK: xxpermdi vs0, vs0, vs1, 1 +; CHECK: stxvd2x vs0, 0, r3 +; CHECK: blr +; +; CHECK-P9-LABEL: bar0: +; CHECK-P9: # %bb.0: # %entry +; CHECK-P9: addis r3, r2, .LC0@toc@ha +; CHECK-P9: addis r4, r2, .LC1@toc@ha +; CHECK-P9: ld r3, .LC0@toc@l(r3) +; CHECK-P9: ld r4, .LC1@toc@l(r4) +; CHECK-P9: lfd f0, 0(r3) +; CHECK-P9: lxvx vs1, 0, r4 +; CHECK-P9: addis r3, r2, .LC2@toc@ha +; CHECK-P9: ld r3, .LC2@toc@l(r3) +; CHECK-P9: xxpermdi vs0, f0, f0, 2 +; CHECK-P9: xxpermdi vs0, vs1, vs0, 1 +; CHECK-P9: stxvx vs0, 0, r3 +; CHECK-P9: blr entry: %0 = load <2 x double>, <2 x double>* @x, align 16 %1 = load double, double* @y, align 8 @@ -25,21 +53,32 @@ ret void } -; CHECK-LABEL: @bar0 -; CHECK-DAG: lxvd2x [[REG1:[0-9]+]] -; CHECK-DAG: lxsdx [[REG2:[0-9]+]] -; CHECK: xxspltd [[REG4:[0-9]+]], [[REG2]], 0 -; CHECK: xxpermdi [[REG5:[0-9]+]], [[REG4]], [[REG1]], 1 -; CHECK: stxvd2x [[REG5]] - -; CHECK-P9-LABEL: @bar0 -; CHECK-P9-DAG: lxvx [[REG1:[0-9]+]] -; CHECK-P9-DAG: lfd [[REG2:[0-9]+]], 0(3) -; CHECK-P9: xxspltd [[REG4:[0-9]+]], [[REG2]], 0 -; CHECK-P9: xxpermdi [[REG5:[0-9]+]], [[REG1]], [[REG4]], 1 -; CHECK-P9: stxvx [[REG5]] - define void @bar1() { +; CHECK-LABEL: bar1: +; CHECK: # %bb.0: # %entry +; CHECK: addis r3, r2, .LC0@toc@ha +; CHECK: addis r4, r2, .LC1@toc@ha +; CHECK: ld r3, .LC0@toc@l(r3) +; CHECK: addis r3, r2, .LC2@toc@ha +; CHECK: ld r3, .LC2@toc@l(r3) +; CHECK: xxmrghd vs0, vs1, vs0 +; CHECK: stxvd2x vs0, 0, r3 +; CHECK: blr +; +; CHECK-P9-LABEL: bar1: +; CHECK-P9: # %bb.0: # %entry +; CHECK-P9: addis r3, r2, .LC0@toc@ha +; CHECK-P9: addis r4, r2, .LC1@toc@ha +; CHECK-P9: ld r3, .LC0@toc@l(r3) +; CHECK-P9: ld r4, .LC1@toc@l(r4) +; CHECK-P9: lfd f0, 0(r3) +; CHECK-P9: lxvx vs1, 0, r4 +; CHECK-P9: addis r3, r2, .LC2@toc@ha +; CHECK-P9: ld r3, .LC2@toc@l(r3) +; CHECK-P9: xxpermdi vs0, f0, f0, 2 +; CHECK-P9: xxmrgld vs0, vs0, vs1 +; CHECK-P9: stxvx vs0, 0, r3 +; CHECK-P9: blr entry: %0 = load <2 x double>, <2 x double>* @x, align 16 %1 = load double, double* @y, align 8 @@ -48,17 +87,3 @@ ret void } -; CHECK-LABEL: @bar1 -; CHECK-DAG: lxvd2x [[REG1:[0-9]+]] -; CHECK-DAG: lxsdx [[REG2:[0-9]+]] -; CHECK: xxspltd [[REG4:[0-9]+]], [[REG2]], 0 -; CHECK: xxmrghd [[REG5:[0-9]+]], [[REG1]], [[REG4]] -; CHECK: stxvd2x [[REG5]] - -; CHECK-P9-LABEL: @bar1 -; CHECK-P9-DAG: lxvx [[REG1:[0-9]+]] -; CHECK-P9-DAG: lfd [[REG2:[0-9]+]], 0(3) -; CHECK-P9: xxspltd [[REG4:[0-9]+]], [[REG2]], 0 -; CHECK-P9: xxmrgld [[REG5:[0-9]+]], [[REG4]], [[REG1]] -; CHECK-P9: stxvx [[REG5]] - Index: llvm/test/CodeGen/PowerPC/vsx_insert_extract_le.ll =================================================================== --- llvm/test/CodeGen/PowerPC/vsx_insert_extract_le.ll +++ llvm/test/CodeGen/PowerPC/vsx_insert_extract_le.ll @@ -1,74 +1,98 @@ -; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mattr=+vsx \ -; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mattr=+vsx -ppc-vsr-nums-as-vr \ +; RUN: -ppc-asm-full-reg-names -mtriple=powerpc64le-unknown-linux-gnu < %s \ +; RUN: | FileCheck %s -; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mattr=-power9-vector \ -; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mattr=-power9-vector -ppc-vsr-nums-as-vr \ +; RUN: -ppc-asm-full-reg-names -mtriple=powerpc64le-unknown-linux-gnu < %s \ +; RUN: | FileCheck %s -; RUN: llc -verify-machineinstrs -mcpu=pwr9 \ +; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ ; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s \ ; RUN: --check-prefix=CHECK-P9 --implicit-check-not xxswapd define <2 x double> @testi0(<2 x double>* %p1, double* %p2) { +; CHECK-LABEL: testi0: +; CHECK: # %bb.0: +; CHECK-NEXT: lxvd2x vs0, 0, r3 +; CHECK-NEXT: lxsdx f1, 0, r4 +; CHECK-NEXT: xxswapd vs0, vs0 +; CHECK-NEXT: xxspltd vs1, vs1, 0 +; CHECK-NEXT: xxpermdi v2, vs0, vs1, 1 +; CHECK-NEXT: blr +; +; CHECK-P9-LABEL: testi0: +; CHECK-P9: # %bb.0: +; CHECK-P9-NEXT: lfd f0, 0(r4) +; CHECK-P9-NEXT: lxv vs1, 0(r3) +; CHECK-P9-NEXT: xxpermdi vs0, f0, f0, 2 +; CHECK-P9-NEXT: xxpermdi v2, vs1, vs0, 1 +; CHECK-P9-NEXT: blr %v = load <2 x double>, <2 x double>* %p1 %s = load double, double* %p2 %r = insertelement <2 x double> %v, double %s, i32 0 ret <2 x double> %r -; CHECK-LABEL: testi0 -; CHECK: lxvd2x 0, 0, 3 -; CHECK: lxsdx 1, 0, 4 -; CHECK-DAG: xxspltd 1, 1, 0 -; CHECK-DAG: xxswapd 0, 0 -; CHECK: xxpermdi 34, 0, 1, 1 -; CHECK-P9-LABEL: testi0 -; CHECK-P9: lfd [[REG1:[0-9]+]], 0(4) -; CHECK-P9: lxv [[REG2:[0-9]+]], 0(3) -; CHECK-P9: xxspltd [[REG3:[0-9]+]], [[REG1]], 0 -; CHECK-P9: xxpermdi 34, [[REG2]], [[REG3]], 1 } define <2 x double> @testi1(<2 x double>* %p1, double* %p2) { +; CHECK-LABEL: testi1: +; CHECK: # %bb.0: +; CHECK-NEXT: lxvd2x vs0, 0, r3 +; CHECK-NEXT: lxsdx f1, 0, r4 +; CHECK-NEXT: xxswapd vs0, vs0 +; CHECK-NEXT: xxspltd vs1, vs1, 0 +; CHECK-NEXT: xxmrgld v2, vs1, vs0 +; CHECK-NEXT: blr +; +; CHECK-P9-LABEL: testi1: +; CHECK-P9: # %bb.0: +; CHECK-P9-NEXT: lfd f0, 0(r4) +; CHECK-P9-NEXT: lxv vs1, 0(r3) +; CHECK-P9-NEXT: xxpermdi vs0, f0, f0, 2 +; CHECK-P9-NEXT: xxmrgld v2, vs0, vs1 +; CHECK-P9-NEXT: blr %v = load <2 x double>, <2 x double>* %p1 %s = load double, double* %p2 %r = insertelement <2 x double> %v, double %s, i32 1 ret <2 x double> %r -; CHECK-LABEL: testi1 -; CHECK: lxvd2x 0, 0, 3 -; CHECK: lxsdx 1, 0, 4 -; CHECK-DAG: xxspltd 1, 1, 0 -; CHECK-DAG: xxswapd 0, 0 -; CHECK: xxmrgld 34, 1, 0 -; CHECK-P9-LABEL: testi1 -; CHECK-P9: lfd [[REG1:[0-9]+]], 0(4) -; CHECK-P9: lxv [[REG2:[0-9]+]], 0(3) -; CHECK-P9: xxspltd [[REG3:[0-9]+]], [[REG1]], 0 -; CHECK-P9: xxmrgld 34, [[REG3]], [[REG2]] } define double @teste0(<2 x double>* %p1) { +; CHECK-LABEL: teste0: +; CHECK: # %bb.0: +; CHECK-NEXT: lxvd2x vs1, 0, r3 +; CHECK-NEXT: # kill: def $f1 killed $f1 killed $vsl1 +; CHECK-NEXT: blr +; +; CHECK-P9-LABEL: teste0: +; CHECK-P9: # %bb.0: +; CHECK-P9-NEXT: lfd f1, 0(r3) +; CHECK-P9-NEXT: blr %v = load <2 x double>, <2 x double>* %p1 %r = extractelement <2 x double> %v, i32 0 ret double %r -; CHECK-LABEL: teste0 -; CHECK: lxvd2x 1, 0, 3 -; CHECK-P9-LABEL: teste0 -; CHECK-P9: lfd 1, 0(3) } define double @teste1(<2 x double>* %p1) { +; CHECK-LABEL: teste1: +; CHECK: # %bb.0: +; CHECK-NEXT: lxvd2x vs0, 0, r3 +; CHECK-NEXT: xxswapd vs1, vs0 +; CHECK-NEXT: # kill: def $f1 killed $f1 killed $vsl1 +; CHECK-NEXT: blr +; +; CHECK-P9-LABEL: teste1: +; CHECK-P9: # %bb.0: +; CHECK-P9-NEXT: lfd f1, 8(r3) +; CHECK-P9-NEXT: blr %v = load <2 x double>, <2 x double>* %p1 %r = extractelement <2 x double> %v, i32 1 ret double %r -; CHECK-LABEL: teste1 -; CHECK: lxvd2x 0, 0, 3 -; CHECK: xxswapd 1, 0 -; CHECK-P9-LABEL: teste1 -; CHECK-P9: lfd 1, 8(3) }