Index: llvm/lib/Target/PowerPC/PPCInstrVSX.td =================================================================== --- llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -2388,6 +2388,10 @@ def AlignValues { dag F32_TO_BE_WORD1 = (v4f32 (XXSLDWI (XSCVDPSPN $B), (XSCVDPSPN $B), 3)); dag I32_TO_BE_WORD1 = (COPY_TO_REGCLASS (MTVSRWZ $B), VSRC); + dag LD32A = (COPY_TO_REGCLASS (LIWZX xoaddr:$A), VSRC); + dag LD32B = (COPY_TO_REGCLASS (LIWZX xoaddr:$B), VSRC); + dag LD32C = (COPY_TO_REGCLASS (LIWZX xoaddr:$C), VSRC); + dag LD32D = (COPY_TO_REGCLASS (LIWZX xoaddr:$D), VSRC); } // The following VSX instructions were introduced in Power ISA 3.0 @@ -3975,7 +3979,12 @@ (v2f64 (XXPERMDI (COPY_TO_REGCLASS $B, VSRC), (COPY_TO_REGCLASS $A, VSRC), 0))>; - + def : Pat<(v4f32 (build_vector (f32 (load xoaddr:$A)), + (f32 (load xoaddr:$B)), + (f32 (load xoaddr:$C)), + (f32 (load xoaddr:$D)))), + (v4f32 (XXPERMDI (XXMRGHW AlignValues.LD32D, AlignValues.LD32C), + (XXMRGHW AlignValues.LD32B, AlignValues.LD32A), 3))>; def : Pat<(v4f32 (build_vector f32:$D, f32:$C, f32:$B, f32:$A)), (VMRGEW MrgFP.AC, MrgFP.BD)>; def : Pat<(v4f32 (build_vector DblToFlt.A0, DblToFlt.A1, Index: llvm/test/CodeGen/PowerPC/float-vector-gather.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/PowerPC/float-vector-gather.ll @@ -0,0 +1,29 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-vsr-nums-as-vr \ +; RUN: -ppc-asm-full-reg-names -mtriple=powerpc64le-unknown-linux-gnu < %s \ +; RUN: | FileCheck %s +define dso_local <4 x float> @vector_gatherf(float* nocapture readonly %a, +float* nocapture readonly %b, float* nocapture readonly %c, +float* nocapture readonly %d) { +; CHECK-LABEL: vector_gatherf: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lfiwzx f0, 0, r6 +; CHECK-NEXT: lfiwzx f1, 0, r5 +; CHECK-NEXT: xxmrghw vs0, vs0, vs1 +; CHECK-NEXT: lfiwzx f1, 0, r4 +; CHECK-NEXT: lfiwzx f2, 0, r3 +; CHECK-NEXT: xxmrghw vs1, vs1, vs2 +; CHECK-NEXT: xxmrgld v2, vs0, vs1 +; CHECK-NEXT: blr +entry: + %0 = load float, float* %a, align 4 + %vecinit = insertelement <4 x float> undef, float %0, i32 0 + %1 = load float, float* %b, align 4 + %vecinit1 = insertelement <4 x float> %vecinit, float %1, i32 1 + %2 = load float, float* %c, align 4 + %vecinit2 = insertelement <4 x float> %vecinit1, float %2, i32 2 + %3 = load float, float* %d, align 4 + %vecinit3 = insertelement <4 x float> %vecinit2, float %3, i32 3 + ret <4 x float> %vecinit3 +} +