diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -408,7 +408,7 @@ // to speed up scalar BSWAP64. // CTPOP or CTTZ were introduced in P8/P9 respectively setOperationAction(ISD::BSWAP, MVT::i32 , Expand); - if (Subtarget.hasP9Vector()) + if (Subtarget.hasP9Vector() && Subtarget.isPPC64()) setOperationAction(ISD::BSWAP, MVT::i64 , Custom); else setOperationAction(ISD::BSWAP, MVT::i64 , Expand); @@ -10254,6 +10254,8 @@ // Lower scalar BSWAP64 to xxbrd. SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); + if (!Subtarget.isPPC64()) + return Op; // MTVSRDD Op = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, Op.getOperand(0), Op.getOperand(0)); diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td --- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -4065,10 +4065,7 @@ v8i16, ScalarLoads.Li16, (VSPLTHs 3, (LXSIHZX xoaddr:$src)), (SUBREG_TO_REG (i64 1), (LXSIHZX xoaddr:$src), sub_64)>; -} // HasVSX, HasP9Vector, NoP10Vector -// Big endian 64Bit Power9 subtarget. -let Predicates = [HasVSX, HasP9Vector, IsBigEndian, IsPPC64] in { def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 0)))))), (f32 (XSCVUXDSP (XXEXTRACTUW $A, 0)))>; def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 1)))))), @@ -4153,7 +4150,10 @@ (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 6)), VSRC), xoaddr:$dst)>; def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 7)), xoaddr:$dst), (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 8)), VSRC), xoaddr:$dst)>; +} // HasVSX, HasP9Vector, IsBigEndian +// Big endian 64Bit Power9 subtarget. +let Predicates = [HasVSX, HasP9Vector, IsBigEndian, IsPPC64] in { def : Pat<(v2i64 (scalar_to_vector (i64 (load iaddrX4:$src)))), (v2i64 (SUBREG_TO_REG (i64 1), (DFLOADf64 iaddrX4:$src), sub_64))>; def : Pat<(v2i64 (scalar_to_vector (i64 (load xaddrX4:$src)))), diff --git a/llvm/test/CodeGen/PowerPC/aix-insert-extract.ll b/llvm/test/CodeGen/PowerPC/aix-insert-extract.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/aix-insert-extract.ll @@ -0,0 +1,808 @@ +; RUN: llc -mcpu=pwr9 -mtriple=powerpc64-ibm-aix-xcoff -vec-extabi \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK-64 +; RUN: llc -mcpu=pwr9 -mtriple=powerpc-ibm-aix-xcoff -vec-extabi \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK-32 + +define <4 x float> @_Z7testInsILj0ELj0EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj0ELj0EDv4_fET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 3 +; CHECK: xxinsertw 34, 0, 0 + %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj0ELj1EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj0ELj1EDv4_fET1_S1_S1_ +; CHECK-NOT: xxsldwi +; CHECK: xxinsertw 34, 35, 0 + %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj0ELj2EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj0ELj2EDv4_fET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 1 +; CHECK: xxinsertw 34, 0, 0 + %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj0ELj3EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj0ELj3EDv4_fET1_S1_S1_ +; CHECK: xxswapd 0, 35 +; CHECK: xxinsertw 34, 0, 0 + %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj1ELj0EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj1ELj0EDv4_fET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 3 +; CHECK: xxinsertw 34, 0, 4 + %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj1ELj1EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj1ELj1EDv4_fET1_S1_S1_ +; CHECK-NOT: xxsldwi +; CHECK: xxinsertw 34, 35, 4 + %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj1ELj2EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj1ELj2EDv4_fET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 1 +; CHECK: xxinsertw 34, 0, 4 + %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj1ELj3EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj1ELj3EDv4_fET1_S1_S1_ +; CHECK: xxswapd 0, 35 +; CHECK: xxinsertw 34, 0, 4 + %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj2ELj0EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj2ELj0EDv4_fET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 3 +; CHECK: xxinsertw 34, 0, 8 + %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj2ELj1EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj2ELj1EDv4_fET1_S1_S1_ +; CHECK-NOT: xxsldwi +; CHECK: xxinsertw 34, 35, 8 + %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj2ELj2EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj2ELj2EDv4_fET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 1 +; CHECK: xxinsertw 34, 0, 8 + %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj2ELj3EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj2ELj3EDv4_fET1_S1_S1_ +; CHECK: xxswapd 0, 35 +; CHECK: xxinsertw 34, 0, 8 + %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj3ELj0EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj3ELj0EDv4_fET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 3 +; CHECK: xxinsertw 34, 0, 12 + %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj3ELj1EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj3ELj1EDv4_fET1_S1_S1_ +; CHECK-NOT: xxsldwi +; CHECK: xxinsertw 34, 35, 12 + %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj3ELj2EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj3ELj2EDv4_fET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 1 +; CHECK: xxinsertw 34, 0, 12 + %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj3ELj3EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj3ELj3EDv4_fET1_S1_S1_ +; CHECK: xxswapd 0, 35 +; CHECK: xxinsertw 34, 0, 12 + %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x i32> @_Z7testInsILj0ELj0EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj0ELj0EDv4_jET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 3 +; CHECK: xxinsertw 34, 0, 0 + %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj0ELj1EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj0ELj1EDv4_jET1_S1_S1_ +; CHECK-NOT: xxsldwi +; CHECK: xxinsertw 34, 35, 0 + %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj0ELj2EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj0ELj2EDv4_jET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 1 +; CHECK: xxinsertw 34, 0, 0 + %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj0ELj3EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj0ELj3EDv4_jET1_S1_S1_ +; CHECK: xxswapd 0, 35 +; CHECK: xxinsertw 34, 0, 0 + %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj1ELj0EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj1ELj0EDv4_jET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 3 +; CHECK: xxinsertw 34, 0, 4 + %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj1ELj1EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj1ELj1EDv4_jET1_S1_S1_ +; CHECK-NOT: xxsldwi +; CHECK: xxinsertw 34, 35, 4 + %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj1ELj2EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj1ELj2EDv4_jET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 1 +; CHECK: xxinsertw 34, 0, 4 + %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj1ELj3EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj1ELj3EDv4_jET1_S1_S1_ +; CHECK: xxswapd 0, 35 +; CHECK: xxinsertw 34, 0, 4 + %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj2ELj0EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj2ELj0EDv4_jET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 3 +; CHECK: xxinsertw 34, 0, 8 + %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj2ELj1EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj2ELj1EDv4_jET1_S1_S1_ +; CHECK-NOT: xxsldwi +; CHECK: xxinsertw 34, 35, 8 + %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj2ELj2EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj2ELj2EDv4_jET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 1 +; CHECK: xxinsertw 34, 0, 8 + %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj2ELj3EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj2ELj3EDv4_jET1_S1_S1_ +; CHECK: xxswapd 0, 35 +; CHECK: xxinsertw 34, 0, 8 + %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj3ELj0EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj3ELj0EDv4_jET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 3 +; CHECK: xxinsertw 34, 0, 12 + %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj3ELj1EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj3ELj1EDv4_jET1_S1_S1_ +; CHECK-NOT: xxsldwi +; CHECK: xxinsertw 34, 35, 12 + %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj3ELj2EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj3ELj2EDv4_jET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 1 +; CHECK: xxinsertw 34, 0, 12 + %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj3ELj3EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj3ELj3EDv4_jET1_S1_S1_ +; CHECK: xxswapd 0, 35 +; CHECK: xxinsertw 34, 0, 12 + %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %vecins +} + +define float @_Z13testUiToFpExtILj0EEfDv4_j(<4 x i32> %a) { +entry: +; CHECK-64-LABEL: _Z13testUiToFpExtILj0EEfDv4_j +; CHECK-64: xxextractuw 0, 34, 0 +; CHECK-64: xscvuxdsp 1, 0 +; CHECK-32-LABEL: _Z13testUiToFpExtILj0EEfDv4_j +; CHECK-32: lfiwzx 0, 0, 3 +; CHECK-32: xscvuxdsp 1, 0 + %vecext = extractelement <4 x i32> %a, i32 0 + %conv = uitofp i32 %vecext to float + ret float %conv +} + +define float @_Z13testUiToFpExtILj1EEfDv4_j(<4 x i32> %a) { +entry: +; CHECK-64-LABEL: _Z13testUiToFpExtILj1EEfDv4_j +; CHECK-64: xxextractuw 0, 34, 4 +; CHECK-64: xscvuxdsp 1, 0 +; CHECK-32-LABEL: _Z13testUiToFpExtILj1EEfDv4_j +; CHECK-32: lfiwzx 0, 0, 3 +; CHECK-32: xscvuxdsp 1, 0 + %vecext = extractelement <4 x i32> %a, i32 1 + %conv = uitofp i32 %vecext to float + ret float %conv +} + +define float @_Z13testUiToFpExtILj2EEfDv4_j(<4 x i32> %a) { +entry: +; CHECK-64-LABEL: _Z13testUiToFpExtILj2EEfDv4_j +; CHECK-64: xxextractuw 0, 34, 8 +; CHECK-64: xscvuxdsp 1, 0 +; CHECK-32-LABEL: _Z13testUiToFpExtILj2EEfDv4_j +; CHECK-32: lfiwzx 0, 0, 3 +; CHECK-32: xscvuxdsp 1, 0 + %vecext = extractelement <4 x i32> %a, i32 2 + %conv = uitofp i32 %vecext to float + ret float %conv +} + +define float @_Z13testUiToFpExtILj3EEfDv4_j(<4 x i32> %a) { +entry: +; CHECK-64-LABEL: _Z13testUiToFpExtILj3EEfDv4_j +; CHECK-64: xxextractuw 0, 34, 12 +; CHECK-64: xscvuxdsp 1, 0 +; CHECK-32-LABEL: _Z13testUiToFpExtILj3EEfDv4_j +; CHECK-32: lfiwzx 0, 0, 3 +; CHECK-32: xscvuxdsp 1, 0 + %vecext = extractelement <4 x i32> %a, i32 3 + %conv = uitofp i32 %vecext to float + ret float %conv +} + +; Verify we generate optimal code for unsigned vector int elem extract followed +; by conversion to double + +define double @conv2dlbTestui0(<4 x i32> %a) { +entry: +; CHECK-64-LABEL: conv2dlbTestui0 +; CHECK-64: xxextractuw [[CP64:[0-9]+]], 34, 0 +; CHECK-64: xscvuxddp 1, [[CP64]] +; CHECK-32-LABEL: conv2dlbTestui0 +; CHECK-32: lfiwzx [[CP32:[0-9]+]], 0, 3 +; CHECK-32: xscvuxddp 1, [[CP32]] + %0 = extractelement <4 x i32> %a, i32 0 + %1 = uitofp i32 %0 to double + ret double %1 +} + +define double @conv2dlbTestui1(<4 x i32> %a) { +entry: +; CHECK-64-LABEL: conv2dlbTestui1 +; CHECK-64: xxextractuw [[CP64:[0-9]+]], 34, 4 +; CHECK-64: xscvuxddp 1, [[CP64]] +; CHECK-32-LABEL: conv2dlbTestui1 +; CHECK-32: lfiwzx [[CP32:[0-9]+]], 0, 3 +; CHECK-32: xscvuxddp 1, [[CP32]] + %0 = extractelement <4 x i32> %a, i32 1 + %1 = uitofp i32 %0 to double + ret double %1 +} + +define double @conv2dlbTestui2(<4 x i32> %a) { +entry: +; CHECK-64-LABEL: conv2dlbTestui2 +; CHECK-64: xxextractuw [[CP64:[0-9]+]], 34, 8 +; CHECK-64: xscvuxddp 1, [[CP64]] +; CHECK-32-LABEL: conv2dlbTestui2 +; CHECK-32: lfiwzx [[CP32:[0-9]+]], 0, 3 +; CHECK-32: xscvuxddp 1, [[CP32]] + %0 = extractelement <4 x i32> %a, i32 2 + %1 = uitofp i32 %0 to double + ret double %1 +} + +define double @conv2dlbTestui3(<4 x i32> %a) { +entry: +; CHECK-64-LABEL: conv2dlbTestui3 +; CHECK-64: xxextractuw [[CP64:[0-9]+]], 34, 12 +; CHECK-64: xscvuxddp 1, [[CP64]] +; CHECK-32-LABEL: conv2dlbTestui3 +; CHECK-32: lfiwzx [[CP32:[0-9]+]], 0, 3 +; CHECK-32: xscvuxddp 1, [[CP32]] + %0 = extractelement <4 x i32> %a, i32 3 + %1 = uitofp i32 %0 to double + ret double %1 +} + +; verify we don't crash for variable elem extract +define double @conv2dlbTestuiVar(<4 x i32> %a, i32 zeroext %elem) { +entry: + %vecext = extractelement <4 x i32> %a, i32 %elem + %conv = uitofp i32 %vecext to double + ret double %conv +} + +define <4 x float> @_Z10testInsEltILj0EDv4_ffET0_S1_T1_(<4 x float> %a, float %b) { +entry: +; CHECK-LABEL: _Z10testInsEltILj0EDv4_ffET0_S1_T1_ +; CHECK: xscvdpspn 0, 1 +; CHECK: xxsldwi 0, 0, 0, 3 +; CHECK: xxinsertw 34, 0, 0 + %vecins = insertelement <4 x float> %a, float %b, i32 0 + ret <4 x float> %vecins +} + +define <4 x float> @_Z10testInsEltILj1EDv4_ffET0_S1_T1_(<4 x float> %a, float %b) { +entry: +; CHECK-LABEL: _Z10testInsEltILj1EDv4_ffET0_S1_T1_ +; CHECK: xscvdpspn 0, 1 +; CHECK: xxsldwi 0, 0, 0, 3 +; CHECK: xxinsertw 34, 0, 4 + %vecins = insertelement <4 x float> %a, float %b, i32 1 + ret <4 x float> %vecins +} + +define <4 x float> @_Z10testInsEltILj2EDv4_ffET0_S1_T1_(<4 x float> %a, float %b) { +entry: +; CHECK-LABEL: _Z10testInsEltILj2EDv4_ffET0_S1_T1_ +; CHECK: xscvdpspn 0, 1 +; CHECK: xxsldwi 0, 0, 0, 3 +; CHECK: xxinsertw 34, 0, 8 + %vecins = insertelement <4 x float> %a, float %b, i32 2 + ret <4 x float> %vecins +} + +define <4 x float> @_Z10testInsEltILj3EDv4_ffET0_S1_T1_(<4 x float> %a, float %b) { +entry: +; CHECK-LABEL: _Z10testInsEltILj3EDv4_ffET0_S1_T1_ +; CHECK: xscvdpspn 0, 1 +; CHECK: xxsldwi 0, 0, 0, 3 +; CHECK: xxinsertw 34, 0, 12 + %vecins = insertelement <4 x float> %a, float %b, i32 3 + ret <4 x float> %vecins +} + +define <4 x i32> @_Z10testInsEltILj0EDv4_jjET0_S1_T1_(<4 x i32> %a, i32 zeroext %b) { +entry: +; CHECK-LABEL: _Z10testInsEltILj0EDv4_jjET0_S1_T1_ +; CHECK: mtfprwz 0, 3 +; CHECK: xxinsertw 34, 0, 0 + %vecins = insertelement <4 x i32> %a, i32 %b, i32 0 + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z10testInsEltILj1EDv4_jjET0_S1_T1_(<4 x i32> %a, i32 zeroext %b) { +entry: +; CHECK-LABEL: _Z10testInsEltILj1EDv4_jjET0_S1_T1_ +; CHECK: mtfprwz 0, 3 +; CHECK: xxinsertw 34, 0, 4 + %vecins = insertelement <4 x i32> %a, i32 %b, i32 1 + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z10testInsEltILj2EDv4_jjET0_S1_T1_(<4 x i32> %a, i32 zeroext %b) { +entry: +; CHECK-LABEL: _Z10testInsEltILj2EDv4_jjET0_S1_T1_ +; CHECK: mtfprwz 0, 3 +; CHECK: xxinsertw 34, 0, 8 + %vecins = insertelement <4 x i32> %a, i32 %b, i32 2 + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z10testInsEltILj3EDv4_jjET0_S1_T1_(<4 x i32> %a, i32 zeroext %b) { +entry: +; CHECK-LABEL: _Z10testInsEltILj3EDv4_jjET0_S1_T1_ +; CHECK: mtfprwz 0, 3 +; CHECK: xxinsertw 34, 0, 12 + %vecins = insertelement <4 x i32> %a, i32 %b, i32 3 + ret <4 x i32> %vecins +} + +define <4 x float> @_Z7testInsILj0ELj0EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj0ELj0EDv4_fET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 3 +; CHECK: xxinsertw 34, 0, 0 + %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj0ELj1EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj0ELj1EDv4_fET1_S1_S1_ +; CHECK-NOT: xxsldwi +; CHECK: xxinsertw 34, 35, 0 + %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj0ELj2EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj0ELj2EDv4_fET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 1 +; CHECK: xxinsertw 34, 0, 0 + %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj0ELj3EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj0ELj3EDv4_fET1_S1_S1_ +; CHECK: xxswapd 0, 35 +; CHECK: xxinsertw 34, 0, 0 + %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj1ELj0EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj1ELj0EDv4_fET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 3 +; CHECK: xxinsertw 34, 0, 4 + %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj1ELj1EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj1ELj1EDv4_fET1_S1_S1_ +; CHECK-NOT: xxsldwi +; CHECK: xxinsertw 34, 35, 4 + %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj1ELj2EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj1ELj2EDv4_fET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 1 +; CHECK: xxinsertw 34, 0, 4 + %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj1ELj3EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj1ELj3EDv4_fET1_S1_S1_ +; CHECK: xxswapd 0, 35 +; CHECK: xxinsertw 34, 0, 4 + %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj2ELj0EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj2ELj0EDv4_fET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 3 +; CHECK: xxinsertw 34, 0, 8 + %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj2ELj1EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj2ELj1EDv4_fET1_S1_S1_ +; CHECK-NOT: xxsldwi +; CHECK: xxinsertw 34, 35, 8 + %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj2ELj2EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj2ELj2EDv4_fET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 1 +; CHECK: xxinsertw 34, 0, 8 + %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj2ELj3EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj2ELj3EDv4_fET1_S1_S1_ +; CHECK: xxswapd 0, 35 +; CHECK: xxinsertw 34, 0, 8 + %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj3ELj0EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj3ELj0EDv4_fET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 3 +; CHECK: xxinsertw 34, 0, 12 + %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj3ELj1EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj3ELj1EDv4_fET1_S1_S1_ +; CHECK-NOT: xxsldwi +; CHECK: xxinsertw 34, 35, 12 + %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj3ELj2EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj3ELj2EDv4_fET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 1 +; CHECK: xxinsertw 34, 0, 12 + %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj3ELj3EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj3ELj3EDv4_fET1_S1_S1_ +; CHECK: xxswapd 0, 35 +; CHECK: xxinsertw 34, 0, 12 + %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x i32> @_Z7testInsILj0ELj0EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj0ELj0EDv4_jET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 3 +; CHECK: xxinsertw 34, 0, 0 + %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj0ELj1EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj0ELj1EDv4_jET1_S1_S1_ +; CHECK-NOT: xxsldwi +; CHECK: xxinsertw 34, 35, 0 + %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj0ELj2EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj0ELj2EDv4_jET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 1 +; CHECK: xxinsertw 34, 0, 0 + %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj0ELj3EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj0ELj3EDv4_jET1_S1_S1_ +; CHECK: xxswapd 0, 35 +; CHECK: xxinsertw 34, 0, 0 + %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj1ELj0EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj1ELj0EDv4_jET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 3 +; CHECK: xxinsertw 34, 0, 4 + %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj1ELj1EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj1ELj1EDv4_jET1_S1_S1_ +; CHECK-NOT: xxsldwi +; CHECK: xxinsertw 34, 35, 4 + %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj1ELj2EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj1ELj2EDv4_jET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 1 +; CHECK: xxinsertw 34, 0, 4 + %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj1ELj3EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj1ELj3EDv4_jET1_S1_S1_ +; CHECK: xxswapd 0, 35 +; CHECK: xxinsertw 34, 0, 4 + %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj2ELj0EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj2ELj0EDv4_jET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 3 +; CHECK: xxinsertw 34, 0, 8 + %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj2ELj1EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj2ELj1EDv4_jET1_S1_S1_ +; CHECK-NOT: xxsldwi +; CHECK: xxinsertw 34, 35, 8 + %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj2ELj2EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj2ELj2EDv4_jET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 1 +; CHECK: xxinsertw 34, 0, 8 + %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj2ELj3EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj2ELj3EDv4_jET1_S1_S1_ +; CHECK: xxswapd 0, 35 +; CHECK: xxinsertw 34, 0, 8 + %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj3ELj0EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj3ELj0EDv4_jET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 3 +; CHECK: xxinsertw 34, 0, 12 + %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj3ELj1EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj3ELj1EDv4_jET1_S1_S1_ +; CHECK-NOT: xxsldwi +; CHECK: xxinsertw 34, 35, 12 + %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj3ELj2EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj3ELj2EDv4_jET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 1 +; CHECK: xxinsertw 34, 0, 12 + %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj3ELj3EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj3ELj3EDv4_jET1_S1_S1_ +; CHECK: xxswapd 0, 35 +; CHECK: xxinsertw 34, 0, 12 + %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> + ret <4 x i32> %vecins +} +define <4 x float> @testSameVecEl0BE(<4 x float> %a) { +entry: +; CHECK-LABEL: testSameVecEl0BE +; CHECK: xxinsertw 34, 34, 0 + %vecins = shufflevector <4 x float> %a, <4 x float> %a, <4 x i32> + ret <4 x float> %vecins +} +define <4 x float> @testSameVecEl2BE(<4 x float> %a) { +entry: +; CHECK-LABEL: testSameVecEl2BE +; CHECK: xxinsertw 34, 34, 8 + %vecins = shufflevector <4 x float> %a, <4 x float> %a, <4 x i32> + ret <4 x float> %vecins +} +define <4 x float> @testSameVecEl3BE(<4 x float> %a) { +entry: +; CHECK-LABEL: testSameVecEl3BE +; CHECK: xxinsertw 34, 34, 12 + %vecins = shufflevector <4 x float> %a, <4 x float> %a, <4 x i32> + ret <4 x float> %vecins +} +define <4 x float> @insertVarF(<4 x float> %a, float %f, i32 %el) { +entry: +; CHECK-LABEL: insertVarF +; CHECK: stfsx 1, +; CHECK: lxv + %vecins = insertelement <4 x float> %a, float %f, i32 %el + ret <4 x float> %vecins +} +define <4 x i32> @insertVarI(<4 x i32> %a, i32 %i, i32 %el) { +entry: +; CHECK-LABEL: insertVarI +; CHECK: stwx +; CHECK: lxv + %vecins = insertelement <4 x i32> %a, i32 %i, i32 %el + ret <4 x i32> %vecins +} diff --git a/llvm/test/CodeGen/PowerPC/aix-p9-insert-extract.ll b/llvm/test/CodeGen/PowerPC/aix-p9-insert-extract.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/aix-p9-insert-extract.ll @@ -0,0 +1,2893 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mcpu=pwr9 -mtriple=powerpc64-ibm-aix-xcoff \ +; RUN: -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK-64,CHECK-64-OPT %s +; RUN: llc -O0 -mcpu=pwr9 -mtriple=powerpc64-ibm-aix-xcoff \ +; RUN: -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK-64,CHECK-64-O0 %s +; RUN: llc -mcpu=pwr9 -mtriple=powerpc-ibm-aix-xcoff \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-32,CHECK-32-OPT +; RUN: llc -O0 -mcpu=pwr9 -mtriple=powerpc-ibm-aix-xcoff \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-32,CHECK-32-O0 + +; The following testcases take one halfword element from the second vector and +; inserts it at various locations in the first vector +define <8 x i16> @shuffle_vector_halfword_0_8(<8 x i16> %a, <8 x i16> %b) { +; CHECK-64-LABEL: shuffle_vector_halfword_0_8: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: vsldoi 3, 3, 3, 10 +; CHECK-64-NEXT: vinserth 2, 3, 0 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: shuffle_vector_halfword_0_8: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: vsldoi 3, 3, 3, 10 +; CHECK-32-NEXT: vinserth 2, 3, 0 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %vecins +} + +define <8 x i16> @shuffle_vector_halfword_1_15(<8 x i16> %a, <8 x i16> %b) { +; CHECK-64-LABEL: shuffle_vector_halfword_1_15: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: vsldoi 3, 3, 3, 8 +; CHECK-64-NEXT: vinserth 2, 3, 2 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: shuffle_vector_halfword_1_15: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: vsldoi 3, 3, 3, 8 +; CHECK-32-NEXT: vinserth 2, 3, 2 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %vecins +} + +define <8 x i16> @shuffle_vector_halfword_2_9(<8 x i16> %a, <8 x i16> %b) { +; CHECK-64-LABEL: shuffle_vector_halfword_2_9: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: vsldoi 3, 3, 3, 12 +; CHECK-64-NEXT: vinserth 2, 3, 4 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: shuffle_vector_halfword_2_9: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: vsldoi 3, 3, 3, 12 +; CHECK-32-NEXT: vinserth 2, 3, 4 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %vecins +} + +define <8 x i16> @shuffle_vector_halfword_3_13(<8 x i16> %a, <8 x i16> %b) { +; CHECK-64-LABEL: shuffle_vector_halfword_3_13: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: vsldoi 3, 3, 3, 4 +; CHECK-64-NEXT: vinserth 2, 3, 6 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: shuffle_vector_halfword_3_13: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: vsldoi 3, 3, 3, 4 +; CHECK-32-NEXT: vinserth 2, 3, 6 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %vecins +} + +define <8 x i16> @shuffle_vector_halfword_4_10(<8 x i16> %a, <8 x i16> %b) { +; CHECK-64-LABEL: shuffle_vector_halfword_4_10: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: vsldoi 3, 3, 3, 14 +; CHECK-64-NEXT: vinserth 2, 3, 8 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: shuffle_vector_halfword_4_10: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: vsldoi 3, 3, 3, 14 +; CHECK-32-NEXT: vinserth 2, 3, 8 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %vecins +} + +define <8 x i16> @shuffle_vector_halfword_5_14(<8 x i16> %a, <8 x i16> %b) { +; CHECK-64-LABEL: shuffle_vector_halfword_5_14: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: vsldoi 3, 3, 3, 6 +; CHECK-64-NEXT: vinserth 2, 3, 10 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: shuffle_vector_halfword_5_14: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: vsldoi 3, 3, 3, 6 +; CHECK-32-NEXT: vinserth 2, 3, 10 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %vecins +} + +define <8 x i16> @shuffle_vector_halfword_6_11(<8 x i16> %a, <8 x i16> %b) { +; CHECK-64-LABEL: shuffle_vector_halfword_6_11: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: vinserth 2, 3, 12 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: shuffle_vector_halfword_6_11: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: vinserth 2, 3, 12 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %vecins +} + +define <8 x i16> @shuffle_vector_halfword_7_12(<8 x i16> %a, <8 x i16> %b) { +; CHECK-64-LABEL: shuffle_vector_halfword_7_12: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: vsldoi 3, 3, 3, 2 +; CHECK-64-NEXT: vinserth 2, 3, 14 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: shuffle_vector_halfword_7_12: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: vsldoi 3, 3, 3, 2 +; CHECK-32-NEXT: vinserth 2, 3, 14 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %vecins +} + +define <8 x i16> @shuffle_vector_halfword_8_1(<8 x i16> %a, <8 x i16> %b) { +; CHECK-64-OPT-LABEL: shuffle_vector_halfword_8_1: +; CHECK-64-OPT: # %bb.0: # %entry +; CHECK-64-OPT-NEXT: vsldoi 2, 2, 2, 12 +; CHECK-64-OPT-NEXT: vinserth 3, 2, 0 +; CHECK-64-OPT-NEXT: vmr 2, 3 +; CHECK-64-OPT-NEXT: blr +; +; CHECK-64-O0-LABEL: shuffle_vector_halfword_8_1: +; CHECK-64-O0: # %bb.0: # %entry +; CHECK-64-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-64-O0-NEXT: vmr 3, 2 +; CHECK-64-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-64-O0-NEXT: vsldoi 3, 3, 3, 12 +; CHECK-64-O0-NEXT: vinserth 2, 3, 0 +; CHECK-64-O0-NEXT: blr +; +; CHECK-32-OPT-LABEL: shuffle_vector_halfword_8_1: +; CHECK-32-OPT: # %bb.0: # %entry +; CHECK-32-OPT-NEXT: vsldoi 2, 2, 2, 12 +; CHECK-32-OPT-NEXT: vinserth 3, 2, 0 +; CHECK-32-OPT-NEXT: vmr 2, 3 +; CHECK-32-OPT-NEXT: blr +; +; CHECK-32-O0-LABEL: shuffle_vector_halfword_8_1: +; CHECK-32-O0: # %bb.0: # %entry +; CHECK-32-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-32-O0-NEXT: vmr 3, 2 +; CHECK-32-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-32-O0-NEXT: vsldoi 3, 3, 3, 12 +; CHECK-32-O0-NEXT: vinserth 2, 3, 0 +; CHECK-32-O0-NEXT: blr +entry: + %vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %vecins +} + +; The following testcases take one halfword element from the first vector and +; inserts it at various locations in the second vector +define <8 x i16> @shuffle_vector_halfword_9_7(<8 x i16> %a, <8 x i16> %b) { +; CHECK-64-OPT-LABEL: shuffle_vector_halfword_9_7: +; CHECK-64-OPT: # %bb.0: # %entry +; CHECK-64-OPT-NEXT: vsldoi 2, 2, 2, 8 +; CHECK-64-OPT-NEXT: vinserth 3, 2, 2 +; CHECK-64-OPT-NEXT: vmr 2, 3 +; CHECK-64-OPT-NEXT: blr +; +; CHECK-64-O0-LABEL: shuffle_vector_halfword_9_7: +; CHECK-64-O0: # %bb.0: # %entry +; CHECK-64-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-64-O0-NEXT: vmr 3, 2 +; CHECK-64-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-64-O0-NEXT: vsldoi 3, 3, 3, 8 +; CHECK-64-O0-NEXT: vinserth 2, 3, 2 +; CHECK-64-O0-NEXT: blr +; +; CHECK-32-OPT-LABEL: shuffle_vector_halfword_9_7: +; CHECK-32-OPT: # %bb.0: # %entry +; CHECK-32-OPT-NEXT: vsldoi 2, 2, 2, 8 +; CHECK-32-OPT-NEXT: vinserth 3, 2, 2 +; CHECK-32-OPT-NEXT: vmr 2, 3 +; CHECK-32-OPT-NEXT: blr +; +; CHECK-32-O0-LABEL: shuffle_vector_halfword_9_7: +; CHECK-32-O0: # %bb.0: # %entry +; CHECK-32-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-32-O0-NEXT: vmr 3, 2 +; CHECK-32-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-32-O0-NEXT: vsldoi 3, 3, 3, 8 +; CHECK-32-O0-NEXT: vinserth 2, 3, 2 +; CHECK-32-O0-NEXT: blr +entry: + %vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %vecins +} + +define <8 x i16> @shuffle_vector_halfword_10_4(<8 x i16> %a, <8 x i16> %b) { +; CHECK-64-OPT-LABEL: shuffle_vector_halfword_10_4: +; CHECK-64-OPT: # %bb.0: # %entry +; CHECK-64-OPT-NEXT: vsldoi 2, 2, 2, 2 +; CHECK-64-OPT-NEXT: vinserth 3, 2, 4 +; CHECK-64-OPT-NEXT: vmr 2, 3 +; CHECK-64-OPT-NEXT: blr +; +; CHECK-64-O0-LABEL: shuffle_vector_halfword_10_4: +; CHECK-64-O0: # %bb.0: # %entry +; CHECK-64-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-64-O0-NEXT: vmr 3, 2 +; CHECK-64-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-64-O0-NEXT: vsldoi 3, 3, 3, 2 +; CHECK-64-O0-NEXT: vinserth 2, 3, 4 +; CHECK-64-O0-NEXT: blr +; +; CHECK-32-OPT-LABEL: shuffle_vector_halfword_10_4: +; CHECK-32-OPT: # %bb.0: # %entry +; CHECK-32-OPT-NEXT: vsldoi 2, 2, 2, 2 +; CHECK-32-OPT-NEXT: vinserth 3, 2, 4 +; CHECK-32-OPT-NEXT: vmr 2, 3 +; CHECK-32-OPT-NEXT: blr +; +; CHECK-32-O0-LABEL: shuffle_vector_halfword_10_4: +; CHECK-32-O0: # %bb.0: # %entry +; CHECK-32-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-32-O0-NEXT: vmr 3, 2 +; CHECK-32-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-32-O0-NEXT: vsldoi 3, 3, 3, 2 +; CHECK-32-O0-NEXT: vinserth 2, 3, 4 +; CHECK-32-O0-NEXT: blr +entry: + %vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %vecins +} + +define <8 x i16> @shuffle_vector_halfword_11_2(<8 x i16> %a, <8 x i16> %b) { +; CHECK-64-OPT-LABEL: shuffle_vector_halfword_11_2: +; CHECK-64-OPT: # %bb.0: # %entry +; CHECK-64-OPT-NEXT: vsldoi 2, 2, 2, 14 +; CHECK-64-OPT-NEXT: vinserth 3, 2, 6 +; CHECK-64-OPT-NEXT: vmr 2, 3 +; CHECK-64-OPT-NEXT: blr +; +; CHECK-64-O0-LABEL: shuffle_vector_halfword_11_2: +; CHECK-64-O0: # %bb.0: # %entry +; CHECK-64-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-64-O0-NEXT: vmr 3, 2 +; CHECK-64-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-64-O0-NEXT: vsldoi 3, 3, 3, 14 +; CHECK-64-O0-NEXT: vinserth 2, 3, 6 +; CHECK-64-O0-NEXT: blr +; +; CHECK-32-OPT-LABEL: shuffle_vector_halfword_11_2: +; CHECK-32-OPT: # %bb.0: # %entry +; CHECK-32-OPT-NEXT: vsldoi 2, 2, 2, 14 +; CHECK-32-OPT-NEXT: vinserth 3, 2, 6 +; CHECK-32-OPT-NEXT: vmr 2, 3 +; CHECK-32-OPT-NEXT: blr +; +; CHECK-32-O0-LABEL: shuffle_vector_halfword_11_2: +; CHECK-32-O0: # %bb.0: # %entry +; CHECK-32-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-32-O0-NEXT: vmr 3, 2 +; CHECK-32-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-32-O0-NEXT: vsldoi 3, 3, 3, 14 +; CHECK-32-O0-NEXT: vinserth 2, 3, 6 +; CHECK-32-O0-NEXT: blr +entry: + %vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %vecins +} + +define <8 x i16> @shuffle_vector_halfword_12_6(<8 x i16> %a, <8 x i16> %b) { +; CHECK-64-OPT-LABEL: shuffle_vector_halfword_12_6: +; CHECK-64-OPT: # %bb.0: # %entry +; CHECK-64-OPT-NEXT: vsldoi 2, 2, 2, 6 +; CHECK-64-OPT-NEXT: vinserth 3, 2, 8 +; CHECK-64-OPT-NEXT: vmr 2, 3 +; CHECK-64-OPT-NEXT: blr +; +; CHECK-64-O0-LABEL: shuffle_vector_halfword_12_6: +; CHECK-64-O0: # %bb.0: # %entry +; CHECK-64-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-64-O0-NEXT: vmr 3, 2 +; CHECK-64-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-64-O0-NEXT: vsldoi 3, 3, 3, 6 +; CHECK-64-O0-NEXT: vinserth 2, 3, 8 +; CHECK-64-O0-NEXT: blr +; +; CHECK-32-OPT-LABEL: shuffle_vector_halfword_12_6: +; CHECK-32-OPT: # %bb.0: # %entry +; CHECK-32-OPT-NEXT: vsldoi 2, 2, 2, 6 +; CHECK-32-OPT-NEXT: vinserth 3, 2, 8 +; CHECK-32-OPT-NEXT: vmr 2, 3 +; CHECK-32-OPT-NEXT: blr +; +; CHECK-32-O0-LABEL: shuffle_vector_halfword_12_6: +; CHECK-32-O0: # %bb.0: # %entry +; CHECK-32-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-32-O0-NEXT: vmr 3, 2 +; CHECK-32-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-32-O0-NEXT: vsldoi 3, 3, 3, 6 +; CHECK-32-O0-NEXT: vinserth 2, 3, 8 +; CHECK-32-O0-NEXT: blr +entry: + %vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %vecins +} + +define <8 x i16> @shuffle_vector_halfword_13_3(<8 x i16> %a, <8 x i16> %b) { +; CHECK-64-OPT-LABEL: shuffle_vector_halfword_13_3: +; CHECK-64-OPT: # %bb.0: # %entry +; CHECK-64-OPT-NEXT: vinserth 3, 2, 10 +; CHECK-64-OPT-NEXT: vmr 2, 3 +; CHECK-64-OPT-NEXT: blr +; +; CHECK-64-O0-LABEL: shuffle_vector_halfword_13_3: +; CHECK-64-O0: # %bb.0: # %entry +; CHECK-64-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-64-O0-NEXT: vmr 3, 2 +; CHECK-64-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-64-O0-NEXT: vinserth 2, 3, 10 +; CHECK-64-O0-NEXT: blr +; +; CHECK-32-OPT-LABEL: shuffle_vector_halfword_13_3: +; CHECK-32-OPT: # %bb.0: # %entry +; CHECK-32-OPT-NEXT: vinserth 3, 2, 10 +; CHECK-32-OPT-NEXT: vmr 2, 3 +; CHECK-32-OPT-NEXT: blr +; +; CHECK-32-O0-LABEL: shuffle_vector_halfword_13_3: +; CHECK-32-O0: # %bb.0: # %entry +; CHECK-32-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-32-O0-NEXT: vmr 3, 2 +; CHECK-32-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-32-O0-NEXT: vinserth 2, 3, 10 +; CHECK-32-O0-NEXT: blr +entry: + %vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %vecins +} + +define <8 x i16> @shuffle_vector_halfword_14_5(<8 x i16> %a, <8 x i16> %b) { +; CHECK-64-OPT-LABEL: shuffle_vector_halfword_14_5: +; CHECK-64-OPT: # %bb.0: # %entry +; CHECK-64-OPT-NEXT: vsldoi 2, 2, 2, 4 +; CHECK-64-OPT-NEXT: vinserth 3, 2, 12 +; CHECK-64-OPT-NEXT: vmr 2, 3 +; CHECK-64-OPT-NEXT: blr +; +; CHECK-64-O0-LABEL: shuffle_vector_halfword_14_5: +; CHECK-64-O0: # %bb.0: # %entry +; CHECK-64-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-64-O0-NEXT: vmr 3, 2 +; CHECK-64-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-64-O0-NEXT: vsldoi 3, 3, 3, 4 +; CHECK-64-O0-NEXT: vinserth 2, 3, 12 +; CHECK-64-O0-NEXT: blr +; +; CHECK-32-OPT-LABEL: shuffle_vector_halfword_14_5: +; CHECK-32-OPT: # %bb.0: # %entry +; CHECK-32-OPT-NEXT: vsldoi 2, 2, 2, 4 +; CHECK-32-OPT-NEXT: vinserth 3, 2, 12 +; CHECK-32-OPT-NEXT: vmr 2, 3 +; CHECK-32-OPT-NEXT: blr +; +; CHECK-32-O0-LABEL: shuffle_vector_halfword_14_5: +; CHECK-32-O0: # %bb.0: # %entry +; CHECK-32-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-32-O0-NEXT: vmr 3, 2 +; CHECK-32-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-32-O0-NEXT: vsldoi 3, 3, 3, 4 +; CHECK-32-O0-NEXT: vinserth 2, 3, 12 +; CHECK-32-O0-NEXT: blr +entry: + %vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %vecins +} + +define <8 x i16> @shuffle_vector_halfword_15_0(<8 x i16> %a, <8 x i16> %b) { +; CHECK-64-OPT-LABEL: shuffle_vector_halfword_15_0: +; CHECK-64-OPT: # %bb.0: # %entry +; CHECK-64-OPT-NEXT: vsldoi 2, 2, 2, 10 +; CHECK-64-OPT-NEXT: vinserth 3, 2, 14 +; CHECK-64-OPT-NEXT: vmr 2, 3 +; CHECK-64-OPT-NEXT: blr +; +; CHECK-64-O0-LABEL: shuffle_vector_halfword_15_0: +; CHECK-64-O0: # %bb.0: # %entry +; CHECK-64-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-64-O0-NEXT: vmr 3, 2 +; CHECK-64-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-64-O0-NEXT: vsldoi 3, 3, 3, 10 +; CHECK-64-O0-NEXT: vinserth 2, 3, 14 +; CHECK-64-O0-NEXT: blr +; +; CHECK-32-OPT-LABEL: shuffle_vector_halfword_15_0: +; CHECK-32-OPT: # %bb.0: # %entry +; CHECK-32-OPT-NEXT: vsldoi 2, 2, 2, 10 +; CHECK-32-OPT-NEXT: vinserth 3, 2, 14 +; CHECK-32-OPT-NEXT: vmr 2, 3 +; CHECK-32-OPT-NEXT: blr +; +; CHECK-32-O0-LABEL: shuffle_vector_halfword_15_0: +; CHECK-32-O0: # %bb.0: # %entry +; CHECK-32-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-32-O0-NEXT: vmr 3, 2 +; CHECK-32-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-32-O0-NEXT: vsldoi 3, 3, 3, 10 +; CHECK-32-O0-NEXT: vinserth 2, 3, 14 +; CHECK-32-O0-NEXT: blr +entry: + %vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> + ret <8 x i16> %vecins +} + +; The following testcases use the same vector in both arguments of the +; shufflevector. If halfword element 3 in BE mode(or 4 in LE mode) is the one +; we're attempting to insert, then we can use the vector insert instruction +define <8 x i16> @shuffle_vector_halfword_0_4(<8 x i16> %a) { +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vinserth 2, 2, 14 +; CHECK-NEXT: blr +; CHECK-BE-LABEL: shuffle_vector_halfword_0_4: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: addis 3, 2, .LCPI16_0@toc@ha +; CHECK-BE-NEXT: addi 3, 3, .LCPI16_0@toc@l +; CHECK-BE-NEXT: lxvx 35, 0, 3 +; CHECK-BE-NEXT: vperm 2, 2, 2, 3 +; CHECK-BE-NEXT: blr +; CHECK-64-LABEL: shuffle_vector_halfword_0_4: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: ld 3, L..C0(2) +; CHECK-64-NEXT: lxvx 35, 0, 3 +; CHECK-64-NEXT: vperm 2, 2, 2, 3 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: shuffle_vector_halfword_0_4: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: lwz 3, L..C0(2) +; CHECK-32-NEXT: lxvx 35, 0, 3 +; CHECK-32-NEXT: vperm 2, 2, 2, 3 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> + ret <8 x i16> %vecins +} + +define <8 x i16> @shuffle_vector_halfword_1_3(<8 x i16> %a) { +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addis 3, 2, .LCPI17_0@toc@ha +; CHECK-NEXT: addi 3, 3, .LCPI17_0@toc@l +; CHECK-NEXT: lxvx 35, 0, 3 +; CHECK-NEXT: vperm 2, 2, 2, 3 +; CHECK-NEXT: blr +; CHECK-BE-LABEL: shuffle_vector_halfword_1_3: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: vinserth 2, 2, 2 +; CHECK-BE-NEXT: blr +; CHECK-64-LABEL: shuffle_vector_halfword_1_3: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: vinserth 2, 2, 2 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: shuffle_vector_halfword_1_3: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: vinserth 2, 2, 2 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> + ret <8 x i16> %vecins +} + +define <8 x i16> @shuffle_vector_halfword_2_3(<8 x i16> %a) { +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addis 3, 2, .LCPI18_0@toc@ha +; CHECK-NEXT: addi 3, 3, .LCPI18_0@toc@l +; CHECK-NEXT: lxvx 35, 0, 3 +; CHECK-NEXT: vperm 2, 2, 2, 3 +; CHECK-NEXT: blr +; CHECK-BE-LABEL: shuffle_vector_halfword_2_3: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: vinserth 2, 2, 4 +; CHECK-BE-NEXT: blr +; CHECK-64-LABEL: shuffle_vector_halfword_2_3: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: vinserth 2, 2, 4 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: shuffle_vector_halfword_2_3: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: vinserth 2, 2, 4 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> + ret <8 x i16> %vecins +} + +define <8 x i16> @shuffle_vector_halfword_3_4(<8 x i16> %a) { +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vinserth 2, 2, 8 +; CHECK-NEXT: blr +; CHECK-BE-LABEL: shuffle_vector_halfword_3_4: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: addis 3, 2, .LCPI19_0@toc@ha +; CHECK-BE-NEXT: addi 3, 3, .LCPI19_0@toc@l +; CHECK-BE-NEXT: lxvx 35, 0, 3 +; CHECK-BE-NEXT: vperm 2, 2, 2, 3 +; CHECK-BE-NEXT: blr +; CHECK-64-LABEL: shuffle_vector_halfword_3_4: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: ld 3, L..C1(2) +; CHECK-64-NEXT: lxvx 35, 0, 3 +; CHECK-64-NEXT: vperm 2, 2, 2, 3 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: shuffle_vector_halfword_3_4: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: lwz 3, L..C1(2) +; CHECK-32-NEXT: lxvx 35, 0, 3 +; CHECK-32-NEXT: vperm 2, 2, 2, 3 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> + ret <8 x i16> %vecins +} + +define <8 x i16> @shuffle_vector_halfword_4_3(<8 x i16> %a) { +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addis 3, 2, .LCPI20_0@toc@ha +; CHECK-NEXT: addi 3, 3, .LCPI20_0@toc@l +; CHECK-NEXT: lxvx 35, 0, 3 +; CHECK-NEXT: vperm 2, 2, 2, 3 +; CHECK-NEXT: blr +; CHECK-BE-LABEL: shuffle_vector_halfword_4_3: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: vinserth 2, 2, 8 +; CHECK-BE-NEXT: blr +; CHECK-64-LABEL: shuffle_vector_halfword_4_3: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: vinserth 2, 2, 8 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: shuffle_vector_halfword_4_3: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: vinserth 2, 2, 8 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> + ret <8 x i16> %vecins +} + +define <8 x i16> @shuffle_vector_halfword_5_3(<8 x i16> %a) { +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addis 3, 2, .LCPI21_0@toc@ha +; CHECK-NEXT: addi 3, 3, .LCPI21_0@toc@l +; CHECK-NEXT: lxvx 35, 0, 3 +; CHECK-NEXT: vperm 2, 2, 2, 3 +; CHECK-NEXT: blr +; CHECK-BE-LABEL: shuffle_vector_halfword_5_3: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: vinserth 2, 2, 10 +; CHECK-BE-NEXT: blr +; CHECK-64-LABEL: shuffle_vector_halfword_5_3: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: vinserth 2, 2, 10 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: shuffle_vector_halfword_5_3: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: vinserth 2, 2, 10 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> + ret <8 x i16> %vecins +} + +define <8 x i16> @shuffle_vector_halfword_6_4(<8 x i16> %a) { +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vinserth 2, 2, 2 +; CHECK-NEXT: blr +; CHECK-BE-LABEL: shuffle_vector_halfword_6_4: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: addis 3, 2, .LCPI22_0@toc@ha +; CHECK-BE-NEXT: addi 3, 3, .LCPI22_0@toc@l +; CHECK-BE-NEXT: lxvx 35, 0, 3 +; CHECK-BE-NEXT: vperm 2, 2, 2, 3 +; CHECK-BE-NEXT: blr +; CHECK-64-LABEL: shuffle_vector_halfword_6_4: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: ld 3, L..C2(2) +; CHECK-64-NEXT: lxvx 35, 0, 3 +; CHECK-64-NEXT: vperm 2, 2, 2, 3 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: shuffle_vector_halfword_6_4: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: lwz 3, L..C2(2) +; CHECK-32-NEXT: lxvx 35, 0, 3 +; CHECK-32-NEXT: vperm 2, 2, 2, 3 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> + ret <8 x i16> %vecins +} + +define <8 x i16> @shuffle_vector_halfword_7_4(<8 x i16> %a) { +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vinserth 2, 2, 0 +; CHECK-NEXT: blr +; CHECK-BE-LABEL: shuffle_vector_halfword_7_4: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: addis 3, 2, .LCPI23_0@toc@ha +; CHECK-BE-NEXT: addi 3, 3, .LCPI23_0@toc@l +; CHECK-BE-NEXT: lxvx 35, 0, 3 +; CHECK-BE-NEXT: vperm 2, 2, 2, 3 +; CHECK-BE-NEXT: blr +; CHECK-64-LABEL: shuffle_vector_halfword_7_4: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: ld 3, L..C3(2) +; CHECK-64-NEXT: lxvx 35, 0, 3 +; CHECK-64-NEXT: vperm 2, 2, 2, 3 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: shuffle_vector_halfword_7_4: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: lwz 3, L..C3(2) +; CHECK-32-NEXT: lxvx 35, 0, 3 +; CHECK-32-NEXT: vperm 2, 2, 2, 3 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> + ret <8 x i16> %vecins +} + +; The following testcases take one byte element from the second vector and +; inserts it at various locations in the first vector +define <16 x i8> @shuffle_vector_byte_0_16(<16 x i8> %a, <16 x i8> %b) { +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsldoi 3, 3, 3, 8 +; CHECK-NEXT: vinsertb 2, 3, 15 +; CHECK-NEXT: blr +; CHECK-BE-LABEL: shuffle_vector_byte_0_16: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: vsldoi 3, 3, 3, 9 +; CHECK-BE-NEXT: vinsertb 2, 3, 0 +; CHECK-BE-NEXT: blr +; CHECK-64-LABEL: shuffle_vector_byte_0_16: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: vsldoi 3, 3, 3, 9 +; CHECK-64-NEXT: vinsertb 2, 3, 0 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: shuffle_vector_byte_0_16: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: vsldoi 3, 3, 3, 9 +; CHECK-32-NEXT: vinsertb 2, 3, 0 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %vecins +} + +define <16 x i8> @shuffle_vector_byte_1_25(<16 x i8> %a, <16 x i8> %b) { +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsldoi 3, 3, 3, 15 +; CHECK-NEXT: vinsertb 2, 3, 14 +; CHECK-NEXT: blr +; CHECK-BE-LABEL: shuffle_vector_byte_1_25: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: vsldoi 3, 3, 3, 2 +; CHECK-BE-NEXT: vinsertb 2, 3, 1 +; CHECK-BE-NEXT: blr +; CHECK-64-LABEL: shuffle_vector_byte_1_25: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: vsldoi 3, 3, 3, 2 +; CHECK-64-NEXT: vinsertb 2, 3, 1 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: shuffle_vector_byte_1_25: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: vsldoi 3, 3, 3, 2 +; CHECK-32-NEXT: vinsertb 2, 3, 1 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %vecins +} + +define <16 x i8> @shuffle_vector_byte_2_18(<16 x i8> %a, <16 x i8> %b) { +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsldoi 3, 3, 3, 6 +; CHECK-NEXT: vinsertb 2, 3, 13 +; CHECK-NEXT: blr +; CHECK-BE-LABEL: shuffle_vector_byte_2_18: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: vsldoi 3, 3, 3, 11 +; CHECK-BE-NEXT: vinsertb 2, 3, 2 +; CHECK-BE-NEXT: blr +; CHECK-64-LABEL: shuffle_vector_byte_2_18: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: vsldoi 3, 3, 3, 11 +; CHECK-64-NEXT: vinsertb 2, 3, 2 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: shuffle_vector_byte_2_18: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: vsldoi 3, 3, 3, 11 +; CHECK-32-NEXT: vinsertb 2, 3, 2 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %vecins +} + +define <16 x i8> @shuffle_vector_byte_3_27(<16 x i8> %a, <16 x i8> %b) { +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsldoi 3, 3, 3, 13 +; CHECK-NEXT: vinsertb 2, 3, 12 +; CHECK-NEXT: blr +; CHECK-BE-LABEL: shuffle_vector_byte_3_27: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: vsldoi 3, 3, 3, 4 +; CHECK-BE-NEXT: vinsertb 2, 3, 3 +; CHECK-BE-NEXT: blr +; CHECK-64-LABEL: shuffle_vector_byte_3_27: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: vsldoi 3, 3, 3, 4 +; CHECK-64-NEXT: vinsertb 2, 3, 3 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: shuffle_vector_byte_3_27: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: vsldoi 3, 3, 3, 4 +; CHECK-32-NEXT: vinsertb 2, 3, 3 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %vecins +} + +define <16 x i8> @shuffle_vector_byte_4_20(<16 x i8> %a, <16 x i8> %b) { +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsldoi 3, 3, 3, 4 +; CHECK-NEXT: vinsertb 2, 3, 11 +; CHECK-NEXT: blr +; CHECK-BE-LABEL: shuffle_vector_byte_4_20: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: vsldoi 3, 3, 3, 13 +; CHECK-BE-NEXT: vinsertb 2, 3, 4 +; CHECK-BE-NEXT: blr +; CHECK-64-LABEL: shuffle_vector_byte_4_20: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: vsldoi 3, 3, 3, 13 +; CHECK-64-NEXT: vinsertb 2, 3, 4 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: shuffle_vector_byte_4_20: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: vsldoi 3, 3, 3, 13 +; CHECK-32-NEXT: vinsertb 2, 3, 4 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %vecins +} + +define <16 x i8> @shuffle_vector_byte_5_29(<16 x i8> %a, <16 x i8> %b) { +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsldoi 3, 3, 3, 11 +; CHECK-NEXT: vinsertb 2, 3, 10 +; CHECK-NEXT: blr +; CHECK-BE-LABEL: shuffle_vector_byte_5_29: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: vsldoi 3, 3, 3, 6 +; CHECK-BE-NEXT: vinsertb 2, 3, 5 +; CHECK-BE-NEXT: blr +; CHECK-64-LABEL: shuffle_vector_byte_5_29: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: vsldoi 3, 3, 3, 6 +; CHECK-64-NEXT: vinsertb 2, 3, 5 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: shuffle_vector_byte_5_29: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: vsldoi 3, 3, 3, 6 +; CHECK-32-NEXT: vinsertb 2, 3, 5 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %vecins +} + +define <16 x i8> @shuffle_vector_byte_6_22(<16 x i8> %a, <16 x i8> %b) { +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsldoi 3, 3, 3, 2 +; CHECK-NEXT: vinsertb 2, 3, 9 +; CHECK-NEXT: blr +; CHECK-BE-LABEL: shuffle_vector_byte_6_22: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: vsldoi 3, 3, 3, 15 +; CHECK-BE-NEXT: vinsertb 2, 3, 6 +; CHECK-BE-NEXT: blr +; CHECK-64-LABEL: shuffle_vector_byte_6_22: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: vsldoi 3, 3, 3, 15 +; CHECK-64-NEXT: vinsertb 2, 3, 6 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: shuffle_vector_byte_6_22: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: vsldoi 3, 3, 3, 15 +; CHECK-32-NEXT: vinsertb 2, 3, 6 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %vecins +} + +define <16 x i8> @shuffle_vector_byte_7_31(<16 x i8> %a, <16 x i8> %b) { +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsldoi 3, 3, 3, 9 +; CHECK-NEXT: vinsertb 2, 3, 8 +; CHECK-NEXT: blr +; CHECK-BE-LABEL: shuffle_vector_byte_7_31: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: vsldoi 3, 3, 3, 8 +; CHECK-BE-NEXT: vinsertb 2, 3, 7 +; CHECK-BE-NEXT: blr +; CHECK-64-LABEL: shuffle_vector_byte_7_31: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: vsldoi 3, 3, 3, 8 +; CHECK-64-NEXT: vinsertb 2, 3, 7 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: shuffle_vector_byte_7_31: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: vsldoi 3, 3, 3, 8 +; CHECK-32-NEXT: vinsertb 2, 3, 7 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %vecins +} + +define <16 x i8> @shuffle_vector_byte_8_24(<16 x i8> %a, <16 x i8> %b) { +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vinsertb 2, 3, 7 +; CHECK-NEXT: blr +; CHECK-BE-LABEL: shuffle_vector_byte_8_24: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: vsldoi 3, 3, 3, 1 +; CHECK-BE-NEXT: vinsertb 2, 3, 8 +; CHECK-BE-NEXT: blr +; CHECK-64-LABEL: shuffle_vector_byte_8_24: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: vsldoi 3, 3, 3, 1 +; CHECK-64-NEXT: vinsertb 2, 3, 8 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: shuffle_vector_byte_8_24: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: vsldoi 3, 3, 3, 1 +; CHECK-32-NEXT: vinsertb 2, 3, 8 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %vecins +} + +define <16 x i8> @shuffle_vector_byte_9_17(<16 x i8> %a, <16 x i8> %b) { +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsldoi 3, 3, 3, 7 +; CHECK-NEXT: vinsertb 2, 3, 6 +; CHECK-NEXT: blr +; CHECK-BE-LABEL: shuffle_vector_byte_9_17: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: vsldoi 3, 3, 3, 10 +; CHECK-BE-NEXT: vinsertb 2, 3, 9 +; CHECK-BE-NEXT: blr +; CHECK-64-LABEL: shuffle_vector_byte_9_17: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: vsldoi 3, 3, 3, 10 +; CHECK-64-NEXT: vinsertb 2, 3, 9 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: shuffle_vector_byte_9_17: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: vsldoi 3, 3, 3, 10 +; CHECK-32-NEXT: vinsertb 2, 3, 9 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %vecins +} + +define <16 x i8> @shuffle_vector_byte_10_26(<16 x i8> %a, <16 x i8> %b) { +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsldoi 3, 3, 3, 14 +; CHECK-NEXT: vinsertb 2, 3, 5 +; CHECK-NEXT: blr +; CHECK-BE-LABEL: shuffle_vector_byte_10_26: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: vsldoi 3, 3, 3, 3 +; CHECK-BE-NEXT: vinsertb 2, 3, 10 +; CHECK-BE-NEXT: blr +; CHECK-64-LABEL: shuffle_vector_byte_10_26: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: vsldoi 3, 3, 3, 3 +; CHECK-64-NEXT: vinsertb 2, 3, 10 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: shuffle_vector_byte_10_26: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: vsldoi 3, 3, 3, 3 +; CHECK-32-NEXT: vinsertb 2, 3, 10 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %vecins +} + +define <16 x i8> @shuffle_vector_byte_11_19(<16 x i8> %a, <16 x i8> %b) { +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsldoi 3, 3, 3, 5 +; CHECK-NEXT: vinsertb 2, 3, 4 +; CHECK-NEXT: blr +; CHECK-BE-LABEL: shuffle_vector_byte_11_19: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: vsldoi 3, 3, 3, 12 +; CHECK-BE-NEXT: vinsertb 2, 3, 11 +; CHECK-BE-NEXT: blr +; CHECK-64-LABEL: shuffle_vector_byte_11_19: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: vsldoi 3, 3, 3, 12 +; CHECK-64-NEXT: vinsertb 2, 3, 11 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: shuffle_vector_byte_11_19: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: vsldoi 3, 3, 3, 12 +; CHECK-32-NEXT: vinsertb 2, 3, 11 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %vecins +} + +define <16 x i8> @shuffle_vector_byte_12_28(<16 x i8> %a, <16 x i8> %b) { +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsldoi 3, 3, 3, 12 +; CHECK-NEXT: vinsertb 2, 3, 3 +; CHECK-NEXT: blr +; CHECK-BE-LABEL: shuffle_vector_byte_12_28: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: vsldoi 3, 3, 3, 5 +; CHECK-BE-NEXT: vinsertb 2, 3, 12 +; CHECK-BE-NEXT: blr +; CHECK-64-LABEL: shuffle_vector_byte_12_28: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: vsldoi 3, 3, 3, 5 +; CHECK-64-NEXT: vinsertb 2, 3, 12 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: shuffle_vector_byte_12_28: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: vsldoi 3, 3, 3, 5 +; CHECK-32-NEXT: vinsertb 2, 3, 12 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %vecins +} + +define <16 x i8> @shuffle_vector_byte_13_21(<16 x i8> %a, <16 x i8> %b) { +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsldoi 3, 3, 3, 3 +; CHECK-NEXT: vinsertb 2, 3, 2 +; CHECK-NEXT: blr +; CHECK-BE-LABEL: shuffle_vector_byte_13_21: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: vsldoi 3, 3, 3, 14 +; CHECK-BE-NEXT: vinsertb 2, 3, 13 +; CHECK-BE-NEXT: blr +; CHECK-64-LABEL: shuffle_vector_byte_13_21: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: vsldoi 3, 3, 3, 14 +; CHECK-64-NEXT: vinsertb 2, 3, 13 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: shuffle_vector_byte_13_21: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: vsldoi 3, 3, 3, 14 +; CHECK-32-NEXT: vinsertb 2, 3, 13 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %vecins +} + +define <16 x i8> @shuffle_vector_byte_14_30(<16 x i8> %a, <16 x i8> %b) { +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsldoi 3, 3, 3, 10 +; CHECK-NEXT: vinsertb 2, 3, 1 +; CHECK-NEXT: blr +; CHECK-BE-LABEL: shuffle_vector_byte_14_30: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: vsldoi 3, 3, 3, 7 +; CHECK-BE-NEXT: vinsertb 2, 3, 14 +; CHECK-BE-NEXT: blr +; CHECK-64-LABEL: shuffle_vector_byte_14_30: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: vsldoi 3, 3, 3, 7 +; CHECK-64-NEXT: vinsertb 2, 3, 14 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: shuffle_vector_byte_14_30: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: vsldoi 3, 3, 3, 7 +; CHECK-32-NEXT: vinsertb 2, 3, 14 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %vecins +} + +define <16 x i8> @shuffle_vector_byte_15_23(<16 x i8> %a, <16 x i8> %b) { +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsldoi 3, 3, 3, 1 +; CHECK-NEXT: vinsertb 2, 3, 0 +; CHECK-NEXT: blr +; CHECK-BE-LABEL: shuffle_vector_byte_15_23: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: vinsertb 2, 3, 15 +; CHECK-BE-NEXT: blr +; CHECK-64-LABEL: shuffle_vector_byte_15_23: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: vinsertb 2, 3, 15 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: shuffle_vector_byte_15_23: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: vinsertb 2, 3, 15 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %vecins +} + +; The following testcases take one byte element from the first vector and +; inserts it at various locations in the second vector +define <16 x i8> @shuffle_vector_byte_16_8(<16 x i8> %a, <16 x i8> %b) { +; CHECK-64-OPT-LABEL: shuffle_vector_byte_16_8: +; CHECK-64-OPT: # %bb.0: # %entry +; CHECK-64-OPT-NEXT: vsldoi 2, 2, 2, 1 +; CHECK-64-OPT-NEXT: vinsertb 3, 2, 0 +; CHECK-64-OPT-NEXT: vmr 2, 3 +; CHECK-64-OPT-NEXT: blr +; +; CHECK-64-O0-LABEL: shuffle_vector_byte_16_8: +; CHECK-64-O0: # %bb.0: # %entry +; CHECK-64-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-64-O0-NEXT: vmr 3, 2 +; CHECK-64-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-64-O0-NEXT: vsldoi 3, 3, 3, 1 +; CHECK-64-O0-NEXT: vinsertb 2, 3, 0 +; CHECK-64-O0-NEXT: blr +; +; CHECK-32-OPT-LABEL: shuffle_vector_byte_16_8: +; CHECK-32-OPT: # %bb.0: # %entry +; CHECK-32-OPT-NEXT: vsldoi 2, 2, 2, 1 +; CHECK-32-OPT-NEXT: vinsertb 3, 2, 0 +; CHECK-32-OPT-NEXT: vmr 2, 3 +; CHECK-32-OPT-NEXT: blr +; +; CHECK-32-O0-LABEL: shuffle_vector_byte_16_8: +; CHECK-32-O0: # %bb.0: # %entry +; CHECK-32-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-32-O0-NEXT: vmr 3, 2 +; CHECK-32-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-32-O0-NEXT: vsldoi 3, 3, 3, 1 +; CHECK-32-O0-NEXT: vinsertb 2, 3, 0 +; CHECK-32-O0-NEXT: blr +entry: + %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %vecins +} + +define <16 x i8> @shuffle_vector_byte_17_1(<16 x i8> %a, <16 x i8> %b) { +; CHECK-64-OPT-LABEL: shuffle_vector_byte_17_1: +; CHECK-64-OPT: # %bb.0: # %entry +; CHECK-64-OPT-NEXT: vsldoi 2, 2, 2, 10 +; CHECK-64-OPT-NEXT: vinsertb 3, 2, 1 +; CHECK-64-OPT-NEXT: vmr 2, 3 +; CHECK-64-OPT-NEXT: blr +; +; CHECK-64-O0-LABEL: shuffle_vector_byte_17_1: +; CHECK-64-O0: # %bb.0: # %entry +; CHECK-64-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-64-O0-NEXT: vmr 3, 2 +; CHECK-64-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-64-O0-NEXT: vsldoi 3, 3, 3, 10 +; CHECK-64-O0-NEXT: vinsertb 2, 3, 1 +; CHECK-64-O0-NEXT: blr +; +; CHECK-32-OPT-LABEL: shuffle_vector_byte_17_1: +; CHECK-32-OPT: # %bb.0: # %entry +; CHECK-32-OPT-NEXT: vsldoi 2, 2, 2, 10 +; CHECK-32-OPT-NEXT: vinsertb 3, 2, 1 +; CHECK-32-OPT-NEXT: vmr 2, 3 +; CHECK-32-OPT-NEXT: blr +; +; CHECK-32-O0-LABEL: shuffle_vector_byte_17_1: +; CHECK-32-O0: # %bb.0: # %entry +; CHECK-32-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-32-O0-NEXT: vmr 3, 2 +; CHECK-32-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-32-O0-NEXT: vsldoi 3, 3, 3, 10 +; CHECK-32-O0-NEXT: vinsertb 2, 3, 1 +; CHECK-32-O0-NEXT: blr +entry: + %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %vecins +} + +define <16 x i8> @shuffle_vector_byte_18_10(<16 x i8> %a, <16 x i8> %b) { +; CHECK-64-OPT-LABEL: shuffle_vector_byte_18_10: +; CHECK-64-OPT: # %bb.0: # %entry +; CHECK-64-OPT-NEXT: vsldoi 2, 2, 2, 3 +; CHECK-64-OPT-NEXT: vinsertb 3, 2, 2 +; CHECK-64-OPT-NEXT: vmr 2, 3 +; CHECK-64-OPT-NEXT: blr +; +; CHECK-64-O0-LABEL: shuffle_vector_byte_18_10: +; CHECK-64-O0: # %bb.0: # %entry +; CHECK-64-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-64-O0-NEXT: vmr 3, 2 +; CHECK-64-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-64-O0-NEXT: vsldoi 3, 3, 3, 3 +; CHECK-64-O0-NEXT: vinsertb 2, 3, 2 +; CHECK-64-O0-NEXT: blr +; +; CHECK-32-OPT-LABEL: shuffle_vector_byte_18_10: +; CHECK-32-OPT: # %bb.0: # %entry +; CHECK-32-OPT-NEXT: vsldoi 2, 2, 2, 3 +; CHECK-32-OPT-NEXT: vinsertb 3, 2, 2 +; CHECK-32-OPT-NEXT: vmr 2, 3 +; CHECK-32-OPT-NEXT: blr +; +; CHECK-32-O0-LABEL: shuffle_vector_byte_18_10: +; CHECK-32-O0: # %bb.0: # %entry +; CHECK-32-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-32-O0-NEXT: vmr 3, 2 +; CHECK-32-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-32-O0-NEXT: vsldoi 3, 3, 3, 3 +; CHECK-32-O0-NEXT: vinsertb 2, 3, 2 +; CHECK-32-O0-NEXT: blr +entry: + %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %vecins +} + +define <16 x i8> @shuffle_vector_byte_19_3(<16 x i8> %a, <16 x i8> %b) { +; CHECK-64-OPT-LABEL: shuffle_vector_byte_19_3: +; CHECK-64-OPT: # %bb.0: # %entry +; CHECK-64-OPT-NEXT: vsldoi 2, 2, 2, 12 +; CHECK-64-OPT-NEXT: vinsertb 3, 2, 3 +; CHECK-64-OPT-NEXT: vmr 2, 3 +; CHECK-64-OPT-NEXT: blr +; +; CHECK-64-O0-LABEL: shuffle_vector_byte_19_3: +; CHECK-64-O0: # %bb.0: # %entry +; CHECK-64-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-64-O0-NEXT: vmr 3, 2 +; CHECK-64-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-64-O0-NEXT: vsldoi 3, 3, 3, 12 +; CHECK-64-O0-NEXT: vinsertb 2, 3, 3 +; CHECK-64-O0-NEXT: blr +; +; CHECK-32-OPT-LABEL: shuffle_vector_byte_19_3: +; CHECK-32-OPT: # %bb.0: # %entry +; CHECK-32-OPT-NEXT: vsldoi 2, 2, 2, 12 +; CHECK-32-OPT-NEXT: vinsertb 3, 2, 3 +; CHECK-32-OPT-NEXT: vmr 2, 3 +; CHECK-32-OPT-NEXT: blr +; +; CHECK-32-O0-LABEL: shuffle_vector_byte_19_3: +; CHECK-32-O0: # %bb.0: # %entry +; CHECK-32-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-32-O0-NEXT: vmr 3, 2 +; CHECK-32-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-32-O0-NEXT: vsldoi 3, 3, 3, 12 +; CHECK-32-O0-NEXT: vinsertb 2, 3, 3 +; CHECK-32-O0-NEXT: blr +entry: + %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %vecins +} + +define <16 x i8> @shuffle_vector_byte_20_12(<16 x i8> %a, <16 x i8> %b) { +; CHECK-64-OPT-LABEL: shuffle_vector_byte_20_12: +; CHECK-64-OPT: # %bb.0: # %entry +; CHECK-64-OPT-NEXT: vsldoi 2, 2, 2, 5 +; CHECK-64-OPT-NEXT: vinsertb 3, 2, 4 +; CHECK-64-OPT-NEXT: vmr 2, 3 +; CHECK-64-OPT-NEXT: blr +; +; CHECK-64-O0-LABEL: shuffle_vector_byte_20_12: +; CHECK-64-O0: # %bb.0: # %entry +; CHECK-64-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-64-O0-NEXT: vmr 3, 2 +; CHECK-64-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-64-O0-NEXT: vsldoi 3, 3, 3, 5 +; CHECK-64-O0-NEXT: vinsertb 2, 3, 4 +; CHECK-64-O0-NEXT: blr +; +; CHECK-32-OPT-LABEL: shuffle_vector_byte_20_12: +; CHECK-32-OPT: # %bb.0: # %entry +; CHECK-32-OPT-NEXT: vsldoi 2, 2, 2, 5 +; CHECK-32-OPT-NEXT: vinsertb 3, 2, 4 +; CHECK-32-OPT-NEXT: vmr 2, 3 +; CHECK-32-OPT-NEXT: blr +; +; CHECK-32-O0-LABEL: shuffle_vector_byte_20_12: +; CHECK-32-O0: # %bb.0: # %entry +; CHECK-32-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-32-O0-NEXT: vmr 3, 2 +; CHECK-32-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-32-O0-NEXT: vsldoi 3, 3, 3, 5 +; CHECK-32-O0-NEXT: vinsertb 2, 3, 4 +; CHECK-32-O0-NEXT: blr +entry: + %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %vecins +} + +define <16 x i8> @shuffle_vector_byte_21_5(<16 x i8> %a, <16 x i8> %b) { +; CHECK-64-OPT-LABEL: shuffle_vector_byte_21_5: +; CHECK-64-OPT: # %bb.0: # %entry +; CHECK-64-OPT-NEXT: vsldoi 2, 2, 2, 14 +; CHECK-64-OPT-NEXT: vinsertb 3, 2, 5 +; CHECK-64-OPT-NEXT: vmr 2, 3 +; CHECK-64-OPT-NEXT: blr +; +; CHECK-64-O0-LABEL: shuffle_vector_byte_21_5: +; CHECK-64-O0: # %bb.0: # %entry +; CHECK-64-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-64-O0-NEXT: vmr 3, 2 +; CHECK-64-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-64-O0-NEXT: vsldoi 3, 3, 3, 14 +; CHECK-64-O0-NEXT: vinsertb 2, 3, 5 +; CHECK-64-O0-NEXT: blr +; +; CHECK-32-OPT-LABEL: shuffle_vector_byte_21_5: +; CHECK-32-OPT: # %bb.0: # %entry +; CHECK-32-OPT-NEXT: vsldoi 2, 2, 2, 14 +; CHECK-32-OPT-NEXT: vinsertb 3, 2, 5 +; CHECK-32-OPT-NEXT: vmr 2, 3 +; CHECK-32-OPT-NEXT: blr +; +; CHECK-32-O0-LABEL: shuffle_vector_byte_21_5: +; CHECK-32-O0: # %bb.0: # %entry +; CHECK-32-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-32-O0-NEXT: vmr 3, 2 +; CHECK-32-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-32-O0-NEXT: vsldoi 3, 3, 3, 14 +; CHECK-32-O0-NEXT: vinsertb 2, 3, 5 +; CHECK-32-O0-NEXT: blr +entry: + %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %vecins +} + +define <16 x i8> @shuffle_vector_byte_22_14(<16 x i8> %a, <16 x i8> %b) { +; CHECK-64-OPT-LABEL: shuffle_vector_byte_22_14: +; CHECK-64-OPT: # %bb.0: # %entry +; CHECK-64-OPT-NEXT: vsldoi 2, 2, 2, 7 +; CHECK-64-OPT-NEXT: vinsertb 3, 2, 6 +; CHECK-64-OPT-NEXT: vmr 2, 3 +; CHECK-64-OPT-NEXT: blr +; +; CHECK-64-O0-LABEL: shuffle_vector_byte_22_14: +; CHECK-64-O0: # %bb.0: # %entry +; CHECK-64-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-64-O0-NEXT: vmr 3, 2 +; CHECK-64-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-64-O0-NEXT: vsldoi 3, 3, 3, 7 +; CHECK-64-O0-NEXT: vinsertb 2, 3, 6 +; CHECK-64-O0-NEXT: blr +; +; CHECK-32-OPT-LABEL: shuffle_vector_byte_22_14: +; CHECK-32-OPT: # %bb.0: # %entry +; CHECK-32-OPT-NEXT: vsldoi 2, 2, 2, 7 +; CHECK-32-OPT-NEXT: vinsertb 3, 2, 6 +; CHECK-32-OPT-NEXT: vmr 2, 3 +; CHECK-32-OPT-NEXT: blr +; +; CHECK-32-O0-LABEL: shuffle_vector_byte_22_14: +; CHECK-32-O0: # %bb.0: # %entry +; CHECK-32-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-32-O0-NEXT: vmr 3, 2 +; CHECK-32-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-32-O0-NEXT: vsldoi 3, 3, 3, 7 +; CHECK-32-O0-NEXT: vinsertb 2, 3, 6 +; CHECK-32-O0-NEXT: blr +entry: + %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %vecins +} + +define <16 x i8> @shuffle_vector_byte_23_7(<16 x i8> %a, <16 x i8> %b) { +; CHECK-64-OPT-LABEL: shuffle_vector_byte_23_7: +; CHECK-64-OPT: # %bb.0: # %entry +; CHECK-64-OPT-NEXT: vinsertb 3, 2, 7 +; CHECK-64-OPT-NEXT: vmr 2, 3 +; CHECK-64-OPT-NEXT: blr +; +; CHECK-64-O0-LABEL: shuffle_vector_byte_23_7: +; CHECK-64-O0: # %bb.0: # %entry +; CHECK-64-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-64-O0-NEXT: vmr 3, 2 +; CHECK-64-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-64-O0-NEXT: vinsertb 2, 3, 7 +; CHECK-64-O0-NEXT: blr +; +; CHECK-32-OPT-LABEL: shuffle_vector_byte_23_7: +; CHECK-32-OPT: # %bb.0: # %entry +; CHECK-32-OPT-NEXT: vinsertb 3, 2, 7 +; CHECK-32-OPT-NEXT: vmr 2, 3 +; CHECK-32-OPT-NEXT: blr +; +; CHECK-32-O0-LABEL: shuffle_vector_byte_23_7: +; CHECK-32-O0: # %bb.0: # %entry +; CHECK-32-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-32-O0-NEXT: vmr 3, 2 +; CHECK-32-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-32-O0-NEXT: vinsertb 2, 3, 7 +; CHECK-32-O0-NEXT: blr +entry: + %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %vecins +} + +define <16 x i8> @shuffle_vector_byte_24_0(<16 x i8> %a, <16 x i8> %b) { +; CHECK-64-OPT-LABEL: shuffle_vector_byte_24_0: +; CHECK-64-OPT: # %bb.0: # %entry +; CHECK-64-OPT-NEXT: vsldoi 2, 2, 2, 9 +; CHECK-64-OPT-NEXT: vinsertb 3, 2, 8 +; CHECK-64-OPT-NEXT: vmr 2, 3 +; CHECK-64-OPT-NEXT: blr +; +; CHECK-64-O0-LABEL: shuffle_vector_byte_24_0: +; CHECK-64-O0: # %bb.0: # %entry +; CHECK-64-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-64-O0-NEXT: vmr 3, 2 +; CHECK-64-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-64-O0-NEXT: vsldoi 3, 3, 3, 9 +; CHECK-64-O0-NEXT: vinsertb 2, 3, 8 +; CHECK-64-O0-NEXT: blr +; +; CHECK-32-OPT-LABEL: shuffle_vector_byte_24_0: +; CHECK-32-OPT: # %bb.0: # %entry +; CHECK-32-OPT-NEXT: vsldoi 2, 2, 2, 9 +; CHECK-32-OPT-NEXT: vinsertb 3, 2, 8 +; CHECK-32-OPT-NEXT: vmr 2, 3 +; CHECK-32-OPT-NEXT: blr +; +; CHECK-32-O0-LABEL: shuffle_vector_byte_24_0: +; CHECK-32-O0: # %bb.0: # %entry +; CHECK-32-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-32-O0-NEXT: vmr 3, 2 +; CHECK-32-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-32-O0-NEXT: vsldoi 3, 3, 3, 9 +; CHECK-32-O0-NEXT: vinsertb 2, 3, 8 +; CHECK-32-O0-NEXT: blr +entry: + %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %vecins +} + +define <16 x i8> @shuffle_vector_byte_25_9(<16 x i8> %a, <16 x i8> %b) { +; CHECK-64-OPT-LABEL: shuffle_vector_byte_25_9: +; CHECK-64-OPT: # %bb.0: # %entry +; CHECK-64-OPT-NEXT: vsldoi 2, 2, 2, 2 +; CHECK-64-OPT-NEXT: vinsertb 3, 2, 9 +; CHECK-64-OPT-NEXT: vmr 2, 3 +; CHECK-64-OPT-NEXT: blr +; +; CHECK-64-O0-LABEL: shuffle_vector_byte_25_9: +; CHECK-64-O0: # %bb.0: # %entry +; CHECK-64-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-64-O0-NEXT: vmr 3, 2 +; CHECK-64-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-64-O0-NEXT: vsldoi 3, 3, 3, 2 +; CHECK-64-O0-NEXT: vinsertb 2, 3, 9 +; CHECK-64-O0-NEXT: blr +; +; CHECK-32-OPT-LABEL: shuffle_vector_byte_25_9: +; CHECK-32-OPT: # %bb.0: # %entry +; CHECK-32-OPT-NEXT: vsldoi 2, 2, 2, 2 +; CHECK-32-OPT-NEXT: vinsertb 3, 2, 9 +; CHECK-32-OPT-NEXT: vmr 2, 3 +; CHECK-32-OPT-NEXT: blr +; +; CHECK-32-O0-LABEL: shuffle_vector_byte_25_9: +; CHECK-32-O0: # %bb.0: # %entry +; CHECK-32-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-32-O0-NEXT: vmr 3, 2 +; CHECK-32-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-32-O0-NEXT: vsldoi 3, 3, 3, 2 +; CHECK-32-O0-NEXT: vinsertb 2, 3, 9 +; CHECK-32-O0-NEXT: blr +entry: + %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %vecins +} + +define <16 x i8> @shuffle_vector_byte_26_2(<16 x i8> %a, <16 x i8> %b) { +; CHECK-64-OPT-LABEL: shuffle_vector_byte_26_2: +; CHECK-64-OPT: # %bb.0: # %entry +; CHECK-64-OPT-NEXT: vsldoi 2, 2, 2, 11 +; CHECK-64-OPT-NEXT: vinsertb 3, 2, 10 +; CHECK-64-OPT-NEXT: vmr 2, 3 +; CHECK-64-OPT-NEXT: blr +; +; CHECK-64-O0-LABEL: shuffle_vector_byte_26_2: +; CHECK-64-O0: # %bb.0: # %entry +; CHECK-64-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-64-O0-NEXT: vmr 3, 2 +; CHECK-64-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-64-O0-NEXT: vsldoi 3, 3, 3, 11 +; CHECK-64-O0-NEXT: vinsertb 2, 3, 10 +; CHECK-64-O0-NEXT: blr +; +; CHECK-32-OPT-LABEL: shuffle_vector_byte_26_2: +; CHECK-32-OPT: # %bb.0: # %entry +; CHECK-32-OPT-NEXT: vsldoi 2, 2, 2, 11 +; CHECK-32-OPT-NEXT: vinsertb 3, 2, 10 +; CHECK-32-OPT-NEXT: vmr 2, 3 +; CHECK-32-OPT-NEXT: blr +; +; CHECK-32-O0-LABEL: shuffle_vector_byte_26_2: +; CHECK-32-O0: # %bb.0: # %entry +; CHECK-32-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-32-O0-NEXT: vmr 3, 2 +; CHECK-32-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-32-O0-NEXT: vsldoi 3, 3, 3, 11 +; CHECK-32-O0-NEXT: vinsertb 2, 3, 10 +; CHECK-32-O0-NEXT: blr +entry: + %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %vecins +} + +define <16 x i8> @shuffle_vector_byte_27_11(<16 x i8> %a, <16 x i8> %b) { +; CHECK-64-OPT-LABEL: shuffle_vector_byte_27_11: +; CHECK-64-OPT: # %bb.0: # %entry +; CHECK-64-OPT-NEXT: vsldoi 2, 2, 2, 4 +; CHECK-64-OPT-NEXT: vinsertb 3, 2, 11 +; CHECK-64-OPT-NEXT: vmr 2, 3 +; CHECK-64-OPT-NEXT: blr +; +; CHECK-64-O0-LABEL: shuffle_vector_byte_27_11: +; CHECK-64-O0: # %bb.0: # %entry +; CHECK-64-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-64-O0-NEXT: vmr 3, 2 +; CHECK-64-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-64-O0-NEXT: vsldoi 3, 3, 3, 4 +; CHECK-64-O0-NEXT: vinsertb 2, 3, 11 +; CHECK-64-O0-NEXT: blr +; +; CHECK-32-OPT-LABEL: shuffle_vector_byte_27_11: +; CHECK-32-OPT: # %bb.0: # %entry +; CHECK-32-OPT-NEXT: vsldoi 2, 2, 2, 4 +; CHECK-32-OPT-NEXT: vinsertb 3, 2, 11 +; CHECK-32-OPT-NEXT: vmr 2, 3 +; CHECK-32-OPT-NEXT: blr +; +; CHECK-32-O0-LABEL: shuffle_vector_byte_27_11: +; CHECK-32-O0: # %bb.0: # %entry +; CHECK-32-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-32-O0-NEXT: vmr 3, 2 +; CHECK-32-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-32-O0-NEXT: vsldoi 3, 3, 3, 4 +; CHECK-32-O0-NEXT: vinsertb 2, 3, 11 +; CHECK-32-O0-NEXT: blr +entry: + %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %vecins +} + +define <16 x i8> @shuffle_vector_byte_28_4(<16 x i8> %a, <16 x i8> %b) { +; CHECK-64-OPT-LABEL: shuffle_vector_byte_28_4: +; CHECK-64-OPT: # %bb.0: # %entry +; CHECK-64-OPT-NEXT: vsldoi 2, 2, 2, 13 +; CHECK-64-OPT-NEXT: vinsertb 3, 2, 12 +; CHECK-64-OPT-NEXT: vmr 2, 3 +; CHECK-64-OPT-NEXT: blr +; +; CHECK-64-O0-LABEL: shuffle_vector_byte_28_4: +; CHECK-64-O0: # %bb.0: # %entry +; CHECK-64-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-64-O0-NEXT: vmr 3, 2 +; CHECK-64-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-64-O0-NEXT: vsldoi 3, 3, 3, 13 +; CHECK-64-O0-NEXT: vinsertb 2, 3, 12 +; CHECK-64-O0-NEXT: blr +; +; CHECK-32-OPT-LABEL: shuffle_vector_byte_28_4: +; CHECK-32-OPT: # %bb.0: # %entry +; CHECK-32-OPT-NEXT: vsldoi 2, 2, 2, 13 +; CHECK-32-OPT-NEXT: vinsertb 3, 2, 12 +; CHECK-32-OPT-NEXT: vmr 2, 3 +; CHECK-32-OPT-NEXT: blr +; +; CHECK-32-O0-LABEL: shuffle_vector_byte_28_4: +; CHECK-32-O0: # %bb.0: # %entry +; CHECK-32-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-32-O0-NEXT: vmr 3, 2 +; CHECK-32-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-32-O0-NEXT: vsldoi 3, 3, 3, 13 +; CHECK-32-O0-NEXT: vinsertb 2, 3, 12 +; CHECK-32-O0-NEXT: blr +entry: + %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %vecins +} + +define <16 x i8> @shuffle_vector_byte_29_13(<16 x i8> %a, <16 x i8> %b) { +; CHECK-64-OPT-LABEL: shuffle_vector_byte_29_13: +; CHECK-64-OPT: # %bb.0: # %entry +; CHECK-64-OPT-NEXT: vsldoi 2, 2, 2, 6 +; CHECK-64-OPT-NEXT: vinsertb 3, 2, 13 +; CHECK-64-OPT-NEXT: vmr 2, 3 +; CHECK-64-OPT-NEXT: blr +; +; CHECK-64-O0-LABEL: shuffle_vector_byte_29_13: +; CHECK-64-O0: # %bb.0: # %entry +; CHECK-64-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-64-O0-NEXT: vmr 3, 2 +; CHECK-64-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-64-O0-NEXT: vsldoi 3, 3, 3, 6 +; CHECK-64-O0-NEXT: vinsertb 2, 3, 13 +; CHECK-64-O0-NEXT: blr +; +; CHECK-32-OPT-LABEL: shuffle_vector_byte_29_13: +; CHECK-32-OPT: # %bb.0: # %entry +; CHECK-32-OPT-NEXT: vsldoi 2, 2, 2, 6 +; CHECK-32-OPT-NEXT: vinsertb 3, 2, 13 +; CHECK-32-OPT-NEXT: vmr 2, 3 +; CHECK-32-OPT-NEXT: blr +; +; CHECK-32-O0-LABEL: shuffle_vector_byte_29_13: +; CHECK-32-O0: # %bb.0: # %entry +; CHECK-32-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-32-O0-NEXT: vmr 3, 2 +; CHECK-32-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-32-O0-NEXT: vsldoi 3, 3, 3, 6 +; CHECK-32-O0-NEXT: vinsertb 2, 3, 13 +; CHECK-32-O0-NEXT: blr +entry: + %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %vecins +} + +define <16 x i8> @shuffle_vector_byte_30_6(<16 x i8> %a, <16 x i8> %b) { +; CHECK-64-OPT-LABEL: shuffle_vector_byte_30_6: +; CHECK-64-OPT: # %bb.0: # %entry +; CHECK-64-OPT-NEXT: vsldoi 2, 2, 2, 15 +; CHECK-64-OPT-NEXT: vinsertb 3, 2, 14 +; CHECK-64-OPT-NEXT: vmr 2, 3 +; CHECK-64-OPT-NEXT: blr +; +; CHECK-64-O0-LABEL: shuffle_vector_byte_30_6: +; CHECK-64-O0: # %bb.0: # %entry +; CHECK-64-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-64-O0-NEXT: vmr 3, 2 +; CHECK-64-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-64-O0-NEXT: vsldoi 3, 3, 3, 15 +; CHECK-64-O0-NEXT: vinsertb 2, 3, 14 +; CHECK-64-O0-NEXT: blr +; +; CHECK-32-OPT-LABEL: shuffle_vector_byte_30_6: +; CHECK-32-OPT: # %bb.0: # %entry +; CHECK-32-OPT-NEXT: vsldoi 2, 2, 2, 15 +; CHECK-32-OPT-NEXT: vinsertb 3, 2, 14 +; CHECK-32-OPT-NEXT: vmr 2, 3 +; CHECK-32-OPT-NEXT: blr +; +; CHECK-32-O0-LABEL: shuffle_vector_byte_30_6: +; CHECK-32-O0: # %bb.0: # %entry +; CHECK-32-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-32-O0-NEXT: vmr 3, 2 +; CHECK-32-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-32-O0-NEXT: vsldoi 3, 3, 3, 15 +; CHECK-32-O0-NEXT: vinsertb 2, 3, 14 +; CHECK-32-O0-NEXT: blr +entry: + %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %vecins +} + +define <16 x i8> @shuffle_vector_byte_31_15(<16 x i8> %a, <16 x i8> %b) { +; CHECK-64-OPT-LABEL: shuffle_vector_byte_31_15: +; CHECK-64-OPT: # %bb.0: # %entry +; CHECK-64-OPT-NEXT: vsldoi 2, 2, 2, 8 +; CHECK-64-OPT-NEXT: vinsertb 3, 2, 15 +; CHECK-64-OPT-NEXT: vmr 2, 3 +; CHECK-64-OPT-NEXT: blr +; +; CHECK-64-O0-LABEL: shuffle_vector_byte_31_15: +; CHECK-64-O0: # %bb.0: # %entry +; CHECK-64-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-64-O0-NEXT: vmr 3, 2 +; CHECK-64-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-64-O0-NEXT: vsldoi 3, 3, 3, 8 +; CHECK-64-O0-NEXT: vinsertb 2, 3, 15 +; CHECK-64-O0-NEXT: blr +; +; CHECK-32-OPT-LABEL: shuffle_vector_byte_31_15: +; CHECK-32-OPT: # %bb.0: # %entry +; CHECK-32-OPT-NEXT: vsldoi 2, 2, 2, 8 +; CHECK-32-OPT-NEXT: vinsertb 3, 2, 15 +; CHECK-32-OPT-NEXT: vmr 2, 3 +; CHECK-32-OPT-NEXT: blr +; +; CHECK-32-O0-LABEL: shuffle_vector_byte_31_15: +; CHECK-32-O0: # %bb.0: # %entry +; CHECK-32-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-32-O0-NEXT: vmr 3, 2 +; CHECK-32-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-32-O0-NEXT: vsldoi 3, 3, 3, 8 +; CHECK-32-O0-NEXT: vinsertb 2, 3, 15 +; CHECK-32-O0-NEXT: blr +entry: + %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %vecins +} + +; The following testcases use the same vector in both arguments of the +; shufflevector. If byte element 7 in BE mode(or 8 in LE mode) is the one +; we're attempting to insert, then we can use the vector insert instruction +define <16 x i8> @shuffle_vector_byte_0_7(<16 x i8> %a) { +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addis 3, 2, .LCPI56_0@toc@ha +; CHECK-NEXT: addi 3, 3, .LCPI56_0@toc@l +; CHECK-NEXT: lxvx 35, 0, 3 +; CHECK-NEXT: vperm 2, 2, 2, 3 +; CHECK-NEXT: blr +; CHECK-BE-LABEL: shuffle_vector_byte_0_7: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: vinsertb 2, 2, 0 +; CHECK-BE-NEXT: blr +; CHECK-64-LABEL: shuffle_vector_byte_0_7: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: vinsertb 2, 2, 0 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: shuffle_vector_byte_0_7: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: vinsertb 2, 2, 0 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> + ret <16 x i8> %vecins +} + +define <16 x i8> @shuffle_vector_byte_1_8(<16 x i8> %a) { +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vinsertb 2, 2, 14 +; CHECK-NEXT: blr +; CHECK-BE-LABEL: shuffle_vector_byte_1_8: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: addis 3, 2, .LCPI57_0@toc@ha +; CHECK-BE-NEXT: addi 3, 3, .LCPI57_0@toc@l +; CHECK-BE-NEXT: lxvx 35, 0, 3 +; CHECK-BE-NEXT: vperm 2, 2, 2, 3 +; CHECK-BE-NEXT: blr +; CHECK-64-LABEL: shuffle_vector_byte_1_8: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: ld 3, L..C4(2) +; CHECK-64-NEXT: lxvx 35, 0, 3 +; CHECK-64-NEXT: vperm 2, 2, 2, 3 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: shuffle_vector_byte_1_8: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: lwz 3, L..C4(2) +; CHECK-32-NEXT: lxvx 35, 0, 3 +; CHECK-32-NEXT: vperm 2, 2, 2, 3 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> + ret <16 x i8> %vecins +} + +define <16 x i8> @shuffle_vector_byte_2_8(<16 x i8> %a) { +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vinsertb 2, 2, 13 +; CHECK-NEXT: blr +; CHECK-BE-LABEL: shuffle_vector_byte_2_8: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: addis 3, 2, .LCPI58_0@toc@ha +; CHECK-BE-NEXT: addi 3, 3, .LCPI58_0@toc@l +; CHECK-BE-NEXT: lxvx 35, 0, 3 +; CHECK-BE-NEXT: vperm 2, 2, 2, 3 +; CHECK-BE-NEXT: blr +; CHECK-64-LABEL: shuffle_vector_byte_2_8: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: ld 3, L..C5(2) +; CHECK-64-NEXT: lxvx 35, 0, 3 +; CHECK-64-NEXT: vperm 2, 2, 2, 3 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: shuffle_vector_byte_2_8: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: lwz 3, L..C5(2) +; CHECK-32-NEXT: lxvx 35, 0, 3 +; CHECK-32-NEXT: vperm 2, 2, 2, 3 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> + ret <16 x i8> %vecins +} + +define <16 x i8> @shuffle_vector_byte_3_7(<16 x i8> %a) { +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addis 3, 2, .LCPI59_0@toc@ha +; CHECK-NEXT: addi 3, 3, .LCPI59_0@toc@l +; CHECK-NEXT: lxvx 35, 0, 3 +; CHECK-NEXT: vperm 2, 2, 2, 3 +; CHECK-NEXT: blr +; CHECK-BE-LABEL: shuffle_vector_byte_3_7: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: vinsertb 2, 2, 3 +; CHECK-BE-NEXT: blr +; CHECK-64-LABEL: shuffle_vector_byte_3_7: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: vinsertb 2, 2, 3 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: shuffle_vector_byte_3_7: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: vinsertb 2, 2, 3 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> + ret <16 x i8> %vecins +} + +define <16 x i8> @shuffle_vector_byte_4_7(<16 x i8> %a) { +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addis 3, 2, .LCPI60_0@toc@ha +; CHECK-NEXT: addi 3, 3, .LCPI60_0@toc@l +; CHECK-NEXT: lxvx 35, 0, 3 +; CHECK-NEXT: vperm 2, 2, 2, 3 +; CHECK-NEXT: blr +; CHECK-BE-LABEL: shuffle_vector_byte_4_7: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: vinsertb 2, 2, 4 +; CHECK-BE-NEXT: blr +; CHECK-64-LABEL: shuffle_vector_byte_4_7: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: vinsertb 2, 2, 4 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: shuffle_vector_byte_4_7: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: vinsertb 2, 2, 4 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> + ret <16 x i8> %vecins +} + +define <16 x i8> @shuffle_vector_byte_5_8(<16 x i8> %a) { +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vinsertb 2, 2, 10 +; CHECK-NEXT: blr +; CHECK-BE-LABEL: shuffle_vector_byte_5_8: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: addis 3, 2, .LCPI61_0@toc@ha +; CHECK-BE-NEXT: addi 3, 3, .LCPI61_0@toc@l +; CHECK-BE-NEXT: lxvx 35, 0, 3 +; CHECK-BE-NEXT: vperm 2, 2, 2, 3 +; CHECK-BE-NEXT: blr +; CHECK-64-LABEL: shuffle_vector_byte_5_8: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: ld 3, L..C6(2) +; CHECK-64-NEXT: lxvx 35, 0, 3 +; CHECK-64-NEXT: vperm 2, 2, 2, 3 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: shuffle_vector_byte_5_8: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: lwz 3, L..C6(2) +; CHECK-32-NEXT: lxvx 35, 0, 3 +; CHECK-32-NEXT: vperm 2, 2, 2, 3 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> + ret <16 x i8> %vecins +} + +define <16 x i8> @shuffle_vector_byte_6_8(<16 x i8> %a) { +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vinsertb 2, 2, 9 +; CHECK-NEXT: blr +; CHECK-BE-LABEL: shuffle_vector_byte_6_8: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: addis 3, 2, .LCPI62_0@toc@ha +; CHECK-BE-NEXT: addi 3, 3, .LCPI62_0@toc@l +; CHECK-BE-NEXT: lxvx 35, 0, 3 +; CHECK-BE-NEXT: vperm 2, 2, 2, 3 +; CHECK-BE-NEXT: blr +; CHECK-64-LABEL: shuffle_vector_byte_6_8: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: ld 3, L..C7(2) +; CHECK-64-NEXT: lxvx 35, 0, 3 +; CHECK-64-NEXT: vperm 2, 2, 2, 3 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: shuffle_vector_byte_6_8: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: lwz 3, L..C7(2) +; CHECK-32-NEXT: lxvx 35, 0, 3 +; CHECK-32-NEXT: vperm 2, 2, 2, 3 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> + ret <16 x i8> %vecins +} + +define <16 x i8> @shuffle_vector_byte_7_8(<16 x i8> %a) { +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vinsertb 2, 2, 8 +; CHECK-NEXT: blr +; CHECK-BE-LABEL: shuffle_vector_byte_7_8: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: addis 3, 2, .LCPI63_0@toc@ha +; CHECK-BE-NEXT: addi 3, 3, .LCPI63_0@toc@l +; CHECK-BE-NEXT: lxvx 35, 0, 3 +; CHECK-BE-NEXT: vperm 2, 2, 2, 3 +; CHECK-BE-NEXT: blr +; CHECK-64-LABEL: shuffle_vector_byte_7_8: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: ld 3, L..C8(2) +; CHECK-64-NEXT: lxvx 35, 0, 3 +; CHECK-64-NEXT: vperm 2, 2, 2, 3 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: shuffle_vector_byte_7_8: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: lwz 3, L..C8(2) +; CHECK-32-NEXT: lxvx 35, 0, 3 +; CHECK-32-NEXT: vperm 2, 2, 2, 3 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> + ret <16 x i8> %vecins +} + +define <16 x i8> @shuffle_vector_byte_8_7(<16 x i8> %a) { +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addis 3, 2, .LCPI64_0@toc@ha +; CHECK-NEXT: addi 3, 3, .LCPI64_0@toc@l +; CHECK-NEXT: lxvx 35, 0, 3 +; CHECK-NEXT: vperm 2, 2, 2, 3 +; CHECK-NEXT: blr +; CHECK-BE-LABEL: shuffle_vector_byte_8_7: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: vinsertb 2, 2, 8 +; CHECK-BE-NEXT: blr +; CHECK-64-LABEL: shuffle_vector_byte_8_7: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: vinsertb 2, 2, 8 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: shuffle_vector_byte_8_7: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: vinsertb 2, 2, 8 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> + ret <16 x i8> %vecins +} + +define <16 x i8> @shuffle_vector_byte_9_7(<16 x i8> %a) { +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addis 3, 2, .LCPI65_0@toc@ha +; CHECK-NEXT: addi 3, 3, .LCPI65_0@toc@l +; CHECK-NEXT: lxvx 35, 0, 3 +; CHECK-NEXT: vperm 2, 2, 2, 3 +; CHECK-NEXT: blr +; CHECK-BE-LABEL: shuffle_vector_byte_9_7: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: vinsertb 2, 2, 9 +; CHECK-BE-NEXT: blr +; CHECK-64-LABEL: shuffle_vector_byte_9_7: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: vinsertb 2, 2, 9 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: shuffle_vector_byte_9_7: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: vinsertb 2, 2, 9 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> + ret <16 x i8> %vecins +} + +define <16 x i8> @shuffle_vector_byte_10_7(<16 x i8> %a) { +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addis 3, 2, .LCPI66_0@toc@ha +; CHECK-NEXT: addi 3, 3, .LCPI66_0@toc@l +; CHECK-NEXT: lxvx 35, 0, 3 +; CHECK-NEXT: vperm 2, 2, 2, 3 +; CHECK-NEXT: blr +; CHECK-BE-LABEL: shuffle_vector_byte_10_7: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: vinsertb 2, 2, 10 +; CHECK-BE-NEXT: blr +; CHECK-64-LABEL: shuffle_vector_byte_10_7: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: vinsertb 2, 2, 10 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: shuffle_vector_byte_10_7: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: vinsertb 2, 2, 10 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> + ret <16 x i8> %vecins +} + +define <16 x i8> @shuffle_vector_byte_11_8(<16 x i8> %a) { +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vinsertb 2, 2, 4 +; CHECK-NEXT: blr +; CHECK-BE-LABEL: shuffle_vector_byte_11_8: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: addis 3, 2, .LCPI67_0@toc@ha +; CHECK-BE-NEXT: addi 3, 3, .LCPI67_0@toc@l +; CHECK-BE-NEXT: lxvx 35, 0, 3 +; CHECK-BE-NEXT: vperm 2, 2, 2, 3 +; CHECK-BE-NEXT: blr +; CHECK-64-LABEL: shuffle_vector_byte_11_8: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: ld 3, L..C9(2) +; CHECK-64-NEXT: lxvx 35, 0, 3 +; CHECK-64-NEXT: vperm 2, 2, 2, 3 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: shuffle_vector_byte_11_8: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: lwz 3, L..C9(2) +; CHECK-32-NEXT: lxvx 35, 0, 3 +; CHECK-32-NEXT: vperm 2, 2, 2, 3 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> + ret <16 x i8> %vecins +} + +define <16 x i8> @shuffle_vector_byte_12_8(<16 x i8> %a) { +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vinsertb 2, 2, 3 +; CHECK-NEXT: blr +; CHECK-BE-LABEL: shuffle_vector_byte_12_8: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: addis 3, 2, .LCPI68_0@toc@ha +; CHECK-BE-NEXT: addi 3, 3, .LCPI68_0@toc@l +; CHECK-BE-NEXT: lxvx 35, 0, 3 +; CHECK-BE-NEXT: vperm 2, 2, 2, 3 +; CHECK-BE-NEXT: blr +; CHECK-64-LABEL: shuffle_vector_byte_12_8: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: ld 3, L..C10(2) +; CHECK-64-NEXT: lxvx 35, 0, 3 +; CHECK-64-NEXT: vperm 2, 2, 2, 3 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: shuffle_vector_byte_12_8: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: lwz 3, L..C10(2) +; CHECK-32-NEXT: lxvx 35, 0, 3 +; CHECK-32-NEXT: vperm 2, 2, 2, 3 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> + ret <16 x i8> %vecins +} + +define <16 x i8> @shuffle_vector_byte_13_7(<16 x i8> %a) { +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addis 3, 2, .LCPI69_0@toc@ha +; CHECK-NEXT: addi 3, 3, .LCPI69_0@toc@l +; CHECK-NEXT: lxvx 35, 0, 3 +; CHECK-NEXT: vperm 2, 2, 2, 3 +; CHECK-NEXT: blr +; CHECK-BE-LABEL: shuffle_vector_byte_13_7: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: vinsertb 2, 2, 13 +; CHECK-BE-NEXT: blr +; CHECK-64-LABEL: shuffle_vector_byte_13_7: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: vinsertb 2, 2, 13 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: shuffle_vector_byte_13_7: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: vinsertb 2, 2, 13 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> + ret <16 x i8> %vecins +} + +define <16 x i8> @shuffle_vector_byte_14_7(<16 x i8> %a) { +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addis 3, 2, .LCPI70_0@toc@ha +; CHECK-NEXT: addi 3, 3, .LCPI70_0@toc@l +; CHECK-NEXT: lxvx 35, 0, 3 +; CHECK-NEXT: vperm 2, 2, 2, 3 +; CHECK-NEXT: blr +; CHECK-BE-LABEL: shuffle_vector_byte_14_7: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: vinsertb 2, 2, 14 +; CHECK-BE-NEXT: blr +; CHECK-64-LABEL: shuffle_vector_byte_14_7: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: vinsertb 2, 2, 14 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: shuffle_vector_byte_14_7: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: vinsertb 2, 2, 14 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> + ret <16 x i8> %vecins +} + +define <16 x i8> @shuffle_vector_byte_15_8(<16 x i8> %a) { +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vinsertb 2, 2, 0 +; CHECK-NEXT: blr +; CHECK-BE-LABEL: shuffle_vector_byte_15_8: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: addis 3, 2, .LCPI71_0@toc@ha +; CHECK-BE-NEXT: addi 3, 3, .LCPI71_0@toc@l +; CHECK-BE-NEXT: lxvx 35, 0, 3 +; CHECK-BE-NEXT: vperm 2, 2, 2, 3 +; CHECK-BE-NEXT: blr +; CHECK-64-LABEL: shuffle_vector_byte_15_8: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: ld 3, L..C11(2) +; CHECK-64-NEXT: lxvx 35, 0, 3 +; CHECK-64-NEXT: vperm 2, 2, 2, 3 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: shuffle_vector_byte_15_8: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: lwz 3, L..C11(2) +; CHECK-32-NEXT: lxvx 35, 0, 3 +; CHECK-32-NEXT: vperm 2, 2, 2, 3 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> + ret <16 x i8> %vecins +} + +; The following tests try to insert one halfword element into the vector. We +; should always be using the 'vinserth' instruction. +define <8 x i16> @insert_halfword_0(<8 x i16> %a, i16 %b) { +; CHECK-64-OPT-LABEL: insert_halfword_0: +; CHECK-64-OPT: # %bb.0: # %entry +; CHECK-64-OPT-NEXT: mtvsrwz 35, 3 +; CHECK-64-OPT-NEXT: vinserth 2, 3, 0 +; CHECK-64-OPT-NEXT: blr +; +; CHECK-64-O0-LABEL: insert_halfword_0: +; CHECK-64-O0: # %bb.0: # %entry +; CHECK-64-O0-NEXT: # kill: def $r3 killed $r3 killed $x3 +; CHECK-64-O0-NEXT: mtfprwz 0, 3 +; CHECK-64-O0-NEXT: xscpsgndp 35, 0, 0 +; CHECK-64-O0-NEXT: vinserth 2, 3, 0 +; CHECK-64-O0-NEXT: blr +; +; CHECK-32-OPT-LABEL: insert_halfword_0: +; CHECK-32-OPT: # %bb.0: # %entry +; CHECK-32-OPT-NEXT: mtvsrwz 35, 3 +; CHECK-32-OPT-NEXT: vinserth 2, 3, 0 +; CHECK-32-OPT-NEXT: blr +; +; CHECK-32-O0-LABEL: insert_halfword_0: +; CHECK-32-O0: # %bb.0: # %entry +; CHECK-32-O0-NEXT: # kill: def $r4 killed $r3 +; CHECK-32-O0-NEXT: mtfprwz 0, 3 +; CHECK-32-O0-NEXT: xscpsgndp 35, 0, 0 +; CHECK-32-O0-NEXT: vinserth 2, 3, 0 +; CHECK-32-O0-NEXT: blr +entry: + %vecins = insertelement <8 x i16> %a, i16 %b, i32 0 + ret <8 x i16> %vecins +} + +define <8 x i16> @insert_halfword_1(<8 x i16> %a, i16 %b) { +; CHECK-64-OPT-LABEL: insert_halfword_1: +; CHECK-64-OPT: # %bb.0: # %entry +; CHECK-64-OPT-NEXT: mtvsrwz 35, 3 +; CHECK-64-OPT-NEXT: vinserth 2, 3, 2 +; CHECK-64-OPT-NEXT: blr +; +; CHECK-64-O0-LABEL: insert_halfword_1: +; CHECK-64-O0: # %bb.0: # %entry +; CHECK-64-O0-NEXT: # kill: def $r3 killed $r3 killed $x3 +; CHECK-64-O0-NEXT: mtfprwz 0, 3 +; CHECK-64-O0-NEXT: xscpsgndp 35, 0, 0 +; CHECK-64-O0-NEXT: vinserth 2, 3, 2 +; CHECK-64-O0-NEXT: blr +; +; CHECK-32-OPT-LABEL: insert_halfword_1: +; CHECK-32-OPT: # %bb.0: # %entry +; CHECK-32-OPT-NEXT: mtvsrwz 35, 3 +; CHECK-32-OPT-NEXT: vinserth 2, 3, 2 +; CHECK-32-OPT-NEXT: blr +; +; CHECK-32-O0-LABEL: insert_halfword_1: +; CHECK-32-O0: # %bb.0: # %entry +; CHECK-32-O0-NEXT: # kill: def $r4 killed $r3 +; CHECK-32-O0-NEXT: mtfprwz 0, 3 +; CHECK-32-O0-NEXT: xscpsgndp 35, 0, 0 +; CHECK-32-O0-NEXT: vinserth 2, 3, 2 +; CHECK-32-O0-NEXT: blr +entry: + %vecins = insertelement <8 x i16> %a, i16 %b, i32 1 + ret <8 x i16> %vecins +} + +define <8 x i16> @insert_halfword_2(<8 x i16> %a, i16 %b) { +; CHECK-64-OPT-LABEL: insert_halfword_2: +; CHECK-64-OPT: # %bb.0: # %entry +; CHECK-64-OPT-NEXT: mtvsrwz 35, 3 +; CHECK-64-OPT-NEXT: vinserth 2, 3, 4 +; CHECK-64-OPT-NEXT: blr +; +; CHECK-64-O0-LABEL: insert_halfword_2: +; CHECK-64-O0: # %bb.0: # %entry +; CHECK-64-O0-NEXT: # kill: def $r3 killed $r3 killed $x3 +; CHECK-64-O0-NEXT: mtfprwz 0, 3 +; CHECK-64-O0-NEXT: xscpsgndp 35, 0, 0 +; CHECK-64-O0-NEXT: vinserth 2, 3, 4 +; CHECK-64-O0-NEXT: blr +; +; CHECK-32-OPT-LABEL: insert_halfword_2: +; CHECK-32-OPT: # %bb.0: # %entry +; CHECK-32-OPT-NEXT: mtvsrwz 35, 3 +; CHECK-32-OPT-NEXT: vinserth 2, 3, 4 +; CHECK-32-OPT-NEXT: blr +; +; CHECK-32-O0-LABEL: insert_halfword_2: +; CHECK-32-O0: # %bb.0: # %entry +; CHECK-32-O0-NEXT: # kill: def $r4 killed $r3 +; CHECK-32-O0-NEXT: mtfprwz 0, 3 +; CHECK-32-O0-NEXT: xscpsgndp 35, 0, 0 +; CHECK-32-O0-NEXT: vinserth 2, 3, 4 +; CHECK-32-O0-NEXT: blr +entry: + %vecins = insertelement <8 x i16> %a, i16 %b, i32 2 + ret <8 x i16> %vecins +} + +define <8 x i16> @insert_halfword_3(<8 x i16> %a, i16 %b) { +; CHECK-64-OPT-LABEL: insert_halfword_3: +; CHECK-64-OPT: # %bb.0: # %entry +; CHECK-64-OPT-NEXT: mtvsrwz 35, 3 +; CHECK-64-OPT-NEXT: vinserth 2, 3, 6 +; CHECK-64-OPT-NEXT: blr +; +; CHECK-64-O0-LABEL: insert_halfword_3: +; CHECK-64-O0: # %bb.0: # %entry +; CHECK-64-O0-NEXT: # kill: def $r3 killed $r3 killed $x3 +; CHECK-64-O0-NEXT: mtfprwz 0, 3 +; CHECK-64-O0-NEXT: xscpsgndp 35, 0, 0 +; CHECK-64-O0-NEXT: vinserth 2, 3, 6 +; CHECK-64-O0-NEXT: blr +; +; CHECK-32-OPT-LABEL: insert_halfword_3: +; CHECK-32-OPT: # %bb.0: # %entry +; CHECK-32-OPT-NEXT: mtvsrwz 35, 3 +; CHECK-32-OPT-NEXT: vinserth 2, 3, 6 +; CHECK-32-OPT-NEXT: blr +; +; CHECK-32-O0-LABEL: insert_halfword_3: +; CHECK-32-O0: # %bb.0: # %entry +; CHECK-32-O0-NEXT: # kill: def $r4 killed $r3 +; CHECK-32-O0-NEXT: mtfprwz 0, 3 +; CHECK-32-O0-NEXT: xscpsgndp 35, 0, 0 +; CHECK-32-O0-NEXT: vinserth 2, 3, 6 +; CHECK-32-O0-NEXT: blr +entry: + %vecins = insertelement <8 x i16> %a, i16 %b, i32 3 + ret <8 x i16> %vecins +} + +define <8 x i16> @insert_halfword_4(<8 x i16> %a, i16 %b) { +; CHECK-64-OPT-LABEL: insert_halfword_4: +; CHECK-64-OPT: # %bb.0: # %entry +; CHECK-64-OPT-NEXT: mtvsrwz 35, 3 +; CHECK-64-OPT-NEXT: vinserth 2, 3, 8 +; CHECK-64-OPT-NEXT: blr +; +; CHECK-64-O0-LABEL: insert_halfword_4: +; CHECK-64-O0: # %bb.0: # %entry +; CHECK-64-O0-NEXT: # kill: def $r3 killed $r3 killed $x3 +; CHECK-64-O0-NEXT: mtfprwz 0, 3 +; CHECK-64-O0-NEXT: xscpsgndp 35, 0, 0 +; CHECK-64-O0-NEXT: vinserth 2, 3, 8 +; CHECK-64-O0-NEXT: blr +; +; CHECK-32-OPT-LABEL: insert_halfword_4: +; CHECK-32-OPT: # %bb.0: # %entry +; CHECK-32-OPT-NEXT: mtvsrwz 35, 3 +; CHECK-32-OPT-NEXT: vinserth 2, 3, 8 +; CHECK-32-OPT-NEXT: blr +; +; CHECK-32-O0-LABEL: insert_halfword_4: +; CHECK-32-O0: # %bb.0: # %entry +; CHECK-32-O0-NEXT: # kill: def $r4 killed $r3 +; CHECK-32-O0-NEXT: mtfprwz 0, 3 +; CHECK-32-O0-NEXT: xscpsgndp 35, 0, 0 +; CHECK-32-O0-NEXT: vinserth 2, 3, 8 +; CHECK-32-O0-NEXT: blr +entry: + %vecins = insertelement <8 x i16> %a, i16 %b, i32 4 + ret <8 x i16> %vecins +} + +define <8 x i16> @insert_halfword_5(<8 x i16> %a, i16 %b) { +; CHECK-64-OPT-LABEL: insert_halfword_5: +; CHECK-64-OPT: # %bb.0: # %entry +; CHECK-64-OPT-NEXT: mtvsrwz 35, 3 +; CHECK-64-OPT-NEXT: vinserth 2, 3, 10 +; CHECK-64-OPT-NEXT: blr +; +; CHECK-64-O0-LABEL: insert_halfword_5: +; CHECK-64-O0: # %bb.0: # %entry +; CHECK-64-O0-NEXT: # kill: def $r3 killed $r3 killed $x3 +; CHECK-64-O0-NEXT: mtfprwz 0, 3 +; CHECK-64-O0-NEXT: xscpsgndp 35, 0, 0 +; CHECK-64-O0-NEXT: vinserth 2, 3, 10 +; CHECK-64-O0-NEXT: blr +; +; CHECK-32-OPT-LABEL: insert_halfword_5: +; CHECK-32-OPT: # %bb.0: # %entry +; CHECK-32-OPT-NEXT: mtvsrwz 35, 3 +; CHECK-32-OPT-NEXT: vinserth 2, 3, 10 +; CHECK-32-OPT-NEXT: blr +; +; CHECK-32-O0-LABEL: insert_halfword_5: +; CHECK-32-O0: # %bb.0: # %entry +; CHECK-32-O0-NEXT: # kill: def $r4 killed $r3 +; CHECK-32-O0-NEXT: mtfprwz 0, 3 +; CHECK-32-O0-NEXT: xscpsgndp 35, 0, 0 +; CHECK-32-O0-NEXT: vinserth 2, 3, 10 +; CHECK-32-O0-NEXT: blr +entry: + %vecins = insertelement <8 x i16> %a, i16 %b, i32 5 + ret <8 x i16> %vecins +} + +define <8 x i16> @insert_halfword_6(<8 x i16> %a, i16 %b) { +; CHECK-64-OPT-LABEL: insert_halfword_6: +; CHECK-64-OPT: # %bb.0: # %entry +; CHECK-64-OPT-NEXT: mtvsrwz 35, 3 +; CHECK-64-OPT-NEXT: vinserth 2, 3, 12 +; CHECK-64-OPT-NEXT: blr +; +; CHECK-64-O0-LABEL: insert_halfword_6: +; CHECK-64-O0: # %bb.0: # %entry +; CHECK-64-O0-NEXT: # kill: def $r3 killed $r3 killed $x3 +; CHECK-64-O0-NEXT: mtfprwz 0, 3 +; CHECK-64-O0-NEXT: xscpsgndp 35, 0, 0 +; CHECK-64-O0-NEXT: vinserth 2, 3, 12 +; CHECK-64-O0-NEXT: blr +; +; CHECK-32-OPT-LABEL: insert_halfword_6: +; CHECK-32-OPT: # %bb.0: # %entry +; CHECK-32-OPT-NEXT: mtvsrwz 35, 3 +; CHECK-32-OPT-NEXT: vinserth 2, 3, 12 +; CHECK-32-OPT-NEXT: blr +; +; CHECK-32-O0-LABEL: insert_halfword_6: +; CHECK-32-O0: # %bb.0: # %entry +; CHECK-32-O0-NEXT: # kill: def $r4 killed $r3 +; CHECK-32-O0-NEXT: mtfprwz 0, 3 +; CHECK-32-O0-NEXT: xscpsgndp 35, 0, 0 +; CHECK-32-O0-NEXT: vinserth 2, 3, 12 +; CHECK-32-O0-NEXT: blr +entry: + %vecins = insertelement <8 x i16> %a, i16 %b, i32 6 + ret <8 x i16> %vecins +} + +define <8 x i16> @insert_halfword_7(<8 x i16> %a, i16 %b) { +; CHECK-64-OPT-LABEL: insert_halfword_7: +; CHECK-64-OPT: # %bb.0: # %entry +; CHECK-64-OPT-NEXT: mtvsrwz 35, 3 +; CHECK-64-OPT-NEXT: vinserth 2, 3, 14 +; CHECK-64-OPT-NEXT: blr +; +; CHECK-64-O0-LABEL: insert_halfword_7: +; CHECK-64-O0: # %bb.0: # %entry +; CHECK-64-O0-NEXT: # kill: def $r3 killed $r3 killed $x3 +; CHECK-64-O0-NEXT: mtfprwz 0, 3 +; CHECK-64-O0-NEXT: xscpsgndp 35, 0, 0 +; CHECK-64-O0-NEXT: vinserth 2, 3, 14 +; CHECK-64-O0-NEXT: blr +; +; CHECK-32-OPT-LABEL: insert_halfword_7: +; CHECK-32-OPT: # %bb.0: # %entry +; CHECK-32-OPT-NEXT: mtvsrwz 35, 3 +; CHECK-32-OPT-NEXT: vinserth 2, 3, 14 +; CHECK-32-OPT-NEXT: blr +; +; CHECK-32-O0-LABEL: insert_halfword_7: +; CHECK-32-O0: # %bb.0: # %entry +; CHECK-32-O0-NEXT: # kill: def $r4 killed $r3 +; CHECK-32-O0-NEXT: mtfprwz 0, 3 +; CHECK-32-O0-NEXT: xscpsgndp 35, 0, 0 +; CHECK-32-O0-NEXT: vinserth 2, 3, 14 +; CHECK-32-O0-NEXT: blr +entry: + %vecins = insertelement <8 x i16> %a, i16 %b, i32 7 + ret <8 x i16> %vecins +} + +; The following tests try to insert one byte element into the vector. We +; should always be using the 'vinsertb' instruction. +define <16 x i8> @insert_byte_0(<16 x i8> %a, i8 %b) { +; CHECK-64-OPT-LABEL: insert_byte_0: +; CHECK-64-OPT: # %bb.0: # %entry +; CHECK-64-OPT-NEXT: mtvsrwz 35, 3 +; CHECK-64-OPT-NEXT: vinsertb 2, 3, 0 +; CHECK-64-OPT-NEXT: blr +; +; CHECK-64-O0-LABEL: insert_byte_0: +; CHECK-64-O0: # %bb.0: # %entry +; CHECK-64-O0-NEXT: # kill: def $r3 killed $r3 killed $x3 +; CHECK-64-O0-NEXT: mtfprwz 0, 3 +; CHECK-64-O0-NEXT: xscpsgndp 35, 0, 0 +; CHECK-64-O0-NEXT: vinsertb 2, 3, 0 +; CHECK-64-O0-NEXT: blr +; +; CHECK-32-OPT-LABEL: insert_byte_0: +; CHECK-32-OPT: # %bb.0: # %entry +; CHECK-32-OPT-NEXT: mtvsrwz 35, 3 +; CHECK-32-OPT-NEXT: vinsertb 2, 3, 0 +; CHECK-32-OPT-NEXT: blr +; +; CHECK-32-O0-LABEL: insert_byte_0: +; CHECK-32-O0: # %bb.0: # %entry +; CHECK-32-O0-NEXT: # kill: def $r4 killed $r3 +; CHECK-32-O0-NEXT: mtfprwz 0, 3 +; CHECK-32-O0-NEXT: xscpsgndp 35, 0, 0 +; CHECK-32-O0-NEXT: vinsertb 2, 3, 0 +; CHECK-32-O0-NEXT: blr +entry: + %vecins = insertelement <16 x i8> %a, i8 %b, i32 0 + ret <16 x i8> %vecins +} + +define <16 x i8> @insert_byte_1(<16 x i8> %a, i8 %b) { +; CHECK-64-OPT-LABEL: insert_byte_1: +; CHECK-64-OPT: # %bb.0: # %entry +; CHECK-64-OPT-NEXT: mtvsrwz 35, 3 +; CHECK-64-OPT-NEXT: vinsertb 2, 3, 1 +; CHECK-64-OPT-NEXT: blr +; +; CHECK-64-O0-LABEL: insert_byte_1: +; CHECK-64-O0: # %bb.0: # %entry +; CHECK-64-O0-NEXT: # kill: def $r3 killed $r3 killed $x3 +; CHECK-64-O0-NEXT: mtfprwz 0, 3 +; CHECK-64-O0-NEXT: xscpsgndp 35, 0, 0 +; CHECK-64-O0-NEXT: vinsertb 2, 3, 1 +; CHECK-64-O0-NEXT: blr +; +; CHECK-32-OPT-LABEL: insert_byte_1: +; CHECK-32-OPT: # %bb.0: # %entry +; CHECK-32-OPT-NEXT: mtvsrwz 35, 3 +; CHECK-32-OPT-NEXT: vinsertb 2, 3, 1 +; CHECK-32-OPT-NEXT: blr +; +; CHECK-32-O0-LABEL: insert_byte_1: +; CHECK-32-O0: # %bb.0: # %entry +; CHECK-32-O0-NEXT: # kill: def $r4 killed $r3 +; CHECK-32-O0-NEXT: mtfprwz 0, 3 +; CHECK-32-O0-NEXT: xscpsgndp 35, 0, 0 +; CHECK-32-O0-NEXT: vinsertb 2, 3, 1 +; CHECK-32-O0-NEXT: blr +entry: + %vecins = insertelement <16 x i8> %a, i8 %b, i32 1 + ret <16 x i8> %vecins +} + +define <16 x i8> @insert_byte_2(<16 x i8> %a, i8 %b) { +; CHECK-64-OPT-LABEL: insert_byte_2: +; CHECK-64-OPT: # %bb.0: # %entry +; CHECK-64-OPT-NEXT: mtvsrwz 35, 3 +; CHECK-64-OPT-NEXT: vinsertb 2, 3, 2 +; CHECK-64-OPT-NEXT: blr +; +; CHECK-64-O0-LABEL: insert_byte_2: +; CHECK-64-O0: # %bb.0: # %entry +; CHECK-64-O0-NEXT: # kill: def $r3 killed $r3 killed $x3 +; CHECK-64-O0-NEXT: mtfprwz 0, 3 +; CHECK-64-O0-NEXT: xscpsgndp 35, 0, 0 +; CHECK-64-O0-NEXT: vinsertb 2, 3, 2 +; CHECK-64-O0-NEXT: blr +; +; CHECK-32-OPT-LABEL: insert_byte_2: +; CHECK-32-OPT: # %bb.0: # %entry +; CHECK-32-OPT-NEXT: mtvsrwz 35, 3 +; CHECK-32-OPT-NEXT: vinsertb 2, 3, 2 +; CHECK-32-OPT-NEXT: blr +; +; CHECK-32-O0-LABEL: insert_byte_2: +; CHECK-32-O0: # %bb.0: # %entry +; CHECK-32-O0-NEXT: # kill: def $r4 killed $r3 +; CHECK-32-O0-NEXT: mtfprwz 0, 3 +; CHECK-32-O0-NEXT: xscpsgndp 35, 0, 0 +; CHECK-32-O0-NEXT: vinsertb 2, 3, 2 +; CHECK-32-O0-NEXT: blr +entry: + %vecins = insertelement <16 x i8> %a, i8 %b, i32 2 + ret <16 x i8> %vecins +} + +define <16 x i8> @insert_byte_3(<16 x i8> %a, i8 %b) { +; CHECK-64-OPT-LABEL: insert_byte_3: +; CHECK-64-OPT: # %bb.0: # %entry +; CHECK-64-OPT-NEXT: mtvsrwz 35, 3 +; CHECK-64-OPT-NEXT: vinsertb 2, 3, 3 +; CHECK-64-OPT-NEXT: blr +; +; CHECK-64-O0-LABEL: insert_byte_3: +; CHECK-64-O0: # %bb.0: # %entry +; CHECK-64-O0-NEXT: # kill: def $r3 killed $r3 killed $x3 +; CHECK-64-O0-NEXT: mtfprwz 0, 3 +; CHECK-64-O0-NEXT: xscpsgndp 35, 0, 0 +; CHECK-64-O0-NEXT: vinsertb 2, 3, 3 +; CHECK-64-O0-NEXT: blr +; +; CHECK-32-OPT-LABEL: insert_byte_3: +; CHECK-32-OPT: # %bb.0: # %entry +; CHECK-32-OPT-NEXT: mtvsrwz 35, 3 +; CHECK-32-OPT-NEXT: vinsertb 2, 3, 3 +; CHECK-32-OPT-NEXT: blr +; +; CHECK-32-O0-LABEL: insert_byte_3: +; CHECK-32-O0: # %bb.0: # %entry +; CHECK-32-O0-NEXT: # kill: def $r4 killed $r3 +; CHECK-32-O0-NEXT: mtfprwz 0, 3 +; CHECK-32-O0-NEXT: xscpsgndp 35, 0, 0 +; CHECK-32-O0-NEXT: vinsertb 2, 3, 3 +; CHECK-32-O0-NEXT: blr +entry: + %vecins = insertelement <16 x i8> %a, i8 %b, i32 3 + ret <16 x i8> %vecins +} + +define <16 x i8> @insert_byte_4(<16 x i8> %a, i8 %b) { +; CHECK-64-OPT-LABEL: insert_byte_4: +; CHECK-64-OPT: # %bb.0: # %entry +; CHECK-64-OPT-NEXT: mtvsrwz 35, 3 +; CHECK-64-OPT-NEXT: vinsertb 2, 3, 4 +; CHECK-64-OPT-NEXT: blr +; +; CHECK-64-O0-LABEL: insert_byte_4: +; CHECK-64-O0: # %bb.0: # %entry +; CHECK-64-O0-NEXT: # kill: def $r3 killed $r3 killed $x3 +; CHECK-64-O0-NEXT: mtfprwz 0, 3 +; CHECK-64-O0-NEXT: xscpsgndp 35, 0, 0 +; CHECK-64-O0-NEXT: vinsertb 2, 3, 4 +; CHECK-64-O0-NEXT: blr +; +; CHECK-32-OPT-LABEL: insert_byte_4: +; CHECK-32-OPT: # %bb.0: # %entry +; CHECK-32-OPT-NEXT: mtvsrwz 35, 3 +; CHECK-32-OPT-NEXT: vinsertb 2, 3, 4 +; CHECK-32-OPT-NEXT: blr +; +; CHECK-32-O0-LABEL: insert_byte_4: +; CHECK-32-O0: # %bb.0: # %entry +; CHECK-32-O0-NEXT: # kill: def $r4 killed $r3 +; CHECK-32-O0-NEXT: mtfprwz 0, 3 +; CHECK-32-O0-NEXT: xscpsgndp 35, 0, 0 +; CHECK-32-O0-NEXT: vinsertb 2, 3, 4 +; CHECK-32-O0-NEXT: blr +entry: + %vecins = insertelement <16 x i8> %a, i8 %b, i32 4 + ret <16 x i8> %vecins +} + +define <16 x i8> @insert_byte_5(<16 x i8> %a, i8 %b) { +; CHECK-64-OPT-LABEL: insert_byte_5: +; CHECK-64-OPT: # %bb.0: # %entry +; CHECK-64-OPT-NEXT: mtvsrwz 35, 3 +; CHECK-64-OPT-NEXT: vinsertb 2, 3, 5 +; CHECK-64-OPT-NEXT: blr +; +; CHECK-64-O0-LABEL: insert_byte_5: +; CHECK-64-O0: # %bb.0: # %entry +; CHECK-64-O0-NEXT: # kill: def $r3 killed $r3 killed $x3 +; CHECK-64-O0-NEXT: mtfprwz 0, 3 +; CHECK-64-O0-NEXT: xscpsgndp 35, 0, 0 +; CHECK-64-O0-NEXT: vinsertb 2, 3, 5 +; CHECK-64-O0-NEXT: blr +; +; CHECK-32-OPT-LABEL: insert_byte_5: +; CHECK-32-OPT: # %bb.0: # %entry +; CHECK-32-OPT-NEXT: mtvsrwz 35, 3 +; CHECK-32-OPT-NEXT: vinsertb 2, 3, 5 +; CHECK-32-OPT-NEXT: blr +; +; CHECK-32-O0-LABEL: insert_byte_5: +; CHECK-32-O0: # %bb.0: # %entry +; CHECK-32-O0-NEXT: # kill: def $r4 killed $r3 +; CHECK-32-O0-NEXT: mtfprwz 0, 3 +; CHECK-32-O0-NEXT: xscpsgndp 35, 0, 0 +; CHECK-32-O0-NEXT: vinsertb 2, 3, 5 +; CHECK-32-O0-NEXT: blr +entry: + %vecins = insertelement <16 x i8> %a, i8 %b, i32 5 + ret <16 x i8> %vecins +} + +define <16 x i8> @insert_byte_6(<16 x i8> %a, i8 %b) { +; CHECK-64-OPT-LABEL: insert_byte_6: +; CHECK-64-OPT: # %bb.0: # %entry +; CHECK-64-OPT-NEXT: mtvsrwz 35, 3 +; CHECK-64-OPT-NEXT: vinsertb 2, 3, 6 +; CHECK-64-OPT-NEXT: blr +; +; CHECK-64-O0-LABEL: insert_byte_6: +; CHECK-64-O0: # %bb.0: # %entry +; CHECK-64-O0-NEXT: # kill: def $r3 killed $r3 killed $x3 +; CHECK-64-O0-NEXT: mtfprwz 0, 3 +; CHECK-64-O0-NEXT: xscpsgndp 35, 0, 0 +; CHECK-64-O0-NEXT: vinsertb 2, 3, 6 +; CHECK-64-O0-NEXT: blr +; +; CHECK-32-OPT-LABEL: insert_byte_6: +; CHECK-32-OPT: # %bb.0: # %entry +; CHECK-32-OPT-NEXT: mtvsrwz 35, 3 +; CHECK-32-OPT-NEXT: vinsertb 2, 3, 6 +; CHECK-32-OPT-NEXT: blr +; +; CHECK-32-O0-LABEL: insert_byte_6: +; CHECK-32-O0: # %bb.0: # %entry +; CHECK-32-O0-NEXT: # kill: def $r4 killed $r3 +; CHECK-32-O0-NEXT: mtfprwz 0, 3 +; CHECK-32-O0-NEXT: xscpsgndp 35, 0, 0 +; CHECK-32-O0-NEXT: vinsertb 2, 3, 6 +; CHECK-32-O0-NEXT: blr +entry: + %vecins = insertelement <16 x i8> %a, i8 %b, i32 6 + ret <16 x i8> %vecins +} + +define <16 x i8> @insert_byte_7(<16 x i8> %a, i8 %b) { +; CHECK-64-OPT-LABEL: insert_byte_7: +; CHECK-64-OPT: # %bb.0: # %entry +; CHECK-64-OPT-NEXT: mtvsrwz 35, 3 +; CHECK-64-OPT-NEXT: vinsertb 2, 3, 7 +; CHECK-64-OPT-NEXT: blr +; +; CHECK-64-O0-LABEL: insert_byte_7: +; CHECK-64-O0: # %bb.0: # %entry +; CHECK-64-O0-NEXT: # kill: def $r3 killed $r3 killed $x3 +; CHECK-64-O0-NEXT: mtfprwz 0, 3 +; CHECK-64-O0-NEXT: xscpsgndp 35, 0, 0 +; CHECK-64-O0-NEXT: vinsertb 2, 3, 7 +; CHECK-64-O0-NEXT: blr +; +; CHECK-32-OPT-LABEL: insert_byte_7: +; CHECK-32-OPT: # %bb.0: # %entry +; CHECK-32-OPT-NEXT: mtvsrwz 35, 3 +; CHECK-32-OPT-NEXT: vinsertb 2, 3, 7 +; CHECK-32-OPT-NEXT: blr +; +; CHECK-32-O0-LABEL: insert_byte_7: +; CHECK-32-O0: # %bb.0: # %entry +; CHECK-32-O0-NEXT: # kill: def $r4 killed $r3 +; CHECK-32-O0-NEXT: mtfprwz 0, 3 +; CHECK-32-O0-NEXT: xscpsgndp 35, 0, 0 +; CHECK-32-O0-NEXT: vinsertb 2, 3, 7 +; CHECK-32-O0-NEXT: blr +entry: + %vecins = insertelement <16 x i8> %a, i8 %b, i32 7 + ret <16 x i8> %vecins +} + +define <16 x i8> @insert_byte_8(<16 x i8> %a, i8 %b) { +; CHECK-64-OPT-LABEL: insert_byte_8: +; CHECK-64-OPT: # %bb.0: # %entry +; CHECK-64-OPT-NEXT: mtvsrwz 35, 3 +; CHECK-64-OPT-NEXT: vinsertb 2, 3, 8 +; CHECK-64-OPT-NEXT: blr +; +; CHECK-64-O0-LABEL: insert_byte_8: +; CHECK-64-O0: # %bb.0: # %entry +; CHECK-64-O0-NEXT: # kill: def $r3 killed $r3 killed $x3 +; CHECK-64-O0-NEXT: mtfprwz 0, 3 +; CHECK-64-O0-NEXT: xscpsgndp 35, 0, 0 +; CHECK-64-O0-NEXT: vinsertb 2, 3, 8 +; CHECK-64-O0-NEXT: blr +; +; CHECK-32-OPT-LABEL: insert_byte_8: +; CHECK-32-OPT: # %bb.0: # %entry +; CHECK-32-OPT-NEXT: mtvsrwz 35, 3 +; CHECK-32-OPT-NEXT: vinsertb 2, 3, 8 +; CHECK-32-OPT-NEXT: blr +; +; CHECK-32-O0-LABEL: insert_byte_8: +; CHECK-32-O0: # %bb.0: # %entry +; CHECK-32-O0-NEXT: # kill: def $r4 killed $r3 +; CHECK-32-O0-NEXT: mtfprwz 0, 3 +; CHECK-32-O0-NEXT: xscpsgndp 35, 0, 0 +; CHECK-32-O0-NEXT: vinsertb 2, 3, 8 +; CHECK-32-O0-NEXT: blr +entry: + %vecins = insertelement <16 x i8> %a, i8 %b, i32 8 + ret <16 x i8> %vecins +} + +define <16 x i8> @insert_byte_9(<16 x i8> %a, i8 %b) { +; CHECK-64-OPT-LABEL: insert_byte_9: +; CHECK-64-OPT: # %bb.0: # %entry +; CHECK-64-OPT-NEXT: mtvsrwz 35, 3 +; CHECK-64-OPT-NEXT: vinsertb 2, 3, 9 +; CHECK-64-OPT-NEXT: blr +; +; CHECK-64-O0-LABEL: insert_byte_9: +; CHECK-64-O0: # %bb.0: # %entry +; CHECK-64-O0-NEXT: # kill: def $r3 killed $r3 killed $x3 +; CHECK-64-O0-NEXT: mtfprwz 0, 3 +; CHECK-64-O0-NEXT: xscpsgndp 35, 0, 0 +; CHECK-64-O0-NEXT: vinsertb 2, 3, 9 +; CHECK-64-O0-NEXT: blr +; +; CHECK-32-OPT-LABEL: insert_byte_9: +; CHECK-32-OPT: # %bb.0: # %entry +; CHECK-32-OPT-NEXT: mtvsrwz 35, 3 +; CHECK-32-OPT-NEXT: vinsertb 2, 3, 9 +; CHECK-32-OPT-NEXT: blr +; +; CHECK-32-O0-LABEL: insert_byte_9: +; CHECK-32-O0: # %bb.0: # %entry +; CHECK-32-O0-NEXT: # kill: def $r4 killed $r3 +; CHECK-32-O0-NEXT: mtfprwz 0, 3 +; CHECK-32-O0-NEXT: xscpsgndp 35, 0, 0 +; CHECK-32-O0-NEXT: vinsertb 2, 3, 9 +; CHECK-32-O0-NEXT: blr +entry: + %vecins = insertelement <16 x i8> %a, i8 %b, i32 9 + ret <16 x i8> %vecins +} + +define <16 x i8> @insert_byte_10(<16 x i8> %a, i8 %b) { +; CHECK-64-OPT-LABEL: insert_byte_10: +; CHECK-64-OPT: # %bb.0: # %entry +; CHECK-64-OPT-NEXT: mtvsrwz 35, 3 +; CHECK-64-OPT-NEXT: vinsertb 2, 3, 10 +; CHECK-64-OPT-NEXT: blr +; +; CHECK-64-O0-LABEL: insert_byte_10: +; CHECK-64-O0: # %bb.0: # %entry +; CHECK-64-O0-NEXT: # kill: def $r3 killed $r3 killed $x3 +; CHECK-64-O0-NEXT: mtfprwz 0, 3 +; CHECK-64-O0-NEXT: xscpsgndp 35, 0, 0 +; CHECK-64-O0-NEXT: vinsertb 2, 3, 10 +; CHECK-64-O0-NEXT: blr +; +; CHECK-32-OPT-LABEL: insert_byte_10: +; CHECK-32-OPT: # %bb.0: # %entry +; CHECK-32-OPT-NEXT: mtvsrwz 35, 3 +; CHECK-32-OPT-NEXT: vinsertb 2, 3, 10 +; CHECK-32-OPT-NEXT: blr +; +; CHECK-32-O0-LABEL: insert_byte_10: +; CHECK-32-O0: # %bb.0: # %entry +; CHECK-32-O0-NEXT: # kill: def $r4 killed $r3 +; CHECK-32-O0-NEXT: mtfprwz 0, 3 +; CHECK-32-O0-NEXT: xscpsgndp 35, 0, 0 +; CHECK-32-O0-NEXT: vinsertb 2, 3, 10 +; CHECK-32-O0-NEXT: blr +entry: + %vecins = insertelement <16 x i8> %a, i8 %b, i32 10 + ret <16 x i8> %vecins +} + +define <16 x i8> @insert_byte_11(<16 x i8> %a, i8 %b) { +; CHECK-64-OPT-LABEL: insert_byte_11: +; CHECK-64-OPT: # %bb.0: # %entry +; CHECK-64-OPT-NEXT: mtvsrwz 35, 3 +; CHECK-64-OPT-NEXT: vinsertb 2, 3, 11 +; CHECK-64-OPT-NEXT: blr +; +; CHECK-64-O0-LABEL: insert_byte_11: +; CHECK-64-O0: # %bb.0: # %entry +; CHECK-64-O0-NEXT: # kill: def $r3 killed $r3 killed $x3 +; CHECK-64-O0-NEXT: mtfprwz 0, 3 +; CHECK-64-O0-NEXT: xscpsgndp 35, 0, 0 +; CHECK-64-O0-NEXT: vinsertb 2, 3, 11 +; CHECK-64-O0-NEXT: blr +; +; CHECK-32-OPT-LABEL: insert_byte_11: +; CHECK-32-OPT: # %bb.0: # %entry +; CHECK-32-OPT-NEXT: mtvsrwz 35, 3 +; CHECK-32-OPT-NEXT: vinsertb 2, 3, 11 +; CHECK-32-OPT-NEXT: blr +; +; CHECK-32-O0-LABEL: insert_byte_11: +; CHECK-32-O0: # %bb.0: # %entry +; CHECK-32-O0-NEXT: # kill: def $r4 killed $r3 +; CHECK-32-O0-NEXT: mtfprwz 0, 3 +; CHECK-32-O0-NEXT: xscpsgndp 35, 0, 0 +; CHECK-32-O0-NEXT: vinsertb 2, 3, 11 +; CHECK-32-O0-NEXT: blr +entry: + %vecins = insertelement <16 x i8> %a, i8 %b, i32 11 + ret <16 x i8> %vecins +} + +define <16 x i8> @insert_byte_12(<16 x i8> %a, i8 %b) { +; CHECK-64-OPT-LABEL: insert_byte_12: +; CHECK-64-OPT: # %bb.0: # %entry +; CHECK-64-OPT-NEXT: mtvsrwz 35, 3 +; CHECK-64-OPT-NEXT: vinsertb 2, 3, 12 +; CHECK-64-OPT-NEXT: blr +; +; CHECK-64-O0-LABEL: insert_byte_12: +; CHECK-64-O0: # %bb.0: # %entry +; CHECK-64-O0-NEXT: # kill: def $r3 killed $r3 killed $x3 +; CHECK-64-O0-NEXT: mtfprwz 0, 3 +; CHECK-64-O0-NEXT: xscpsgndp 35, 0, 0 +; CHECK-64-O0-NEXT: vinsertb 2, 3, 12 +; CHECK-64-O0-NEXT: blr +; +; CHECK-32-OPT-LABEL: insert_byte_12: +; CHECK-32-OPT: # %bb.0: # %entry +; CHECK-32-OPT-NEXT: mtvsrwz 35, 3 +; CHECK-32-OPT-NEXT: vinsertb 2, 3, 12 +; CHECK-32-OPT-NEXT: blr +; +; CHECK-32-O0-LABEL: insert_byte_12: +; CHECK-32-O0: # %bb.0: # %entry +; CHECK-32-O0-NEXT: # kill: def $r4 killed $r3 +; CHECK-32-O0-NEXT: mtfprwz 0, 3 +; CHECK-32-O0-NEXT: xscpsgndp 35, 0, 0 +; CHECK-32-O0-NEXT: vinsertb 2, 3, 12 +; CHECK-32-O0-NEXT: blr +entry: + %vecins = insertelement <16 x i8> %a, i8 %b, i32 12 + ret <16 x i8> %vecins +} + +define <16 x i8> @insert_byte_13(<16 x i8> %a, i8 %b) { +; CHECK-64-OPT-LABEL: insert_byte_13: +; CHECK-64-OPT: # %bb.0: # %entry +; CHECK-64-OPT-NEXT: mtvsrwz 35, 3 +; CHECK-64-OPT-NEXT: vinsertb 2, 3, 13 +; CHECK-64-OPT-NEXT: blr +; +; CHECK-64-O0-LABEL: insert_byte_13: +; CHECK-64-O0: # %bb.0: # %entry +; CHECK-64-O0-NEXT: # kill: def $r3 killed $r3 killed $x3 +; CHECK-64-O0-NEXT: mtfprwz 0, 3 +; CHECK-64-O0-NEXT: xscpsgndp 35, 0, 0 +; CHECK-64-O0-NEXT: vinsertb 2, 3, 13 +; CHECK-64-O0-NEXT: blr +; +; CHECK-32-OPT-LABEL: insert_byte_13: +; CHECK-32-OPT: # %bb.0: # %entry +; CHECK-32-OPT-NEXT: mtvsrwz 35, 3 +; CHECK-32-OPT-NEXT: vinsertb 2, 3, 13 +; CHECK-32-OPT-NEXT: blr +; +; CHECK-32-O0-LABEL: insert_byte_13: +; CHECK-32-O0: # %bb.0: # %entry +; CHECK-32-O0-NEXT: # kill: def $r4 killed $r3 +; CHECK-32-O0-NEXT: mtfprwz 0, 3 +; CHECK-32-O0-NEXT: xscpsgndp 35, 0, 0 +; CHECK-32-O0-NEXT: vinsertb 2, 3, 13 +; CHECK-32-O0-NEXT: blr +entry: + %vecins = insertelement <16 x i8> %a, i8 %b, i32 13 + ret <16 x i8> %vecins +} + +define <16 x i8> @insert_byte_14(<16 x i8> %a, i8 %b) { +; CHECK-64-OPT-LABEL: insert_byte_14: +; CHECK-64-OPT: # %bb.0: # %entry +; CHECK-64-OPT-NEXT: mtvsrwz 35, 3 +; CHECK-64-OPT-NEXT: vinsertb 2, 3, 14 +; CHECK-64-OPT-NEXT: blr +; +; CHECK-64-O0-LABEL: insert_byte_14: +; CHECK-64-O0: # %bb.0: # %entry +; CHECK-64-O0-NEXT: # kill: def $r3 killed $r3 killed $x3 +; CHECK-64-O0-NEXT: mtfprwz 0, 3 +; CHECK-64-O0-NEXT: xscpsgndp 35, 0, 0 +; CHECK-64-O0-NEXT: vinsertb 2, 3, 14 +; CHECK-64-O0-NEXT: blr +; +; CHECK-32-OPT-LABEL: insert_byte_14: +; CHECK-32-OPT: # %bb.0: # %entry +; CHECK-32-OPT-NEXT: mtvsrwz 35, 3 +; CHECK-32-OPT-NEXT: vinsertb 2, 3, 14 +; CHECK-32-OPT-NEXT: blr +; +; CHECK-32-O0-LABEL: insert_byte_14: +; CHECK-32-O0: # %bb.0: # %entry +; CHECK-32-O0-NEXT: # kill: def $r4 killed $r3 +; CHECK-32-O0-NEXT: mtfprwz 0, 3 +; CHECK-32-O0-NEXT: xscpsgndp 35, 0, 0 +; CHECK-32-O0-NEXT: vinsertb 2, 3, 14 +; CHECK-32-O0-NEXT: blr +entry: + %vecins = insertelement <16 x i8> %a, i8 %b, i32 14 + ret <16 x i8> %vecins +} + +define <16 x i8> @insert_byte_15(<16 x i8> %a, i8 %b) { +; CHECK-64-OPT-LABEL: insert_byte_15: +; CHECK-64-OPT: # %bb.0: # %entry +; CHECK-64-OPT-NEXT: mtvsrwz 35, 3 +; CHECK-64-OPT-NEXT: vinsertb 2, 3, 15 +; CHECK-64-OPT-NEXT: blr +; +; CHECK-64-O0-LABEL: insert_byte_15: +; CHECK-64-O0: # %bb.0: # %entry +; CHECK-64-O0-NEXT: # kill: def $r3 killed $r3 killed $x3 +; CHECK-64-O0-NEXT: mtfprwz 0, 3 +; CHECK-64-O0-NEXT: xscpsgndp 35, 0, 0 +; CHECK-64-O0-NEXT: vinsertb 2, 3, 15 +; CHECK-64-O0-NEXT: blr +; +; CHECK-32-OPT-LABEL: insert_byte_15: +; CHECK-32-OPT: # %bb.0: # %entry +; CHECK-32-OPT-NEXT: mtvsrwz 35, 3 +; CHECK-32-OPT-NEXT: vinsertb 2, 3, 15 +; CHECK-32-OPT-NEXT: blr +; +; CHECK-32-O0-LABEL: insert_byte_15: +; CHECK-32-O0: # %bb.0: # %entry +; CHECK-32-O0-NEXT: # kill: def $r4 killed $r3 +; CHECK-32-O0-NEXT: mtfprwz 0, 3 +; CHECK-32-O0-NEXT: xscpsgndp 35, 0, 0 +; CHECK-32-O0-NEXT: vinsertb 2, 3, 15 +; CHECK-32-O0-NEXT: blr +entry: + %vecins = insertelement <16 x i8> %a, i8 %b, i32 15 + ret <16 x i8> %vecins +} diff --git a/llvm/test/CodeGen/PowerPC/aix-p9-xxinsertw-xxextractuw.ll b/llvm/test/CodeGen/PowerPC/aix-p9-xxinsertw-xxextractuw.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/aix-p9-xxinsertw-xxextractuw.ll @@ -0,0 +1,1584 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mcpu=pwr9 -mtriple=powerpc64-ibm-aix-xcoff \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK-64 +; RUN: llc -mcpu=pwr9 -mtriple=powerpc-ibm-aix-xcoff \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK-32 + +define <4 x float> @_Z7testInsILj0ELj0EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) { +; CHECK-64-LABEL: _Z7testInsILj0ELj0EDv4_fET1_S1_S1_: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: xxsldwi 0, 35, 35, 3 +; CHECK-64-NEXT: xxinsertw 34, 0, 0 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: _Z7testInsILj0ELj0EDv4_fET1_S1_S1_: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: xxsldwi 0, 35, 35, 3 +; CHECK-32-NEXT: xxinsertw 34, 0, 0 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj0ELj1EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) { +; CHECK-64-LABEL: _Z7testInsILj0ELj1EDv4_fET1_S1_S1_: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: xxinsertw 34, 35, 0 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: _Z7testInsILj0ELj1EDv4_fET1_S1_S1_: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: xxinsertw 34, 35, 0 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj0ELj2EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) { +; CHECK-64-LABEL: _Z7testInsILj0ELj2EDv4_fET1_S1_S1_: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: xxsldwi 0, 35, 35, 1 +; CHECK-64-NEXT: xxinsertw 34, 0, 0 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: _Z7testInsILj0ELj2EDv4_fET1_S1_S1_: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: xxsldwi 0, 35, 35, 1 +; CHECK-32-NEXT: xxinsertw 34, 0, 0 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj0ELj3EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) { +; CHECK-64-LABEL: _Z7testInsILj0ELj3EDv4_fET1_S1_S1_: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: xxswapd 0, 35 +; CHECK-64-NEXT: xxinsertw 34, 0, 0 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: _Z7testInsILj0ELj3EDv4_fET1_S1_S1_: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: xxswapd 0, 35 +; CHECK-32-NEXT: xxinsertw 34, 0, 0 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj1ELj0EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) { +; CHECK-64-LABEL: _Z7testInsILj1ELj0EDv4_fET1_S1_S1_: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: xxsldwi 0, 35, 35, 3 +; CHECK-64-NEXT: xxinsertw 34, 0, 4 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: _Z7testInsILj1ELj0EDv4_fET1_S1_S1_: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: xxsldwi 0, 35, 35, 3 +; CHECK-32-NEXT: xxinsertw 34, 0, 4 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj1ELj1EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) { +; CHECK-64-LABEL: _Z7testInsILj1ELj1EDv4_fET1_S1_S1_: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: xxinsertw 34, 35, 4 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: _Z7testInsILj1ELj1EDv4_fET1_S1_S1_: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: xxinsertw 34, 35, 4 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj1ELj2EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) { +; CHECK-64-LABEL: _Z7testInsILj1ELj2EDv4_fET1_S1_S1_: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: xxsldwi 0, 35, 35, 1 +; CHECK-64-NEXT: xxinsertw 34, 0, 4 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: _Z7testInsILj1ELj2EDv4_fET1_S1_S1_: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: xxsldwi 0, 35, 35, 1 +; CHECK-32-NEXT: xxinsertw 34, 0, 4 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj1ELj3EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) { +; CHECK-64-LABEL: _Z7testInsILj1ELj3EDv4_fET1_S1_S1_: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: xxswapd 0, 35 +; CHECK-64-NEXT: xxinsertw 34, 0, 4 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: _Z7testInsILj1ELj3EDv4_fET1_S1_S1_: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: xxswapd 0, 35 +; CHECK-32-NEXT: xxinsertw 34, 0, 4 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj2ELj0EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) { +; CHECK-64-LABEL: _Z7testInsILj2ELj0EDv4_fET1_S1_S1_: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: xxsldwi 0, 35, 35, 3 +; CHECK-64-NEXT: xxinsertw 34, 0, 8 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: _Z7testInsILj2ELj0EDv4_fET1_S1_S1_: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: xxsldwi 0, 35, 35, 3 +; CHECK-32-NEXT: xxinsertw 34, 0, 8 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj2ELj1EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) { +; CHECK-64-LABEL: _Z7testInsILj2ELj1EDv4_fET1_S1_S1_: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: xxinsertw 34, 35, 8 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: _Z7testInsILj2ELj1EDv4_fET1_S1_S1_: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: xxinsertw 34, 35, 8 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj2ELj2EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) { +; CHECK-64-LABEL: _Z7testInsILj2ELj2EDv4_fET1_S1_S1_: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: xxsldwi 0, 35, 35, 1 +; CHECK-64-NEXT: xxinsertw 34, 0, 8 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: _Z7testInsILj2ELj2EDv4_fET1_S1_S1_: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: xxsldwi 0, 35, 35, 1 +; CHECK-32-NEXT: xxinsertw 34, 0, 8 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj2ELj3EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) { +; CHECK-64-LABEL: _Z7testInsILj2ELj3EDv4_fET1_S1_S1_: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: xxswapd 0, 35 +; CHECK-64-NEXT: xxinsertw 34, 0, 8 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: _Z7testInsILj2ELj3EDv4_fET1_S1_S1_: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: xxswapd 0, 35 +; CHECK-32-NEXT: xxinsertw 34, 0, 8 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj3ELj0EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) { +; CHECK-64-LABEL: _Z7testInsILj3ELj0EDv4_fET1_S1_S1_: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: xxsldwi 0, 35, 35, 3 +; CHECK-64-NEXT: xxinsertw 34, 0, 12 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: _Z7testInsILj3ELj0EDv4_fET1_S1_S1_: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: xxsldwi 0, 35, 35, 3 +; CHECK-32-NEXT: xxinsertw 34, 0, 12 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj3ELj1EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) { +; CHECK-64-LABEL: _Z7testInsILj3ELj1EDv4_fET1_S1_S1_: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: xxinsertw 34, 35, 12 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: _Z7testInsILj3ELj1EDv4_fET1_S1_S1_: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: xxinsertw 34, 35, 12 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj3ELj2EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) { +; CHECK-64-LABEL: _Z7testInsILj3ELj2EDv4_fET1_S1_S1_: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: xxsldwi 0, 35, 35, 1 +; CHECK-64-NEXT: xxinsertw 34, 0, 12 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: _Z7testInsILj3ELj2EDv4_fET1_S1_S1_: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: xxsldwi 0, 35, 35, 1 +; CHECK-32-NEXT: xxinsertw 34, 0, 12 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj3ELj3EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) { +; CHECK-64-LABEL: _Z7testInsILj3ELj3EDv4_fET1_S1_S1_: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: xxswapd 0, 35 +; CHECK-64-NEXT: xxinsertw 34, 0, 12 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: _Z7testInsILj3ELj3EDv4_fET1_S1_S1_: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: xxswapd 0, 35 +; CHECK-32-NEXT: xxinsertw 34, 0, 12 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x i32> @_Z7testInsILj0ELj0EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) { +; CHECK-64-LABEL: _Z7testInsILj0ELj0EDv4_jET1_S1_S1_: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: xxsldwi 0, 35, 35, 3 +; CHECK-64-NEXT: xxinsertw 34, 0, 0 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: _Z7testInsILj0ELj0EDv4_jET1_S1_S1_: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: xxsldwi 0, 35, 35, 3 +; CHECK-32-NEXT: xxinsertw 34, 0, 0 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj0ELj1EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) { +; CHECK-64-LABEL: _Z7testInsILj0ELj1EDv4_jET1_S1_S1_: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: xxinsertw 34, 35, 0 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: _Z7testInsILj0ELj1EDv4_jET1_S1_S1_: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: xxinsertw 34, 35, 0 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj0ELj2EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) { +; CHECK-64-LABEL: _Z7testInsILj0ELj2EDv4_jET1_S1_S1_: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: xxsldwi 0, 35, 35, 1 +; CHECK-64-NEXT: xxinsertw 34, 0, 0 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: _Z7testInsILj0ELj2EDv4_jET1_S1_S1_: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: xxsldwi 0, 35, 35, 1 +; CHECK-32-NEXT: xxinsertw 34, 0, 0 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj0ELj3EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) { +; CHECK-64-LABEL: _Z7testInsILj0ELj3EDv4_jET1_S1_S1_: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: xxswapd 0, 35 +; CHECK-64-NEXT: xxinsertw 34, 0, 0 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: _Z7testInsILj0ELj3EDv4_jET1_S1_S1_: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: xxswapd 0, 35 +; CHECK-32-NEXT: xxinsertw 34, 0, 0 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj1ELj0EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) { +; CHECK-64-LABEL: _Z7testInsILj1ELj0EDv4_jET1_S1_S1_: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: xxsldwi 0, 35, 35, 3 +; CHECK-64-NEXT: xxinsertw 34, 0, 4 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: _Z7testInsILj1ELj0EDv4_jET1_S1_S1_: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: xxsldwi 0, 35, 35, 3 +; CHECK-32-NEXT: xxinsertw 34, 0, 4 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj1ELj1EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) { +; CHECK-64-LABEL: _Z7testInsILj1ELj1EDv4_jET1_S1_S1_: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: xxinsertw 34, 35, 4 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: _Z7testInsILj1ELj1EDv4_jET1_S1_S1_: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: xxinsertw 34, 35, 4 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj1ELj2EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) { +; CHECK-64-LABEL: _Z7testInsILj1ELj2EDv4_jET1_S1_S1_: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: xxsldwi 0, 35, 35, 1 +; CHECK-64-NEXT: xxinsertw 34, 0, 4 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: _Z7testInsILj1ELj2EDv4_jET1_S1_S1_: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: xxsldwi 0, 35, 35, 1 +; CHECK-32-NEXT: xxinsertw 34, 0, 4 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj1ELj3EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) { +; CHECK-64-LABEL: _Z7testInsILj1ELj3EDv4_jET1_S1_S1_: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: xxswapd 0, 35 +; CHECK-64-NEXT: xxinsertw 34, 0, 4 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: _Z7testInsILj1ELj3EDv4_jET1_S1_S1_: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: xxswapd 0, 35 +; CHECK-32-NEXT: xxinsertw 34, 0, 4 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj2ELj0EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) { +; CHECK-64-LABEL: _Z7testInsILj2ELj0EDv4_jET1_S1_S1_: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: xxsldwi 0, 35, 35, 3 +; CHECK-64-NEXT: xxinsertw 34, 0, 8 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: _Z7testInsILj2ELj0EDv4_jET1_S1_S1_: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: xxsldwi 0, 35, 35, 3 +; CHECK-32-NEXT: xxinsertw 34, 0, 8 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj2ELj1EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) { +; CHECK-64-LABEL: _Z7testInsILj2ELj1EDv4_jET1_S1_S1_: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: xxinsertw 34, 35, 8 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: _Z7testInsILj2ELj1EDv4_jET1_S1_S1_: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: xxinsertw 34, 35, 8 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj2ELj2EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) { +; CHECK-64-LABEL: _Z7testInsILj2ELj2EDv4_jET1_S1_S1_: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: xxsldwi 0, 35, 35, 1 +; CHECK-64-NEXT: xxinsertw 34, 0, 8 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: _Z7testInsILj2ELj2EDv4_jET1_S1_S1_: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: xxsldwi 0, 35, 35, 1 +; CHECK-32-NEXT: xxinsertw 34, 0, 8 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj2ELj3EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) { +; CHECK-64-LABEL: _Z7testInsILj2ELj3EDv4_jET1_S1_S1_: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: xxswapd 0, 35 +; CHECK-64-NEXT: xxinsertw 34, 0, 8 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: _Z7testInsILj2ELj3EDv4_jET1_S1_S1_: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: xxswapd 0, 35 +; CHECK-32-NEXT: xxinsertw 34, 0, 8 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj3ELj0EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) { +; CHECK-64-LABEL: _Z7testInsILj3ELj0EDv4_jET1_S1_S1_: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: xxsldwi 0, 35, 35, 3 +; CHECK-64-NEXT: xxinsertw 34, 0, 12 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: _Z7testInsILj3ELj0EDv4_jET1_S1_S1_: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: xxsldwi 0, 35, 35, 3 +; CHECK-32-NEXT: xxinsertw 34, 0, 12 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj3ELj1EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) { +; CHECK-64-LABEL: _Z7testInsILj3ELj1EDv4_jET1_S1_S1_: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: xxinsertw 34, 35, 12 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: _Z7testInsILj3ELj1EDv4_jET1_S1_S1_: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: xxinsertw 34, 35, 12 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj3ELj2EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) { +; CHECK-64-LABEL: _Z7testInsILj3ELj2EDv4_jET1_S1_S1_: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: xxsldwi 0, 35, 35, 1 +; CHECK-64-NEXT: xxinsertw 34, 0, 12 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: _Z7testInsILj3ELj2EDv4_jET1_S1_S1_: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: xxsldwi 0, 35, 35, 1 +; CHECK-32-NEXT: xxinsertw 34, 0, 12 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj3ELj3EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) { +; CHECK-64-LABEL: _Z7testInsILj3ELj3EDv4_jET1_S1_S1_: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: xxswapd 0, 35 +; CHECK-64-NEXT: xxinsertw 34, 0, 12 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: _Z7testInsILj3ELj3EDv4_jET1_S1_S1_: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: xxswapd 0, 35 +; CHECK-32-NEXT: xxinsertw 34, 0, 12 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %vecins +} + +define float @_Z13testUiToFpExtILj0EEfDv4_j(<4 x i32> %a) { +; CHECK-64-LABEL: _Z13testUiToFpExtILj0EEfDv4_j: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: xxextractuw 0, 34, 0 +; CHECK-64-NEXT: xscvuxdsp 1, 0 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: _Z13testUiToFpExtILj0EEfDv4_j: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: stxv 34, -32(1) +; CHECK-32-NEXT: lwz 3, -32(1) +; CHECK-32-NEXT: stw 3, -4(1) +; CHECK-32-NEXT: addi 3, 1, -4 +; CHECK-32-NEXT: lfiwzx 0, 0, 3 +; CHECK-32-NEXT: xscvuxdsp 1, 0 +; CHECK-32-NEXT: blr +entry: + %vecext = extractelement <4 x i32> %a, i32 0 + %conv = uitofp i32 %vecext to float + ret float %conv +} + +define float @_Z13testUiToFpExtILj1EEfDv4_j(<4 x i32> %a) { +; CHECK-64-LABEL: _Z13testUiToFpExtILj1EEfDv4_j: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: xxextractuw 0, 34, 4 +; CHECK-64-NEXT: xscvuxdsp 1, 0 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: _Z13testUiToFpExtILj1EEfDv4_j: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: stxv 34, -32(1) +; CHECK-32-NEXT: lwz 3, -28(1) +; CHECK-32-NEXT: stw 3, -4(1) +; CHECK-32-NEXT: addi 3, 1, -4 +; CHECK-32-NEXT: lfiwzx 0, 0, 3 +; CHECK-32-NEXT: xscvuxdsp 1, 0 +; CHECK-32-NEXT: blr +entry: + %vecext = extractelement <4 x i32> %a, i32 1 + %conv = uitofp i32 %vecext to float + ret float %conv +} + +define float @_Z13testUiToFpExtILj2EEfDv4_j(<4 x i32> %a) { +; CHECK-64-LABEL: _Z13testUiToFpExtILj2EEfDv4_j: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: xxextractuw 0, 34, 8 +; CHECK-64-NEXT: xscvuxdsp 1, 0 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: _Z13testUiToFpExtILj2EEfDv4_j: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: stxv 34, -32(1) +; CHECK-32-NEXT: lwz 3, -24(1) +; CHECK-32-NEXT: stw 3, -4(1) +; CHECK-32-NEXT: addi 3, 1, -4 +; CHECK-32-NEXT: lfiwzx 0, 0, 3 +; CHECK-32-NEXT: xscvuxdsp 1, 0 +; CHECK-32-NEXT: blr +entry: + %vecext = extractelement <4 x i32> %a, i32 2 + %conv = uitofp i32 %vecext to float + ret float %conv +} + +define float @_Z13testUiToFpExtILj3EEfDv4_j(<4 x i32> %a) { +; CHECK-64-LABEL: _Z13testUiToFpExtILj3EEfDv4_j: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: xxextractuw 0, 34, 12 +; CHECK-64-NEXT: xscvuxdsp 1, 0 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: _Z13testUiToFpExtILj3EEfDv4_j: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: stxv 34, -32(1) +; CHECK-32-NEXT: lwz 3, -20(1) +; CHECK-32-NEXT: stw 3, -4(1) +; CHECK-32-NEXT: addi 3, 1, -4 +; CHECK-32-NEXT: lfiwzx 0, 0, 3 +; CHECK-32-NEXT: xscvuxdsp 1, 0 +; CHECK-32-NEXT: blr +entry: + %vecext = extractelement <4 x i32> %a, i32 3 + %conv = uitofp i32 %vecext to float + ret float %conv +} + +; Verify we generate optimal code for unsigned vector int elem extract followed +; by conversion to double + +define double @conv2dlbTestui0(<4 x i32> %a) { +; CHECK-64-LABEL: conv2dlbTestui0: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: xxextractuw 0, 34, 0 +; CHECK-64-NEXT: xscvuxddp 1, 0 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: conv2dlbTestui0: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: stxv 34, -32(1) +; CHECK-32-NEXT: lwz 3, -32(1) +; CHECK-32-NEXT: stw 3, -4(1) +; CHECK-32-NEXT: addi 3, 1, -4 +; CHECK-32-NEXT: lfiwzx 0, 0, 3 +; CHECK-32-NEXT: xscvuxddp 1, 0 +; CHECK-32-NEXT: blr +entry: + %0 = extractelement <4 x i32> %a, i32 0 + %1 = uitofp i32 %0 to double + ret double %1 +} + +define double @conv2dlbTestui1(<4 x i32> %a) { +; CHECK-64-LABEL: conv2dlbTestui1: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: xxextractuw 0, 34, 4 +; CHECK-64-NEXT: xscvuxddp 1, 0 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: conv2dlbTestui1: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: stxv 34, -32(1) +; CHECK-32-NEXT: lwz 3, -28(1) +; CHECK-32-NEXT: stw 3, -4(1) +; CHECK-32-NEXT: addi 3, 1, -4 +; CHECK-32-NEXT: lfiwzx 0, 0, 3 +; CHECK-32-NEXT: xscvuxddp 1, 0 +; CHECK-32-NEXT: blr +entry: + %0 = extractelement <4 x i32> %a, i32 1 + %1 = uitofp i32 %0 to double + ret double %1 +} + +define double @conv2dlbTestui2(<4 x i32> %a) { +; CHECK-64-LABEL: conv2dlbTestui2: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: xxextractuw 0, 34, 8 +; CHECK-64-NEXT: xscvuxddp 1, 0 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: conv2dlbTestui2: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: stxv 34, -32(1) +; CHECK-32-NEXT: lwz 3, -24(1) +; CHECK-32-NEXT: stw 3, -4(1) +; CHECK-32-NEXT: addi 3, 1, -4 +; CHECK-32-NEXT: lfiwzx 0, 0, 3 +; CHECK-32-NEXT: xscvuxddp 1, 0 +; CHECK-32-NEXT: blr +entry: + %0 = extractelement <4 x i32> %a, i32 2 + %1 = uitofp i32 %0 to double + ret double %1 +} + +define double @conv2dlbTestui3(<4 x i32> %a) { +; CHECK-64-LABEL: conv2dlbTestui3: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: xxextractuw 0, 34, 12 +; CHECK-64-NEXT: xscvuxddp 1, 0 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: conv2dlbTestui3: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: stxv 34, -32(1) +; CHECK-32-NEXT: lwz 3, -20(1) +; CHECK-32-NEXT: stw 3, -4(1) +; CHECK-32-NEXT: addi 3, 1, -4 +; CHECK-32-NEXT: lfiwzx 0, 0, 3 +; CHECK-32-NEXT: xscvuxddp 1, 0 +; CHECK-32-NEXT: blr +entry: + %0 = extractelement <4 x i32> %a, i32 3 + %1 = uitofp i32 %0 to double + ret double %1 +} + +; verify we don't crash for variable elem extract +define double @conv2dlbTestuiVar(<4 x i32> %a, i32 zeroext %elem) { +; CHECK-64-LABEL: conv2dlbTestuiVar: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: extsw 3, 3 +; CHECK-64-NEXT: rlwinm 3, 3, 2, 28, 29 +; CHECK-64-NEXT: vextuwlx 3, 3, 2 +; CHECK-64-NEXT: mtfprwz 0, 3 +; CHECK-64-NEXT: xscvuxddp 1, 0 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: conv2dlbTestuiVar: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: addi 4, 1, -32 +; CHECK-32-NEXT: rlwinm 3, 3, 2, 28, 29 +; CHECK-32-NEXT: stxv 34, -32(1) +; CHECK-32-NEXT: lwzx 3, 4, 3 +; CHECK-32-NEXT: stw 3, -4(1) +; CHECK-32-NEXT: addi 3, 1, -4 +; CHECK-32-NEXT: lfiwzx 0, 0, 3 +; CHECK-32-NEXT: xscvuxddp 1, 0 +; CHECK-32-NEXT: blr +entry: + %vecext = extractelement <4 x i32> %a, i32 %elem + %conv = uitofp i32 %vecext to double + ret double %conv +} + +define <4 x float> @_Z10testInsEltILj0EDv4_ffET0_S1_T1_(<4 x float> %a, float %b) { +; CHECK-64-LABEL: _Z10testInsEltILj0EDv4_ffET0_S1_T1_: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: xscvdpspn 0, 1 +; CHECK-64-NEXT: xxsldwi 0, 0, 0, 3 +; CHECK-64-NEXT: xxinsertw 34, 0, 0 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: _Z10testInsEltILj0EDv4_ffET0_S1_T1_: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: xscvdpspn 0, 1 +; CHECK-32-NEXT: xxsldwi 0, 0, 0, 3 +; CHECK-32-NEXT: xxinsertw 34, 0, 0 +; CHECK-32-NEXT: blr +entry: + %vecins = insertelement <4 x float> %a, float %b, i32 0 + ret <4 x float> %vecins +} + +define <4 x float> @_Z10testInsEltILj1EDv4_ffET0_S1_T1_(<4 x float> %a, float %b) { +; CHECK-64-LABEL: _Z10testInsEltILj1EDv4_ffET0_S1_T1_: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: xscvdpspn 0, 1 +; CHECK-64-NEXT: xxsldwi 0, 0, 0, 3 +; CHECK-64-NEXT: xxinsertw 34, 0, 4 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: _Z10testInsEltILj1EDv4_ffET0_S1_T1_: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: xscvdpspn 0, 1 +; CHECK-32-NEXT: xxsldwi 0, 0, 0, 3 +; CHECK-32-NEXT: xxinsertw 34, 0, 4 +; CHECK-32-NEXT: blr +entry: + %vecins = insertelement <4 x float> %a, float %b, i32 1 + ret <4 x float> %vecins +} + +define <4 x float> @_Z10testInsEltILj2EDv4_ffET0_S1_T1_(<4 x float> %a, float %b) { +; CHECK-64-LABEL: _Z10testInsEltILj2EDv4_ffET0_S1_T1_: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: xscvdpspn 0, 1 +; CHECK-64-NEXT: xxsldwi 0, 0, 0, 3 +; CHECK-64-NEXT: xxinsertw 34, 0, 8 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: _Z10testInsEltILj2EDv4_ffET0_S1_T1_: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: xscvdpspn 0, 1 +; CHECK-32-NEXT: xxsldwi 0, 0, 0, 3 +; CHECK-32-NEXT: xxinsertw 34, 0, 8 +; CHECK-32-NEXT: blr +entry: + %vecins = insertelement <4 x float> %a, float %b, i32 2 + ret <4 x float> %vecins +} + +define <4 x float> @_Z10testInsEltILj3EDv4_ffET0_S1_T1_(<4 x float> %a, float %b) { +; CHECK-64-LABEL: _Z10testInsEltILj3EDv4_ffET0_S1_T1_: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: xscvdpspn 0, 1 +; CHECK-64-NEXT: xxsldwi 0, 0, 0, 3 +; CHECK-64-NEXT: xxinsertw 34, 0, 12 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: _Z10testInsEltILj3EDv4_ffET0_S1_T1_: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: xscvdpspn 0, 1 +; CHECK-32-NEXT: xxsldwi 0, 0, 0, 3 +; CHECK-32-NEXT: xxinsertw 34, 0, 12 +; CHECK-32-NEXT: blr +entry: + %vecins = insertelement <4 x float> %a, float %b, i32 3 + ret <4 x float> %vecins +} + +define <4 x i32> @_Z10testInsEltILj0EDv4_jjET0_S1_T1_(<4 x i32> %a, i32 zeroext %b) { +; CHECK-64-LABEL: _Z10testInsEltILj0EDv4_jjET0_S1_T1_: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: mtfprwz 0, 3 +; CHECK-64-NEXT: xxinsertw 34, 0, 0 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: _Z10testInsEltILj0EDv4_jjET0_S1_T1_: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: mtfprwz 0, 3 +; CHECK-32-NEXT: xxinsertw 34, 0, 0 +; CHECK-32-NEXT: blr +entry: + %vecins = insertelement <4 x i32> %a, i32 %b, i32 0 + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z10testInsEltILj1EDv4_jjET0_S1_T1_(<4 x i32> %a, i32 zeroext %b) { +; CHECK-64-LABEL: _Z10testInsEltILj1EDv4_jjET0_S1_T1_: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: mtfprwz 0, 3 +; CHECK-64-NEXT: xxinsertw 34, 0, 4 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: _Z10testInsEltILj1EDv4_jjET0_S1_T1_: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: mtfprwz 0, 3 +; CHECK-32-NEXT: xxinsertw 34, 0, 4 +; CHECK-32-NEXT: blr +entry: + %vecins = insertelement <4 x i32> %a, i32 %b, i32 1 + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z10testInsEltILj2EDv4_jjET0_S1_T1_(<4 x i32> %a, i32 zeroext %b) { +; CHECK-64-LABEL: _Z10testInsEltILj2EDv4_jjET0_S1_T1_: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: mtfprwz 0, 3 +; CHECK-64-NEXT: xxinsertw 34, 0, 8 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: _Z10testInsEltILj2EDv4_jjET0_S1_T1_: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: mtfprwz 0, 3 +; CHECK-32-NEXT: xxinsertw 34, 0, 8 +; CHECK-32-NEXT: blr +entry: + %vecins = insertelement <4 x i32> %a, i32 %b, i32 2 + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z10testInsEltILj3EDv4_jjET0_S1_T1_(<4 x i32> %a, i32 zeroext %b) { +; CHECK-64-LABEL: _Z10testInsEltILj3EDv4_jjET0_S1_T1_: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: mtfprwz 0, 3 +; CHECK-64-NEXT: xxinsertw 34, 0, 12 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: _Z10testInsEltILj3EDv4_jjET0_S1_T1_: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: mtfprwz 0, 3 +; CHECK-32-NEXT: xxinsertw 34, 0, 12 +; CHECK-32-NEXT: blr +entry: + %vecins = insertelement <4 x i32> %a, i32 %b, i32 3 + ret <4 x i32> %vecins +} + +define <4 x float> @_Z7testInsILj0ELj0EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) { +; CHECK-64-LABEL: _Z7testInsILj0ELj0EDv4_fET1_S1_S1_r: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: xxsldwi 0, 35, 35, 3 +; CHECK-64-NEXT: xxinsertw 34, 0, 0 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: _Z7testInsILj0ELj0EDv4_fET1_S1_S1_r: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: xxsldwi 0, 35, 35, 3 +; CHECK-32-NEXT: xxinsertw 34, 0, 0 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj0ELj1EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) { +; CHECK-64-LABEL: _Z7testInsILj0ELj1EDv4_fET1_S1_S1_r: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: xxinsertw 34, 35, 0 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: _Z7testInsILj0ELj1EDv4_fET1_S1_S1_r: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: xxinsertw 34, 35, 0 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj0ELj2EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) { +; CHECK-64-LABEL: _Z7testInsILj0ELj2EDv4_fET1_S1_S1_r: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: xxsldwi 0, 35, 35, 1 +; CHECK-64-NEXT: xxinsertw 34, 0, 0 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: _Z7testInsILj0ELj2EDv4_fET1_S1_S1_r: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: xxsldwi 0, 35, 35, 1 +; CHECK-32-NEXT: xxinsertw 34, 0, 0 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj0ELj3EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) { +; CHECK-64-LABEL: _Z7testInsILj0ELj3EDv4_fET1_S1_S1_r: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: xxswapd 0, 35 +; CHECK-64-NEXT: xxinsertw 34, 0, 0 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: _Z7testInsILj0ELj3EDv4_fET1_S1_S1_r: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: xxswapd 0, 35 +; CHECK-32-NEXT: xxinsertw 34, 0, 0 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj1ELj0EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) { +; CHECK-64-LABEL: _Z7testInsILj1ELj0EDv4_fET1_S1_S1_r: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: xxsldwi 0, 35, 35, 3 +; CHECK-64-NEXT: xxinsertw 34, 0, 4 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: _Z7testInsILj1ELj0EDv4_fET1_S1_S1_r: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: xxsldwi 0, 35, 35, 3 +; CHECK-32-NEXT: xxinsertw 34, 0, 4 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj1ELj1EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) { +; CHECK-64-LABEL: _Z7testInsILj1ELj1EDv4_fET1_S1_S1_r: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: xxinsertw 34, 35, 4 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: _Z7testInsILj1ELj1EDv4_fET1_S1_S1_r: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: xxinsertw 34, 35, 4 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj1ELj2EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) { +; CHECK-64-LABEL: _Z7testInsILj1ELj2EDv4_fET1_S1_S1_r: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: xxsldwi 0, 35, 35, 1 +; CHECK-64-NEXT: xxinsertw 34, 0, 4 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: _Z7testInsILj1ELj2EDv4_fET1_S1_S1_r: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: xxsldwi 0, 35, 35, 1 +; CHECK-32-NEXT: xxinsertw 34, 0, 4 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj1ELj3EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) { +; CHECK-64-LABEL: _Z7testInsILj1ELj3EDv4_fET1_S1_S1_r: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: xxswapd 0, 35 +; CHECK-64-NEXT: xxinsertw 34, 0, 4 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: _Z7testInsILj1ELj3EDv4_fET1_S1_S1_r: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: xxswapd 0, 35 +; CHECK-32-NEXT: xxinsertw 34, 0, 4 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj2ELj0EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) { +; CHECK-64-LABEL: _Z7testInsILj2ELj0EDv4_fET1_S1_S1_r: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: xxsldwi 0, 35, 35, 3 +; CHECK-64-NEXT: xxinsertw 34, 0, 8 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: _Z7testInsILj2ELj0EDv4_fET1_S1_S1_r: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: xxsldwi 0, 35, 35, 3 +; CHECK-32-NEXT: xxinsertw 34, 0, 8 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj2ELj1EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) { +; CHECK-64-LABEL: _Z7testInsILj2ELj1EDv4_fET1_S1_S1_r: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: xxinsertw 34, 35, 8 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: _Z7testInsILj2ELj1EDv4_fET1_S1_S1_r: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: xxinsertw 34, 35, 8 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj2ELj2EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) { +; CHECK-64-LABEL: _Z7testInsILj2ELj2EDv4_fET1_S1_S1_r: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: xxsldwi 0, 35, 35, 1 +; CHECK-64-NEXT: xxinsertw 34, 0, 8 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: _Z7testInsILj2ELj2EDv4_fET1_S1_S1_r: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: xxsldwi 0, 35, 35, 1 +; CHECK-32-NEXT: xxinsertw 34, 0, 8 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj2ELj3EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) { +; CHECK-64-LABEL: _Z7testInsILj2ELj3EDv4_fET1_S1_S1_r: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: xxswapd 0, 35 +; CHECK-64-NEXT: xxinsertw 34, 0, 8 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: _Z7testInsILj2ELj3EDv4_fET1_S1_S1_r: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: xxswapd 0, 35 +; CHECK-32-NEXT: xxinsertw 34, 0, 8 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj3ELj0EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) { +; CHECK-64-LABEL: _Z7testInsILj3ELj0EDv4_fET1_S1_S1_r: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: xxsldwi 0, 35, 35, 3 +; CHECK-64-NEXT: xxinsertw 34, 0, 12 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: _Z7testInsILj3ELj0EDv4_fET1_S1_S1_r: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: xxsldwi 0, 35, 35, 3 +; CHECK-32-NEXT: xxinsertw 34, 0, 12 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj3ELj1EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) { +; CHECK-64-LABEL: _Z7testInsILj3ELj1EDv4_fET1_S1_S1_r: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: xxinsertw 34, 35, 12 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: _Z7testInsILj3ELj1EDv4_fET1_S1_S1_r: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: xxinsertw 34, 35, 12 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj3ELj2EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) { +; CHECK-64-LABEL: _Z7testInsILj3ELj2EDv4_fET1_S1_S1_r: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: xxsldwi 0, 35, 35, 1 +; CHECK-64-NEXT: xxinsertw 34, 0, 12 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: _Z7testInsILj3ELj2EDv4_fET1_S1_S1_r: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: xxsldwi 0, 35, 35, 1 +; CHECK-32-NEXT: xxinsertw 34, 0, 12 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj3ELj3EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) { +; CHECK-64-LABEL: _Z7testInsILj3ELj3EDv4_fET1_S1_S1_r: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: xxswapd 0, 35 +; CHECK-64-NEXT: xxinsertw 34, 0, 12 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: _Z7testInsILj3ELj3EDv4_fET1_S1_S1_r: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: xxswapd 0, 35 +; CHECK-32-NEXT: xxinsertw 34, 0, 12 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x i32> @_Z7testInsILj0ELj0EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) { +; CHECK-64-LABEL: _Z7testInsILj0ELj0EDv4_jET1_S1_S1_r: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: xxsldwi 0, 35, 35, 3 +; CHECK-64-NEXT: xxinsertw 34, 0, 0 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: _Z7testInsILj0ELj0EDv4_jET1_S1_S1_r: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: xxsldwi 0, 35, 35, 3 +; CHECK-32-NEXT: xxinsertw 34, 0, 0 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj0ELj1EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) { +; CHECK-64-LABEL: _Z7testInsILj0ELj1EDv4_jET1_S1_S1_r: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: xxinsertw 34, 35, 0 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: _Z7testInsILj0ELj1EDv4_jET1_S1_S1_r: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: xxinsertw 34, 35, 0 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj0ELj2EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) { +; CHECK-64-LABEL: _Z7testInsILj0ELj2EDv4_jET1_S1_S1_r: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: xxsldwi 0, 35, 35, 1 +; CHECK-64-NEXT: xxinsertw 34, 0, 0 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: _Z7testInsILj0ELj2EDv4_jET1_S1_S1_r: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: xxsldwi 0, 35, 35, 1 +; CHECK-32-NEXT: xxinsertw 34, 0, 0 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj0ELj3EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) { +; CHECK-64-LABEL: _Z7testInsILj0ELj3EDv4_jET1_S1_S1_r: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: xxswapd 0, 35 +; CHECK-64-NEXT: xxinsertw 34, 0, 0 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: _Z7testInsILj0ELj3EDv4_jET1_S1_S1_r: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: xxswapd 0, 35 +; CHECK-32-NEXT: xxinsertw 34, 0, 0 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj1ELj0EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) { +; CHECK-64-LABEL: _Z7testInsILj1ELj0EDv4_jET1_S1_S1_r: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: xxsldwi 0, 35, 35, 3 +; CHECK-64-NEXT: xxinsertw 34, 0, 4 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: _Z7testInsILj1ELj0EDv4_jET1_S1_S1_r: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: xxsldwi 0, 35, 35, 3 +; CHECK-32-NEXT: xxinsertw 34, 0, 4 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj1ELj1EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) { +; CHECK-64-LABEL: _Z7testInsILj1ELj1EDv4_jET1_S1_S1_r: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: xxinsertw 34, 35, 4 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: _Z7testInsILj1ELj1EDv4_jET1_S1_S1_r: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: xxinsertw 34, 35, 4 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj1ELj2EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) { +; CHECK-64-LABEL: _Z7testInsILj1ELj2EDv4_jET1_S1_S1_r: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: xxsldwi 0, 35, 35, 1 +; CHECK-64-NEXT: xxinsertw 34, 0, 4 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: _Z7testInsILj1ELj2EDv4_jET1_S1_S1_r: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: xxsldwi 0, 35, 35, 1 +; CHECK-32-NEXT: xxinsertw 34, 0, 4 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj1ELj3EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) { +; CHECK-64-LABEL: _Z7testInsILj1ELj3EDv4_jET1_S1_S1_r: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: xxswapd 0, 35 +; CHECK-64-NEXT: xxinsertw 34, 0, 4 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: _Z7testInsILj1ELj3EDv4_jET1_S1_S1_r: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: xxswapd 0, 35 +; CHECK-32-NEXT: xxinsertw 34, 0, 4 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj2ELj0EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) { +; CHECK-64-LABEL: _Z7testInsILj2ELj0EDv4_jET1_S1_S1_r: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: xxsldwi 0, 35, 35, 3 +; CHECK-64-NEXT: xxinsertw 34, 0, 8 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: _Z7testInsILj2ELj0EDv4_jET1_S1_S1_r: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: xxsldwi 0, 35, 35, 3 +; CHECK-32-NEXT: xxinsertw 34, 0, 8 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj2ELj1EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) { +; CHECK-64-LABEL: _Z7testInsILj2ELj1EDv4_jET1_S1_S1_r: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: xxinsertw 34, 35, 8 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: _Z7testInsILj2ELj1EDv4_jET1_S1_S1_r: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: xxinsertw 34, 35, 8 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj2ELj2EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) { +; CHECK-64-LABEL: _Z7testInsILj2ELj2EDv4_jET1_S1_S1_r: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: xxsldwi 0, 35, 35, 1 +; CHECK-64-NEXT: xxinsertw 34, 0, 8 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: _Z7testInsILj2ELj2EDv4_jET1_S1_S1_r: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: xxsldwi 0, 35, 35, 1 +; CHECK-32-NEXT: xxinsertw 34, 0, 8 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj2ELj3EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) { +; CHECK-64-LABEL: _Z7testInsILj2ELj3EDv4_jET1_S1_S1_r: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: xxswapd 0, 35 +; CHECK-64-NEXT: xxinsertw 34, 0, 8 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: _Z7testInsILj2ELj3EDv4_jET1_S1_S1_r: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: xxswapd 0, 35 +; CHECK-32-NEXT: xxinsertw 34, 0, 8 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj3ELj0EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) { +; CHECK-64-LABEL: _Z7testInsILj3ELj0EDv4_jET1_S1_S1_r: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: xxsldwi 0, 35, 35, 3 +; CHECK-64-NEXT: xxinsertw 34, 0, 12 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: _Z7testInsILj3ELj0EDv4_jET1_S1_S1_r: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: xxsldwi 0, 35, 35, 3 +; CHECK-32-NEXT: xxinsertw 34, 0, 12 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj3ELj1EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) { +; CHECK-64-LABEL: _Z7testInsILj3ELj1EDv4_jET1_S1_S1_r: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: xxinsertw 34, 35, 12 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: _Z7testInsILj3ELj1EDv4_jET1_S1_S1_r: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: xxinsertw 34, 35, 12 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj3ELj2EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) { +; CHECK-64-LABEL: _Z7testInsILj3ELj2EDv4_jET1_S1_S1_r: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: xxsldwi 0, 35, 35, 1 +; CHECK-64-NEXT: xxinsertw 34, 0, 12 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: _Z7testInsILj3ELj2EDv4_jET1_S1_S1_r: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: xxsldwi 0, 35, 35, 1 +; CHECK-32-NEXT: xxinsertw 34, 0, 12 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj3ELj3EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) { +; CHECK-64-LABEL: _Z7testInsILj3ELj3EDv4_jET1_S1_S1_r: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: xxswapd 0, 35 +; CHECK-64-NEXT: xxinsertw 34, 0, 12 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: _Z7testInsILj3ELj3EDv4_jET1_S1_S1_r: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: xxswapd 0, 35 +; CHECK-32-NEXT: xxinsertw 34, 0, 12 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> + ret <4 x i32> %vecins +} +define <4 x float> @testSameVecEl0BE(<4 x float> %a) { +; CHECK-64-LABEL: testSameVecEl0BE: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: xxinsertw 34, 34, 0 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: testSameVecEl0BE: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: xxinsertw 34, 34, 0 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <4 x float> %a, <4 x float> %a, <4 x i32> + ret <4 x float> %vecins +} +define <4 x float> @testSameVecEl2BE(<4 x float> %a) { +; CHECK-64-LABEL: testSameVecEl2BE: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: xxinsertw 34, 34, 8 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: testSameVecEl2BE: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: xxinsertw 34, 34, 8 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <4 x float> %a, <4 x float> %a, <4 x i32> + ret <4 x float> %vecins +} +define <4 x float> @testSameVecEl3BE(<4 x float> %a) { +; CHECK-64-LABEL: testSameVecEl3BE: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: xxinsertw 34, 34, 12 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: testSameVecEl3BE: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: xxinsertw 34, 34, 12 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <4 x float> %a, <4 x float> %a, <4 x i32> + ret <4 x float> %vecins +} +define <4 x float> @testSameVecEl0LE(<4 x float> %a) { +; CHECK-64-LABEL: testSameVecEl0LE: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: xxspltw 0, 34, 2 +; CHECK-64-NEXT: xxsldwi 0, 34, 0, 1 +; CHECK-64-NEXT: xxsldwi 34, 0, 0, 3 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: testSameVecEl0LE: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: xxspltw 0, 34, 2 +; CHECK-32-NEXT: xxsldwi 0, 34, 0, 1 +; CHECK-32-NEXT: xxsldwi 34, 0, 0, 3 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <4 x float> %a, <4 x float> %a, <4 x i32> + ret <4 x float> %vecins +} +define <4 x float> @testSameVecEl1LE(<4 x float> %a) { +; CHECK-64-LABEL: testSameVecEl1LE: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: xxswapd 35, 34 +; CHECK-64-NEXT: vmrghw 2, 2, 3 +; CHECK-64-NEXT: vmrghw 2, 2, 3 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: testSameVecEl1LE: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: xxswapd 35, 34 +; CHECK-32-NEXT: vmrghw 2, 2, 3 +; CHECK-32-NEXT: vmrghw 2, 2, 3 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <4 x float> %a, <4 x float> %a, <4 x i32> + ret <4 x float> %vecins +} +define <4 x float> @testSameVecEl3LE(<4 x float> %a) { +; CHECK-64-LABEL: testSameVecEl3LE: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: xxspltw 0, 34, 2 +; CHECK-64-NEXT: xxswapd 1, 34 +; CHECK-64-NEXT: xxsldwi 34, 1, 0, 2 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: testSameVecEl3LE: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: xxspltw 0, 34, 2 +; CHECK-32-NEXT: xxswapd 1, 34 +; CHECK-32-NEXT: xxsldwi 34, 1, 0, 2 +; CHECK-32-NEXT: blr +entry: + %vecins = shufflevector <4 x float> %a, <4 x float> %a, <4 x i32> + ret <4 x float> %vecins +} +define <4 x float> @insertVarF(<4 x float> %a, float %f, i32 %el) { +; CHECK-64-LABEL: insertVarF: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: rlwinm 3, 4, 2, 28, 29 +; CHECK-64-NEXT: addi 4, 1, -16 +; CHECK-64-NEXT: stxv 34, -16(1) +; CHECK-64-NEXT: stfsx 1, 4, 3 +; CHECK-64-NEXT: lxv 34, -16(1) +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: insertVarF: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: rlwinm 3, 4, 2, 28, 29 +; CHECK-32-NEXT: addi 4, 1, -16 +; CHECK-32-NEXT: stxv 34, -16(1) +; CHECK-32-NEXT: stfsx 1, 4, 3 +; CHECK-32-NEXT: lxv 34, -16(1) +; CHECK-32-NEXT: blr +entry: + %vecins = insertelement <4 x float> %a, float %f, i32 %el + ret <4 x float> %vecins +} +define <4 x i32> @insertVarI(<4 x i32> %a, i32 %i, i32 %el) { +; CHECK-64-LABEL: insertVarI: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: addi 5, 1, -16 +; CHECK-64-NEXT: rlwinm 4, 4, 2, 28, 29 +; CHECK-64-NEXT: stxv 34, -16(1) +; CHECK-64-NEXT: stwx 3, 5, 4 +; CHECK-64-NEXT: lxv 34, -16(1) +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: insertVarI: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: addi 5, 1, -16 +; CHECK-32-NEXT: rlwinm 4, 4, 2, 28, 29 +; CHECK-32-NEXT: stxv 34, -16(1) +; CHECK-32-NEXT: stwx 3, 5, 4 +; CHECK-32-NEXT: lxv 34, -16(1) +; CHECK-32-NEXT: blr +entry: + %vecins = insertelement <4 x i32> %a, i32 %i, i32 %el + ret <4 x i32> %vecins +} +define <4 x i32> @intrinsicInsertTest(<4 x i32> %a, <2 x i64> %b) { +; CHECK-64-LABEL: intrinsicInsertTest: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: xxinsertw 34, 35, 3 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: intrinsicInsertTest: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: xxinsertw 34, 35, 3 +; CHECK-32-NEXT: blr +entry: + %ans = tail call <4 x i32> @llvm.ppc.vsx.xxinsertw(<4 x i32> %a, <2 x i64> %b, i32 3) + ret <4 x i32> %ans +} +declare <4 x i32> @llvm.ppc.vsx.xxinsertw(<4 x i32>, <2 x i64>, i32) +define <2 x i64> @intrinsicExtractTest(<2 x i64> %a) { +; CHECK-64-LABEL: intrinsicExtractTest: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: xxextractuw 0, 34, 5 +; CHECK-64-NEXT: xxlor 34, 0, 0 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: intrinsicExtractTest: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: xxextractuw 0, 34, 5 +; CHECK-32-NEXT: xxlor 34, 0, 0 +; CHECK-32-NEXT: blr +entry: + %ans = tail call <2 x i64> @llvm.ppc.vsx.xxextractuw(<2 x i64> %a, i32 5) + ret <2 x i64> %ans +} +declare <2 x i64> @llvm.ppc.vsx.xxextractuw(<2 x i64>, i32) diff --git a/llvm/test/CodeGen/PowerPC/aix-vec_extract_p9.ll b/llvm/test/CodeGen/PowerPC/aix-vec_extract_p9.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/aix-vec_extract_p9.ll @@ -0,0 +1,174 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix-xcoff -vec-extabi -mcpu=pwr9 < %s | FileCheck %s -check-prefix=CHECK-64 +; RUN: llc -verify-machineinstrs -mtriple=powerpc-ibm-aix-xcoff -vec-extabi -mcpu=pwr9 < %s | FileCheck %s -check-prefix=CHECK-32 + +define zeroext i8 @test1(<16 x i8> %a, i32 signext %index) { +; CHECK-64-LABEL: test1: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: vextublx 3, 3, 2 +; CHECK-64-NEXT: clrldi 3, 3, 56 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: test1: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: addi 4, 1, -16 +; CHECK-32-NEXT: clrlwi 3, 3, 28 +; CHECK-32-NEXT: stxv 34, -16(1) +; CHECK-32-NEXT: lbzx 3, 4, 3 +; CHECK-32-NEXT: blr +entry: + %vecext = extractelement <16 x i8> %a, i32 %index + ret i8 %vecext +} + +define signext i8 @test2(<16 x i8> %a, i32 signext %index) { +; CHECK-64-LABEL: test2: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: vextublx 3, 3, 2 +; CHECK-64-NEXT: extsb 3, 3 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: test2: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: addi 4, 1, -16 +; CHECK-32-NEXT: clrlwi 3, 3, 28 +; CHECK-32-NEXT: stxv 34, -16(1) +; CHECK-32-NEXT: lbzx 3, 4, 3 +; CHECK-32-NEXT: extsb 3, 3 +; CHECK-32-NEXT: blr +entry: + %vecext = extractelement <16 x i8> %a, i32 %index + ret i8 %vecext +} + +define zeroext i16 @test3(<8 x i16> %a, i32 signext %index) { +; CHECK-64-LABEL: test3: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: rlwinm 3, 3, 1, 28, 30 +; CHECK-64-NEXT: vextuhlx 3, 3, 2 +; CHECK-64-NEXT: clrldi 3, 3, 48 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: test3: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: addi 4, 1, -16 +; CHECK-32-NEXT: rlwinm 3, 3, 1, 28, 30 +; CHECK-32-NEXT: stxv 34, -16(1) +; CHECK-32-NEXT: lhzx 3, 4, 3 +; CHECK-32-NEXT: blr +entry: + %vecext = extractelement <8 x i16> %a, i32 %index + ret i16 %vecext +} + +define signext i16 @test4(<8 x i16> %a, i32 signext %index) { +; CHECK-64-LABEL: test4: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: rlwinm 3, 3, 1, 28, 30 +; CHECK-64-NEXT: vextuhlx 3, 3, 2 +; CHECK-64-NEXT: extsh 3, 3 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: test4: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: addi 4, 1, -16 +; CHECK-32-NEXT: rlwinm 3, 3, 1, 28, 30 +; CHECK-32-NEXT: stxv 34, -16(1) +; CHECK-32-NEXT: lhax 3, 4, 3 +; CHECK-32-NEXT: blr +entry: + %vecext = extractelement <8 x i16> %a, i32 %index + ret i16 %vecext +} + +define zeroext i32 @test5(<4 x i32> %a, i32 signext %index) { +; CHECK-64-LABEL: test5: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: rlwinm 3, 3, 2, 28, 29 +; CHECK-64-NEXT: vextuwlx 3, 3, 2 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: test5: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: addi 4, 1, -16 +; CHECK-32-NEXT: rlwinm 3, 3, 2, 28, 29 +; CHECK-32-NEXT: stxv 34, -16(1) +; CHECK-32-NEXT: lwzx 3, 4, 3 +; CHECK-32-NEXT: blr +entry: + %vecext = extractelement <4 x i32> %a, i32 %index + ret i32 %vecext +} + +define signext i32 @test6(<4 x i32> %a, i32 signext %index) { +; CHECK-64-LABEL: test6: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: rlwinm 3, 3, 2, 28, 29 +; CHECK-64-NEXT: vextuwlx 3, 3, 2 +; CHECK-64-NEXT: extsw 3, 3 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: test6: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: addi 4, 1, -16 +; CHECK-32-NEXT: rlwinm 3, 3, 2, 28, 29 +; CHECK-32-NEXT: stxv 34, -16(1) +; CHECK-32-NEXT: lwzx 3, 4, 3 +; CHECK-32-NEXT: blr +entry: + %vecext = extractelement <4 x i32> %a, i32 %index + ret i32 %vecext +} + +; Test with immediate index +define zeroext i8 @test7(<16 x i8> %a) { +; CHECK-64-LABEL: test7: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: li 3, 1 +; CHECK-64-NEXT: vextublx 3, 3, 2 +; CHECK-64-NEXT: clrldi 3, 3, 56 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: test7: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: stxv 34, -16(1) +; CHECK-32-NEXT: lbz 3, -15(1) +; CHECK-32-NEXT: blr +entry: + %vecext = extractelement <16 x i8> %a, i32 1 + ret i8 %vecext +} + +define zeroext i16 @test8(<8 x i16> %a) { +; CHECK-64-LABEL: test8: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: li 3, 2 +; CHECK-64-NEXT: vextuhlx 3, 3, 2 +; CHECK-64-NEXT: clrldi 3, 3, 48 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: test8: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: stxv 34, -16(1) +; CHECK-32-NEXT: lhz 3, -14(1) +; CHECK-32-NEXT: blr +entry: + %vecext = extractelement <8 x i16> %a, i32 1 + ret i16 %vecext +} + +define zeroext i32 @test9(<4 x i32> %a) { +; CHECK-64-LABEL: test9: +; CHECK-64: # %bb.0: +; CHECK-64-NEXT: li 3, 12 +; CHECK-64-NEXT: vextuwlx 3, 3, 2 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: test9: +; CHECK-32: # %bb.0: +; CHECK-32-NEXT: stxv 34, -16(1) +; CHECK-32-NEXT: lwz 3, -4(1) +; CHECK-32-NEXT: blr + %vecext = extractelement <4 x i32> %a, i32 3 + ret i32 %vecext +} diff --git a/llvm/test/CodeGen/PowerPC/aix-vec_extract_p9_2.ll b/llvm/test/CodeGen/PowerPC/aix-vec_extract_p9_2.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/aix-vec_extract_p9_2.ll @@ -0,0 +1,271 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix-xcoff -vec-extabi -mcpu=pwr9 < %s | FileCheck %s -check-prefix=CHECK-64 +; RUN: llc -verify-machineinstrs -mtriple=powerpc-ibm-aix-xcoff -vec-extabi -mcpu=pwr9 < %s | FileCheck %s -check-prefix=CHECK-32 + +define zeroext i8 @test_add1(<16 x i8> %a, i32 signext %index, i8 zeroext %c) { +; CHECK-64-LABEL: test_add1: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: vextublx 3, 3, 2 +; CHECK-64-NEXT: add 3, 3, 4 +; CHECK-64-NEXT: clrldi 3, 3, 56 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: test_add1: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: addi 5, 1, -16 +; CHECK-32-NEXT: clrlwi 3, 3, 28 +; CHECK-32-NEXT: stxv 34, -16(1) +; CHECK-32-NEXT: lbzx 3, 5, 3 +; CHECK-32-NEXT: add 3, 3, 4 +; CHECK-32-NEXT: clrlwi 3, 3, 24 +; CHECK-32-NEXT: blr +entry: + %vecext = extractelement <16 x i8> %a, i32 %index + %conv = zext i8 %vecext to i32 + %conv1 = zext i8 %c to i32 + %add = add nuw nsw i32 %conv, %conv1 + %conv2 = trunc i32 %add to i8 + ret i8 %conv2 +} + +define signext i8 @test_add2(<16 x i8> %a, i32 signext %index, i8 signext %c) { +; CHECK-64-LABEL: test_add2: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: vextublx 3, 3, 2 +; CHECK-64-NEXT: add 3, 3, 4 +; CHECK-64-NEXT: extsb 3, 3 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: test_add2: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: addi 5, 1, -16 +; CHECK-32-NEXT: clrlwi 3, 3, 28 +; CHECK-32-NEXT: stxv 34, -16(1) +; CHECK-32-NEXT: lbzx 3, 5, 3 +; CHECK-32-NEXT: add 3, 3, 4 +; CHECK-32-NEXT: extsb 3, 3 +; CHECK-32-NEXT: blr +entry: + %vecext = extractelement <16 x i8> %a, i32 %index + %conv3 = zext i8 %vecext to i32 + %conv14 = zext i8 %c to i32 + %add = add nuw nsw i32 %conv3, %conv14 + %conv2 = trunc i32 %add to i8 + ret i8 %conv2 +} + +define zeroext i16 @test_add3(<8 x i16> %a, i32 signext %index, i16 zeroext %c) { +; CHECK-64-LABEL: test_add3: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: rlwinm 3, 3, 1, 28, 30 +; CHECK-64-NEXT: vextuhlx 3, 3, 2 +; CHECK-64-NEXT: add 3, 3, 4 +; CHECK-64-NEXT: clrldi 3, 3, 48 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: test_add3: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: addi 5, 1, -16 +; CHECK-32-NEXT: rlwinm 3, 3, 1, 28, 30 +; CHECK-32-NEXT: stxv 34, -16(1) +; CHECK-32-NEXT: lhzx 3, 5, 3 +; CHECK-32-NEXT: add 3, 3, 4 +; CHECK-32-NEXT: clrlwi 3, 3, 16 +; CHECK-32-NEXT: blr +entry: + %vecext = extractelement <8 x i16> %a, i32 %index + %conv = zext i16 %vecext to i32 + %conv1 = zext i16 %c to i32 + %add = add nuw nsw i32 %conv, %conv1 + %conv2 = trunc i32 %add to i16 + ret i16 %conv2 +} + +define signext i16 @test_add4(<8 x i16> %a, i32 signext %index, i16 signext %c) { +; CHECK-64-LABEL: test_add4: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: rlwinm 3, 3, 1, 28, 30 +; CHECK-64-NEXT: vextuhlx 3, 3, 2 +; CHECK-64-NEXT: add 3, 3, 4 +; CHECK-64-NEXT: extsh 3, 3 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: test_add4: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: addi 5, 1, -16 +; CHECK-32-NEXT: rlwinm 3, 3, 1, 28, 30 +; CHECK-32-NEXT: stxv 34, -16(1) +; CHECK-32-NEXT: lhzx 3, 5, 3 +; CHECK-32-NEXT: add 3, 3, 4 +; CHECK-32-NEXT: extsh 3, 3 +; CHECK-32-NEXT: blr +entry: + %vecext = extractelement <8 x i16> %a, i32 %index + %conv5 = zext i16 %vecext to i32 + %conv16 = zext i16 %c to i32 + %add = add nuw nsw i32 %conv5, %conv16 + %conv2 = trunc i32 %add to i16 + ret i16 %conv2 +} + +define zeroext i32 @test_add5(<4 x i32> %a, i32 signext %index, i32 zeroext %c) { +; CHECK-64-LABEL: test_add5: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: rlwinm 3, 3, 2, 28, 29 +; CHECK-64-NEXT: vextuwlx 3, 3, 2 +; CHECK-64-NEXT: add 3, 3, 4 +; CHECK-64-NEXT: clrldi 3, 3, 32 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: test_add5: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: addi 5, 1, -16 +; CHECK-32-NEXT: rlwinm 3, 3, 2, 28, 29 +; CHECK-32-NEXT: stxv 34, -16(1) +; CHECK-32-NEXT: lwzx 3, 5, 3 +; CHECK-32-NEXT: add 3, 3, 4 +; CHECK-32-NEXT: blr +entry: + %vecext = extractelement <4 x i32> %a, i32 %index + %add = add i32 %vecext, %c + ret i32 %add +} + +define signext i32 @test_add6(<4 x i32> %a, i32 signext %index, i32 signext %c) { +; CHECK-64-LABEL: test_add6: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: rlwinm 3, 3, 2, 28, 29 +; CHECK-64-NEXT: vextuwlx 3, 3, 2 +; CHECK-64-NEXT: add 3, 3, 4 +; CHECK-64-NEXT: extsw 3, 3 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: test_add6: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: addi 5, 1, -16 +; CHECK-32-NEXT: rlwinm 3, 3, 2, 28, 29 +; CHECK-32-NEXT: stxv 34, -16(1) +; CHECK-32-NEXT: lwzx 3, 5, 3 +; CHECK-32-NEXT: add 3, 3, 4 +; CHECK-32-NEXT: blr +entry: + %vecext = extractelement <4 x i32> %a, i32 %index + %add = add nsw i32 %vecext, %c + ret i32 %add +} + +; When extracting word element 2 on LE, it's better to use mfvsrwz rather than vextuwrx +define zeroext i32 @test7(<4 x i32> %a) { +; CHECK-64-LABEL: test7: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: li 3, 8 +; CHECK-64-NEXT: vextuwlx 3, 3, 2 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: test7: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: stxv 34, -16(1) +; CHECK-32-NEXT: lwz 3, -8(1) +; CHECK-32-NEXT: blr +entry: + %vecext = extractelement <4 x i32> %a, i32 2 + ret i32 %vecext +} + +define zeroext i32 @testadd_7(<4 x i32> %a, i32 zeroext %c) { +; CHECK-64-LABEL: testadd_7: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: li 4, 8 +; CHECK-64-NEXT: vextuwlx 4, 4, 2 +; CHECK-64-NEXT: add 3, 4, 3 +; CHECK-64-NEXT: clrldi 3, 3, 32 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: testadd_7: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: stxv 34, -16(1) +; CHECK-32-NEXT: lwz 4, -8(1) +; CHECK-32-NEXT: add 3, 4, 3 +; CHECK-32-NEXT: blr +entry: + %vecext = extractelement <4 x i32> %a, i32 2 + %add = add i32 %vecext, %c + ret i32 %add +} + +define signext i32 @test8(<4 x i32> %a) { +; CHECK-64-LABEL: test8: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: li 3, 8 +; CHECK-64-NEXT: vextuwlx 3, 3, 2 +; CHECK-64-NEXT: extsw 3, 3 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: test8: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: stxv 34, -16(1) +; CHECK-32-NEXT: lwz 3, -8(1) +; CHECK-32-NEXT: blr +entry: + %vecext = extractelement <4 x i32> %a, i32 2 + ret i32 %vecext +} + +define signext i32 @testadd_8(<4 x i32> %a, i32 signext %c) { +; CHECK-64-LABEL: testadd_8: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: li 4, 8 +; CHECK-64-NEXT: vextuwlx 4, 4, 2 +; CHECK-64-NEXT: add 3, 4, 3 +; CHECK-64-NEXT: extsw 3, 3 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: testadd_8: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: stxv 34, -16(1) +; CHECK-32-NEXT: lwz 4, -8(1) +; CHECK-32-NEXT: add 3, 4, 3 +; CHECK-32-NEXT: blr +entry: + %vecext = extractelement <4 x i32> %a, i32 2 + %add = add nsw i32 %vecext, %c + ret i32 %add +} + +; When extracting word element 1 on BE, it's better to use mfvsrwz rather than vextuwlx +define signext i32 @test9(<4 x i32> %a) { +; CHECK-64-LABEL: test9: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: mfvsrwz 3, 34 +; CHECK-64-NEXT: extsw 3, 3 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: test9: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: stxv 34, -16(1) +; CHECK-32-NEXT: lwz 3, -12(1) +; CHECK-32-NEXT: blr +entry: + %vecext = extractelement <4 x i32> %a, i32 1 + ret i32 %vecext +} + +define signext i32 @testadd_9(<4 x i32> %a, i32 signext %c) { +; CHECK-64-LABEL: testadd_9: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: mfvsrwz 4, 34 +; CHECK-64-NEXT: add 3, 4, 3 +; CHECK-64-NEXT: extsw 3, 3 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: testadd_9: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: stxv 34, -16(1) +; CHECK-32-NEXT: lwz 4, -12(1) +; CHECK-32-NEXT: add 3, 4, 3 +; CHECK-32-NEXT: blr +entry: + %vecext = extractelement <4 x i32> %a, i32 1 + %add = add nsw i32 %vecext, %c + ret i32 %add +} diff --git a/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll b/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll @@ -0,0 +1,611 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix-xcoff -vec-extabi -mcpu=pwr9 < %s | FileCheck %s -check-prefix=CHECK-64 +; RUN: llc -verify-machineinstrs -mtriple=powerpc-ibm-aix-xcoff -vec-extabi -mcpu=pwr9 < %s | FileCheck %s -check-prefix=CHECK-32 + +; Byte indexed + +define <16 x i8> @testByte(<16 x i8> %a, i64 %b, i64 %idx) { +; CHECK-64-LABEL: testByte: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: addi 5, 1, -16 +; CHECK-64-NEXT: clrldi 4, 4, 60 +; CHECK-64-NEXT: stxv 34, -16(1) +; CHECK-64-NEXT: stbx 3, 5, 4 +; CHECK-64-NEXT: lxv 34, -16(1) +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: testByte: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: addi 5, 1, -16 +; CHECK-32-NEXT: clrlwi 3, 6, 28 +; CHECK-32-NEXT: stxv 34, -16(1) +; CHECK-32-NEXT: stbx 4, 5, 3 +; CHECK-32-NEXT: lxv 34, -16(1) +; CHECK-32-NEXT: blr +entry: + %conv = trunc i64 %b to i8 + %vecins = insertelement <16 x i8> %a, i8 %conv, i64 %idx + ret <16 x i8> %vecins +} + +; Halfword indexed + +define <8 x i16> @testHalf(<8 x i16> %a, i64 %b, i64 %idx) { +; CHECK-64-LABEL: testHalf: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: addi 5, 1, -16 +; CHECK-64-NEXT: rlwinm 4, 4, 1, 28, 30 +; CHECK-64-NEXT: stxv 34, -16(1) +; CHECK-64-NEXT: sthx 3, 5, 4 +; CHECK-64-NEXT: lxv 34, -16(1) +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: testHalf: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: addi 5, 1, -16 +; CHECK-32-NEXT: rlwinm 3, 6, 1, 28, 30 +; CHECK-32-NEXT: stxv 34, -16(1) +; CHECK-32-NEXT: sthx 4, 5, 3 +; CHECK-32-NEXT: lxv 34, -16(1) +; CHECK-32-NEXT: blr +entry: + %conv = trunc i64 %b to i16 + %vecins = insertelement <8 x i16> %a, i16 %conv, i64 %idx + ret <8 x i16> %vecins +} + +; Word indexed + +define <4 x i32> @testWord(<4 x i32> %a, i64 %b, i64 %idx) { +; CHECK-64-LABEL: testWord: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: addi 5, 1, -16 +; CHECK-64-NEXT: rlwinm 4, 4, 2, 28, 29 +; CHECK-64-NEXT: stxv 34, -16(1) +; CHECK-64-NEXT: stwx 3, 5, 4 +; CHECK-64-NEXT: lxv 34, -16(1) +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: testWord: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: addi 5, 1, -16 +; CHECK-32-NEXT: rlwinm 3, 6, 2, 28, 29 +; CHECK-32-NEXT: stxv 34, -16(1) +; CHECK-32-NEXT: stwx 4, 5, 3 +; CHECK-32-NEXT: lxv 34, -16(1) +; CHECK-32-NEXT: blr +entry: + %conv = trunc i64 %b to i32 + %vecins = insertelement <4 x i32> %a, i32 %conv, i64 %idx + ret <4 x i32> %vecins +} + +; Word immediate + +define <4 x i32> @testWordImm(<4 x i32> %a, i64 %b) { +; CHECK-64-LABEL: testWordImm: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: mtfprwz 0, 3 +; CHECK-64-NEXT: xxinsertw 34, 0, 4 +; CHECK-64-NEXT: xxinsertw 34, 0, 12 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: testWordImm: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: mtfprwz 0, 4 +; CHECK-32-NEXT: xxinsertw 34, 0, 4 +; CHECK-32-NEXT: xxinsertw 34, 0, 12 +; CHECK-32-NEXT: blr +entry: + %conv = trunc i64 %b to i32 + %vecins = insertelement <4 x i32> %a, i32 %conv, i32 1 + %vecins2 = insertelement <4 x i32> %vecins, i32 %conv, i32 3 + ret <4 x i32> %vecins2 +} + +; Doubleword indexed + +define <2 x i64> @testDoubleword(<2 x i64> %a, i64 %b, i64 %idx) { +; CHECK-64-LABEL: testDoubleword: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: addi 5, 1, -16 +; CHECK-64-NEXT: rlwinm 4, 4, 3, 28, 28 +; CHECK-64-NEXT: stxv 34, -16(1) +; CHECK-64-NEXT: stdx 3, 5, 4 +; CHECK-64-NEXT: lxv 34, -16(1) +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: testDoubleword: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: add 5, 6, 6 +; CHECK-32-NEXT: addi 7, 1, -32 +; CHECK-32-NEXT: stxv 34, -32(1) +; CHECK-32-NEXT: rlwinm 6, 5, 2, 28, 29 +; CHECK-32-NEXT: stwx 3, 7, 6 +; CHECK-32-NEXT: addi 3, 5, 1 +; CHECK-32-NEXT: addi 5, 1, -16 +; CHECK-32-NEXT: lxv 0, -32(1) +; CHECK-32-NEXT: rlwinm 3, 3, 2, 28, 29 +; CHECK-32-NEXT: stxv 0, -16(1) +; CHECK-32-NEXT: stwx 4, 5, 3 +; CHECK-32-NEXT: lxv 34, -16(1) +; CHECK-32-NEXT: blr +entry: + %vecins = insertelement <2 x i64> %a, i64 %b, i64 %idx + ret <2 x i64> %vecins +} + +; Doubleword immediate + +define <2 x i64> @testDoublewordImm(<2 x i64> %a, i64 %b) { +; CHECK-64-LABEL: testDoublewordImm: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: mtfprd 0, 3 +; CHECK-64-NEXT: xxmrghd 34, 34, 0 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: testDoublewordImm: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: mtfprwz 0, 3 +; CHECK-32-NEXT: xxinsertw 34, 0, 8 +; CHECK-32-NEXT: mtfprwz 0, 4 +; CHECK-32-NEXT: xxinsertw 34, 0, 12 +; CHECK-32-NEXT: blr +entry: + %vecins = insertelement <2 x i64> %a, i64 %b, i32 1 + ret <2 x i64> %vecins +} + +define <2 x i64> @testDoublewordImm2(<2 x i64> %a, i64 %b) { +; CHECK-64-LABEL: testDoublewordImm2: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: mtfprd 0, 3 +; CHECK-64-NEXT: xxpermdi 34, 0, 34, 1 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: testDoublewordImm2: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: mtfprwz 0, 3 +; CHECK-32-NEXT: xxinsertw 34, 0, 0 +; CHECK-32-NEXT: mtfprwz 0, 4 +; CHECK-32-NEXT: xxinsertw 34, 0, 4 +; CHECK-32-NEXT: blr +entry: + %vecins = insertelement <2 x i64> %a, i64 %b, i32 0 + ret <2 x i64> %vecins +} + +; Float indexed + +define <4 x float> @testFloat1(<4 x float> %a, float %b, i32 zeroext %idx1) { +; CHECK-64-LABEL: testFloat1: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: rlwinm 3, 4, 2, 28, 29 +; CHECK-64-NEXT: addi 4, 1, -16 +; CHECK-64-NEXT: stxv 34, -16(1) +; CHECK-64-NEXT: stfsx 1, 4, 3 +; CHECK-64-NEXT: lxv 34, -16(1) +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: testFloat1: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: rlwinm 3, 4, 2, 28, 29 +; CHECK-32-NEXT: addi 4, 1, -16 +; CHECK-32-NEXT: stxv 34, -16(1) +; CHECK-32-NEXT: stfsx 1, 4, 3 +; CHECK-32-NEXT: lxv 34, -16(1) +; CHECK-32-NEXT: blr +entry: + %vecins = insertelement <4 x float> %a, float %b, i32 %idx1 + ret <4 x float> %vecins +} + +define <4 x float> @testFloat2(<4 x float> %a, i8* %b, i32 zeroext %idx1, i32 zeroext %idx2) { +; CHECK-64-LABEL: testFloat2: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: lwz 6, 0(3) +; CHECK-64-NEXT: rlwinm 4, 4, 2, 28, 29 +; CHECK-64-NEXT: addi 7, 1, -32 +; CHECK-64-NEXT: stxv 34, -32(1) +; CHECK-64-NEXT: stwx 6, 7, 4 +; CHECK-64-NEXT: rlwinm 4, 5, 2, 28, 29 +; CHECK-64-NEXT: addi 5, 1, -16 +; CHECK-64-NEXT: lxv 0, -32(1) +; CHECK-64-NEXT: lwz 3, 1(3) +; CHECK-64-NEXT: stxv 0, -16(1) +; CHECK-64-NEXT: stwx 3, 5, 4 +; CHECK-64-NEXT: lxv 34, -16(1) +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: testFloat2: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: lwz 6, 0(3) +; CHECK-32-NEXT: addi 7, 1, -32 +; CHECK-32-NEXT: rlwinm 4, 4, 2, 28, 29 +; CHECK-32-NEXT: stxv 34, -32(1) +; CHECK-32-NEXT: rlwinm 5, 5, 2, 28, 29 +; CHECK-32-NEXT: stwx 6, 7, 4 +; CHECK-32-NEXT: addi 4, 1, -16 +; CHECK-32-NEXT: lxv 0, -32(1) +; CHECK-32-NEXT: lwz 3, 1(3) +; CHECK-32-NEXT: stxv 0, -16(1) +; CHECK-32-NEXT: stwx 3, 4, 5 +; CHECK-32-NEXT: lxv 34, -16(1) +; CHECK-32-NEXT: blr +entry: + %0 = bitcast i8* %b to float* + %add.ptr1 = getelementptr inbounds i8, i8* %b, i64 1 + %1 = bitcast i8* %add.ptr1 to float* + %2 = load float, float* %0, align 4 + %vecins = insertelement <4 x float> %a, float %2, i32 %idx1 + %3 = load float, float* %1, align 4 + %vecins2 = insertelement <4 x float> %vecins, float %3, i32 %idx2 + ret <4 x float> %vecins2 +} + +define <4 x float> @testFloat3(<4 x float> %a, i8* %b, i32 zeroext %idx1, i32 zeroext %idx2) { +; CHECK-64-LABEL: testFloat3: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: lis 6, 1 +; CHECK-64-NEXT: rlwinm 4, 4, 2, 28, 29 +; CHECK-64-NEXT: addi 7, 1, -32 +; CHECK-64-NEXT: lwzx 6, 3, 6 +; CHECK-64-NEXT: stxv 34, -32(1) +; CHECK-64-NEXT: stwx 6, 7, 4 +; CHECK-64-NEXT: li 4, 1 +; CHECK-64-NEXT: lxv 0, -32(1) +; CHECK-64-NEXT: rldic 4, 4, 36, 27 +; CHECK-64-NEXT: lwzx 3, 3, 4 +; CHECK-64-NEXT: rlwinm 4, 5, 2, 28, 29 +; CHECK-64-NEXT: addi 5, 1, -16 +; CHECK-64-NEXT: stxv 0, -16(1) +; CHECK-64-NEXT: stwx 3, 5, 4 +; CHECK-64-NEXT: lxv 34, -16(1) +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: testFloat3: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: lis 6, 1 +; CHECK-32-NEXT: addi 7, 1, -32 +; CHECK-32-NEXT: rlwinm 4, 4, 2, 28, 29 +; CHECK-32-NEXT: rlwinm 5, 5, 2, 28, 29 +; CHECK-32-NEXT: lwzx 6, 3, 6 +; CHECK-32-NEXT: stxv 34, -32(1) +; CHECK-32-NEXT: stwx 6, 7, 4 +; CHECK-32-NEXT: addi 4, 1, -16 +; CHECK-32-NEXT: lxv 0, -32(1) +; CHECK-32-NEXT: lwz 3, 0(3) +; CHECK-32-NEXT: stxv 0, -16(1) +; CHECK-32-NEXT: stwx 3, 4, 5 +; CHECK-32-NEXT: lxv 34, -16(1) +; CHECK-32-NEXT: blr +entry: + %add.ptr = getelementptr inbounds i8, i8* %b, i64 65536 + %0 = bitcast i8* %add.ptr to float* + %add.ptr1 = getelementptr inbounds i8, i8* %b, i64 68719476736 + %1 = bitcast i8* %add.ptr1 to float* + %2 = load float, float* %0, align 4 + %vecins = insertelement <4 x float> %a, float %2, i32 %idx1 + %3 = load float, float* %1, align 4 + %vecins2 = insertelement <4 x float> %vecins, float %3, i32 %idx2 + ret <4 x float> %vecins2 +} + +; Float immediate + +define <4 x float> @testFloatImm1(<4 x float> %a, float %b) { +; CHECK-64-LABEL: testFloatImm1: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: xscvdpspn 0, 1 +; CHECK-64-NEXT: xxsldwi 0, 0, 0, 3 +; CHECK-64-NEXT: xxinsertw 34, 0, 0 +; CHECK-64-NEXT: xxinsertw 34, 0, 8 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: testFloatImm1: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: xscvdpspn 0, 1 +; CHECK-32-NEXT: xxsldwi 0, 0, 0, 3 +; CHECK-32-NEXT: xxinsertw 34, 0, 0 +; CHECK-32-NEXT: xxinsertw 34, 0, 8 +; CHECK-32-NEXT: blr +entry: + %vecins = insertelement <4 x float> %a, float %b, i32 0 + %vecins1 = insertelement <4 x float> %vecins, float %b, i32 2 + ret <4 x float> %vecins1 +} + +define <4 x float> @testFloatImm2(<4 x float> %a, i32* %b) { +; CHECK-64-LABEL: testFloatImm2: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: lfs 0, 0(3) +; CHECK-64-NEXT: xscvdpspn 0, 0 +; CHECK-64-NEXT: xxsldwi 0, 0, 0, 3 +; CHECK-64-NEXT: xxinsertw 34, 0, 0 +; CHECK-64-NEXT: lfs 0, 4(3) +; CHECK-64-NEXT: xscvdpspn 0, 0 +; CHECK-64-NEXT: xxsldwi 0, 0, 0, 3 +; CHECK-64-NEXT: xxinsertw 34, 0, 8 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: testFloatImm2: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: lfs 0, 0(3) +; CHECK-32-NEXT: xscvdpspn 0, 0 +; CHECK-32-NEXT: xxsldwi 0, 0, 0, 3 +; CHECK-32-NEXT: xxinsertw 34, 0, 0 +; CHECK-32-NEXT: lfs 0, 4(3) +; CHECK-32-NEXT: xscvdpspn 0, 0 +; CHECK-32-NEXT: xxsldwi 0, 0, 0, 3 +; CHECK-32-NEXT: xxinsertw 34, 0, 8 +; CHECK-32-NEXT: blr +entry: + %0 = bitcast i32* %b to float* + %add.ptr1 = getelementptr inbounds i32, i32* %b, i64 1 + %1 = bitcast i32* %add.ptr1 to float* + %2 = load float, float* %0, align 4 + %vecins = insertelement <4 x float> %a, float %2, i32 0 + %3 = load float, float* %1, align 4 + %vecins2 = insertelement <4 x float> %vecins, float %3, i32 2 + ret <4 x float> %vecins2 +} + +define <4 x float> @testFloatImm3(<4 x float> %a, i32* %b) { +; CHECK-64-LABEL: testFloatImm3: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: lis 4, 4 +; CHECK-64-NEXT: lfsx 0, 3, 4 +; CHECK-64-NEXT: li 4, 1 +; CHECK-64-NEXT: rldic 4, 4, 38, 25 +; CHECK-64-NEXT: xscvdpspn 0, 0 +; CHECK-64-NEXT: xxsldwi 0, 0, 0, 3 +; CHECK-64-NEXT: xxinsertw 34, 0, 0 +; CHECK-64-NEXT: lfsx 0, 3, 4 +; CHECK-64-NEXT: xscvdpspn 0, 0 +; CHECK-64-NEXT: xxsldwi 0, 0, 0, 3 +; CHECK-64-NEXT: xxinsertw 34, 0, 8 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: testFloatImm3: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: lis 4, 4 +; CHECK-32-NEXT: lfsx 0, 3, 4 +; CHECK-32-NEXT: xscvdpspn 0, 0 +; CHECK-32-NEXT: xxsldwi 0, 0, 0, 3 +; CHECK-32-NEXT: xxinsertw 34, 0, 0 +; CHECK-32-NEXT: lfs 0, 0(3) +; CHECK-32-NEXT: xscvdpspn 0, 0 +; CHECK-32-NEXT: xxsldwi 0, 0, 0, 3 +; CHECK-32-NEXT: xxinsertw 34, 0, 8 +; CHECK-32-NEXT: blr +entry: + %add.ptr = getelementptr inbounds i32, i32* %b, i64 65536 + %0 = bitcast i32* %add.ptr to float* + %add.ptr1 = getelementptr inbounds i32, i32* %b, i64 68719476736 + %1 = bitcast i32* %add.ptr1 to float* + %2 = load float, float* %0, align 4 + %vecins = insertelement <4 x float> %a, float %2, i32 0 + %3 = load float, float* %1, align 4 + %vecins2 = insertelement <4 x float> %vecins, float %3, i32 2 + ret <4 x float> %vecins2 +} + +; Double indexed + +define <2 x double> @testDouble1(<2 x double> %a, double %b, i32 zeroext %idx1) { +; CHECK-64-LABEL: testDouble1: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: rlwinm 3, 4, 3, 28, 28 +; CHECK-64-NEXT: addi 4, 1, -16 +; CHECK-64-NEXT: stxv 34, -16(1) +; CHECK-64-NEXT: stfdx 1, 4, 3 +; CHECK-64-NEXT: lxv 34, -16(1) +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: testDouble1: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: addi 4, 1, -16 +; CHECK-32-NEXT: rlwinm 3, 5, 3, 28, 28 +; CHECK-32-NEXT: stxv 34, -16(1) +; CHECK-32-NEXT: stfdx 1, 4, 3 +; CHECK-32-NEXT: lxv 34, -16(1) +; CHECK-32-NEXT: blr +entry: + %vecins = insertelement <2 x double> %a, double %b, i32 %idx1 + ret <2 x double> %vecins +} + +define <2 x double> @testDouble2(<2 x double> %a, i8* %b, i32 zeroext %idx1, i32 zeroext %idx2) { +; CHECK-64-LABEL: testDouble2: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: ld 6, 0(3) +; CHECK-64-NEXT: rlwinm 4, 4, 3, 28, 28 +; CHECK-64-NEXT: addi 7, 1, -32 +; CHECK-64-NEXT: stxv 34, -32(1) +; CHECK-64-NEXT: stdx 6, 7, 4 +; CHECK-64-NEXT: li 4, 1 +; CHECK-64-NEXT: lxv 0, -32(1) +; CHECK-64-NEXT: ldx 3, 3, 4 +; CHECK-64-NEXT: rlwinm 4, 5, 3, 28, 28 +; CHECK-64-NEXT: addi 5, 1, -16 +; CHECK-64-NEXT: stxv 0, -16(1) +; CHECK-64-NEXT: stdx 3, 5, 4 +; CHECK-64-NEXT: lxv 34, -16(1) +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: testDouble2: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: lfd 0, 0(3) +; CHECK-32-NEXT: addi 6, 1, -32 +; CHECK-32-NEXT: rlwinm 4, 4, 3, 28, 28 +; CHECK-32-NEXT: stxv 34, -32(1) +; CHECK-32-NEXT: rlwinm 5, 5, 3, 28, 28 +; CHECK-32-NEXT: stfdx 0, 6, 4 +; CHECK-32-NEXT: lxv 0, -32(1) +; CHECK-32-NEXT: lfd 1, 1(3) +; CHECK-32-NEXT: addi 3, 1, -16 +; CHECK-32-NEXT: stxv 0, -16(1) +; CHECK-32-NEXT: stfdx 1, 3, 5 +; CHECK-32-NEXT: lxv 34, -16(1) +; CHECK-32-NEXT: blr +entry: + %0 = bitcast i8* %b to double* + %add.ptr1 = getelementptr inbounds i8, i8* %b, i64 1 + %1 = bitcast i8* %add.ptr1 to double* + %2 = load double, double* %0, align 8 + %vecins = insertelement <2 x double> %a, double %2, i32 %idx1 + %3 = load double, double* %1, align 8 + %vecins2 = insertelement <2 x double> %vecins, double %3, i32 %idx2 + ret <2 x double> %vecins2 +} + +define <2 x double> @testDouble3(<2 x double> %a, i8* %b, i32 zeroext %idx1, i32 zeroext %idx2) { +; CHECK-64-LABEL: testDouble3: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: lis 6, 1 +; CHECK-64-NEXT: rlwinm 4, 4, 3, 28, 28 +; CHECK-64-NEXT: addi 7, 1, -32 +; CHECK-64-NEXT: ldx 6, 3, 6 +; CHECK-64-NEXT: stxv 34, -32(1) +; CHECK-64-NEXT: stdx 6, 7, 4 +; CHECK-64-NEXT: li 4, 1 +; CHECK-64-NEXT: lxv 0, -32(1) +; CHECK-64-NEXT: rldic 4, 4, 36, 27 +; CHECK-64-NEXT: ldx 3, 3, 4 +; CHECK-64-NEXT: rlwinm 4, 5, 3, 28, 28 +; CHECK-64-NEXT: addi 5, 1, -16 +; CHECK-64-NEXT: stxv 0, -16(1) +; CHECK-64-NEXT: stdx 3, 5, 4 +; CHECK-64-NEXT: lxv 34, -16(1) +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: testDouble3: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: lis 6, 1 +; CHECK-32-NEXT: rlwinm 4, 4, 3, 28, 28 +; CHECK-32-NEXT: rlwinm 5, 5, 3, 28, 28 +; CHECK-32-NEXT: lfdx 0, 3, 6 +; CHECK-32-NEXT: addi 6, 1, -32 +; CHECK-32-NEXT: stxv 34, -32(1) +; CHECK-32-NEXT: stfdx 0, 6, 4 +; CHECK-32-NEXT: lxv 0, -32(1) +; CHECK-32-NEXT: lfd 1, 0(3) +; CHECK-32-NEXT: addi 3, 1, -16 +; CHECK-32-NEXT: stxv 0, -16(1) +; CHECK-32-NEXT: stfdx 1, 3, 5 +; CHECK-32-NEXT: lxv 34, -16(1) +; CHECK-32-NEXT: blr +entry: + %add.ptr = getelementptr inbounds i8, i8* %b, i64 65536 + %0 = bitcast i8* %add.ptr to double* + %add.ptr1 = getelementptr inbounds i8, i8* %b, i64 68719476736 + %1 = bitcast i8* %add.ptr1 to double* + %2 = load double, double* %0, align 8 + %vecins = insertelement <2 x double> %a, double %2, i32 %idx1 + %3 = load double, double* %1, align 8 + %vecins2 = insertelement <2 x double> %vecins, double %3, i32 %idx2 + ret <2 x double> %vecins2 +} + +; Double immediate + +define <2 x double> @testDoubleImm1(<2 x double> %a, double %b) { +; CHECK-64-LABEL: testDoubleImm1: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; CHECK-64-NEXT: xxpermdi 34, 1, 34, 1 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: testDoubleImm1: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; CHECK-32-NEXT: xxpermdi 34, 1, 34, 1 +; CHECK-32-NEXT: blr +entry: + %vecins = insertelement <2 x double> %a, double %b, i32 0 + ret <2 x double> %vecins +} + +define <2 x double> @testDoubleImm2(<2 x double> %a, i32* %b) { +; CHECK-64-LABEL: testDoubleImm2: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: lfd 0, 0(3) +; CHECK-64-NEXT: xxpermdi 34, 0, 34, 1 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: testDoubleImm2: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: lfd 0, 0(3) +; CHECK-32-NEXT: xxpermdi 34, 0, 34, 1 +; CHECK-32-NEXT: blr +entry: + %0 = bitcast i32* %b to double* + %1 = load double, double* %0, align 8 + %vecins = insertelement <2 x double> %a, double %1, i32 0 + ret <2 x double> %vecins +} + +define <2 x double> @testDoubleImm3(<2 x double> %a, i32* %b) { +; CHECK-64-LABEL: testDoubleImm3: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: lfd 0, 4(3) +; CHECK-64-NEXT: xxpermdi 34, 0, 34, 1 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: testDoubleImm3: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: lfd 0, 4(3) +; CHECK-32-NEXT: xxpermdi 34, 0, 34, 1 +; CHECK-32-NEXT: blr +entry: + %add.ptr = getelementptr inbounds i32, i32* %b, i64 1 + %0 = bitcast i32* %add.ptr to double* + %1 = load double, double* %0, align 8 + %vecins = insertelement <2 x double> %a, double %1, i32 0 + ret <2 x double> %vecins +} + +define <2 x double> @testDoubleImm4(<2 x double> %a, i32* %b) { +; CHECK-64-LABEL: testDoubleImm4: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: lis 4, 4 +; CHECK-64-NEXT: lfdx 0, 3, 4 +; CHECK-64-NEXT: xxpermdi 34, 0, 34, 1 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: testDoubleImm4: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: lis 4, 4 +; CHECK-32-NEXT: lfdx 0, 3, 4 +; CHECK-32-NEXT: xxpermdi 34, 0, 34, 1 +; CHECK-32-NEXT: blr +entry: + %add.ptr = getelementptr inbounds i32, i32* %b, i64 65536 + %0 = bitcast i32* %add.ptr to double* + %1 = load double, double* %0, align 8 + %vecins = insertelement <2 x double> %a, double %1, i32 0 + ret <2 x double> %vecins +} + +define <2 x double> @testDoubleImm5(<2 x double> %a, i32* %b) { +; CHECK-64-LABEL: testDoubleImm5: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: li 4, 1 +; CHECK-64-NEXT: rldic 4, 4, 38, 25 +; CHECK-64-NEXT: lfdx 0, 3, 4 +; CHECK-64-NEXT: xxpermdi 34, 0, 34, 1 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: testDoubleImm5: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: lfd 0, 0(3) +; CHECK-32-NEXT: xxpermdi 34, 0, 34, 1 +; CHECK-32-NEXT: blr +entry: + %add.ptr = getelementptr inbounds i32, i32* %b, i64 68719476736 + %0 = bitcast i32* %add.ptr to double* + %1 = load double, double* %0, align 8 + %vecins = insertelement <2 x double> %a, double %1, i32 0 + ret <2 x double> %vecins +} + diff --git a/llvm/test/CodeGen/PowerPC/vec-bswap.ll b/llvm/test/CodeGen/PowerPC/vec-bswap.ll --- a/llvm/test/CodeGen/PowerPC/vec-bswap.ll +++ b/llvm/test/CodeGen/PowerPC/vec-bswap.ll @@ -1,10 +1,25 @@ ; RUN: llc < %s -mtriple=powerpc64le-unknown-unknown -mcpu=pwr9 \ ; RUN: -verify-machineinstrs -ppc-asm-full-reg-names | FileCheck %s + +; RUN: llc < %s -mtriple=powerpc64-ibm-aix-xcoff -mcpu=pwr9 \ +; RUN: -verify-machineinstrs -vec-extabi | \ +; RUN: FileCheck %s --check-prefixes=AIX,AIX64 +; RUN: llc < %s -mtriple=powerpc-ibm-aix-xcoff -mcpu=pwr9 \ +; RUN: -verify-machineinstrs -vec-extabi | \ +; RUN: FileCheck %s --check-prefixes=AIX,AIX32 + define dso_local void @test(i32* %Arr, i32 signext %Len) { ; CHECK-LABEL: test: ; CHECK: lxvx [[REG:vs[0-9]+]], r{{[0-9]+}}, r{{[0-9]+}} ; CHECK-NOT: [[REG]] ; CHECK: xxbrw vs{{[0-9]+}}, [[REG]] + +; AIX-LABEL: test: +; AIX64: lxvx [[REG64:[0-9]+]], {{[0-9]+}}, {{[0-9]+}} +; AIX32: lxv [[REG32:[0-9]+]], {{[0-9]+}}({{[0-9]+}}) +; AIX64-NOT: [[REG64]] +; AIX64: xxbrw {{[0-9]+}}, [[REG64]] +; AIX32: xxbrw {{[0-9]+}}, [[REG32]] entry: %cmp1 = icmp slt i32 0, %Len br i1 %cmp1, label %for.body.lr.ph, label %for.cond.cleanup @@ -77,6 +92,10 @@ ; CHECK-LABEL: test_halfword: ; CHECK: xxbrh vs34, vs34 ; CHECK-NEXT: blr + +; AIX-LABEL: test_halfword: +; AIX: xxbrh 34, 34 +; AIX-NEXT: blr entry: %0 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a) ret <8 x i16> %0 @@ -86,6 +105,10 @@ ; CHECK-LABEL: test_doubleword: ; CHECK: xxbrd vs34, vs34 ; CHECK-NEXT: blr + +; AIX-LABEL: test_doubleword: +; AIX: xxbrd 34, 34 +; AIX-NEXT: blr entry: %0 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a) ret <2 x i64> %0 @@ -95,6 +118,10 @@ ; CHECK-LABEL: test_quadword: ; CHECK: xxbrq vs34, vs34 ; CHECK-NEXT: blr + +; AIX-LABEL: test_quadword: +; AIX: xxbrq 34, 34 +; AIX-NEXT: blr entry: %0 = call <1 x i128> @llvm.bswap.v1i128(<1 x i128> %a) ret <1 x i128> %0