diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td --- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -3955,8 +3955,8 @@ (v4i32 (LXVWSX xoaddr:$A))>; } // HasVSX, HasP9Vector -// Big endian 64Bit Power9 subtarget. -let Predicates = [HasVSX, HasP9Vector, IsBigEndian, IsPPC64] in { +// Big endian Power9 subtarget. +let Predicates = [HasVSX, HasP9Vector, IsBigEndian] in { def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 0)))))), (f32 (XSCVUXDSP (XXEXTRACTUW $A, 0)))>; def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 1)))))), @@ -4042,6 +4042,10 @@ def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 7)), xoaddr:$dst), (STXSIHXv (COPY_TO_REGCLASS (v16i8 (VSLDOI $S, $S, 8)), VSRC), xoaddr:$dst)>; +} //HasVSX, HasP9Vector, IsBigEndian + +// Big endian 64Bit Power9 subtarget. +let Predicates = [HasVSX, HasP9Vector, IsBigEndian, IsPPC64] in { def : Pat<(v2i64 (scalar_to_vector (i64 (load iaddrX4:$src)))), (v2i64 (COPY_TO_REGCLASS (DFLOADf64 iaddrX4:$src), VSRC))>; def : Pat<(v2i64 (scalar_to_vector (i64 (load xaddrX4:$src)))), diff --git a/llvm/test/CodeGen/PowerPC/aix-insert-extract.ll b/llvm/test/CodeGen/PowerPC/aix-insert-extract.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/aix-insert-extract.ll @@ -0,0 +1,808 @@ +; RUN: llc -mcpu=pwr9 -mtriple=powerpc64-ibm-aix-xcoff -vec-extabi \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK-64 +; RUN: llc -mcpu=pwr9 -mtriple=powerpc-ibm-aix-xcoff -vec-extabi \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK-32 + +define <4 x float> @_Z7testInsILj0ELj0EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj0ELj0EDv4_fET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 3 +; CHECK: xxinsertw 34, 0, 0 + %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj0ELj1EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj0ELj1EDv4_fET1_S1_S1_ +; CHECK-NOT: xxsldwi +; CHECK: xxinsertw 34, 35, 0 + %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj0ELj2EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj0ELj2EDv4_fET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 1 +; CHECK: xxinsertw 34, 0, 0 + %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj0ELj3EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj0ELj3EDv4_fET1_S1_S1_ +; CHECK: xxswapd 0, 35 +; CHECK: xxinsertw 34, 0, 0 + %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj1ELj0EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj1ELj0EDv4_fET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 3 +; CHECK: xxinsertw 34, 0, 4 + %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj1ELj1EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj1ELj1EDv4_fET1_S1_S1_ +; CHECK-NOT: xxsldwi +; CHECK: xxinsertw 34, 35, 4 + %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj1ELj2EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj1ELj2EDv4_fET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 1 +; CHECK: xxinsertw 34, 0, 4 + %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj1ELj3EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj1ELj3EDv4_fET1_S1_S1_ +; CHECK: xxswapd 0, 35 +; CHECK: xxinsertw 34, 0, 4 + %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj2ELj0EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj2ELj0EDv4_fET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 3 +; CHECK: xxinsertw 34, 0, 8 + %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj2ELj1EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj2ELj1EDv4_fET1_S1_S1_ +; CHECK-NOT: xxsldwi +; CHECK: xxinsertw 34, 35, 8 + %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj2ELj2EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj2ELj2EDv4_fET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 1 +; CHECK: xxinsertw 34, 0, 8 + %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj2ELj3EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj2ELj3EDv4_fET1_S1_S1_ +; CHECK: xxswapd 0, 35 +; CHECK: xxinsertw 34, 0, 8 + %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj3ELj0EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj3ELj0EDv4_fET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 3 +; CHECK: xxinsertw 34, 0, 12 + %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj3ELj1EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj3ELj1EDv4_fET1_S1_S1_ +; CHECK-NOT: xxsldwi +; CHECK: xxinsertw 34, 35, 12 + %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj3ELj2EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj3ELj2EDv4_fET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 1 +; CHECK: xxinsertw 34, 0, 12 + %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj3ELj3EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj3ELj3EDv4_fET1_S1_S1_ +; CHECK: xxswapd 0, 35 +; CHECK: xxinsertw 34, 0, 12 + %vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x i32> @_Z7testInsILj0ELj0EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj0ELj0EDv4_jET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 3 +; CHECK: xxinsertw 34, 0, 0 + %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj0ELj1EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj0ELj1EDv4_jET1_S1_S1_ +; CHECK-NOT: xxsldwi +; CHECK: xxinsertw 34, 35, 0 + %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj0ELj2EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj0ELj2EDv4_jET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 1 +; CHECK: xxinsertw 34, 0, 0 + %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj0ELj3EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj0ELj3EDv4_jET1_S1_S1_ +; CHECK: xxswapd 0, 35 +; CHECK: xxinsertw 34, 0, 0 + %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj1ELj0EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj1ELj0EDv4_jET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 3 +; CHECK: xxinsertw 34, 0, 4 + %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj1ELj1EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj1ELj1EDv4_jET1_S1_S1_ +; CHECK-NOT: xxsldwi +; CHECK: xxinsertw 34, 35, 4 + %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj1ELj2EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj1ELj2EDv4_jET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 1 +; CHECK: xxinsertw 34, 0, 4 + %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj1ELj3EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj1ELj3EDv4_jET1_S1_S1_ +; CHECK: xxswapd 0, 35 +; CHECK: xxinsertw 34, 0, 4 + %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj2ELj0EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj2ELj0EDv4_jET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 3 +; CHECK: xxinsertw 34, 0, 8 + %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj2ELj1EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj2ELj1EDv4_jET1_S1_S1_ +; CHECK-NOT: xxsldwi +; CHECK: xxinsertw 34, 35, 8 + %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj2ELj2EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj2ELj2EDv4_jET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 1 +; CHECK: xxinsertw 34, 0, 8 + %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj2ELj3EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj2ELj3EDv4_jET1_S1_S1_ +; CHECK: xxswapd 0, 35 +; CHECK: xxinsertw 34, 0, 8 + %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj3ELj0EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj3ELj0EDv4_jET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 3 +; CHECK: xxinsertw 34, 0, 12 + %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj3ELj1EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj3ELj1EDv4_jET1_S1_S1_ +; CHECK-NOT: xxsldwi +; CHECK: xxinsertw 34, 35, 12 + %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj3ELj2EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj3ELj2EDv4_jET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 1 +; CHECK: xxinsertw 34, 0, 12 + %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj3ELj3EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj3ELj3EDv4_jET1_S1_S1_ +; CHECK: xxswapd 0, 35 +; CHECK: xxinsertw 34, 0, 12 + %vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %vecins +} + +define float @_Z13testUiToFpExtILj0EEfDv4_j(<4 x i32> %a) { +entry: +; CHECK-64-LABEL: _Z13testUiToFpExtILj0EEfDv4_j +; CHECK-64: xxextractuw 0, 34, 0 +; CHECK-64: xscvuxdsp 1, 0 +; CHECK-32-LABEL: _Z13testUiToFpExtILj0EEfDv4_j +; CHECK-32: lfiwzx 0, 0, 3 +; CHECK-32: xscvuxdsp 1, 0 + %vecext = extractelement <4 x i32> %a, i32 0 + %conv = uitofp i32 %vecext to float + ret float %conv +} + +define float @_Z13testUiToFpExtILj1EEfDv4_j(<4 x i32> %a) { +entry: +; CHECK-64-LABEL: _Z13testUiToFpExtILj1EEfDv4_j +; CHECK-64: xxextractuw 0, 34, 4 +; CHECK-64: xscvuxdsp 1, 0 +; CHECK-32-LABEL: _Z13testUiToFpExtILj1EEfDv4_j +; CHECK-32: lfiwzx 0, 0, 3 +; CHECK-32: xscvuxdsp 1, 0 + %vecext = extractelement <4 x i32> %a, i32 1 + %conv = uitofp i32 %vecext to float + ret float %conv +} + +define float @_Z13testUiToFpExtILj2EEfDv4_j(<4 x i32> %a) { +entry: +; CHECK-64-LABEL: _Z13testUiToFpExtILj2EEfDv4_j +; CHECK-64: xxextractuw 0, 34, 8 +; CHECK-64: xscvuxdsp 1, 0 +; CHECK-32-LABEL: _Z13testUiToFpExtILj2EEfDv4_j +; CHECK-32: lfiwzx 0, 0, 3 +; CHECK-32: xscvuxdsp 1, 0 + %vecext = extractelement <4 x i32> %a, i32 2 + %conv = uitofp i32 %vecext to float + ret float %conv +} + +define float @_Z13testUiToFpExtILj3EEfDv4_j(<4 x i32> %a) { +entry: +; CHECK-64-LABEL: _Z13testUiToFpExtILj3EEfDv4_j +; CHECK-64: xxextractuw 0, 34, 12 +; CHECK-64: xscvuxdsp 1, 0 +; CHECK-32-LABEL: _Z13testUiToFpExtILj3EEfDv4_j +; CHECK-32: lfiwzx 0, 0, 3 +; CHECK-32: xscvuxdsp 1, 0 + %vecext = extractelement <4 x i32> %a, i32 3 + %conv = uitofp i32 %vecext to float + ret float %conv +} + +; Verify we generate optimal code for unsigned vector int elem extract followed +; by conversion to double + +define double @conv2dlbTestui0(<4 x i32> %a) { +entry: +; CHECK-64-LABEL: conv2dlbTestui0 +; CHECK-64: xxextractuw [[CP64:[0-9]+]], 34, 0 +; CHECK-64: xscvuxddp 1, [[CP64]] +; CHECK-32-LABEL: conv2dlbTestui0 +; CHECK-32: lfiwzx [[CP32:[0-9]+]], 0, 3 +; CHECK-32: xscvuxddp 1, [[CP32]] + %0 = extractelement <4 x i32> %a, i32 0 + %1 = uitofp i32 %0 to double + ret double %1 +} + +define double @conv2dlbTestui1(<4 x i32> %a) { +entry: +; CHECK-64-LABEL: conv2dlbTestui1 +; CHECK-64: xxextractuw [[CP64:[0-9]+]], 34, 4 +; CHECK-64: xscvuxddp 1, [[CP64]] +; CHECK-32-LABEL: conv2dlbTestui1 +; CHECK-32: lfiwzx [[CP32:[0-9]+]], 0, 3 +; CHECK-32: xscvuxddp 1, [[CP32]] + %0 = extractelement <4 x i32> %a, i32 1 + %1 = uitofp i32 %0 to double + ret double %1 +} + +define double @conv2dlbTestui2(<4 x i32> %a) { +entry: +; CHECK-64-LABEL: conv2dlbTestui2 +; CHECK-64: xxextractuw [[CP64:[0-9]+]], 34, 8 +; CHECK-64: xscvuxddp 1, [[CP64]] +; CHECK-32-LABEL: conv2dlbTestui2 +; CHECK-32: lfiwzx [[CP32:[0-9]+]], 0, 3 +; CHECK-32: xscvuxddp 1, [[CP32]] + %0 = extractelement <4 x i32> %a, i32 2 + %1 = uitofp i32 %0 to double + ret double %1 +} + +define double @conv2dlbTestui3(<4 x i32> %a) { +entry: +; CHECK-64-LABEL: conv2dlbTestui3 +; CHECK-64: xxextractuw [[CP64:[0-9]+]], 34, 12 +; CHECK-64: xscvuxddp 1, [[CP64]] +; CHECK-32-LABEL: conv2dlbTestui3 +; CHECK-32: lfiwzx [[CP32:[0-9]+]], 0, 3 +; CHECK-32: xscvuxddp 1, [[CP32]] + %0 = extractelement <4 x i32> %a, i32 3 + %1 = uitofp i32 %0 to double + ret double %1 +} + +; verify we don't crash for variable elem extract +define double @conv2dlbTestuiVar(<4 x i32> %a, i32 zeroext %elem) { +entry: + %vecext = extractelement <4 x i32> %a, i32 %elem + %conv = uitofp i32 %vecext to double + ret double %conv +} + +define <4 x float> @_Z10testInsEltILj0EDv4_ffET0_S1_T1_(<4 x float> %a, float %b) { +entry: +; CHECK-LABEL: _Z10testInsEltILj0EDv4_ffET0_S1_T1_ +; CHECK: xscvdpspn 0, 1 +; CHECK: xxsldwi 0, 0, 0, 3 +; CHECK: xxinsertw 34, 0, 0 + %vecins = insertelement <4 x float> %a, float %b, i32 0 + ret <4 x float> %vecins +} + +define <4 x float> @_Z10testInsEltILj1EDv4_ffET0_S1_T1_(<4 x float> %a, float %b) { +entry: +; CHECK-LABEL: _Z10testInsEltILj1EDv4_ffET0_S1_T1_ +; CHECK: xscvdpspn 0, 1 +; CHECK: xxsldwi 0, 0, 0, 3 +; CHECK: xxinsertw 34, 0, 4 + %vecins = insertelement <4 x float> %a, float %b, i32 1 + ret <4 x float> %vecins +} + +define <4 x float> @_Z10testInsEltILj2EDv4_ffET0_S1_T1_(<4 x float> %a, float %b) { +entry: +; CHECK-LABEL: _Z10testInsEltILj2EDv4_ffET0_S1_T1_ +; CHECK: xscvdpspn 0, 1 +; CHECK: xxsldwi 0, 0, 0, 3 +; CHECK: xxinsertw 34, 0, 8 + %vecins = insertelement <4 x float> %a, float %b, i32 2 + ret <4 x float> %vecins +} + +define <4 x float> @_Z10testInsEltILj3EDv4_ffET0_S1_T1_(<4 x float> %a, float %b) { +entry: +; CHECK-LABEL: _Z10testInsEltILj3EDv4_ffET0_S1_T1_ +; CHECK: xscvdpspn 0, 1 +; CHECK: xxsldwi 0, 0, 0, 3 +; CHECK: xxinsertw 34, 0, 12 + %vecins = insertelement <4 x float> %a, float %b, i32 3 + ret <4 x float> %vecins +} + +define <4 x i32> @_Z10testInsEltILj0EDv4_jjET0_S1_T1_(<4 x i32> %a, i32 zeroext %b) { +entry: +; CHECK-LABEL: _Z10testInsEltILj0EDv4_jjET0_S1_T1_ +; CHECK: mtfprwz 0, 3 +; CHECK: xxinsertw 34, 0, 0 + %vecins = insertelement <4 x i32> %a, i32 %b, i32 0 + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z10testInsEltILj1EDv4_jjET0_S1_T1_(<4 x i32> %a, i32 zeroext %b) { +entry: +; CHECK-LABEL: _Z10testInsEltILj1EDv4_jjET0_S1_T1_ +; CHECK: mtfprwz 0, 3 +; CHECK: xxinsertw 34, 0, 4 + %vecins = insertelement <4 x i32> %a, i32 %b, i32 1 + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z10testInsEltILj2EDv4_jjET0_S1_T1_(<4 x i32> %a, i32 zeroext %b) { +entry: +; CHECK-LABEL: _Z10testInsEltILj2EDv4_jjET0_S1_T1_ +; CHECK: mtfprwz 0, 3 +; CHECK: xxinsertw 34, 0, 8 + %vecins = insertelement <4 x i32> %a, i32 %b, i32 2 + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z10testInsEltILj3EDv4_jjET0_S1_T1_(<4 x i32> %a, i32 zeroext %b) { +entry: +; CHECK-LABEL: _Z10testInsEltILj3EDv4_jjET0_S1_T1_ +; CHECK: mtfprwz 0, 3 +; CHECK: xxinsertw 34, 0, 12 + %vecins = insertelement <4 x i32> %a, i32 %b, i32 3 + ret <4 x i32> %vecins +} + +define <4 x float> @_Z7testInsILj0ELj0EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj0ELj0EDv4_fET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 3 +; CHECK: xxinsertw 34, 0, 0 + %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj0ELj1EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj0ELj1EDv4_fET1_S1_S1_ +; CHECK-NOT: xxsldwi +; CHECK: xxinsertw 34, 35, 0 + %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj0ELj2EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj0ELj2EDv4_fET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 1 +; CHECK: xxinsertw 34, 0, 0 + %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj0ELj3EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj0ELj3EDv4_fET1_S1_S1_ +; CHECK: xxswapd 0, 35 +; CHECK: xxinsertw 34, 0, 0 + %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj1ELj0EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj1ELj0EDv4_fET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 3 +; CHECK: xxinsertw 34, 0, 4 + %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj1ELj1EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj1ELj1EDv4_fET1_S1_S1_ +; CHECK-NOT: xxsldwi +; CHECK: xxinsertw 34, 35, 4 + %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj1ELj2EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj1ELj2EDv4_fET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 1 +; CHECK: xxinsertw 34, 0, 4 + %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj1ELj3EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj1ELj3EDv4_fET1_S1_S1_ +; CHECK: xxswapd 0, 35 +; CHECK: xxinsertw 34, 0, 4 + %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj2ELj0EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj2ELj0EDv4_fET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 3 +; CHECK: xxinsertw 34, 0, 8 + %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj2ELj1EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj2ELj1EDv4_fET1_S1_S1_ +; CHECK-NOT: xxsldwi +; CHECK: xxinsertw 34, 35, 8 + %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj2ELj2EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj2ELj2EDv4_fET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 1 +; CHECK: xxinsertw 34, 0, 8 + %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj2ELj3EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj2ELj3EDv4_fET1_S1_S1_ +; CHECK: xxswapd 0, 35 +; CHECK: xxinsertw 34, 0, 8 + %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj3ELj0EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj3ELj0EDv4_fET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 3 +; CHECK: xxinsertw 34, 0, 12 + %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj3ELj1EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj3ELj1EDv4_fET1_S1_S1_ +; CHECK-NOT: xxsldwi +; CHECK: xxinsertw 34, 35, 12 + %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj3ELj2EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj3ELj2EDv4_fET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 1 +; CHECK: xxinsertw 34, 0, 12 + %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x float> @_Z7testInsILj3ELj3EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj3ELj3EDv4_fET1_S1_S1_ +; CHECK: xxswapd 0, 35 +; CHECK: xxinsertw 34, 0, 12 + %vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> + ret <4 x float> %vecins +} + +define <4 x i32> @_Z7testInsILj0ELj0EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj0ELj0EDv4_jET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 3 +; CHECK: xxinsertw 34, 0, 0 + %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj0ELj1EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj0ELj1EDv4_jET1_S1_S1_ +; CHECK-NOT: xxsldwi +; CHECK: xxinsertw 34, 35, 0 + %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj0ELj2EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj0ELj2EDv4_jET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 1 +; CHECK: xxinsertw 34, 0, 0 + %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj0ELj3EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj0ELj3EDv4_jET1_S1_S1_ +; CHECK: xxswapd 0, 35 +; CHECK: xxinsertw 34, 0, 0 + %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj1ELj0EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj1ELj0EDv4_jET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 3 +; CHECK: xxinsertw 34, 0, 4 + %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj1ELj1EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj1ELj1EDv4_jET1_S1_S1_ +; CHECK-NOT: xxsldwi +; CHECK: xxinsertw 34, 35, 4 + %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj1ELj2EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj1ELj2EDv4_jET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 1 +; CHECK: xxinsertw 34, 0, 4 + %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj1ELj3EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj1ELj3EDv4_jET1_S1_S1_ +; CHECK: xxswapd 0, 35 +; CHECK: xxinsertw 34, 0, 4 + %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj2ELj0EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj2ELj0EDv4_jET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 3 +; CHECK: xxinsertw 34, 0, 8 + %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj2ELj1EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj2ELj1EDv4_jET1_S1_S1_ +; CHECK-NOT: xxsldwi +; CHECK: xxinsertw 34, 35, 8 + %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj2ELj2EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj2ELj2EDv4_jET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 1 +; CHECK: xxinsertw 34, 0, 8 + %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj2ELj3EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj2ELj3EDv4_jET1_S1_S1_ +; CHECK: xxswapd 0, 35 +; CHECK: xxinsertw 34, 0, 8 + %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj3ELj0EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj3ELj0EDv4_jET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 3 +; CHECK: xxinsertw 34, 0, 12 + %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj3ELj1EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj3ELj1EDv4_jET1_S1_S1_ +; CHECK-NOT: xxsldwi +; CHECK: xxinsertw 34, 35, 12 + %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj3ELj2EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj3ELj2EDv4_jET1_S1_S1_ +; CHECK: xxsldwi 0, 35, 35, 1 +; CHECK: xxinsertw 34, 0, 12 + %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> + ret <4 x i32> %vecins +} + +define <4 x i32> @_Z7testInsILj3ELj3EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) { +entry: +; CHECK-LABEL: _Z7testInsILj3ELj3EDv4_jET1_S1_S1_ +; CHECK: xxswapd 0, 35 +; CHECK: xxinsertw 34, 0, 12 + %vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> + ret <4 x i32> %vecins +} +define <4 x float> @testSameVecEl0BE(<4 x float> %a) { +entry: +; CHECK-LABEL: testSameVecEl0BE +; CHECK: xxinsertw 34, 34, 0 + %vecins = shufflevector <4 x float> %a, <4 x float> %a, <4 x i32> + ret <4 x float> %vecins +} +define <4 x float> @testSameVecEl2BE(<4 x float> %a) { +entry: +; CHECK-LABEL: testSameVecEl2BE +; CHECK: xxinsertw 34, 34, 8 + %vecins = shufflevector <4 x float> %a, <4 x float> %a, <4 x i32> + ret <4 x float> %vecins +} +define <4 x float> @testSameVecEl3BE(<4 x float> %a) { +entry: +; CHECK-LABEL: testSameVecEl3BE +; CHECK: xxinsertw 34, 34, 12 + %vecins = shufflevector <4 x float> %a, <4 x float> %a, <4 x i32> + ret <4 x float> %vecins +} +define <4 x float> @insertVarF(<4 x float> %a, float %f, i32 %el) { +entry: +; CHECK-LABEL: insertVarF +; CHECK: stfsx 1, +; CHECK: lxv + %vecins = insertelement <4 x float> %a, float %f, i32 %el + ret <4 x float> %vecins +} +define <4 x i32> @insertVarI(<4 x i32> %a, i32 %i, i32 %el) { +entry: +; CHECK-LABEL: insertVarI +; CHECK: stwx +; CHECK: lxv + %vecins = insertelement <4 x i32> %a, i32 %i, i32 %el + ret <4 x i32> %vecins +} diff --git a/llvm/test/CodeGen/PowerPC/aix-vec_extract_p9.ll b/llvm/test/CodeGen/PowerPC/aix-vec_extract_p9.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/aix-vec_extract_p9.ll @@ -0,0 +1,174 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix-xcoff -vec-extabi -mcpu=pwr9 < %s | FileCheck %s -check-prefix=CHECK-64 +; RUN: llc -verify-machineinstrs -mtriple=powerpc-ibm-aix-xcoff -vec-extabi -mcpu=pwr9 < %s | FileCheck %s -check-prefix=CHECK-32 + +define zeroext i8 @test1(<16 x i8> %a, i32 signext %index) { +; CHECK-64-LABEL: test1: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: vextublx 3, 3, 2 +; CHECK-64-NEXT: clrldi 3, 3, 56 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: test1: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: addi 4, 1, -16 +; CHECK-32-NEXT: clrlwi 3, 3, 28 +; CHECK-32-NEXT: stxv 34, -16(1) +; CHECK-32-NEXT: lbzx 3, 4, 3 +; CHECK-32-NEXT: blr +entry: + %vecext = extractelement <16 x i8> %a, i32 %index + ret i8 %vecext +} + +define signext i8 @test2(<16 x i8> %a, i32 signext %index) { +; CHECK-64-LABEL: test2: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: vextublx 3, 3, 2 +; CHECK-64-NEXT: extsb 3, 3 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: test2: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: addi 4, 1, -16 +; CHECK-32-NEXT: clrlwi 3, 3, 28 +; CHECK-32-NEXT: stxv 34, -16(1) +; CHECK-32-NEXT: lbzx 3, 4, 3 +; CHECK-32-NEXT: extsb 3, 3 +; CHECK-32-NEXT: blr +entry: + %vecext = extractelement <16 x i8> %a, i32 %index + ret i8 %vecext +} + +define zeroext i16 @test3(<8 x i16> %a, i32 signext %index) { +; CHECK-64-LABEL: test3: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: rlwinm 3, 3, 1, 28, 30 +; CHECK-64-NEXT: vextuhlx 3, 3, 2 +; CHECK-64-NEXT: clrldi 3, 3, 48 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: test3: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: addi 4, 1, -16 +; CHECK-32-NEXT: rlwinm 3, 3, 1, 28, 30 +; CHECK-32-NEXT: stxv 34, -16(1) +; CHECK-32-NEXT: lhzx 3, 4, 3 +; CHECK-32-NEXT: blr +entry: + %vecext = extractelement <8 x i16> %a, i32 %index + ret i16 %vecext +} + +define signext i16 @test4(<8 x i16> %a, i32 signext %index) { +; CHECK-64-LABEL: test4: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: rlwinm 3, 3, 1, 28, 30 +; CHECK-64-NEXT: vextuhlx 3, 3, 2 +; CHECK-64-NEXT: extsh 3, 3 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: test4: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: addi 4, 1, -16 +; CHECK-32-NEXT: rlwinm 3, 3, 1, 28, 30 +; CHECK-32-NEXT: stxv 34, -16(1) +; CHECK-32-NEXT: lhax 3, 4, 3 +; CHECK-32-NEXT: blr +entry: + %vecext = extractelement <8 x i16> %a, i32 %index + ret i16 %vecext +} + +define zeroext i32 @test5(<4 x i32> %a, i32 signext %index) { +; CHECK-64-LABEL: test5: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: rlwinm 3, 3, 2, 28, 29 +; CHECK-64-NEXT: vextuwlx 3, 3, 2 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: test5: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: addi 4, 1, -16 +; CHECK-32-NEXT: rlwinm 3, 3, 2, 28, 29 +; CHECK-32-NEXT: stxv 34, -16(1) +; CHECK-32-NEXT: lwzx 3, 4, 3 +; CHECK-32-NEXT: blr +entry: + %vecext = extractelement <4 x i32> %a, i32 %index + ret i32 %vecext +} + +define signext i32 @test6(<4 x i32> %a, i32 signext %index) { +; CHECK-64-LABEL: test6: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: rlwinm 3, 3, 2, 28, 29 +; CHECK-64-NEXT: vextuwlx 3, 3, 2 +; CHECK-64-NEXT: extsw 3, 3 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: test6: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: addi 4, 1, -16 +; CHECK-32-NEXT: rlwinm 3, 3, 2, 28, 29 +; CHECK-32-NEXT: stxv 34, -16(1) +; CHECK-32-NEXT: lwzx 3, 4, 3 +; CHECK-32-NEXT: blr +entry: + %vecext = extractelement <4 x i32> %a, i32 %index + ret i32 %vecext +} + +; Test with immediate index +define zeroext i8 @test7(<16 x i8> %a) { +; CHECK-64-LABEL: test7: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: li 3, 1 +; CHECK-64-NEXT: vextublx 3, 3, 2 +; CHECK-64-NEXT: clrldi 3, 3, 56 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: test7: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: stxv 34, -16(1) +; CHECK-32-NEXT: lbz 3, -15(1) +; CHECK-32-NEXT: blr +entry: + %vecext = extractelement <16 x i8> %a, i32 1 + ret i8 %vecext +} + +define zeroext i16 @test8(<8 x i16> %a) { +; CHECK-64-LABEL: test8: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: li 3, 2 +; CHECK-64-NEXT: vextuhlx 3, 3, 2 +; CHECK-64-NEXT: clrldi 3, 3, 48 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: test8: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: stxv 34, -16(1) +; CHECK-32-NEXT: lhz 3, -14(1) +; CHECK-32-NEXT: blr +entry: + %vecext = extractelement <8 x i16> %a, i32 1 + ret i16 %vecext +} + +define zeroext i32 @test9(<4 x i32> %a) { +; CHECK-64-LABEL: test9: +; CHECK-64: # %bb.0: +; CHECK-64-NEXT: li 3, 12 +; CHECK-64-NEXT: vextuwlx 3, 3, 2 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: test9: +; CHECK-32: # %bb.0: +; CHECK-32-NEXT: stxv 34, -16(1) +; CHECK-32-NEXT: lwz 3, -4(1) +; CHECK-32-NEXT: blr + %vecext = extractelement <4 x i32> %a, i32 3 + ret i32 %vecext +} diff --git a/llvm/test/CodeGen/PowerPC/aix-vec_extract_p9_2.ll b/llvm/test/CodeGen/PowerPC/aix-vec_extract_p9_2.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/aix-vec_extract_p9_2.ll @@ -0,0 +1,271 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix-xcoff -vec-extabi -mcpu=pwr9 < %s | FileCheck %s -check-prefix=CHECK-64 +; RUN: llc -verify-machineinstrs -mtriple=powerpc-ibm-aix-xcoff -vec-extabi -mcpu=pwr9 < %s | FileCheck %s -check-prefix=CHECK-32 + +define zeroext i8 @test_add1(<16 x i8> %a, i32 signext %index, i8 zeroext %c) { +; CHECK-64-LABEL: test_add1: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: vextublx 3, 3, 2 +; CHECK-64-NEXT: add 3, 3, 4 +; CHECK-64-NEXT: clrldi 3, 3, 56 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: test_add1: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: addi 5, 1, -16 +; CHECK-32-NEXT: clrlwi 3, 3, 28 +; CHECK-32-NEXT: stxv 34, -16(1) +; CHECK-32-NEXT: lbzx 3, 5, 3 +; CHECK-32-NEXT: add 3, 3, 4 +; CHECK-32-NEXT: clrlwi 3, 3, 24 +; CHECK-32-NEXT: blr +entry: + %vecext = extractelement <16 x i8> %a, i32 %index + %conv = zext i8 %vecext to i32 + %conv1 = zext i8 %c to i32 + %add = add nuw nsw i32 %conv, %conv1 + %conv2 = trunc i32 %add to i8 + ret i8 %conv2 +} + +define signext i8 @test_add2(<16 x i8> %a, i32 signext %index, i8 signext %c) { +; CHECK-64-LABEL: test_add2: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: vextublx 3, 3, 2 +; CHECK-64-NEXT: add 3, 3, 4 +; CHECK-64-NEXT: extsb 3, 3 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: test_add2: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: addi 5, 1, -16 +; CHECK-32-NEXT: clrlwi 3, 3, 28 +; CHECK-32-NEXT: stxv 34, -16(1) +; CHECK-32-NEXT: lbzx 3, 5, 3 +; CHECK-32-NEXT: add 3, 3, 4 +; CHECK-32-NEXT: extsb 3, 3 +; CHECK-32-NEXT: blr +entry: + %vecext = extractelement <16 x i8> %a, i32 %index + %conv3 = zext i8 %vecext to i32 + %conv14 = zext i8 %c to i32 + %add = add nuw nsw i32 %conv3, %conv14 + %conv2 = trunc i32 %add to i8 + ret i8 %conv2 +} + +define zeroext i16 @test_add3(<8 x i16> %a, i32 signext %index, i16 zeroext %c) { +; CHECK-64-LABEL: test_add3: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: rlwinm 3, 3, 1, 28, 30 +; CHECK-64-NEXT: vextuhlx 3, 3, 2 +; CHECK-64-NEXT: add 3, 3, 4 +; CHECK-64-NEXT: clrldi 3, 3, 48 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: test_add3: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: addi 5, 1, -16 +; CHECK-32-NEXT: rlwinm 3, 3, 1, 28, 30 +; CHECK-32-NEXT: stxv 34, -16(1) +; CHECK-32-NEXT: lhzx 3, 5, 3 +; CHECK-32-NEXT: add 3, 3, 4 +; CHECK-32-NEXT: clrlwi 3, 3, 16 +; CHECK-32-NEXT: blr +entry: + %vecext = extractelement <8 x i16> %a, i32 %index + %conv = zext i16 %vecext to i32 + %conv1 = zext i16 %c to i32 + %add = add nuw nsw i32 %conv, %conv1 + %conv2 = trunc i32 %add to i16 + ret i16 %conv2 +} + +define signext i16 @test_add4(<8 x i16> %a, i32 signext %index, i16 signext %c) { +; CHECK-64-LABEL: test_add4: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: rlwinm 3, 3, 1, 28, 30 +; CHECK-64-NEXT: vextuhlx 3, 3, 2 +; CHECK-64-NEXT: add 3, 3, 4 +; CHECK-64-NEXT: extsh 3, 3 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: test_add4: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: addi 5, 1, -16 +; CHECK-32-NEXT: rlwinm 3, 3, 1, 28, 30 +; CHECK-32-NEXT: stxv 34, -16(1) +; CHECK-32-NEXT: lhzx 3, 5, 3 +; CHECK-32-NEXT: add 3, 3, 4 +; CHECK-32-NEXT: extsh 3, 3 +; CHECK-32-NEXT: blr +entry: + %vecext = extractelement <8 x i16> %a, i32 %index + %conv5 = zext i16 %vecext to i32 + %conv16 = zext i16 %c to i32 + %add = add nuw nsw i32 %conv5, %conv16 + %conv2 = trunc i32 %add to i16 + ret i16 %conv2 +} + +define zeroext i32 @test_add5(<4 x i32> %a, i32 signext %index, i32 zeroext %c) { +; CHECK-64-LABEL: test_add5: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: rlwinm 3, 3, 2, 28, 29 +; CHECK-64-NEXT: vextuwlx 3, 3, 2 +; CHECK-64-NEXT: add 3, 3, 4 +; CHECK-64-NEXT: clrldi 3, 3, 32 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: test_add5: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: addi 5, 1, -16 +; CHECK-32-NEXT: rlwinm 3, 3, 2, 28, 29 +; CHECK-32-NEXT: stxv 34, -16(1) +; CHECK-32-NEXT: lwzx 3, 5, 3 +; CHECK-32-NEXT: add 3, 3, 4 +; CHECK-32-NEXT: blr +entry: + %vecext = extractelement <4 x i32> %a, i32 %index + %add = add i32 %vecext, %c + ret i32 %add +} + +define signext i32 @test_add6(<4 x i32> %a, i32 signext %index, i32 signext %c) { +; CHECK-64-LABEL: test_add6: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: rlwinm 3, 3, 2, 28, 29 +; CHECK-64-NEXT: vextuwlx 3, 3, 2 +; CHECK-64-NEXT: add 3, 3, 4 +; CHECK-64-NEXT: extsw 3, 3 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: test_add6: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: addi 5, 1, -16 +; CHECK-32-NEXT: rlwinm 3, 3, 2, 28, 29 +; CHECK-32-NEXT: stxv 34, -16(1) +; CHECK-32-NEXT: lwzx 3, 5, 3 +; CHECK-32-NEXT: add 3, 3, 4 +; CHECK-32-NEXT: blr +entry: + %vecext = extractelement <4 x i32> %a, i32 %index + %add = add nsw i32 %vecext, %c + ret i32 %add +} + +; When extracting word element 2 on LE, it's better to use mfvsrwz rather than vextuwrx +define zeroext i32 @test7(<4 x i32> %a) { +; CHECK-64-LABEL: test7: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: li 3, 8 +; CHECK-64-NEXT: vextuwlx 3, 3, 2 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: test7: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: stxv 34, -16(1) +; CHECK-32-NEXT: lwz 3, -8(1) +; CHECK-32-NEXT: blr +entry: + %vecext = extractelement <4 x i32> %a, i32 2 + ret i32 %vecext +} + +define zeroext i32 @testadd_7(<4 x i32> %a, i32 zeroext %c) { +; CHECK-64-LABEL: testadd_7: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: li 4, 8 +; CHECK-64-NEXT: vextuwlx 4, 4, 2 +; CHECK-64-NEXT: add 3, 4, 3 +; CHECK-64-NEXT: clrldi 3, 3, 32 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: testadd_7: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: stxv 34, -16(1) +; CHECK-32-NEXT: lwz 4, -8(1) +; CHECK-32-NEXT: add 3, 4, 3 +; CHECK-32-NEXT: blr +entry: + %vecext = extractelement <4 x i32> %a, i32 2 + %add = add i32 %vecext, %c + ret i32 %add +} + +define signext i32 @test8(<4 x i32> %a) { +; CHECK-64-LABEL: test8: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: li 3, 8 +; CHECK-64-NEXT: vextuwlx 3, 3, 2 +; CHECK-64-NEXT: extsw 3, 3 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: test8: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: stxv 34, -16(1) +; CHECK-32-NEXT: lwz 3, -8(1) +; CHECK-32-NEXT: blr +entry: + %vecext = extractelement <4 x i32> %a, i32 2 + ret i32 %vecext +} + +define signext i32 @testadd_8(<4 x i32> %a, i32 signext %c) { +; CHECK-64-LABEL: testadd_8: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: li 4, 8 +; CHECK-64-NEXT: vextuwlx 4, 4, 2 +; CHECK-64-NEXT: add 3, 4, 3 +; CHECK-64-NEXT: extsw 3, 3 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: testadd_8: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: stxv 34, -16(1) +; CHECK-32-NEXT: lwz 4, -8(1) +; CHECK-32-NEXT: add 3, 4, 3 +; CHECK-32-NEXT: blr +entry: + %vecext = extractelement <4 x i32> %a, i32 2 + %add = add nsw i32 %vecext, %c + ret i32 %add +} + +; When extracting word element 1 on BE, it's better to use mfvsrwz rather than vextuwlx +define signext i32 @test9(<4 x i32> %a) { +; CHECK-64-LABEL: test9: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: mfvsrwz 3, 34 +; CHECK-64-NEXT: extsw 3, 3 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: test9: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: stxv 34, -16(1) +; CHECK-32-NEXT: lwz 3, -12(1) +; CHECK-32-NEXT: blr +entry: + %vecext = extractelement <4 x i32> %a, i32 1 + ret i32 %vecext +} + +define signext i32 @testadd_9(<4 x i32> %a, i32 signext %c) { +; CHECK-64-LABEL: testadd_9: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: mfvsrwz 4, 34 +; CHECK-64-NEXT: add 3, 4, 3 +; CHECK-64-NEXT: extsw 3, 3 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: testadd_9: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: stxv 34, -16(1) +; CHECK-32-NEXT: lwz 4, -12(1) +; CHECK-32-NEXT: add 3, 4, 3 +; CHECK-32-NEXT: blr +entry: + %vecext = extractelement <4 x i32> %a, i32 1 + %add = add nsw i32 %vecext, %c + ret i32 %add +} diff --git a/llvm/test/CodeGen/PowerPC/aix-vec_insert.ll b/llvm/test/CodeGen/PowerPC/aix-vec_insert.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/aix-vec_insert.ll @@ -0,0 +1,8 @@ +; RUN: llc -verify-machineinstrs < %s -mtriple=ppc32-- -mcpu=g5 | grep sth + +define <8 x i16> @insert(<8 x i16> %foo, i16 %a) nounwind { +entry: + %vecext = insertelement <8 x i16> %foo, i16 %a, i32 7 ; [#uses=1] + ret <8 x i16> %vecext +} + diff --git a/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll b/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll @@ -0,0 +1,611 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix-xcoff -vec-extabi -mcpu=pwr9 < %s | FileCheck %s -check-prefix=CHECK-64 +; RUN: llc -verify-machineinstrs -mtriple=powerpc-ibm-aix-xcoff -vec-extabi -mcpu=pwr9 < %s | FileCheck %s -check-prefix=CHECK-32 + +; Byte indexed + +define <16 x i8> @testByte(<16 x i8> %a, i64 %b, i64 %idx) { +; CHECK-64-LABEL: testByte: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: addi 5, 1, -16 +; CHECK-64-NEXT: clrldi 4, 4, 60 +; CHECK-64-NEXT: stxv 34, -16(1) +; CHECK-64-NEXT: stbx 3, 5, 4 +; CHECK-64-NEXT: lxv 34, -16(1) +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: testByte: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: addi 5, 1, -16 +; CHECK-32-NEXT: clrlwi 3, 6, 28 +; CHECK-32-NEXT: stxv 34, -16(1) +; CHECK-32-NEXT: stbx 4, 5, 3 +; CHECK-32-NEXT: lxv 34, -16(1) +; CHECK-32-NEXT: blr +entry: + %conv = trunc i64 %b to i8 + %vecins = insertelement <16 x i8> %a, i8 %conv, i64 %idx + ret <16 x i8> %vecins +} + +; Halfword indexed + +define <8 x i16> @testHalf(<8 x i16> %a, i64 %b, i64 %idx) { +; CHECK-64-LABEL: testHalf: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: addi 5, 1, -16 +; CHECK-64-NEXT: rlwinm 4, 4, 1, 28, 30 +; CHECK-64-NEXT: stxv 34, -16(1) +; CHECK-64-NEXT: sthx 3, 5, 4 +; CHECK-64-NEXT: lxv 34, -16(1) +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: testHalf: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: addi 5, 1, -16 +; CHECK-32-NEXT: rlwinm 3, 6, 1, 28, 30 +; CHECK-32-NEXT: stxv 34, -16(1) +; CHECK-32-NEXT: sthx 4, 5, 3 +; CHECK-32-NEXT: lxv 34, -16(1) +; CHECK-32-NEXT: blr +entry: + %conv = trunc i64 %b to i16 + %vecins = insertelement <8 x i16> %a, i16 %conv, i64 %idx + ret <8 x i16> %vecins +} + +; Word indexed + +define <4 x i32> @testWord(<4 x i32> %a, i64 %b, i64 %idx) { +; CHECK-64-LABEL: testWord: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: addi 5, 1, -16 +; CHECK-64-NEXT: rlwinm 4, 4, 2, 28, 29 +; CHECK-64-NEXT: stxv 34, -16(1) +; CHECK-64-NEXT: stwx 3, 5, 4 +; CHECK-64-NEXT: lxv 34, -16(1) +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: testWord: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: addi 5, 1, -16 +; CHECK-32-NEXT: rlwinm 3, 6, 2, 28, 29 +; CHECK-32-NEXT: stxv 34, -16(1) +; CHECK-32-NEXT: stwx 4, 5, 3 +; CHECK-32-NEXT: lxv 34, -16(1) +; CHECK-32-NEXT: blr +entry: + %conv = trunc i64 %b to i32 + %vecins = insertelement <4 x i32> %a, i32 %conv, i64 %idx + ret <4 x i32> %vecins +} + +; Word immediate + +define <4 x i32> @testWordImm(<4 x i32> %a, i64 %b) { +; CHECK-64-LABEL: testWordImm: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: mtfprwz 0, 3 +; CHECK-64-NEXT: xxinsertw 34, 0, 4 +; CHECK-64-NEXT: xxinsertw 34, 0, 12 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: testWordImm: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: mtfprwz 0, 4 +; CHECK-32-NEXT: xxinsertw 34, 0, 4 +; CHECK-32-NEXT: xxinsertw 34, 0, 12 +; CHECK-32-NEXT: blr +entry: + %conv = trunc i64 %b to i32 + %vecins = insertelement <4 x i32> %a, i32 %conv, i32 1 + %vecins2 = insertelement <4 x i32> %vecins, i32 %conv, i32 3 + ret <4 x i32> %vecins2 +} + +; Doubleword indexed + +define <2 x i64> @testDoubleword(<2 x i64> %a, i64 %b, i64 %idx) { +; CHECK-64-LABEL: testDoubleword: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: addi 5, 1, -16 +; CHECK-64-NEXT: rlwinm 4, 4, 3, 28, 28 +; CHECK-64-NEXT: stxv 34, -16(1) +; CHECK-64-NEXT: stdx 3, 5, 4 +; CHECK-64-NEXT: lxv 34, -16(1) +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: testDoubleword: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: add 5, 6, 6 +; CHECK-32-NEXT: addi 7, 1, -32 +; CHECK-32-NEXT: stxv 34, -32(1) +; CHECK-32-NEXT: rlwinm 6, 5, 2, 28, 29 +; CHECK-32-NEXT: stwx 3, 7, 6 +; CHECK-32-NEXT: addi 3, 5, 1 +; CHECK-32-NEXT: addi 5, 1, -16 +; CHECK-32-NEXT: lxv 0, -32(1) +; CHECK-32-NEXT: rlwinm 3, 3, 2, 28, 29 +; CHECK-32-NEXT: stxv 0, -16(1) +; CHECK-32-NEXT: stwx 4, 5, 3 +; CHECK-32-NEXT: lxv 34, -16(1) +; CHECK-32-NEXT: blr +entry: + %vecins = insertelement <2 x i64> %a, i64 %b, i64 %idx + ret <2 x i64> %vecins +} + +; Doubleword immediate + +define <2 x i64> @testDoublewordImm(<2 x i64> %a, i64 %b) { +; CHECK-64-LABEL: testDoublewordImm: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: mtfprd 0, 3 +; CHECK-64-NEXT: xxmrghd 34, 34, 0 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: testDoublewordImm: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: mtfprwz 0, 3 +; CHECK-32-NEXT: xxinsertw 34, 0, 8 +; CHECK-32-NEXT: mtfprwz 0, 4 +; CHECK-32-NEXT: xxinsertw 34, 0, 12 +; CHECK-32-NEXT: blr +entry: + %vecins = insertelement <2 x i64> %a, i64 %b, i32 1 + ret <2 x i64> %vecins +} + +define <2 x i64> @testDoublewordImm2(<2 x i64> %a, i64 %b) { +; CHECK-64-LABEL: testDoublewordImm2: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: mtfprd 0, 3 +; CHECK-64-NEXT: xxpermdi 34, 0, 34, 1 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: testDoublewordImm2: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: mtfprwz 0, 3 +; CHECK-32-NEXT: xxinsertw 34, 0, 0 +; CHECK-32-NEXT: mtfprwz 0, 4 +; CHECK-32-NEXT: xxinsertw 34, 0, 4 +; CHECK-32-NEXT: blr +entry: + %vecins = insertelement <2 x i64> %a, i64 %b, i32 0 + ret <2 x i64> %vecins +} + +; Float indexed + +define <4 x float> @testFloat1(<4 x float> %a, float %b, i32 zeroext %idx1) { +; CHECK-64-LABEL: testFloat1: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: rlwinm 3, 4, 2, 28, 29 +; CHECK-64-NEXT: addi 4, 1, -16 +; CHECK-64-NEXT: stxv 34, -16(1) +; CHECK-64-NEXT: stfsx 1, 4, 3 +; CHECK-64-NEXT: lxv 34, -16(1) +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: testFloat1: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: rlwinm 3, 4, 2, 28, 29 +; CHECK-32-NEXT: addi 4, 1, -16 +; CHECK-32-NEXT: stxv 34, -16(1) +; CHECK-32-NEXT: stfsx 1, 4, 3 +; CHECK-32-NEXT: lxv 34, -16(1) +; CHECK-32-NEXT: blr +entry: + %vecins = insertelement <4 x float> %a, float %b, i32 %idx1 + ret <4 x float> %vecins +} + +define <4 x float> @testFloat2(<4 x float> %a, i8* %b, i32 zeroext %idx1, i32 zeroext %idx2) { +; CHECK-64-LABEL: testFloat2: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: lwz 6, 0(3) +; CHECK-64-NEXT: rlwinm 4, 4, 2, 28, 29 +; CHECK-64-NEXT: addi 7, 1, -32 +; CHECK-64-NEXT: stxv 34, -32(1) +; CHECK-64-NEXT: stwx 6, 7, 4 +; CHECK-64-NEXT: rlwinm 4, 5, 2, 28, 29 +; CHECK-64-NEXT: addi 5, 1, -16 +; CHECK-64-NEXT: lxv 0, -32(1) +; CHECK-64-NEXT: lwz 3, 1(3) +; CHECK-64-NEXT: stxv 0, -16(1) +; CHECK-64-NEXT: stwx 3, 5, 4 +; CHECK-64-NEXT: lxv 34, -16(1) +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: testFloat2: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: lwz 6, 0(3) +; CHECK-32-NEXT: addi 7, 1, -32 +; CHECK-32-NEXT: rlwinm 4, 4, 2, 28, 29 +; CHECK-32-NEXT: stxv 34, -32(1) +; CHECK-32-NEXT: rlwinm 5, 5, 2, 28, 29 +; CHECK-32-NEXT: stwx 6, 7, 4 +; CHECK-32-NEXT: addi 4, 1, -16 +; CHECK-32-NEXT: lxv 0, -32(1) +; CHECK-32-NEXT: lwz 3, 1(3) +; CHECK-32-NEXT: stxv 0, -16(1) +; CHECK-32-NEXT: stwx 3, 4, 5 +; CHECK-32-NEXT: lxv 34, -16(1) +; CHECK-32-NEXT: blr +entry: + %0 = bitcast i8* %b to float* + %add.ptr1 = getelementptr inbounds i8, i8* %b, i64 1 + %1 = bitcast i8* %add.ptr1 to float* + %2 = load float, float* %0, align 4 + %vecins = insertelement <4 x float> %a, float %2, i32 %idx1 + %3 = load float, float* %1, align 4 + %vecins2 = insertelement <4 x float> %vecins, float %3, i32 %idx2 + ret <4 x float> %vecins2 +} + +define <4 x float> @testFloat3(<4 x float> %a, i8* %b, i32 zeroext %idx1, i32 zeroext %idx2) { +; CHECK-64-LABEL: testFloat3: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: lis 6, 1 +; CHECK-64-NEXT: rlwinm 4, 4, 2, 28, 29 +; CHECK-64-NEXT: addi 7, 1, -32 +; CHECK-64-NEXT: lwzx 6, 3, 6 +; CHECK-64-NEXT: stxv 34, -32(1) +; CHECK-64-NEXT: stwx 6, 7, 4 +; CHECK-64-NEXT: li 4, 1 +; CHECK-64-NEXT: lxv 0, -32(1) +; CHECK-64-NEXT: rldic 4, 4, 36, 27 +; CHECK-64-NEXT: lwzx 3, 3, 4 +; CHECK-64-NEXT: rlwinm 4, 5, 2, 28, 29 +; CHECK-64-NEXT: addi 5, 1, -16 +; CHECK-64-NEXT: stxv 0, -16(1) +; CHECK-64-NEXT: stwx 3, 5, 4 +; CHECK-64-NEXT: lxv 34, -16(1) +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: testFloat3: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: lis 6, 1 +; CHECK-32-NEXT: addi 7, 1, -32 +; CHECK-32-NEXT: rlwinm 4, 4, 2, 28, 29 +; CHECK-32-NEXT: rlwinm 5, 5, 2, 28, 29 +; CHECK-32-NEXT: lwzx 6, 3, 6 +; CHECK-32-NEXT: stxv 34, -32(1) +; CHECK-32-NEXT: stwx 6, 7, 4 +; CHECK-32-NEXT: addi 4, 1, -16 +; CHECK-32-NEXT: lxv 0, -32(1) +; CHECK-32-NEXT: lwz 3, 0(3) +; CHECK-32-NEXT: stxv 0, -16(1) +; CHECK-32-NEXT: stwx 3, 4, 5 +; CHECK-32-NEXT: lxv 34, -16(1) +; CHECK-32-NEXT: blr +entry: + %add.ptr = getelementptr inbounds i8, i8* %b, i64 65536 + %0 = bitcast i8* %add.ptr to float* + %add.ptr1 = getelementptr inbounds i8, i8* %b, i64 68719476736 + %1 = bitcast i8* %add.ptr1 to float* + %2 = load float, float* %0, align 4 + %vecins = insertelement <4 x float> %a, float %2, i32 %idx1 + %3 = load float, float* %1, align 4 + %vecins2 = insertelement <4 x float> %vecins, float %3, i32 %idx2 + ret <4 x float> %vecins2 +} + +; Float immediate + +define <4 x float> @testFloatImm1(<4 x float> %a, float %b) { +; CHECK-64-LABEL: testFloatImm1: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: xscvdpspn 0, 1 +; CHECK-64-NEXT: xxsldwi 0, 0, 0, 3 +; CHECK-64-NEXT: xxinsertw 34, 0, 0 +; CHECK-64-NEXT: xxinsertw 34, 0, 8 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: testFloatImm1: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: xscvdpspn 0, 1 +; CHECK-32-NEXT: xxsldwi 0, 0, 0, 3 +; CHECK-32-NEXT: xxinsertw 34, 0, 0 +; CHECK-32-NEXT: xxinsertw 34, 0, 8 +; CHECK-32-NEXT: blr +entry: + %vecins = insertelement <4 x float> %a, float %b, i32 0 + %vecins1 = insertelement <4 x float> %vecins, float %b, i32 2 + ret <4 x float> %vecins1 +} + +define <4 x float> @testFloatImm2(<4 x float> %a, i32* %b) { +; CHECK-64-LABEL: testFloatImm2: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: lfs 0, 0(3) +; CHECK-64-NEXT: xscvdpspn 0, 0 +; CHECK-64-NEXT: xxsldwi 0, 0, 0, 3 +; CHECK-64-NEXT: xxinsertw 34, 0, 0 +; CHECK-64-NEXT: lfs 0, 4(3) +; CHECK-64-NEXT: xscvdpspn 0, 0 +; CHECK-64-NEXT: xxsldwi 0, 0, 0, 3 +; CHECK-64-NEXT: xxinsertw 34, 0, 8 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: testFloatImm2: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: lfs 0, 0(3) +; CHECK-32-NEXT: xscvdpspn 0, 0 +; CHECK-32-NEXT: xxsldwi 0, 0, 0, 3 +; CHECK-32-NEXT: xxinsertw 34, 0, 0 +; CHECK-32-NEXT: lfs 0, 4(3) +; CHECK-32-NEXT: xscvdpspn 0, 0 +; CHECK-32-NEXT: xxsldwi 0, 0, 0, 3 +; CHECK-32-NEXT: xxinsertw 34, 0, 8 +; CHECK-32-NEXT: blr +entry: + %0 = bitcast i32* %b to float* + %add.ptr1 = getelementptr inbounds i32, i32* %b, i64 1 + %1 = bitcast i32* %add.ptr1 to float* + %2 = load float, float* %0, align 4 + %vecins = insertelement <4 x float> %a, float %2, i32 0 + %3 = load float, float* %1, align 4 + %vecins2 = insertelement <4 x float> %vecins, float %3, i32 2 + ret <4 x float> %vecins2 +} + +define <4 x float> @testFloatImm3(<4 x float> %a, i32* %b) { +; CHECK-64-LABEL: testFloatImm3: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: lis 4, 4 +; CHECK-64-NEXT: lfsx 0, 3, 4 +; CHECK-64-NEXT: li 4, 1 +; CHECK-64-NEXT: rldic 4, 4, 38, 25 +; CHECK-64-NEXT: xscvdpspn 0, 0 +; CHECK-64-NEXT: xxsldwi 0, 0, 0, 3 +; CHECK-64-NEXT: xxinsertw 34, 0, 0 +; CHECK-64-NEXT: lfsx 0, 3, 4 +; CHECK-64-NEXT: xscvdpspn 0, 0 +; CHECK-64-NEXT: xxsldwi 0, 0, 0, 3 +; CHECK-64-NEXT: xxinsertw 34, 0, 8 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: testFloatImm3: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: lis 4, 4 +; CHECK-32-NEXT: lfsx 0, 3, 4 +; CHECK-32-NEXT: xscvdpspn 0, 0 +; CHECK-32-NEXT: xxsldwi 0, 0, 0, 3 +; CHECK-32-NEXT: xxinsertw 34, 0, 0 +; CHECK-32-NEXT: lfs 0, 0(3) +; CHECK-32-NEXT: xscvdpspn 0, 0 +; CHECK-32-NEXT: xxsldwi 0, 0, 0, 3 +; CHECK-32-NEXT: xxinsertw 34, 0, 8 +; CHECK-32-NEXT: blr +entry: + %add.ptr = getelementptr inbounds i32, i32* %b, i64 65536 + %0 = bitcast i32* %add.ptr to float* + %add.ptr1 = getelementptr inbounds i32, i32* %b, i64 68719476736 + %1 = bitcast i32* %add.ptr1 to float* + %2 = load float, float* %0, align 4 + %vecins = insertelement <4 x float> %a, float %2, i32 0 + %3 = load float, float* %1, align 4 + %vecins2 = insertelement <4 x float> %vecins, float %3, i32 2 + ret <4 x float> %vecins2 +} + +; Double indexed + +define <2 x double> @testDouble1(<2 x double> %a, double %b, i32 zeroext %idx1) { +; CHECK-64-LABEL: testDouble1: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: rlwinm 3, 4, 3, 28, 28 +; CHECK-64-NEXT: addi 4, 1, -16 +; CHECK-64-NEXT: stxv 34, -16(1) +; CHECK-64-NEXT: stfdx 1, 4, 3 +; CHECK-64-NEXT: lxv 34, -16(1) +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: testDouble1: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: addi 4, 1, -16 +; CHECK-32-NEXT: rlwinm 3, 5, 3, 28, 28 +; CHECK-32-NEXT: stxv 34, -16(1) +; CHECK-32-NEXT: stfdx 1, 4, 3 +; CHECK-32-NEXT: lxv 34, -16(1) +; CHECK-32-NEXT: blr +entry: + %vecins = insertelement <2 x double> %a, double %b, i32 %idx1 + ret <2 x double> %vecins +} + +define <2 x double> @testDouble2(<2 x double> %a, i8* %b, i32 zeroext %idx1, i32 zeroext %idx2) { +; CHECK-64-LABEL: testDouble2: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: ld 6, 0(3) +; CHECK-64-NEXT: rlwinm 4, 4, 3, 28, 28 +; CHECK-64-NEXT: addi 7, 1, -32 +; CHECK-64-NEXT: stxv 34, -32(1) +; CHECK-64-NEXT: stdx 6, 7, 4 +; CHECK-64-NEXT: li 4, 1 +; CHECK-64-NEXT: lxv 0, -32(1) +; CHECK-64-NEXT: ldx 3, 3, 4 +; CHECK-64-NEXT: rlwinm 4, 5, 3, 28, 28 +; CHECK-64-NEXT: addi 5, 1, -16 +; CHECK-64-NEXT: stxv 0, -16(1) +; CHECK-64-NEXT: stdx 3, 5, 4 +; CHECK-64-NEXT: lxv 34, -16(1) +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: testDouble2: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: lfd 0, 0(3) +; CHECK-32-NEXT: addi 6, 1, -32 +; CHECK-32-NEXT: rlwinm 4, 4, 3, 28, 28 +; CHECK-32-NEXT: stxv 34, -32(1) +; CHECK-32-NEXT: rlwinm 5, 5, 3, 28, 28 +; CHECK-32-NEXT: stfdx 0, 6, 4 +; CHECK-32-NEXT: lxv 0, -32(1) +; CHECK-32-NEXT: lfd 1, 1(3) +; CHECK-32-NEXT: addi 3, 1, -16 +; CHECK-32-NEXT: stxv 0, -16(1) +; CHECK-32-NEXT: stfdx 1, 3, 5 +; CHECK-32-NEXT: lxv 34, -16(1) +; CHECK-32-NEXT: blr +entry: + %0 = bitcast i8* %b to double* + %add.ptr1 = getelementptr inbounds i8, i8* %b, i64 1 + %1 = bitcast i8* %add.ptr1 to double* + %2 = load double, double* %0, align 8 + %vecins = insertelement <2 x double> %a, double %2, i32 %idx1 + %3 = load double, double* %1, align 8 + %vecins2 = insertelement <2 x double> %vecins, double %3, i32 %idx2 + ret <2 x double> %vecins2 +} + +define <2 x double> @testDouble3(<2 x double> %a, i8* %b, i32 zeroext %idx1, i32 zeroext %idx2) { +; CHECK-64-LABEL: testDouble3: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: lis 6, 1 +; CHECK-64-NEXT: rlwinm 4, 4, 3, 28, 28 +; CHECK-64-NEXT: addi 7, 1, -32 +; CHECK-64-NEXT: ldx 6, 3, 6 +; CHECK-64-NEXT: stxv 34, -32(1) +; CHECK-64-NEXT: stdx 6, 7, 4 +; CHECK-64-NEXT: li 4, 1 +; CHECK-64-NEXT: lxv 0, -32(1) +; CHECK-64-NEXT: rldic 4, 4, 36, 27 +; CHECK-64-NEXT: ldx 3, 3, 4 +; CHECK-64-NEXT: rlwinm 4, 5, 3, 28, 28 +; CHECK-64-NEXT: addi 5, 1, -16 +; CHECK-64-NEXT: stxv 0, -16(1) +; CHECK-64-NEXT: stdx 3, 5, 4 +; CHECK-64-NEXT: lxv 34, -16(1) +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: testDouble3: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: lis 6, 1 +; CHECK-32-NEXT: rlwinm 4, 4, 3, 28, 28 +; CHECK-32-NEXT: rlwinm 5, 5, 3, 28, 28 +; CHECK-32-NEXT: lfdx 0, 3, 6 +; CHECK-32-NEXT: addi 6, 1, -32 +; CHECK-32-NEXT: stxv 34, -32(1) +; CHECK-32-NEXT: stfdx 0, 6, 4 +; CHECK-32-NEXT: lxv 0, -32(1) +; CHECK-32-NEXT: lfd 1, 0(3) +; CHECK-32-NEXT: addi 3, 1, -16 +; CHECK-32-NEXT: stxv 0, -16(1) +; CHECK-32-NEXT: stfdx 1, 3, 5 +; CHECK-32-NEXT: lxv 34, -16(1) +; CHECK-32-NEXT: blr +entry: + %add.ptr = getelementptr inbounds i8, i8* %b, i64 65536 + %0 = bitcast i8* %add.ptr to double* + %add.ptr1 = getelementptr inbounds i8, i8* %b, i64 68719476736 + %1 = bitcast i8* %add.ptr1 to double* + %2 = load double, double* %0, align 8 + %vecins = insertelement <2 x double> %a, double %2, i32 %idx1 + %3 = load double, double* %1, align 8 + %vecins2 = insertelement <2 x double> %vecins, double %3, i32 %idx2 + ret <2 x double> %vecins2 +} + +; Double immediate + +define <2 x double> @testDoubleImm1(<2 x double> %a, double %b) { +; CHECK-64-LABEL: testDoubleImm1: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; CHECK-64-NEXT: xxpermdi 34, 1, 34, 1 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: testDoubleImm1: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: # kill: def $f1 killed $f1 def $vsl1 +; CHECK-32-NEXT: xxpermdi 34, 1, 34, 1 +; CHECK-32-NEXT: blr +entry: + %vecins = insertelement <2 x double> %a, double %b, i32 0 + ret <2 x double> %vecins +} + +define <2 x double> @testDoubleImm2(<2 x double> %a, i32* %b) { +; CHECK-64-LABEL: testDoubleImm2: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: lfd 0, 0(3) +; CHECK-64-NEXT: xxpermdi 34, 0, 34, 1 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: testDoubleImm2: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: lfd 0, 0(3) +; CHECK-32-NEXT: xxpermdi 34, 0, 34, 1 +; CHECK-32-NEXT: blr +entry: + %0 = bitcast i32* %b to double* + %1 = load double, double* %0, align 8 + %vecins = insertelement <2 x double> %a, double %1, i32 0 + ret <2 x double> %vecins +} + +define <2 x double> @testDoubleImm3(<2 x double> %a, i32* %b) { +; CHECK-64-LABEL: testDoubleImm3: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: lfd 0, 4(3) +; CHECK-64-NEXT: xxpermdi 34, 0, 34, 1 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: testDoubleImm3: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: lfd 0, 4(3) +; CHECK-32-NEXT: xxpermdi 34, 0, 34, 1 +; CHECK-32-NEXT: blr +entry: + %add.ptr = getelementptr inbounds i32, i32* %b, i64 1 + %0 = bitcast i32* %add.ptr to double* + %1 = load double, double* %0, align 8 + %vecins = insertelement <2 x double> %a, double %1, i32 0 + ret <2 x double> %vecins +} + +define <2 x double> @testDoubleImm4(<2 x double> %a, i32* %b) { +; CHECK-64-LABEL: testDoubleImm4: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: lis 4, 4 +; CHECK-64-NEXT: lfdx 0, 3, 4 +; CHECK-64-NEXT: xxpermdi 34, 0, 34, 1 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: testDoubleImm4: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: lis 4, 4 +; CHECK-32-NEXT: lfdx 0, 3, 4 +; CHECK-32-NEXT: xxpermdi 34, 0, 34, 1 +; CHECK-32-NEXT: blr +entry: + %add.ptr = getelementptr inbounds i32, i32* %b, i64 65536 + %0 = bitcast i32* %add.ptr to double* + %1 = load double, double* %0, align 8 + %vecins = insertelement <2 x double> %a, double %1, i32 0 + ret <2 x double> %vecins +} + +define <2 x double> @testDoubleImm5(<2 x double> %a, i32* %b) { +; CHECK-64-LABEL: testDoubleImm5: +; CHECK-64: # %bb.0: # %entry +; CHECK-64-NEXT: li 4, 1 +; CHECK-64-NEXT: rldic 4, 4, 38, 25 +; CHECK-64-NEXT: lfdx 0, 3, 4 +; CHECK-64-NEXT: xxpermdi 34, 0, 34, 1 +; CHECK-64-NEXT: blr +; +; CHECK-32-LABEL: testDoubleImm5: +; CHECK-32: # %bb.0: # %entry +; CHECK-32-NEXT: lfd 0, 0(3) +; CHECK-32-NEXT: xxpermdi 34, 0, 34, 1 +; CHECK-32-NEXT: blr +entry: + %add.ptr = getelementptr inbounds i32, i32* %b, i64 68719476736 + %0 = bitcast i32* %add.ptr to double* + %1 = load double, double* %0, align 8 + %vecins = insertelement <2 x double> %a, double %1, i32 0 + ret <2 x double> %vecins +} +