diff --git a/llvm/test/CodeGen/PowerPC/scalar_to_vector_shuffle.ll b/llvm/test/CodeGen/PowerPC/scalar_to_vector_shuffle.ll deleted file mode 100644 --- a/llvm/test/CodeGen/PowerPC/scalar_to_vector_shuffle.ll +++ /dev/null @@ -1,138 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-asm-full-reg-names \ -; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64le-unknown-linux-gnu < %s | \ -; RUN: FileCheck %s --check-prefix=CHECK-LE-P8 -; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \ -; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64le-unknown-linux-gnu < %s | \ -; RUN: FileCheck %s --check-prefix=CHECK-LE-P9 -; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-asm-full-reg-names \ -; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64-unknown-linux-gnu < %s | \ -; RUN: FileCheck %s --check-prefix=CHECK-BE-P8 -; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \ -; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64-unknown-linux-gnu < %s | \ -; RUN: FileCheck %s --check-prefix=CHECK-BE-P9 - -; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-asm-full-reg-names \ -; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64-ibm-aix < %s | \ -; RUN: FileCheck %s --check-prefix=CHECK-AIX-64-P8 -; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \ -; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64-ibm-aix < %s | \ -; RUN: FileCheck %s --check-prefix=CHECK-AIX-64-P9 -; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-asm-full-reg-names \ -; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc-ibm-aix < %s | \ -; RUN: FileCheck %s --check-prefix=CHECK-AIX-32-P8 -; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \ -; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc-ibm-aix < %s | \ -; RUN: FileCheck %s --check-prefix=CHECK-AIX-32-P9 - -define <16 x i8> @test_4_8(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b) local_unnamed_addr { -; CHECK-LE-P8-LABEL: test_4_8: -; CHECK-LE-P8: # %bb.0: # %entry -; CHECK-LE-P8-NEXT: addis r5, r2, .LCPI0_0@toc@ha -; CHECK-LE-P8-NEXT: lfiwzx f0, 0, r3 -; CHECK-LE-P8-NEXT: lfdx f1, 0, r4 -; CHECK-LE-P8-NEXT: addi r3, r5, .LCPI0_0@toc@l -; CHECK-LE-P8-NEXT: lxvd2x vs2, 0, r3 -; CHECK-LE-P8-NEXT: xxswapd v2, f0 -; CHECK-LE-P8-NEXT: xxswapd v3, f1 -; CHECK-LE-P8-NEXT: xxswapd v4, vs2 -; CHECK-LE-P8-NEXT: vperm v2, v3, v2, v4 -; CHECK-LE-P8-NEXT: blr -; -; CHECK-LE-P9-LABEL: test_4_8: -; CHECK-LE-P9: # %bb.0: # %entry -; CHECK-LE-P9-NEXT: lfiwzx f0, 0, r3 -; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI0_0@toc@ha -; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI0_0@toc@l -; CHECK-LE-P9-NEXT: xxswapd v2, f0 -; CHECK-LE-P9-NEXT: lfd f0, 0(r4) -; CHECK-LE-P9-NEXT: lxv v4, 0(r3) -; CHECK-LE-P9-NEXT: xxswapd v3, f0 -; CHECK-LE-P9-NEXT: vperm v2, v3, v2, v4 -; CHECK-LE-P9-NEXT: blr -; -; CHECK-BE-P8-LABEL: test_4_8: -; CHECK-BE-P8: # %bb.0: # %entry -; CHECK-BE-P8-NEXT: lfiwzx f0, 0, r3 -; CHECK-BE-P8-NEXT: addis r5, r2, .LCPI0_0@toc@ha -; CHECK-BE-P8-NEXT: lxsdx v3, 0, r4 -; CHECK-BE-P8-NEXT: addi r3, r5, .LCPI0_0@toc@l -; CHECK-BE-P8-NEXT: lxvw4x v4, 0, r3 -; CHECK-BE-P8-NEXT: xxsldwi v2, f0, f0, 1 -; CHECK-BE-P8-NEXT: vperm v2, v2, v3, v4 -; CHECK-BE-P8-NEXT: blr -; -; CHECK-BE-P9-LABEL: test_4_8: -; CHECK-BE-P9: # %bb.0: # %entry -; CHECK-BE-P9-NEXT: lfiwzx f0, 0, r3 -; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI0_0@toc@ha -; CHECK-BE-P9-NEXT: lxsd v3, 0(r4) -; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI0_0@toc@l -; CHECK-BE-P9-NEXT: lxv v4, 0(r3) -; CHECK-BE-P9-NEXT: xxsldwi v2, f0, f0, 1 -; CHECK-BE-P9-NEXT: vperm v2, v2, v3, v4 -; CHECK-BE-P9-NEXT: blr -; -; CHECK-AIX-64-P8-LABEL: test_4_8: -; CHECK-AIX-64-P8: # %bb.0: # %entry -; CHECK-AIX-64-P8-NEXT: ld r5, L..C0(r2) # %const.0 -; CHECK-AIX-64-P8-NEXT: lfiwzx f0, 0, r3 -; CHECK-AIX-64-P8-NEXT: lxsdx v3, 0, r4 -; CHECK-AIX-64-P8-NEXT: xxsldwi v2, f0, f0, 1 -; CHECK-AIX-64-P8-NEXT: lxvw4x v4, 0, r5 -; CHECK-AIX-64-P8-NEXT: vperm v2, v2, v3, v4 -; CHECK-AIX-64-P8-NEXT: blr -; -; CHECK-AIX-64-P9-LABEL: test_4_8: -; CHECK-AIX-64-P9: # %bb.0: # %entry -; CHECK-AIX-64-P9-NEXT: lfiwzx f0, 0, r3 -; CHECK-AIX-64-P9-NEXT: ld r3, L..C0(r2) # %const.0 -; CHECK-AIX-64-P9-NEXT: lxsd v3, 0(r4) -; CHECK-AIX-64-P9-NEXT: xxsldwi v2, f0, f0, 1 -; CHECK-AIX-64-P9-NEXT: lxv v4, 0(r3) -; CHECK-AIX-64-P9-NEXT: vperm v2, v2, v3, v4 -; CHECK-AIX-64-P9-NEXT: blr -; -; CHECK-AIX-32-P8-LABEL: test_4_8: -; CHECK-AIX-32-P8: # %bb.0: # %entry -; CHECK-AIX-32-P8-NEXT: lwz r5, 4(r4) -; CHECK-AIX-32-P8-NEXT: lxsiwzx v2, 0, r3 -; CHECK-AIX-32-P8-NEXT: stw r5, -16(r1) -; CHECK-AIX-32-P8-NEXT: lwz r3, 0(r4) -; CHECK-AIX-32-P8-NEXT: addi r4, r1, -32 -; CHECK-AIX-32-P8-NEXT: stw r3, -32(r1) -; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16 -; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r3 -; CHECK-AIX-32-P8-NEXT: lwz r3, L..C0(r2) # %const.0 -; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r4 -; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r3 -; CHECK-AIX-32-P8-NEXT: xxmrghw v3, vs1, vs0 -; CHECK-AIX-32-P8-NEXT: vperm v2, v2, v3, v4 -; CHECK-AIX-32-P8-NEXT: blr -; -; CHECK-AIX-32-P9-LABEL: test_4_8: -; CHECK-AIX-32-P9: # %bb.0: # %entry -; CHECK-AIX-32-P9-NEXT: lxsiwzx v2, 0, r3 -; CHECK-AIX-32-P9-NEXT: lwz r3, 4(r4) -; CHECK-AIX-32-P9-NEXT: stw r3, -16(r1) -; CHECK-AIX-32-P9-NEXT: lwz r3, 0(r4) -; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1) -; CHECK-AIX-32-P9-NEXT: stw r3, -32(r1) -; CHECK-AIX-32-P9-NEXT: lwz r3, L..C0(r2) # %const.0 -; CHECK-AIX-32-P9-NEXT: lxv vs1, -32(r1) -; CHECK-AIX-32-P9-NEXT: lxv v4, 0(r3) -; CHECK-AIX-32-P9-NEXT: xxmrghw v3, vs1, vs0 -; CHECK-AIX-32-P9-NEXT: vperm v2, v2, v3, v4 -; CHECK-AIX-32-P9-NEXT: blr -entry: - %0 = load <4 x i8>, ptr %a, align 4 - %bc1 = bitcast <4 x i8> %0 to i32 - %vecinit3 = insertelement <4 x i32> poison, i32 %bc1, i64 0 - %1 = load <8 x i8>, ptr %b, align 8 - %bc2 = bitcast <8 x i8> %1 to i64 - %vecinit6 = insertelement <2 x i64> undef, i64 %bc2, i64 0 - %2 = bitcast <4 x i32> %vecinit3 to <16 x i8> - %3 = bitcast <2 x i64> %vecinit6 to <16 x i8> - %shuffle = shufflevector <16 x i8> %2, <16 x i8> %3, <16 x i32> - ret <16 x i8> %shuffle -} diff --git a/llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll b/llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll @@ -0,0 +1,2090 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64le-unknown-linux-gnu < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-LE-P8 +; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64le-unknown-linux-gnu < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-LE-P9 +; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64-unknown-linux-gnu < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-BE-P8 +; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64-unknown-linux-gnu < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-BE-P9 + +; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64-ibm-aix < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-AIX-64-P8 +; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64-ibm-aix < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-AIX-64-P9 +; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc-ibm-aix < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-AIX-32-P8 +; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc-ibm-aix < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-AIX-32-P9 + +define <16 x i8> @test_v16i8_v16i8(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b) { +; CHECK-LE-P8-LABEL: test_v16i8_v16i8: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: lbz r3, 0(r3) +; CHECK-LE-P8-NEXT: lbz r4, 0(r4) +; CHECK-LE-P8-NEXT: mtvsrd v2, r3 +; CHECK-LE-P8-NEXT: mtvsrd v3, r4 +; CHECK-LE-P8-NEXT: vmrghh v2, v3, v2 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v16i8_v16i8: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: lxsibzx v2, 0, r3 +; CHECK-LE-P9-NEXT: lxsibzx v3, 0, r4 +; CHECK-LE-P9-NEXT: vmrghh v2, v3, v2 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v16i8_v16i8: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: addis r5, r2, .LCPI0_0@toc@ha +; CHECK-BE-P8-NEXT: lbz r4, 0(r4) +; CHECK-BE-P8-NEXT: lbz r3, 0(r3) +; CHECK-BE-P8-NEXT: addi r5, r5, .LCPI0_0@toc@l +; CHECK-BE-P8-NEXT: lxvw4x v2, 0, r5 +; CHECK-BE-P8-NEXT: mtvsrwz v3, r4 +; CHECK-BE-P8-NEXT: mtvsrwz v4, r3 +; CHECK-BE-P8-NEXT: vperm v2, v4, v3, v2 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v16i8_v16i8: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: addis r5, r2, .LCPI0_0@toc@ha +; CHECK-BE-P9-NEXT: lxsibzx v3, 0, r4 +; CHECK-BE-P9-NEXT: lxsibzx v4, 0, r3 +; CHECK-BE-P9-NEXT: addi r5, r5, .LCPI0_0@toc@l +; CHECK-BE-P9-NEXT: lxv v2, 0(r5) +; CHECK-BE-P9-NEXT: vperm v2, v4, v3, v2 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v16i8_v16i8: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: ld r5, L..C0(r2) # %const.0 +; CHECK-AIX-64-P8-NEXT: lbz r4, 0(r4) +; CHECK-AIX-64-P8-NEXT: lbz r3, 0(r3) +; CHECK-AIX-64-P8-NEXT: mtvsrwz v3, r4 +; CHECK-AIX-64-P8-NEXT: lxvw4x v2, 0, r5 +; CHECK-AIX-64-P8-NEXT: mtvsrwz v4, r3 +; CHECK-AIX-64-P8-NEXT: vperm v2, v4, v3, v2 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v16i8_v16i8: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: ld r5, L..C0(r2) # %const.0 +; CHECK-AIX-64-P9-NEXT: lxsibzx v3, 0, r4 +; CHECK-AIX-64-P9-NEXT: lxsibzx v4, 0, r3 +; CHECK-AIX-64-P9-NEXT: lxv v2, 0(r5) +; CHECK-AIX-64-P9-NEXT: vperm v2, v4, v3, v2 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v16i8_v16i8: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: lwz r5, L..C0(r2) # %const.0 +; CHECK-AIX-32-P8-NEXT: lbz r4, 0(r4) +; CHECK-AIX-32-P8-NEXT: lbz r3, 0(r3) +; CHECK-AIX-32-P8-NEXT: mtvsrwz v3, r4 +; CHECK-AIX-32-P8-NEXT: lxvw4x v2, 0, r5 +; CHECK-AIX-32-P8-NEXT: mtvsrwz v4, r3 +; CHECK-AIX-32-P8-NEXT: vperm v2, v4, v3, v2 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v16i8_v16i8: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: lwz r5, L..C0(r2) # %const.0 +; CHECK-AIX-32-P9-NEXT: lxsibzx v3, 0, r4 +; CHECK-AIX-32-P9-NEXT: lxsibzx v4, 0, r3 +; CHECK-AIX-32-P9-NEXT: lxv v2, 0(r5) +; CHECK-AIX-32-P9-NEXT: vperm v2, v4, v3, v2 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %0 = load <1 x i8>, ptr %a, align 4 + %bc1 = bitcast <1 x i8> %0 to i8 + %vecinit3 = insertelement <16 x i8> poison, i8 %bc1, i64 0 + %1 = load <1 x i8>, ptr %b, align 8 + %bc2 = bitcast <1 x i8> %1 to i8 + %vecinit6 = insertelement <16 x i8> undef, i8 %bc2, i64 0 + %2 = bitcast <16 x i8> %vecinit3 to <16 x i8> + %3 = bitcast <16 x i8> %vecinit6 to <16 x i8> + %shuffle = shufflevector <16 x i8> %2, <16 x i8> %3, <16 x i32> + ret <16 x i8> %shuffle +} + +define <16 x i8> @test_v16i8_none(<16 x i8> %a, i8 %b) { +; CHECK-LE-P8-LABEL: test_v16i8_none: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: addis r3, r2, .LCPI1_0@toc@ha +; CHECK-LE-P8-NEXT: mtvsrd v4, r5 +; CHECK-LE-P8-NEXT: addi r3, r3, .LCPI1_0@toc@l +; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-LE-P8-NEXT: xxswapd v3, vs0 +; CHECK-LE-P8-NEXT: vperm v2, v2, v4, v3 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v16i8_none: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: mtvsrwz v3, r5 +; CHECK-LE-P9-NEXT: vinsertb v2, v3, 15 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v16i8_none: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: addis r3, r2, .LCPI1_0@toc@ha +; CHECK-BE-P8-NEXT: mtvsrwz v4, r5 +; CHECK-BE-P8-NEXT: addi r3, r3, .LCPI1_0@toc@l +; CHECK-BE-P8-NEXT: lxvw4x v3, 0, r3 +; CHECK-BE-P8-NEXT: vperm v2, v4, v2, v3 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v16i8_none: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: mtvsrwz v3, r5 +; CHECK-BE-P9-NEXT: vinsertb v2, v3, 0 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v16i8_none: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: ld r4, L..C1(r2) # %const.0 +; CHECK-AIX-64-P8-NEXT: mtvsrwz v4, r3 +; CHECK-AIX-64-P8-NEXT: lxvw4x v3, 0, r4 +; CHECK-AIX-64-P8-NEXT: vperm v2, v4, v2, v3 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v16i8_none: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: mtvsrwz v3, r3 +; CHECK-AIX-64-P9-NEXT: vinsertb v2, v3, 0 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v16i8_none: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: lwz r4, L..C1(r2) # %const.0 +; CHECK-AIX-32-P8-NEXT: mtvsrwz v4, r3 +; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r4 +; CHECK-AIX-32-P8-NEXT: vperm v2, v4, v2, v3 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v16i8_none: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: mtvsrwz v3, r3 +; CHECK-AIX-32-P9-NEXT: vinsertb v2, v3, 0 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %vecins = insertelement <16 x i8> %a, i8 %b, i32 0 + ret <16 x i8> %vecins +} + +define <16 x i8> @test_none_v16i8(i8 %arg, ptr nocapture noundef readonly %b) { +; CHECK-LE-P8-LABEL: test_none_v16i8: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: addis r5, r2, .LCPI2_0@toc@ha +; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r4 +; CHECK-LE-P8-NEXT: mtvsrd v4, r3 +; CHECK-LE-P8-NEXT: addi r5, r5, .LCPI2_0@toc@l +; CHECK-LE-P8-NEXT: lxvd2x vs1, 0, r5 +; CHECK-LE-P8-NEXT: xxswapd v2, vs0 +; CHECK-LE-P8-NEXT: xxswapd v3, vs1 +; CHECK-LE-P8-NEXT: vperm v2, v4, v2, v3 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_none_v16i8: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: mtvsrd v3, r3 +; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI2_0@toc@ha +; CHECK-LE-P9-NEXT: lxv v2, 0(r4) +; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI2_0@toc@l +; CHECK-LE-P9-NEXT: lxv v4, 0(r3) +; CHECK-LE-P9-NEXT: vperm v2, v3, v2, v4 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_none_v16i8: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: addis r5, r2, .LCPI2_0@toc@ha +; CHECK-BE-P8-NEXT: lxvw4x v2, 0, r4 +; CHECK-BE-P8-NEXT: mtvsrwz v4, r3 +; CHECK-BE-P8-NEXT: addi r5, r5, .LCPI2_0@toc@l +; CHECK-BE-P8-NEXT: lxvw4x v3, 0, r5 +; CHECK-BE-P8-NEXT: vperm v2, v2, v4, v3 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_none_v16i8: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: mtvsrwz v3, r3 +; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI2_0@toc@ha +; CHECK-BE-P9-NEXT: lxv v2, 0(r4) +; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI2_0@toc@l +; CHECK-BE-P9-NEXT: lxv v4, 0(r3) +; CHECK-BE-P9-NEXT: vperm v2, v2, v3, v4 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_none_v16i8: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: ld r5, L..C2(r2) # %const.0 +; CHECK-AIX-64-P8-NEXT: lxvw4x v2, 0, r4 +; CHECK-AIX-64-P8-NEXT: mtvsrwz v4, r3 +; CHECK-AIX-64-P8-NEXT: lxvw4x v3, 0, r5 +; CHECK-AIX-64-P8-NEXT: vperm v2, v2, v4, v3 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_none_v16i8: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: mtvsrwz v3, r3 +; CHECK-AIX-64-P9-NEXT: ld r3, L..C1(r2) # %const.0 +; CHECK-AIX-64-P9-NEXT: lxv v2, 0(r4) +; CHECK-AIX-64-P9-NEXT: lxv v4, 0(r3) +; CHECK-AIX-64-P9-NEXT: vperm v2, v2, v3, v4 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_none_v16i8: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: addi r5, r1, -16 +; CHECK-AIX-32-P8-NEXT: lxvw4x v2, 0, r4 +; CHECK-AIX-32-P8-NEXT: stb r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r5 +; CHECK-AIX-32-P8-NEXT: vmrghh v2, v2, v3 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_none_v16i8: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: lxv v2, 0(r4) +; CHECK-AIX-32-P9-NEXT: stb r3, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv v3, -16(r1) +; CHECK-AIX-32-P9-NEXT: vmrghh v2, v2, v3 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %lhs = load <16 x i8>, ptr %b, align 4 + %rhs = insertelement <16 x i8> undef, i8 %arg, i32 0 + %shuffle = shufflevector <16 x i8> %lhs, <16 x i8> %rhs, <16 x i32> + ret <16 x i8> %shuffle +} + +define <16 x i8> @test_v16i8_v8i16(i16 %arg, i8 %arg1) { +; CHECK-LE-P8-LABEL: test_v16i8_v8i16: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: mtfprd f0, r4 +; CHECK-LE-P8-NEXT: mtfprd f1, r3 +; CHECK-LE-P8-NEXT: xxswapd v2, vs0 +; CHECK-LE-P8-NEXT: xxswapd v3, vs1 +; CHECK-LE-P8-NEXT: vmrglh v2, v3, v2 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v16i8_v8i16: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: mtfprd f0, r4 +; CHECK-LE-P9-NEXT: xxswapd v2, vs0 +; CHECK-LE-P9-NEXT: mtfprd f0, r3 +; CHECK-LE-P9-NEXT: xxswapd v3, vs0 +; CHECK-LE-P9-NEXT: vmrglh v2, v3, v2 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v16i8_v8i16: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: sldi r4, r4, 56 +; CHECK-BE-P8-NEXT: sldi r3, r3, 48 +; CHECK-BE-P8-NEXT: mtvsrd v2, r4 +; CHECK-BE-P8-NEXT: mtvsrd v3, r3 +; CHECK-BE-P8-NEXT: vmrghh v2, v2, v3 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v16i8_v8i16: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: sldi r4, r4, 56 +; CHECK-BE-P9-NEXT: sldi r3, r3, 48 +; CHECK-BE-P9-NEXT: mtvsrd v2, r4 +; CHECK-BE-P9-NEXT: mtvsrd v3, r3 +; CHECK-BE-P9-NEXT: vmrghh v2, v2, v3 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v16i8_v8i16: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: sldi r4, r4, 56 +; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 48 +; CHECK-AIX-64-P8-NEXT: mtvsrd v2, r4 +; CHECK-AIX-64-P8-NEXT: mtvsrd v3, r3 +; CHECK-AIX-64-P8-NEXT: vmrghh v2, v2, v3 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v16i8_v8i16: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: sldi r4, r4, 56 +; CHECK-AIX-64-P9-NEXT: sldi r3, r3, 48 +; CHECK-AIX-64-P9-NEXT: mtvsrd v2, r4 +; CHECK-AIX-64-P9-NEXT: mtvsrd v3, r3 +; CHECK-AIX-64-P9-NEXT: vmrghh v2, v2, v3 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v16i8_v8i16: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: addi r5, r1, -32 +; CHECK-AIX-32-P8-NEXT: stb r4, -32(r1) +; CHECK-AIX-32-P8-NEXT: lxvw4x v2, 0, r5 +; CHECK-AIX-32-P8-NEXT: sth r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16 +; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3 +; CHECK-AIX-32-P8-NEXT: vmrghh v2, v2, v3 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v16i8_v8i16: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: stb r4, -32(r1) +; CHECK-AIX-32-P9-NEXT: sth r3, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv v2, -32(r1) +; CHECK-AIX-32-P9-NEXT: lxv v3, -16(r1) +; CHECK-AIX-32-P9-NEXT: vmrghh v2, v2, v3 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %lhs = insertelement <16 x i8> undef, i8 %arg1, i32 0 + %rhs.tmp = insertelement <8 x i16> undef, i16 %arg, i32 0 + %rhs = bitcast <8 x i16> %rhs.tmp to <16 x i8> + %shuffle = shufflevector <16 x i8> %lhs, <16 x i8> %rhs, <16 x i32> + ret <16 x i8> %shuffle +} + +define <16 x i8> @test_v8i16_v16i8(i16 %arg, i8 %arg1) { +; CHECK-LE-P8-LABEL: test_v8i16_v16i8: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: mtfprd f0, r4 +; CHECK-LE-P8-NEXT: mtfprd f1, r3 +; CHECK-LE-P8-NEXT: xxswapd v2, vs0 +; CHECK-LE-P8-NEXT: xxswapd v3, vs1 +; CHECK-LE-P8-NEXT: vmrglh v2, v2, v3 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v8i16_v16i8: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: mtfprd f0, r4 +; CHECK-LE-P9-NEXT: xxswapd v2, vs0 +; CHECK-LE-P9-NEXT: mtfprd f0, r3 +; CHECK-LE-P9-NEXT: xxswapd v3, vs0 +; CHECK-LE-P9-NEXT: vmrglh v2, v2, v3 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v8i16_v16i8: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: sldi r4, r4, 56 +; CHECK-BE-P8-NEXT: sldi r3, r3, 48 +; CHECK-BE-P8-NEXT: mtvsrd v2, r4 +; CHECK-BE-P8-NEXT: mtvsrd v3, r3 +; CHECK-BE-P8-NEXT: vmrghh v2, v3, v2 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v8i16_v16i8: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: sldi r4, r4, 56 +; CHECK-BE-P9-NEXT: sldi r3, r3, 48 +; CHECK-BE-P9-NEXT: mtvsrd v2, r4 +; CHECK-BE-P9-NEXT: mtvsrd v3, r3 +; CHECK-BE-P9-NEXT: vmrghh v2, v3, v2 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v8i16_v16i8: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: sldi r4, r4, 56 +; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 48 +; CHECK-AIX-64-P8-NEXT: mtvsrd v2, r4 +; CHECK-AIX-64-P8-NEXT: mtvsrd v3, r3 +; CHECK-AIX-64-P8-NEXT: vmrghh v2, v3, v2 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v8i16_v16i8: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: sldi r4, r4, 56 +; CHECK-AIX-64-P9-NEXT: sldi r3, r3, 48 +; CHECK-AIX-64-P9-NEXT: mtvsrd v2, r4 +; CHECK-AIX-64-P9-NEXT: mtvsrd v3, r3 +; CHECK-AIX-64-P9-NEXT: vmrghh v2, v3, v2 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v8i16_v16i8: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: addi r5, r1, -32 +; CHECK-AIX-32-P8-NEXT: stb r4, -32(r1) +; CHECK-AIX-32-P8-NEXT: lxvw4x v2, 0, r5 +; CHECK-AIX-32-P8-NEXT: sth r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16 +; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3 +; CHECK-AIX-32-P8-NEXT: vmrghh v2, v3, v2 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v8i16_v16i8: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: stb r4, -32(r1) +; CHECK-AIX-32-P9-NEXT: sth r3, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv v2, -32(r1) +; CHECK-AIX-32-P9-NEXT: lxv v3, -16(r1) +; CHECK-AIX-32-P9-NEXT: vmrghh v2, v3, v2 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %rhs = insertelement <16 x i8> undef, i8 %arg1, i32 0 + %lhs.tmp = insertelement <8 x i16> undef, i16 %arg, i32 0 + %lhs = bitcast <8 x i16> %lhs.tmp to <16 x i8> + %shuffle = shufflevector <16 x i8> %lhs, <16 x i8> %rhs, <16 x i32> + ret <16 x i8> %shuffle +} + +define <16 x i8> @test_none_v8i16(i16 %arg, ptr nocapture noundef readonly %b) { +; CHECK-LE-P8-LABEL: test_none_v8i16: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: addis r5, r2, .LCPI5_0@toc@ha +; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r4 +; CHECK-LE-P8-NEXT: mtvsrd v4, r3 +; CHECK-LE-P8-NEXT: addi r5, r5, .LCPI5_0@toc@l +; CHECK-LE-P8-NEXT: lxvd2x vs1, 0, r5 +; CHECK-LE-P8-NEXT: xxswapd v2, vs0 +; CHECK-LE-P8-NEXT: xxswapd v3, vs1 +; CHECK-LE-P8-NEXT: vperm v2, v4, v2, v3 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_none_v8i16: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: mtvsrd v3, r3 +; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI5_0@toc@ha +; CHECK-LE-P9-NEXT: lxv v2, 0(r4) +; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI5_0@toc@l +; CHECK-LE-P9-NEXT: lxv v4, 0(r3) +; CHECK-LE-P9-NEXT: vperm v2, v3, v2, v4 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_none_v8i16: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: addis r5, r2, .LCPI5_0@toc@ha +; CHECK-BE-P8-NEXT: lxvw4x v2, 0, r4 +; CHECK-BE-P8-NEXT: mtvsrwz v4, r3 +; CHECK-BE-P8-NEXT: addi r5, r5, .LCPI5_0@toc@l +; CHECK-BE-P8-NEXT: lxvw4x v3, 0, r5 +; CHECK-BE-P8-NEXT: vperm v2, v2, v4, v3 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_none_v8i16: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: mtvsrwz v3, r3 +; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI5_0@toc@ha +; CHECK-BE-P9-NEXT: lxv v2, 0(r4) +; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI5_0@toc@l +; CHECK-BE-P9-NEXT: lxv v4, 0(r3) +; CHECK-BE-P9-NEXT: vperm v2, v2, v3, v4 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_none_v8i16: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: ld r5, L..C3(r2) # %const.0 +; CHECK-AIX-64-P8-NEXT: lxvw4x v2, 0, r4 +; CHECK-AIX-64-P8-NEXT: mtvsrwz v4, r3 +; CHECK-AIX-64-P8-NEXT: lxvw4x v3, 0, r5 +; CHECK-AIX-64-P8-NEXT: vperm v2, v2, v4, v3 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_none_v8i16: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: mtvsrwz v3, r3 +; CHECK-AIX-64-P9-NEXT: ld r3, L..C2(r2) # %const.0 +; CHECK-AIX-64-P9-NEXT: lxv v2, 0(r4) +; CHECK-AIX-64-P9-NEXT: lxv v4, 0(r3) +; CHECK-AIX-64-P9-NEXT: vperm v2, v2, v3, v4 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_none_v8i16: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: addi r5, r1, -16 +; CHECK-AIX-32-P8-NEXT: lxvw4x v2, 0, r4 +; CHECK-AIX-32-P8-NEXT: sth r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r5 +; CHECK-AIX-32-P8-NEXT: vmrghh v2, v2, v3 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_none_v8i16: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: lxv v2, 0(r4) +; CHECK-AIX-32-P9-NEXT: sth r3, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv v3, -16(r1) +; CHECK-AIX-32-P9-NEXT: vmrghh v2, v2, v3 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %lhs = load <16 x i8>, ptr %b, align 4 + %rhs.tmp = insertelement <8 x i16> undef, i16 %arg, i32 0 + %rhs = bitcast <8 x i16> %rhs.tmp to <16 x i8> + %shuffle = shufflevector <16 x i8> %lhs, <16 x i8> %rhs, <16 x i32> + ret <16 x i8> %shuffle +} + +define <8 x i16> @test_v8i16_none(<8 x i16> %a, i16 %b) { +; CHECK-LE-P8-LABEL: test_v8i16_none: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: addis r3, r2, .LCPI6_0@toc@ha +; CHECK-LE-P8-NEXT: mtvsrd v4, r5 +; CHECK-LE-P8-NEXT: addi r3, r3, .LCPI6_0@toc@l +; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-LE-P8-NEXT: xxswapd v3, vs0 +; CHECK-LE-P8-NEXT: vperm v2, v2, v4, v3 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v8i16_none: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: mtvsrwz v3, r5 +; CHECK-LE-P9-NEXT: vinserth v2, v3, 14 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v8i16_none: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: addis r3, r2, .LCPI6_0@toc@ha +; CHECK-BE-P8-NEXT: mtvsrwz v4, r5 +; CHECK-BE-P8-NEXT: addi r3, r3, .LCPI6_0@toc@l +; CHECK-BE-P8-NEXT: lxvw4x v3, 0, r3 +; CHECK-BE-P8-NEXT: vperm v2, v4, v2, v3 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v8i16_none: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: mtvsrwz v3, r5 +; CHECK-BE-P9-NEXT: vinserth v2, v3, 0 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v8i16_none: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: ld r4, L..C4(r2) # %const.0 +; CHECK-AIX-64-P8-NEXT: mtvsrwz v4, r3 +; CHECK-AIX-64-P8-NEXT: lxvw4x v3, 0, r4 +; CHECK-AIX-64-P8-NEXT: vperm v2, v4, v2, v3 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v8i16_none: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: mtvsrwz v3, r3 +; CHECK-AIX-64-P9-NEXT: vinserth v2, v3, 0 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v8i16_none: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: lwz r4, L..C2(r2) # %const.0 +; CHECK-AIX-32-P8-NEXT: sth r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16 +; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r3 +; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r4 +; CHECK-AIX-32-P8-NEXT: vperm v2, v4, v2, v3 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v8i16_none: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: mtvsrwz v3, r3 +; CHECK-AIX-32-P9-NEXT: vinserth v2, v3, 0 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %vecins = insertelement <8 x i16> %a, i16 %b, i32 0 + ret <8 x i16> %vecins +} + +define <16 x i8> @test_v16i8_v4i32(i8 %arg, i32 %arg1, <16 x i8> %a, <4 x i32> %b) { +; CHECK-LE-P8-LABEL: test_v16i8_v4i32: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: mtfprd f0, r3 +; CHECK-LE-P8-NEXT: mtfprd f1, r4 +; CHECK-LE-P8-NEXT: xxswapd v2, vs0 +; CHECK-LE-P8-NEXT: xxswapd v3, vs1 +; CHECK-LE-P8-NEXT: vmrglb v2, v3, v2 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v16i8_v4i32: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: mtfprd f0, r3 +; CHECK-LE-P9-NEXT: mtvsrws v3, r4 +; CHECK-LE-P9-NEXT: xxswapd v2, vs0 +; CHECK-LE-P9-NEXT: vmrglb v2, v3, v2 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v16i8_v4i32: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: sldi r3, r3, 56 +; CHECK-BE-P8-NEXT: sldi r4, r4, 32 +; CHECK-BE-P8-NEXT: mtvsrd v2, r3 +; CHECK-BE-P8-NEXT: mtvsrd v3, r4 +; CHECK-BE-P8-NEXT: vmrghb v2, v2, v3 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v16i8_v4i32: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: sldi r3, r3, 56 +; CHECK-BE-P9-NEXT: mtvsrws v3, r4 +; CHECK-BE-P9-NEXT: mtvsrd v2, r3 +; CHECK-BE-P9-NEXT: vmrghb v2, v2, v3 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v16i8_v4i32: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 56 +; CHECK-AIX-64-P8-NEXT: sldi r4, r4, 32 +; CHECK-AIX-64-P8-NEXT: mtvsrd v2, r3 +; CHECK-AIX-64-P8-NEXT: mtvsrd v3, r4 +; CHECK-AIX-64-P8-NEXT: vmrghb v2, v2, v3 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v16i8_v4i32: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: sldi r3, r3, 56 +; CHECK-AIX-64-P9-NEXT: mtvsrws v3, r4 +; CHECK-AIX-64-P9-NEXT: mtvsrd v2, r3 +; CHECK-AIX-64-P9-NEXT: vmrghb v2, v2, v3 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v16i8_v4i32: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: addi r5, r1, -16 +; CHECK-AIX-32-P8-NEXT: stb r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: addi r3, r1, -32 +; CHECK-AIX-32-P8-NEXT: lxvw4x v2, 0, r5 +; CHECK-AIX-32-P8-NEXT: stw r4, -32(r1) +; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3 +; CHECK-AIX-32-P8-NEXT: vmrghb v2, v2, v3 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v16i8_v4i32: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: stb r3, -16(r1) +; CHECK-AIX-32-P9-NEXT: stw r4, -32(r1) +; CHECK-AIX-32-P9-NEXT: lxv v2, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv v3, -32(r1) +; CHECK-AIX-32-P9-NEXT: vmrghb v2, v2, v3 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %lhs.tmp = insertelement <16 x i8> %a, i8 %arg, i32 0 + %lhs = bitcast <16 x i8> %lhs.tmp to <16 x i8> + %rhs.tmp = insertelement <4 x i32> %b, i32 %arg1, i32 0 + %rhs = bitcast <4 x i32> %rhs.tmp to <16 x i8> + %shuffle = shufflevector <16 x i8> %lhs, <16 x i8> %rhs, <16 x i32> + ret <16 x i8> %shuffle +} + +define <16 x i8> @test_v4i32_v16i8(i32 %arg, i8 %arg1) { +; CHECK-LE-P8-LABEL: test_v4i32_v16i8: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: mtfprd f0, r4 +; CHECK-LE-P8-NEXT: mtfprd f1, r3 +; CHECK-LE-P8-NEXT: xxswapd v2, vs0 +; CHECK-LE-P8-NEXT: xxswapd v3, vs1 +; CHECK-LE-P8-NEXT: vmrglh v2, v2, v3 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v4i32_v16i8: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: mtfprd f0, r4 +; CHECK-LE-P9-NEXT: mtvsrws v3, r3 +; CHECK-LE-P9-NEXT: xxswapd v2, vs0 +; CHECK-LE-P9-NEXT: vmrglh v2, v2, v3 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v4i32_v16i8: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: sldi r4, r4, 56 +; CHECK-BE-P8-NEXT: sldi r3, r3, 32 +; CHECK-BE-P8-NEXT: mtvsrd v2, r4 +; CHECK-BE-P8-NEXT: mtvsrd v3, r3 +; CHECK-BE-P8-NEXT: vmrghh v2, v3, v2 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v4i32_v16i8: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: sldi r4, r4, 56 +; CHECK-BE-P9-NEXT: mtvsrws v3, r3 +; CHECK-BE-P9-NEXT: mtvsrd v2, r4 +; CHECK-BE-P9-NEXT: vmrghh v2, v3, v2 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v4i32_v16i8: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: sldi r4, r4, 56 +; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 32 +; CHECK-AIX-64-P8-NEXT: mtvsrd v2, r4 +; CHECK-AIX-64-P8-NEXT: mtvsrd v3, r3 +; CHECK-AIX-64-P8-NEXT: vmrghh v2, v3, v2 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v4i32_v16i8: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: sldi r4, r4, 56 +; CHECK-AIX-64-P9-NEXT: mtvsrws v3, r3 +; CHECK-AIX-64-P9-NEXT: mtvsrd v2, r4 +; CHECK-AIX-64-P9-NEXT: vmrghh v2, v3, v2 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v4i32_v16i8: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: addi r5, r1, -32 +; CHECK-AIX-32-P8-NEXT: stb r4, -32(r1) +; CHECK-AIX-32-P8-NEXT: lxvw4x v2, 0, r5 +; CHECK-AIX-32-P8-NEXT: stw r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16 +; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3 +; CHECK-AIX-32-P8-NEXT: vmrghh v2, v3, v2 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v4i32_v16i8: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: stb r4, -32(r1) +; CHECK-AIX-32-P9-NEXT: stw r3, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv v2, -32(r1) +; CHECK-AIX-32-P9-NEXT: lxv v3, -16(r1) +; CHECK-AIX-32-P9-NEXT: vmrghh v2, v3, v2 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %rhs = insertelement <16 x i8> undef, i8 %arg1, i32 0 + %lhs.tmp = insertelement <4 x i32> undef, i32 %arg, i32 0 + %lhs = bitcast <4 x i32> %lhs.tmp to <16 x i8> + %shuffle = shufflevector <16 x i8> %lhs, <16 x i8> %rhs, <16 x i32> + ret <16 x i8> %shuffle +} + +define <4 x i32> @test_none_v4i32(<4 x i32> %a, i64 %b) { +; CHECK-LE-P8-LABEL: test_none_v4i32: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: addis r3, r2, .LCPI9_0@toc@ha +; CHECK-LE-P8-NEXT: mtvsrwz v4, r5 +; CHECK-LE-P8-NEXT: addi r3, r3, .LCPI9_0@toc@l +; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-LE-P8-NEXT: addis r3, r2, .LCPI9_1@toc@ha +; CHECK-LE-P8-NEXT: addi r3, r3, .LCPI9_1@toc@l +; CHECK-LE-P8-NEXT: xxswapd v3, vs0 +; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-LE-P8-NEXT: vperm v2, v4, v2, v3 +; CHECK-LE-P8-NEXT: xxswapd v3, vs0 +; CHECK-LE-P8-NEXT: vperm v2, v4, v2, v3 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_none_v4i32: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: mtfprwz f0, r5 +; CHECK-LE-P9-NEXT: xxinsertw v2, vs0, 8 +; CHECK-LE-P9-NEXT: xxinsertw v2, vs0, 0 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_none_v4i32: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: addis r3, r2, .LCPI9_0@toc@ha +; CHECK-BE-P8-NEXT: mtvsrwz v4, r5 +; CHECK-BE-P8-NEXT: addi r3, r3, .LCPI9_0@toc@l +; CHECK-BE-P8-NEXT: lxvw4x v3, 0, r3 +; CHECK-BE-P8-NEXT: addis r3, r2, .LCPI9_1@toc@ha +; CHECK-BE-P8-NEXT: addi r3, r3, .LCPI9_1@toc@l +; CHECK-BE-P8-NEXT: vperm v2, v2, v4, v3 +; CHECK-BE-P8-NEXT: lxvw4x v3, 0, r3 +; CHECK-BE-P8-NEXT: vperm v2, v2, v4, v3 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_none_v4i32: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: mtfprwz f0, r5 +; CHECK-BE-P9-NEXT: xxinsertw v2, vs0, 4 +; CHECK-BE-P9-NEXT: xxinsertw v2, vs0, 12 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_none_v4i32: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: ld r4, L..C5(r2) # %const.0 +; CHECK-AIX-64-P8-NEXT: mtvsrwz v4, r3 +; CHECK-AIX-64-P8-NEXT: ld r3, L..C6(r2) # %const.1 +; CHECK-AIX-64-P8-NEXT: lxvw4x v3, 0, r4 +; CHECK-AIX-64-P8-NEXT: vperm v2, v2, v4, v3 +; CHECK-AIX-64-P8-NEXT: lxvw4x v3, 0, r3 +; CHECK-AIX-64-P8-NEXT: vperm v2, v2, v4, v3 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_none_v4i32: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r3 +; CHECK-AIX-64-P9-NEXT: xxinsertw v2, vs0, 4 +; CHECK-AIX-64-P9-NEXT: xxinsertw v2, vs0, 12 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_none_v4i32: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: lwz r3, L..C3(r2) # %const.0 +; CHECK-AIX-32-P8-NEXT: stw r4, -16(r1) +; CHECK-AIX-32-P8-NEXT: addi r4, r1, -16 +; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r4 +; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3 +; CHECK-AIX-32-P8-NEXT: lwz r3, L..C4(r2) # %const.1 +; CHECK-AIX-32-P8-NEXT: vperm v2, v2, v4, v3 +; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3 +; CHECK-AIX-32-P8-NEXT: vperm v2, v2, v4, v3 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_none_v4i32: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: mtfprwz f0, r4 +; CHECK-AIX-32-P9-NEXT: xxinsertw v2, vs0, 4 +; CHECK-AIX-32-P9-NEXT: xxinsertw v2, vs0, 12 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %conv = trunc i64 %b to i32 + %vecins = insertelement <4 x i32> %a, i32 %conv, i32 1 + %vecins2 = insertelement <4 x i32> %vecins, i32 %conv, i32 3 + ret <4 x i32> %vecins2 +} + +define <16 x i8> @test_v4i32_none(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b) { +; CHECK-LE-P8-LABEL: test_v4i32_none: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: addis r5, r2, .LCPI10_0@toc@ha +; CHECK-LE-P8-NEXT: lbzx r4, 0, r4 +; CHECK-LE-P8-NEXT: lxsiwzx v3, 0, r3 +; CHECK-LE-P8-NEXT: addi r5, r5, .LCPI10_0@toc@l +; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r5 +; CHECK-LE-P8-NEXT: mtvsrwz v2, r4 +; CHECK-LE-P8-NEXT: vspltb v2, v2, 7 +; CHECK-LE-P8-NEXT: xxswapd v4, vs0 +; CHECK-LE-P8-NEXT: vperm v2, v2, v3, v4 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v4i32_none: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: lxsiwzx v2, 0, r3 +; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI10_0@toc@ha +; CHECK-LE-P9-NEXT: lxsibzx v3, 0, r4 +; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI10_0@toc@l +; CHECK-LE-P9-NEXT: lxv v4, 0(r3) +; CHECK-LE-P9-NEXT: vspltb v3, v3, 7 +; CHECK-LE-P9-NEXT: vperm v2, v3, v2, v4 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v4i32_none: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: lbzx r4, 0, r4 +; CHECK-BE-P8-NEXT: addis r5, r2, .LCPI10_0@toc@ha +; CHECK-BE-P8-NEXT: lxsiwzx v3, 0, r3 +; CHECK-BE-P8-NEXT: mtvsrwz v2, r4 +; CHECK-BE-P8-NEXT: addi r4, r5, .LCPI10_0@toc@l +; CHECK-BE-P8-NEXT: lxvw4x v4, 0, r4 +; CHECK-BE-P8-NEXT: vspltb v2, v2, 7 +; CHECK-BE-P8-NEXT: vperm v2, v3, v2, v4 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v4i32_none: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: lxsiwzx v2, 0, r3 +; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI10_0@toc@ha +; CHECK-BE-P9-NEXT: lxsibzx v3, 0, r4 +; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI10_0@toc@l +; CHECK-BE-P9-NEXT: lxv v4, 0(r3) +; CHECK-BE-P9-NEXT: vspltb v3, v3, 7 +; CHECK-BE-P9-NEXT: vperm v2, v2, v3, v4 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v4i32_none: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: lbzx r4, 0, r4 +; CHECK-AIX-64-P8-NEXT: ld r5, L..C7(r2) # %const.0 +; CHECK-AIX-64-P8-NEXT: lxsiwzx v3, 0, r3 +; CHECK-AIX-64-P8-NEXT: mtvsrwz v2, r4 +; CHECK-AIX-64-P8-NEXT: lxvw4x v4, 0, r5 +; CHECK-AIX-64-P8-NEXT: vspltb v2, v2, 7 +; CHECK-AIX-64-P8-NEXT: vperm v2, v3, v2, v4 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v4i32_none: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: lxsiwzx v2, 0, r3 +; CHECK-AIX-64-P9-NEXT: ld r3, L..C3(r2) # %const.0 +; CHECK-AIX-64-P9-NEXT: lxsibzx v3, 0, r4 +; CHECK-AIX-64-P9-NEXT: lxv v4, 0(r3) +; CHECK-AIX-64-P9-NEXT: vspltb v3, v3, 7 +; CHECK-AIX-64-P9-NEXT: vperm v2, v2, v3, v4 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v4i32_none: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: lbzx r4, 0, r4 +; CHECK-AIX-32-P8-NEXT: lwz r5, L..C5(r2) # %const.0 +; CHECK-AIX-32-P8-NEXT: lxsiwzx v3, 0, r3 +; CHECK-AIX-32-P8-NEXT: mtvsrwz v2, r4 +; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r5 +; CHECK-AIX-32-P8-NEXT: vspltb v2, v2, 7 +; CHECK-AIX-32-P8-NEXT: vperm v2, v3, v2, v4 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v4i32_none: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: lxsiwzx v2, 0, r3 +; CHECK-AIX-32-P9-NEXT: lwz r3, L..C1(r2) # %const.0 +; CHECK-AIX-32-P9-NEXT: lxsibzx v3, 0, r4 +; CHECK-AIX-32-P9-NEXT: lxv v4, 0(r3) +; CHECK-AIX-32-P9-NEXT: vspltb v3, v3, 7 +; CHECK-AIX-32-P9-NEXT: vperm v2, v2, v3, v4 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %0 = load <4 x i8>, ptr %a, align 4 + %bc1 = bitcast <4 x i8> %0 to i32 + %vecinit3 = insertelement <4 x i32> poison, i32 %bc1, i64 0 + %1 = load <1 x i8>, ptr %b, align 8 + %bc2 = bitcast <1 x i8> %1 to i8 + %vecinit6 = insertelement <16 x i8> undef, i8 %bc2, i64 0 + %2 = bitcast <4 x i32> %vecinit3 to <16 x i8> + %3 = bitcast <16 x i8> %vecinit6 to <16 x i8> + %shuffle = shufflevector <16 x i8> %2, <16 x i8> %3, <16 x i32> + ret <16 x i8> %shuffle +} + +define <16 x i8> @test_v16i8_v2i64(i8 %arg, i64 %arg1, <16 x i8> %a, <2 x i64> %b) { +; CHECK-LE-P8-LABEL: test_v16i8_v2i64: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: mtfprd f0, r3 +; CHECK-LE-P8-NEXT: mtfprd f1, r4 +; CHECK-LE-P8-NEXT: xxswapd v2, vs0 +; CHECK-LE-P8-NEXT: xxswapd v3, vs1 +; CHECK-LE-P8-NEXT: vmrglb v2, v3, v2 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v16i8_v2i64: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: mtfprd f0, r3 +; CHECK-LE-P9-NEXT: xxswapd v2, vs0 +; CHECK-LE-P9-NEXT: mtfprd f0, r4 +; CHECK-LE-P9-NEXT: xxswapd v3, vs0 +; CHECK-LE-P9-NEXT: vmrglb v2, v3, v2 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v16i8_v2i64: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: sldi r3, r3, 56 +; CHECK-BE-P8-NEXT: mtvsrd v3, r4 +; CHECK-BE-P8-NEXT: mtvsrd v2, r3 +; CHECK-BE-P8-NEXT: vmrghb v2, v2, v3 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v16i8_v2i64: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: sldi r3, r3, 56 +; CHECK-BE-P9-NEXT: mtvsrd v3, r4 +; CHECK-BE-P9-NEXT: mtvsrd v2, r3 +; CHECK-BE-P9-NEXT: vmrghb v2, v2, v3 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v16i8_v2i64: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 56 +; CHECK-AIX-64-P8-NEXT: mtvsrd v3, r4 +; CHECK-AIX-64-P8-NEXT: mtvsrd v2, r3 +; CHECK-AIX-64-P8-NEXT: vmrghb v2, v2, v3 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v16i8_v2i64: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: sldi r3, r3, 56 +; CHECK-AIX-64-P9-NEXT: mtvsrd v3, r4 +; CHECK-AIX-64-P9-NEXT: mtvsrd v2, r3 +; CHECK-AIX-64-P9-NEXT: vmrghb v2, v2, v3 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v16i8_v2i64: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: addi r5, r1, -16 +; CHECK-AIX-32-P8-NEXT: stb r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: addi r3, r1, -32 +; CHECK-AIX-32-P8-NEXT: lxvw4x v2, 0, r5 +; CHECK-AIX-32-P8-NEXT: stw r4, -32(r1) +; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3 +; CHECK-AIX-32-P8-NEXT: vmrghb v2, v2, v3 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v16i8_v2i64: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: stb r3, -16(r1) +; CHECK-AIX-32-P9-NEXT: stw r4, -32(r1) +; CHECK-AIX-32-P9-NEXT: lxv v2, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv v3, -32(r1) +; CHECK-AIX-32-P9-NEXT: vmrghb v2, v2, v3 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %lhs.tmp = insertelement <16 x i8> %a, i8 %arg, i32 0 + %lhs = bitcast <16 x i8> %lhs.tmp to <16 x i8> + %rhs.tmp = insertelement <2 x i64> %b, i64 %arg1, i32 0 + %rhs = bitcast <2 x i64> %rhs.tmp to <16 x i8> + %shuffle = shufflevector <16 x i8> %lhs, <16 x i8> %rhs, <16 x i32> + ret <16 x i8> %shuffle +} + +define <16 x i8> @test_v2i64_v16i8(i64 %arg, i8 %arg1) { +; CHECK-LE-P8-LABEL: test_v2i64_v16i8: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: mtfprd f0, r4 +; CHECK-LE-P8-NEXT: mtfprd f1, r3 +; CHECK-LE-P8-NEXT: xxswapd v2, vs0 +; CHECK-LE-P8-NEXT: xxswapd v3, vs1 +; CHECK-LE-P8-NEXT: vmrglh v2, v2, v3 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v2i64_v16i8: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: mtfprd f0, r4 +; CHECK-LE-P9-NEXT: xxswapd v2, vs0 +; CHECK-LE-P9-NEXT: mtfprd f0, r3 +; CHECK-LE-P9-NEXT: xxswapd v3, vs0 +; CHECK-LE-P9-NEXT: vmrglh v2, v2, v3 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v2i64_v16i8: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: sldi r4, r4, 56 +; CHECK-BE-P8-NEXT: mtvsrd v3, r3 +; CHECK-BE-P8-NEXT: mtvsrd v2, r4 +; CHECK-BE-P8-NEXT: vmrghh v2, v3, v2 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v2i64_v16i8: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: sldi r4, r4, 56 +; CHECK-BE-P9-NEXT: mtvsrd v3, r3 +; CHECK-BE-P9-NEXT: mtvsrd v2, r4 +; CHECK-BE-P9-NEXT: vmrghh v2, v3, v2 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v2i64_v16i8: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: sldi r4, r4, 56 +; CHECK-AIX-64-P8-NEXT: mtvsrd v3, r3 +; CHECK-AIX-64-P8-NEXT: mtvsrd v2, r4 +; CHECK-AIX-64-P8-NEXT: vmrghh v2, v3, v2 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v2i64_v16i8: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: sldi r4, r4, 56 +; CHECK-AIX-64-P9-NEXT: mtvsrd v3, r3 +; CHECK-AIX-64-P9-NEXT: mtvsrd v2, r4 +; CHECK-AIX-64-P9-NEXT: vmrghh v2, v3, v2 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v2i64_v16i8: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: addi r4, r1, -32 +; CHECK-AIX-32-P8-NEXT: stb r5, -32(r1) +; CHECK-AIX-32-P8-NEXT: lxvw4x v2, 0, r4 +; CHECK-AIX-32-P8-NEXT: stw r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16 +; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3 +; CHECK-AIX-32-P8-NEXT: vmrghh v2, v3, v2 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v2i64_v16i8: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: stb r5, -32(r1) +; CHECK-AIX-32-P9-NEXT: stw r3, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv v2, -32(r1) +; CHECK-AIX-32-P9-NEXT: lxv v3, -16(r1) +; CHECK-AIX-32-P9-NEXT: vmrghh v2, v3, v2 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %rhs = insertelement <16 x i8> undef, i8 %arg1, i32 0 + %lhs.tmp = insertelement <2 x i64> undef, i64 %arg, i32 0 + %lhs = bitcast <2 x i64> %lhs.tmp to <16 x i8> + %shuffle = shufflevector <16 x i8> %lhs, <16 x i8> %rhs, <16 x i32> + ret <16 x i8> %shuffle +} + +define dso_local <16 x i8> @test_1_2(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b) local_unnamed_addr { +; CHECK-LE-P8-LABEL: test_1_2: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: addis r5, r2, .LCPI13_0@toc@ha +; CHECK-LE-P8-NEXT: lbzx r3, 0, r3 +; CHECK-LE-P8-NEXT: lxsdx v3, 0, r4 +; CHECK-LE-P8-NEXT: addi r5, r5, .LCPI13_0@toc@l +; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r5 +; CHECK-LE-P8-NEXT: mtvsrwz v2, r3 +; CHECK-LE-P8-NEXT: vspltb v2, v2, 7 +; CHECK-LE-P8-NEXT: xxswapd v4, vs0 +; CHECK-LE-P8-NEXT: vperm v2, v3, v2, v4 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_1_2: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: lxsibzx v2, 0, r3 +; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI13_0@toc@ha +; CHECK-LE-P9-NEXT: lxsd v3, 0(r4) +; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI13_0@toc@l +; CHECK-LE-P9-NEXT: lxv v4, 0(r3) +; CHECK-LE-P9-NEXT: vspltb v2, v2, 7 +; CHECK-LE-P9-NEXT: vperm v2, v3, v2, v4 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_1_2: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: lbzx r3, 0, r3 +; CHECK-BE-P8-NEXT: lxsdx v3, 0, r4 +; CHECK-BE-P8-NEXT: mtvsrwz v2, r3 +; CHECK-BE-P8-NEXT: vspltb v2, v2, 7 +; CHECK-BE-P8-NEXT: vmrghh v2, v2, v3 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_1_2: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: lxsibzx v2, 0, r3 +; CHECK-BE-P9-NEXT: lxsd v3, 0(r4) +; CHECK-BE-P9-NEXT: vspltb v2, v2, 7 +; CHECK-BE-P9-NEXT: vmrghh v2, v2, v3 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_1_2: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: lbzx r3, 0, r3 +; CHECK-AIX-64-P8-NEXT: lxsdx v3, 0, r4 +; CHECK-AIX-64-P8-NEXT: mtvsrwz v2, r3 +; CHECK-AIX-64-P8-NEXT: vspltb v2, v2, 7 +; CHECK-AIX-64-P8-NEXT: vmrghh v2, v2, v3 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_1_2: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: lxsibzx v2, 0, r3 +; CHECK-AIX-64-P9-NEXT: lxsd v3, 0(r4) +; CHECK-AIX-64-P9-NEXT: vspltb v2, v2, 7 +; CHECK-AIX-64-P9-NEXT: vmrghh v2, v2, v3 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_1_2: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: lbzx r3, 0, r3 +; CHECK-AIX-32-P8-NEXT: lwz r5, L..C6(r2) # %const.0 +; CHECK-AIX-32-P8-NEXT: lxsiwzx v3, 0, r4 +; CHECK-AIX-32-P8-NEXT: mtvsrwz v2, r3 +; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r5 +; CHECK-AIX-32-P8-NEXT: vspltb v2, v2, 7 +; CHECK-AIX-32-P8-NEXT: vperm v2, v2, v3, v4 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_1_2: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: lxsibzx v2, 0, r3 +; CHECK-AIX-32-P9-NEXT: lwz r3, L..C2(r2) # %const.0 +; CHECK-AIX-32-P9-NEXT: lxsiwzx v3, 0, r4 +; CHECK-AIX-32-P9-NEXT: lxv v4, 0(r3) +; CHECK-AIX-32-P9-NEXT: vspltb v2, v2, 7 +; CHECK-AIX-32-P9-NEXT: vperm v2, v2, v3, v4 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %0 = load <1 x i8>, ptr %a, align 4 + %bc1 = bitcast <1 x i8> %0 to i8 + %vecinit3 = insertelement <16 x i8> poison, i8 %bc1, i64 0 + %1 = load <2 x i8>, ptr %b, align 8 + %bc2 = bitcast <2 x i8> %1 to i16 + %vecinit6 = insertelement <8 x i16> undef, i16 %bc2, i64 0 + %2 = bitcast <16 x i8> %vecinit3 to <16 x i8> + %3 = bitcast <8 x i16> %vecinit6 to <16 x i8> + %shuffle = shufflevector <16 x i8> %2, <16 x i8> %3, <16 x i32> + ret <16 x i8> %shuffle +} + +define <16 x i8> @test_none_v2i64(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b) { +; CHECK-LE-P8-LABEL: test_none_v2i64: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: addis r5, r2, .LCPI14_0@toc@ha +; CHECK-LE-P8-NEXT: lbzx r3, 0, r3 +; CHECK-LE-P8-NEXT: lxsdx v3, 0, r4 +; CHECK-LE-P8-NEXT: addi r5, r5, .LCPI14_0@toc@l +; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r5 +; CHECK-LE-P8-NEXT: mtvsrwz v2, r3 +; CHECK-LE-P8-NEXT: vspltb v2, v2, 7 +; CHECK-LE-P8-NEXT: xxswapd v4, vs0 +; CHECK-LE-P8-NEXT: vperm v2, v3, v2, v4 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_none_v2i64: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: lxsibzx v2, 0, r3 +; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI14_0@toc@ha +; CHECK-LE-P9-NEXT: lxsd v3, 0(r4) +; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI14_0@toc@l +; CHECK-LE-P9-NEXT: lxv v4, 0(r3) +; CHECK-LE-P9-NEXT: vspltb v2, v2, 7 +; CHECK-LE-P9-NEXT: vperm v2, v3, v2, v4 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_none_v2i64: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: lbzx r3, 0, r3 +; CHECK-BE-P8-NEXT: lxsdx v3, 0, r4 +; CHECK-BE-P8-NEXT: mtvsrwz v2, r3 +; CHECK-BE-P8-NEXT: vspltb v2, v2, 7 +; CHECK-BE-P8-NEXT: vmrghh v2, v2, v3 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_none_v2i64: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: lxsibzx v2, 0, r3 +; CHECK-BE-P9-NEXT: lxsd v3, 0(r4) +; CHECK-BE-P9-NEXT: vspltb v2, v2, 7 +; CHECK-BE-P9-NEXT: vmrghh v2, v2, v3 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_none_v2i64: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: lbzx r3, 0, r3 +; CHECK-AIX-64-P8-NEXT: lxsdx v3, 0, r4 +; CHECK-AIX-64-P8-NEXT: mtvsrwz v2, r3 +; CHECK-AIX-64-P8-NEXT: vspltb v2, v2, 7 +; CHECK-AIX-64-P8-NEXT: vmrghh v2, v2, v3 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_none_v2i64: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: lxsibzx v2, 0, r3 +; CHECK-AIX-64-P9-NEXT: lxsd v3, 0(r4) +; CHECK-AIX-64-P9-NEXT: vspltb v2, v2, 7 +; CHECK-AIX-64-P9-NEXT: vmrghh v2, v2, v3 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_none_v2i64: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: lbzx r3, 0, r3 +; CHECK-AIX-32-P8-NEXT: lwz r5, L..C7(r2) # %const.0 +; CHECK-AIX-32-P8-NEXT: lxsiwzx v3, 0, r4 +; CHECK-AIX-32-P8-NEXT: mtvsrwz v2, r3 +; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r5 +; CHECK-AIX-32-P8-NEXT: vspltb v2, v2, 7 +; CHECK-AIX-32-P8-NEXT: vperm v2, v2, v3, v4 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_none_v2i64: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: lxsibzx v2, 0, r3 +; CHECK-AIX-32-P9-NEXT: lwz r3, L..C3(r2) # %const.0 +; CHECK-AIX-32-P9-NEXT: lxsiwzx v3, 0, r4 +; CHECK-AIX-32-P9-NEXT: lxv v4, 0(r3) +; CHECK-AIX-32-P9-NEXT: vspltb v2, v2, 7 +; CHECK-AIX-32-P9-NEXT: vperm v2, v2, v3, v4 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %0 = load <1 x i8>, ptr %a, align 4 + %bc1 = bitcast <1 x i8> %0 to i8 + %vecinit3 = insertelement <16 x i8> poison, i8 %bc1, i64 0 + %1 = load <2 x i8>, ptr %b, align 8 + %bc2 = bitcast <2 x i8> %1 to i16 + %vecinit6 = insertelement <8 x i16> undef, i16 %bc2, i64 0 + %2 = bitcast <16 x i8> %vecinit3 to <16 x i8> + %3 = bitcast <8 x i16> %vecinit6 to <16 x i8> + %shuffle = shufflevector <16 x i8> %2, <16 x i8> %3, <16 x i32> + ret <16 x i8> %shuffle +} + +define <16 x i8> @test_v2i64_none(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b) { +; CHECK-LE-P8-LABEL: test_v2i64_none: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: addis r5, r2, .LCPI15_0@toc@ha +; CHECK-LE-P8-NEXT: lbzx r4, 0, r4 +; CHECK-LE-P8-NEXT: lxsdx v3, 0, r3 +; CHECK-LE-P8-NEXT: addi r5, r5, .LCPI15_0@toc@l +; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r5 +; CHECK-LE-P8-NEXT: mtvsrwz v2, r4 +; CHECK-LE-P8-NEXT: vspltb v2, v2, 7 +; CHECK-LE-P8-NEXT: xxswapd v4, vs0 +; CHECK-LE-P8-NEXT: vperm v2, v2, v3, v4 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v2i64_none: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: lxsd v2, 0(r3) +; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI15_0@toc@ha +; CHECK-LE-P9-NEXT: lxsibzx v3, 0, r4 +; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI15_0@toc@l +; CHECK-LE-P9-NEXT: lxv v4, 0(r3) +; CHECK-LE-P9-NEXT: vspltb v3, v3, 7 +; CHECK-LE-P9-NEXT: vperm v2, v3, v2, v4 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v2i64_none: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: lbzx r4, 0, r4 +; CHECK-BE-P8-NEXT: lxsdx v3, 0, r3 +; CHECK-BE-P8-NEXT: mtvsrwz v2, r4 +; CHECK-BE-P8-NEXT: vspltb v2, v2, 7 +; CHECK-BE-P8-NEXT: vmrghh v2, v3, v2 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v2i64_none: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: lxsibzx v3, 0, r4 +; CHECK-BE-P9-NEXT: lxsd v2, 0(r3) +; CHECK-BE-P9-NEXT: vspltb v3, v3, 7 +; CHECK-BE-P9-NEXT: vmrghh v2, v2, v3 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v2i64_none: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: lbzx r4, 0, r4 +; CHECK-AIX-64-P8-NEXT: lxsdx v3, 0, r3 +; CHECK-AIX-64-P8-NEXT: mtvsrwz v2, r4 +; CHECK-AIX-64-P8-NEXT: vspltb v2, v2, 7 +; CHECK-AIX-64-P8-NEXT: vmrghh v2, v3, v2 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v2i64_none: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: lxsibzx v3, 0, r4 +; CHECK-AIX-64-P9-NEXT: lxsd v2, 0(r3) +; CHECK-AIX-64-P9-NEXT: vspltb v3, v3, 7 +; CHECK-AIX-64-P9-NEXT: vmrghh v2, v2, v3 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v2i64_none: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: lbzx r4, 0, r4 +; CHECK-AIX-32-P8-NEXT: lfiwzx f0, 0, r3 +; CHECK-AIX-32-P8-NEXT: mtvsrwz v2, r4 +; CHECK-AIX-32-P8-NEXT: xxspltw v3, vs0, 1 +; CHECK-AIX-32-P8-NEXT: vspltb v2, v2, 7 +; CHECK-AIX-32-P8-NEXT: vmrghh v2, v3, v2 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v2i64_none: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: lxsibzx v3, 0, r4 +; CHECK-AIX-32-P9-NEXT: lxvwsx v2, 0, r3 +; CHECK-AIX-32-P9-NEXT: vspltb v3, v3, 7 +; CHECK-AIX-32-P9-NEXT: vmrghh v2, v2, v3 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %0 = load <8 x i8>, ptr %a, align 4 + %bc1 = bitcast <8 x i8> %0 to i64 + %vecinit3 = insertelement <2 x i64> poison, i64 %bc1, i64 0 + %1 = load <1 x i8>, ptr %b, align 8 + %bc2 = bitcast <1 x i8> %1 to i8 + %vecinit6 = insertelement <16 x i8> undef, i8 %bc2, i64 0 + %2 = bitcast <2 x i64> %vecinit3 to <16 x i8> + %3 = bitcast <16 x i8> %vecinit6 to <16 x i8> + %shuffle = shufflevector <16 x i8> %2, <16 x i8> %3, <16 x i32> + ret <16 x i8> %shuffle +} + +define <16 x i8> @test_v8i16_v8i16rhs(i16 %arg, i16 %arg1) { +; CHECK-LE-P8-LABEL: test_v8i16_v8i16rhs: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: mtvsrd v2, r3 +; CHECK-LE-P8-NEXT: mtvsrd v3, r4 +; CHECK-LE-P8-NEXT: vmrghh v2, v3, v2 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v8i16_v8i16rhs: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: mtvsrd v2, r3 +; CHECK-LE-P9-NEXT: mtvsrd v3, r4 +; CHECK-LE-P9-NEXT: vmrghh v2, v3, v2 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v8i16_v8i16rhs: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: addis r5, r2, .LCPI16_0@toc@ha +; CHECK-BE-P8-NEXT: mtvsrwz v3, r4 +; CHECK-BE-P8-NEXT: addi r5, r5, .LCPI16_0@toc@l +; CHECK-BE-P8-NEXT: mtvsrwz v4, r3 +; CHECK-BE-P8-NEXT: lxvw4x v2, 0, r5 +; CHECK-BE-P8-NEXT: vperm v2, v4, v3, v2 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v8i16_v8i16rhs: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: addis r5, r2, .LCPI16_0@toc@ha +; CHECK-BE-P9-NEXT: mtvsrwz v3, r4 +; CHECK-BE-P9-NEXT: mtvsrwz v4, r3 +; CHECK-BE-P9-NEXT: addi r5, r5, .LCPI16_0@toc@l +; CHECK-BE-P9-NEXT: lxv v2, 0(r5) +; CHECK-BE-P9-NEXT: vperm v2, v4, v3, v2 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v8i16_v8i16rhs: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: ld r5, L..C8(r2) # %const.0 +; CHECK-AIX-64-P8-NEXT: mtvsrwz v2, r4 +; CHECK-AIX-64-P8-NEXT: mtvsrwz v4, r3 +; CHECK-AIX-64-P8-NEXT: lxvw4x v3, 0, r5 +; CHECK-AIX-64-P8-NEXT: vperm v2, v4, v2, v3 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v8i16_v8i16rhs: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: ld r5, L..C4(r2) # %const.0 +; CHECK-AIX-64-P9-NEXT: mtvsrwz v3, r4 +; CHECK-AIX-64-P9-NEXT: mtvsrwz v4, r3 +; CHECK-AIX-64-P9-NEXT: lxv v2, 0(r5) +; CHECK-AIX-64-P9-NEXT: vperm v2, v4, v3, v2 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v8i16_v8i16rhs: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: sth r4, -16(r1) +; CHECK-AIX-32-P8-NEXT: sth r3, -32(r1) +; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16 +; CHECK-AIX-32-P8-NEXT: addi r4, r1, -32 +; CHECK-AIX-32-P8-NEXT: lxvw4x v2, 0, r3 +; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r4 +; CHECK-AIX-32-P8-NEXT: vmrghh v2, v3, v2 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v8i16_v8i16rhs: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: sth r4, -16(r1) +; CHECK-AIX-32-P9-NEXT: sth r3, -32(r1) +; CHECK-AIX-32-P9-NEXT: lxv v2, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv v3, -32(r1) +; CHECK-AIX-32-P9-NEXT: vmrghh v2, v3, v2 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %rhs.tmp = insertelement <8 x i16> undef, i16 %arg1, i32 0 + %rhs = bitcast <8 x i16> %rhs.tmp to <16 x i8> + %lhs.tmp = insertelement <8 x i16> undef, i16 %arg, i32 0 + %lhs = bitcast <8 x i16> %lhs.tmp to <16 x i8> + %shuffle = shufflevector <16 x i8> %lhs, <16 x i8> %rhs, <16 x i32> + ret <16 x i8> %shuffle +} + +define <16 x i8> @test_v8i16_v4i32(<8 x i16> %a, <4 x i32> %b, i16 %arg, i32 %arg1) { +; CHECK-LE-P8-LABEL: test_v8i16_v4i32: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: mtfprd f0, r7 +; CHECK-LE-P8-NEXT: mtfprd f1, r8 +; CHECK-LE-P8-NEXT: xxswapd v2, vs0 +; CHECK-LE-P8-NEXT: xxswapd v3, vs1 +; CHECK-LE-P8-NEXT: vmrglb v2, v3, v2 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v8i16_v4i32: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: mtfprd f0, r7 +; CHECK-LE-P9-NEXT: mtvsrws v3, r8 +; CHECK-LE-P9-NEXT: xxswapd v2, vs0 +; CHECK-LE-P9-NEXT: vmrglb v2, v3, v2 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v8i16_v4i32: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: sldi r3, r7, 48 +; CHECK-BE-P8-NEXT: sldi r4, r8, 32 +; CHECK-BE-P8-NEXT: mtvsrd v2, r3 +; CHECK-BE-P8-NEXT: mtvsrd v3, r4 +; CHECK-BE-P8-NEXT: vmrghb v2, v2, v3 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v8i16_v4i32: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: sldi r3, r7, 48 +; CHECK-BE-P9-NEXT: mtvsrws v3, r8 +; CHECK-BE-P9-NEXT: mtvsrd v2, r3 +; CHECK-BE-P9-NEXT: vmrghb v2, v2, v3 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v8i16_v4i32: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 48 +; CHECK-AIX-64-P8-NEXT: sldi r4, r4, 32 +; CHECK-AIX-64-P8-NEXT: mtvsrd v2, r3 +; CHECK-AIX-64-P8-NEXT: mtvsrd v3, r4 +; CHECK-AIX-64-P8-NEXT: vmrghb v2, v2, v3 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v8i16_v4i32: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: sldi r3, r3, 48 +; CHECK-AIX-64-P9-NEXT: mtvsrws v3, r4 +; CHECK-AIX-64-P9-NEXT: mtvsrd v2, r3 +; CHECK-AIX-64-P9-NEXT: vmrghb v2, v2, v3 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v8i16_v4i32: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: addi r5, r1, -16 +; CHECK-AIX-32-P8-NEXT: sth r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: addi r3, r1, -32 +; CHECK-AIX-32-P8-NEXT: lxvw4x v2, 0, r5 +; CHECK-AIX-32-P8-NEXT: stw r4, -32(r1) +; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3 +; CHECK-AIX-32-P8-NEXT: vmrghb v2, v2, v3 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v8i16_v4i32: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: sth r3, -16(r1) +; CHECK-AIX-32-P9-NEXT: stw r4, -32(r1) +; CHECK-AIX-32-P9-NEXT: lxv v2, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv v3, -32(r1) +; CHECK-AIX-32-P9-NEXT: vmrghb v2, v2, v3 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %lhs.tmp = insertelement <8 x i16> %a, i16 %arg, i32 0 + %lhs = bitcast <8 x i16> %lhs.tmp to <16 x i8> + %rhs.tmp = insertelement <4 x i32> %b, i32 %arg1, i32 0 + %rhs = bitcast <4 x i32> %rhs.tmp to <16 x i8> + %shuffle = shufflevector <16 x i8> %lhs, <16 x i8> %rhs, <16 x i32> + ret <16 x i8> %shuffle +} + +define <16 x i8> @test_v8i16_v2i64(<8 x i16> %a, <2 x i64> %b, i16 %arg, i64 %arg1) { +; CHECK-LE-P8-LABEL: test_v8i16_v2i64: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: mtfprd f0, r7 +; CHECK-LE-P8-NEXT: mtfprd f1, r8 +; CHECK-LE-P8-NEXT: xxswapd v2, vs0 +; CHECK-LE-P8-NEXT: xxswapd v3, vs1 +; CHECK-LE-P8-NEXT: vmrglb v2, v3, v2 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v8i16_v2i64: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: mtfprd f0, r7 +; CHECK-LE-P9-NEXT: xxswapd v2, vs0 +; CHECK-LE-P9-NEXT: mtfprd f0, r8 +; CHECK-LE-P9-NEXT: xxswapd v3, vs0 +; CHECK-LE-P9-NEXT: vmrglb v2, v3, v2 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v8i16_v2i64: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: sldi r3, r7, 48 +; CHECK-BE-P8-NEXT: mtvsrd v3, r8 +; CHECK-BE-P8-NEXT: mtvsrd v2, r3 +; CHECK-BE-P8-NEXT: vmrghb v2, v2, v3 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v8i16_v2i64: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: sldi r3, r7, 48 +; CHECK-BE-P9-NEXT: mtvsrd v3, r8 +; CHECK-BE-P9-NEXT: mtvsrd v2, r3 +; CHECK-BE-P9-NEXT: vmrghb v2, v2, v3 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v8i16_v2i64: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 48 +; CHECK-AIX-64-P8-NEXT: mtvsrd v3, r4 +; CHECK-AIX-64-P8-NEXT: mtvsrd v2, r3 +; CHECK-AIX-64-P8-NEXT: vmrghb v2, v2, v3 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v8i16_v2i64: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: sldi r3, r3, 48 +; CHECK-AIX-64-P9-NEXT: mtvsrd v3, r4 +; CHECK-AIX-64-P9-NEXT: mtvsrd v2, r3 +; CHECK-AIX-64-P9-NEXT: vmrghb v2, v2, v3 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v8i16_v2i64: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: addi r5, r1, -16 +; CHECK-AIX-32-P8-NEXT: sth r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: addi r3, r1, -32 +; CHECK-AIX-32-P8-NEXT: lxvw4x v2, 0, r5 +; CHECK-AIX-32-P8-NEXT: stw r4, -32(r1) +; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3 +; CHECK-AIX-32-P8-NEXT: vmrghb v2, v2, v3 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v8i16_v2i64: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: sth r3, -16(r1) +; CHECK-AIX-32-P9-NEXT: stw r4, -32(r1) +; CHECK-AIX-32-P9-NEXT: lxv v2, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv v3, -32(r1) +; CHECK-AIX-32-P9-NEXT: vmrghb v2, v2, v3 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %lhs.tmp = insertelement <8 x i16> %a, i16 %arg, i32 0 + %lhs = bitcast <8 x i16> %lhs.tmp to <16 x i8> + %rhs.tmp = insertelement <2 x i64> %b, i64 %arg1, i32 0 + %rhs = bitcast <2 x i64> %rhs.tmp to <16 x i8> + %shuffle = shufflevector <16 x i8> %lhs, <16 x i8> %rhs, <16 x i32> + ret <16 x i8> %shuffle +} + +define <16 x i8> @test_v4i32_v4i32(i32 %arg, i32 %arg1, <4 x i32> %a, <4 x i32> %b) { +; CHECK-LE-P8-LABEL: test_v4i32_v4i32: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: mtfprwz f0, r3 +; CHECK-LE-P8-NEXT: mtfprwz f1, r4 +; CHECK-LE-P8-NEXT: xxmrghw v2, vs1, vs0 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v4i32_v4i32: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: mtfprwz f0, r3 +; CHECK-LE-P9-NEXT: mtfprwz f1, r4 +; CHECK-LE-P9-NEXT: xxmrghw v2, vs1, vs0 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v4i32_v4i32: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: mtvsrwz v2, r4 +; CHECK-BE-P8-NEXT: mtvsrwz v3, r3 +; CHECK-BE-P8-NEXT: vmrgow v2, v3, v2 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v4i32_v4i32: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: mtvsrwz v2, r4 +; CHECK-BE-P9-NEXT: mtvsrwz v3, r3 +; CHECK-BE-P9-NEXT: vmrgow v2, v3, v2 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v4i32_v4i32: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: mtvsrwz v2, r4 +; CHECK-AIX-64-P8-NEXT: mtvsrwz v3, r3 +; CHECK-AIX-64-P8-NEXT: vmrgow v2, v3, v2 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v4i32_v4i32: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: mtvsrwz v2, r4 +; CHECK-AIX-64-P9-NEXT: mtvsrwz v3, r3 +; CHECK-AIX-64-P9-NEXT: vmrgow v2, v3, v2 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v4i32_v4i32: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: stw r4, -16(r1) +; CHECK-AIX-32-P8-NEXT: stw r3, -32(r1) +; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16 +; CHECK-AIX-32-P8-NEXT: addi r4, r1, -32 +; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r3 +; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r4 +; CHECK-AIX-32-P8-NEXT: xxmrghw v2, vs1, vs0 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v4i32_v4i32: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: stw r4, -16(r1) +; CHECK-AIX-32-P9-NEXT: stw r3, -32(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs1, -32(r1) +; CHECK-AIX-32-P9-NEXT: xxmrghw v2, vs1, vs0 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %lhs.tmp = insertelement <4 x i32> %a, i32 %arg, i32 0 + %lhs = bitcast <4 x i32> %lhs.tmp to <16 x i8> + %rhs.tmp = insertelement <4 x i32> %b, i32 %arg1, i32 0 + %rhs = bitcast <4 x i32> %rhs.tmp to <16 x i8> + %shuffle = shufflevector <16 x i8> %lhs, <16 x i8> %rhs, <16 x i32> + ret <16 x i8> %shuffle +} + +define <16 x i8> @test_v4i32_v8i16(i32 %arg, i16 %arg1) { +; CHECK-LE-P8-LABEL: test_v4i32_v8i16: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: mtfprd f0, r3 +; CHECK-LE-P8-NEXT: mtfprd f1, r4 +; CHECK-LE-P8-NEXT: xxswapd v2, vs0 +; CHECK-LE-P8-NEXT: xxswapd v3, vs1 +; CHECK-LE-P8-NEXT: vmrglh v2, v3, v2 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v4i32_v8i16: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: mtfprd f0, r4 +; CHECK-LE-P9-NEXT: mtvsrws v2, r3 +; CHECK-LE-P9-NEXT: xxswapd v3, vs0 +; CHECK-LE-P9-NEXT: vmrglh v2, v3, v2 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v4i32_v8i16: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: sldi r3, r3, 32 +; CHECK-BE-P8-NEXT: sldi r4, r4, 48 +; CHECK-BE-P8-NEXT: mtvsrd v2, r3 +; CHECK-BE-P8-NEXT: mtvsrd v3, r4 +; CHECK-BE-P8-NEXT: vmrghh v2, v2, v3 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v4i32_v8i16: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: mtvsrws v2, r3 +; CHECK-BE-P9-NEXT: sldi r3, r4, 48 +; CHECK-BE-P9-NEXT: mtvsrd v3, r3 +; CHECK-BE-P9-NEXT: vmrghh v2, v2, v3 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v4i32_v8i16: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 32 +; CHECK-AIX-64-P8-NEXT: sldi r4, r4, 48 +; CHECK-AIX-64-P8-NEXT: mtvsrd v2, r3 +; CHECK-AIX-64-P8-NEXT: mtvsrd v3, r4 +; CHECK-AIX-64-P8-NEXT: vmrghh v2, v2, v3 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v4i32_v8i16: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: mtvsrws v2, r3 +; CHECK-AIX-64-P9-NEXT: sldi r3, r4, 48 +; CHECK-AIX-64-P9-NEXT: mtvsrd v3, r3 +; CHECK-AIX-64-P9-NEXT: vmrghh v2, v2, v3 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v4i32_v8i16: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: addi r5, r1, -16 +; CHECK-AIX-32-P8-NEXT: stw r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: addi r3, r1, -32 +; CHECK-AIX-32-P8-NEXT: lxvw4x v2, 0, r5 +; CHECK-AIX-32-P8-NEXT: sth r4, -32(r1) +; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3 +; CHECK-AIX-32-P8-NEXT: vmrghh v2, v2, v3 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v4i32_v8i16: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: stw r3, -16(r1) +; CHECK-AIX-32-P9-NEXT: sth r4, -32(r1) +; CHECK-AIX-32-P9-NEXT: lxv v2, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv v3, -32(r1) +; CHECK-AIX-32-P9-NEXT: vmrghh v2, v2, v3 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %lhs.tmp = insertelement <4 x i32> undef, i32 %arg, i32 0 + %lhs = bitcast <4 x i32> %lhs.tmp to <16 x i8> + %rhs.tmp = insertelement <8 x i16> undef, i16 %arg1, i32 0 + %rhs = bitcast <8 x i16> %rhs.tmp to <16 x i8> + %shuffle = shufflevector <16 x i8> %lhs, <16 x i8> %rhs, <16 x i32> + ret <16 x i8> %shuffle +} + +define <16 x i8> @test_v2i64_v2i64(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b) { +; CHECK-LE-P8-LABEL: test_v2i64_v2i64: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: lxsdx v2, 0, r3 +; CHECK-LE-P8-NEXT: lxsdx v3, 0, r4 +; CHECK-LE-P8-NEXT: vmrghh v2, v3, v2 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v2i64_v2i64: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: lxsd v2, 0(r3) +; CHECK-LE-P9-NEXT: lxsd v3, 0(r4) +; CHECK-LE-P9-NEXT: vmrghh v2, v3, v2 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v2i64_v2i64: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: lxsdx v2, 0, r3 +; CHECK-BE-P8-NEXT: lxsdx v3, 0, r4 +; CHECK-BE-P8-NEXT: vmrghh v2, v2, v3 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v2i64_v2i64: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: lxsd v2, 0(r3) +; CHECK-BE-P9-NEXT: lxsd v3, 0(r4) +; CHECK-BE-P9-NEXT: vmrghh v2, v2, v3 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v2i64_v2i64: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: lxsdx v2, 0, r3 +; CHECK-AIX-64-P8-NEXT: lxsdx v3, 0, r4 +; CHECK-AIX-64-P8-NEXT: vmrghh v2, v2, v3 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v2i64_v2i64: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: lxsd v2, 0(r3) +; CHECK-AIX-64-P9-NEXT: lxsd v3, 0(r4) +; CHECK-AIX-64-P9-NEXT: vmrghh v2, v2, v3 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v2i64_v2i64: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: lwz r5, L..C8(r2) # %const.0 +; CHECK-AIX-32-P8-NEXT: lfiwzx f0, 0, r3 +; CHECK-AIX-32-P8-NEXT: lxsiwzx v3, 0, r4 +; CHECK-AIX-32-P8-NEXT: xxspltw v2, vs0, 1 +; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r5 +; CHECK-AIX-32-P8-NEXT: vperm v2, v2, v3, v4 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v2i64_v2i64: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: lxvwsx v2, 0, r3 +; CHECK-AIX-32-P9-NEXT: lwz r3, L..C4(r2) # %const.0 +; CHECK-AIX-32-P9-NEXT: lxsiwzx v3, 0, r4 +; CHECK-AIX-32-P9-NEXT: lxv v4, 0(r3) +; CHECK-AIX-32-P9-NEXT: vperm v2, v2, v3, v4 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %0 = load <8 x i8>, ptr %a, align 4 + %bc1 = bitcast <8 x i8> %0 to i64 + %vecinit3 = insertelement <2 x i64> poison, i64 %bc1, i64 0 + %1 = load <2 x i8>, ptr %b, align 8 + %bc2 = bitcast <2 x i8> %1 to i16 + %vecinit6 = insertelement <8 x i16> undef, i16 %bc2, i64 0 + %2 = bitcast <2 x i64> %vecinit3 to <16 x i8> + %3 = bitcast <8 x i16> %vecinit6 to <16 x i8> + %shuffle = shufflevector <16 x i8> %2, <16 x i8> %3, <16 x i32> + ret <16 x i8> %shuffle +} + +define <16 x i8> @test_v2i64_v4i32(i64 %arg, i32 %arg1, <2 x i64> %a, <4 x i32> %b) { +; CHECK-LE-P8-LABEL: test_v2i64_v4i32: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: mtfprd f0, r3 +; CHECK-LE-P8-NEXT: mtfprd f1, r4 +; CHECK-LE-P8-NEXT: xxswapd vs0, vs0 +; CHECK-LE-P8-NEXT: xxswapd vs1, vs1 +; CHECK-LE-P8-NEXT: xxmrglw v2, vs1, vs0 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v2i64_v4i32: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: mtfprd f0, r3 +; CHECK-LE-P9-NEXT: mtvsrws vs1, r4 +; CHECK-LE-P9-NEXT: xxswapd vs0, vs0 +; CHECK-LE-P9-NEXT: xxmrglw v2, vs1, vs0 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v2i64_v4i32: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: mtfprd f0, r3 +; CHECK-BE-P8-NEXT: sldi r3, r4, 32 +; CHECK-BE-P8-NEXT: mtfprd f1, r3 +; CHECK-BE-P8-NEXT: xxmrghw v2, vs0, vs1 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v2i64_v4i32: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: mtvsrws vs1, r4 +; CHECK-BE-P9-NEXT: mtfprd f0, r3 +; CHECK-BE-P9-NEXT: xxmrghw v2, vs0, vs1 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v2i64_v4i32: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: mtfprd f0, r3 +; CHECK-AIX-64-P8-NEXT: sldi r3, r4, 32 +; CHECK-AIX-64-P8-NEXT: mtfprd f1, r3 +; CHECK-AIX-64-P8-NEXT: xxmrghw v2, vs0, vs1 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v2i64_v4i32: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: mtvsrws vs1, r4 +; CHECK-AIX-64-P9-NEXT: mtfprd f0, r3 +; CHECK-AIX-64-P9-NEXT: xxmrghw v2, vs0, vs1 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v2i64_v4i32: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: stw r5, -16(r1) +; CHECK-AIX-32-P8-NEXT: stw r3, -32(r1) +; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16 +; CHECK-AIX-32-P8-NEXT: addi r4, r1, -32 +; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r3 +; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r4 +; CHECK-AIX-32-P8-NEXT: xxmrghw v2, vs1, vs0 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v2i64_v4i32: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: stw r5, -16(r1) +; CHECK-AIX-32-P9-NEXT: stw r3, -32(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs1, -32(r1) +; CHECK-AIX-32-P9-NEXT: xxmrghw v2, vs1, vs0 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %lhs.tmp = insertelement <2 x i64> %a, i64 %arg, i32 0 + %lhs = bitcast <2 x i64> %lhs.tmp to <16 x i8> + %rhs.tmp = insertelement <4 x i32> %b, i32 %arg1, i32 0 + %rhs = bitcast <4 x i32> %rhs.tmp to <16 x i8> + %shuffle = shufflevector <16 x i8> %lhs, <16 x i8> %rhs, <16 x i32> + ret <16 x i8> %shuffle +} + +define <16 x i8> @test_v2i64_v8i16(i64 %arg, i16 %arg1) { +; CHECK-LE-P8-LABEL: test_v2i64_v8i16: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: mtfprd f0, r3 +; CHECK-LE-P8-NEXT: mtfprd f1, r4 +; CHECK-LE-P8-NEXT: xxswapd v2, vs0 +; CHECK-LE-P8-NEXT: xxswapd v3, vs1 +; CHECK-LE-P8-NEXT: vmrglh v2, v3, v2 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v2i64_v8i16: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: mtfprd f0, r3 +; CHECK-LE-P9-NEXT: xxswapd v2, vs0 +; CHECK-LE-P9-NEXT: mtfprd f0, r4 +; CHECK-LE-P9-NEXT: xxswapd v3, vs0 +; CHECK-LE-P9-NEXT: vmrglh v2, v3, v2 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v2i64_v8i16: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: mtvsrd v2, r3 +; CHECK-BE-P8-NEXT: sldi r3, r4, 48 +; CHECK-BE-P8-NEXT: mtvsrd v3, r3 +; CHECK-BE-P8-NEXT: vmrghh v2, v2, v3 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v2i64_v8i16: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: mtvsrd v2, r3 +; CHECK-BE-P9-NEXT: sldi r3, r4, 48 +; CHECK-BE-P9-NEXT: mtvsrd v3, r3 +; CHECK-BE-P9-NEXT: vmrghh v2, v2, v3 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v2i64_v8i16: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: mtvsrd v2, r3 +; CHECK-AIX-64-P8-NEXT: sldi r3, r4, 48 +; CHECK-AIX-64-P8-NEXT: mtvsrd v3, r3 +; CHECK-AIX-64-P8-NEXT: vmrghh v2, v2, v3 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v2i64_v8i16: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: mtvsrd v2, r3 +; CHECK-AIX-64-P9-NEXT: sldi r3, r4, 48 +; CHECK-AIX-64-P9-NEXT: mtvsrd v3, r3 +; CHECK-AIX-64-P9-NEXT: vmrghh v2, v2, v3 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v2i64_v8i16: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: addi r4, r1, -16 +; CHECK-AIX-32-P8-NEXT: stw r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: addi r3, r1, -32 +; CHECK-AIX-32-P8-NEXT: lxvw4x v2, 0, r4 +; CHECK-AIX-32-P8-NEXT: sth r5, -32(r1) +; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3 +; CHECK-AIX-32-P8-NEXT: vmrghh v2, v2, v3 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v2i64_v8i16: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: stw r3, -16(r1) +; CHECK-AIX-32-P9-NEXT: sth r5, -32(r1) +; CHECK-AIX-32-P9-NEXT: lxv v2, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv v3, -32(r1) +; CHECK-AIX-32-P9-NEXT: vmrghh v2, v2, v3 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %lhs.tmp = insertelement <2 x i64> undef, i64 %arg, i32 0 + %lhs = bitcast <2 x i64> %lhs.tmp to <16 x i8> + %rhs.tmp = insertelement <8 x i16> undef, i16 %arg1, i32 0 + %rhs = bitcast <8 x i16> %rhs.tmp to <16 x i8> + %shuffle = shufflevector <16 x i8> %lhs, <16 x i8> %rhs, <16 x i32> + ret <16 x i8> %shuffle +} + +define <16 x i8> @test_v4i32_v2i64(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b) { +; CHECK-LE-P8-LABEL: test_v4i32_v2i64: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: addis r5, r2, .LCPI24_0@toc@ha +; CHECK-LE-P8-NEXT: lfiwzx f0, 0, r3 +; CHECK-LE-P8-NEXT: lfdx f1, 0, r4 +; CHECK-LE-P8-NEXT: addi r3, r5, .LCPI24_0@toc@l +; CHECK-LE-P8-NEXT: lxvd2x vs2, 0, r3 +; CHECK-LE-P8-NEXT: xxswapd v2, f0 +; CHECK-LE-P8-NEXT: xxswapd v3, f1 +; CHECK-LE-P8-NEXT: xxswapd v4, vs2 +; CHECK-LE-P8-NEXT: vperm v2, v3, v2, v4 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v4i32_v2i64: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: lfiwzx f0, 0, r3 +; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI24_0@toc@ha +; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI24_0@toc@l +; CHECK-LE-P9-NEXT: xxswapd v2, f0 +; CHECK-LE-P9-NEXT: lfd f0, 0(r4) +; CHECK-LE-P9-NEXT: lxv v4, 0(r3) +; CHECK-LE-P9-NEXT: xxswapd v3, f0 +; CHECK-LE-P9-NEXT: vperm v2, v3, v2, v4 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v4i32_v2i64: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: lfiwzx f0, 0, r3 +; CHECK-BE-P8-NEXT: addis r5, r2, .LCPI24_0@toc@ha +; CHECK-BE-P8-NEXT: lxsdx v3, 0, r4 +; CHECK-BE-P8-NEXT: addi r3, r5, .LCPI24_0@toc@l +; CHECK-BE-P8-NEXT: lxvw4x v4, 0, r3 +; CHECK-BE-P8-NEXT: xxsldwi v2, f0, f0, 1 +; CHECK-BE-P8-NEXT: vperm v2, v2, v3, v4 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v4i32_v2i64: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: lfiwzx f0, 0, r3 +; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI24_0@toc@ha +; CHECK-BE-P9-NEXT: lxsd v3, 0(r4) +; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI24_0@toc@l +; CHECK-BE-P9-NEXT: lxv v4, 0(r3) +; CHECK-BE-P9-NEXT: xxsldwi v2, f0, f0, 1 +; CHECK-BE-P9-NEXT: vperm v2, v2, v3, v4 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v4i32_v2i64: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: ld r5, L..C9(r2) # %const.0 +; CHECK-AIX-64-P8-NEXT: lfiwzx f0, 0, r3 +; CHECK-AIX-64-P8-NEXT: lxsdx v3, 0, r4 +; CHECK-AIX-64-P8-NEXT: xxsldwi v2, f0, f0, 1 +; CHECK-AIX-64-P8-NEXT: lxvw4x v4, 0, r5 +; CHECK-AIX-64-P8-NEXT: vperm v2, v2, v3, v4 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v4i32_v2i64: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: lfiwzx f0, 0, r3 +; CHECK-AIX-64-P9-NEXT: ld r3, L..C5(r2) # %const.0 +; CHECK-AIX-64-P9-NEXT: lxsd v3, 0(r4) +; CHECK-AIX-64-P9-NEXT: xxsldwi v2, f0, f0, 1 +; CHECK-AIX-64-P9-NEXT: lxv v4, 0(r3) +; CHECK-AIX-64-P9-NEXT: vperm v2, v2, v3, v4 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v4i32_v2i64: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: lwz r5, 4(r4) +; CHECK-AIX-32-P8-NEXT: lxsiwzx v2, 0, r3 +; CHECK-AIX-32-P8-NEXT: stw r5, -16(r1) +; CHECK-AIX-32-P8-NEXT: lwz r3, 0(r4) +; CHECK-AIX-32-P8-NEXT: addi r4, r1, -32 +; CHECK-AIX-32-P8-NEXT: stw r3, -32(r1) +; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16 +; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r3 +; CHECK-AIX-32-P8-NEXT: lwz r3, L..C9(r2) # %const.0 +; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r4 +; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r3 +; CHECK-AIX-32-P8-NEXT: xxmrghw v3, vs1, vs0 +; CHECK-AIX-32-P8-NEXT: vperm v2, v2, v3, v4 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v4i32_v2i64: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: lxsiwzx v2, 0, r3 +; CHECK-AIX-32-P9-NEXT: lwz r3, 4(r4) +; CHECK-AIX-32-P9-NEXT: stw r3, -16(r1) +; CHECK-AIX-32-P9-NEXT: lwz r3, 0(r4) +; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1) +; CHECK-AIX-32-P9-NEXT: stw r3, -32(r1) +; CHECK-AIX-32-P9-NEXT: lwz r3, L..C5(r2) # %const.0 +; CHECK-AIX-32-P9-NEXT: lxv vs1, -32(r1) +; CHECK-AIX-32-P9-NEXT: lxv v4, 0(r3) +; CHECK-AIX-32-P9-NEXT: xxmrghw v3, vs1, vs0 +; CHECK-AIX-32-P9-NEXT: vperm v2, v2, v3, v4 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %0 = load <4 x i8>, ptr %a, align 4 + %bc1 = bitcast <4 x i8> %0 to i32 + %vecinit3 = insertelement <4 x i32> poison, i32 %bc1, i64 0 + %1 = load <8 x i8>, ptr %b, align 8 + %bc2 = bitcast <8 x i8> %1 to i64 + %vecinit6 = insertelement <2 x i64> undef, i64 %bc2, i64 0 + %2 = bitcast <4 x i32> %vecinit3 to <16 x i8> + %3 = bitcast <2 x i64> %vecinit6 to <16 x i8> + %shuffle = shufflevector <16 x i8> %2, <16 x i8> %3, <16 x i32> + ret <16 x i8> %shuffle +} diff --git a/llvm/test/CodeGen/PowerPC/v2i64_scalar_to_vector_shuffle.ll b/llvm/test/CodeGen/PowerPC/v2i64_scalar_to_vector_shuffle.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/v2i64_scalar_to_vector_shuffle.ll @@ -0,0 +1,1909 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64le-unknown-linux-gnu < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-LE-P8 +; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64le-unknown-linux-gnu < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-LE-P9 +; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64-unknown-linux-gnu < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-BE-P8 +; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64-unknown-linux-gnu < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-BE-P9 + +; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64-ibm-aix < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-AIX-64-P8 +; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64-ibm-aix < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-AIX-64-P9 +; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc-ibm-aix < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-AIX-32-P8 +; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc-ibm-aix < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-AIX-32-P9 + +define <2 x i64> @test_v16i8_v16i8(i8 %arg1, i8 %arg) { +; CHECK-LE-P8-LABEL: test_v16i8_v16i8: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: mtfprd f0, r3 +; CHECK-LE-P8-NEXT: mtfprd f1, r4 +; CHECK-LE-P8-NEXT: xxmrgld v2, vs1, vs0 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v16i8_v16i8: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: mtfprd f0, r3 +; CHECK-LE-P9-NEXT: mtfprd f1, r4 +; CHECK-LE-P9-NEXT: xxmrgld v2, vs1, vs0 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v16i8_v16i8: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: mtfprwz f0, r3 +; CHECK-BE-P8-NEXT: mtfprwz f1, r4 +; CHECK-BE-P8-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v16i8_v16i8: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: mtfprwz f0, r3 +; CHECK-BE-P9-NEXT: mtfprwz f1, r4 +; CHECK-BE-P9-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v16i8_v16i8: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: mtfprwz f0, r3 +; CHECK-AIX-64-P8-NEXT: mtfprwz f1, r4 +; CHECK-AIX-64-P8-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v16i8_v16i8: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r3 +; CHECK-AIX-64-P9-NEXT: mtfprwz f1, r4 +; CHECK-AIX-64-P9-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v16i8_v16i8: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: addi r5, r1, -16 +; CHECK-AIX-32-P8-NEXT: stb r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: addi r3, r1, -32 +; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r5 +; CHECK-AIX-32-P8-NEXT: stb r4, -32(r1) +; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r3 +; CHECK-AIX-32-P8-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v16i8_v16i8: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: stb r3, -16(r1) +; CHECK-AIX-32-P9-NEXT: stb r4, -32(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs1, -32(r1) +; CHECK-AIX-32-P9-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %lhs.tmp = insertelement <16 x i8> undef, i8 %arg1, i32 0 + %lhs = bitcast <16 x i8> %lhs.tmp to <2 x i64> + %rhs.tmp = insertelement <16 x i8> undef, i8 %arg, i32 0 + %rhs = bitcast <16 x i8> %rhs.tmp to <2 x i64> + %shuffle = shufflevector <2 x i64> %lhs, <2 x i64> %rhs, <2 x i32> + ret <2 x i64> %shuffle +} + +define <2 x i64> @test_none_v16i8(i8 %arg1, ptr nocapture noundef readonly %b) { +; CHECK-LE-P8-LABEL: test_none_v16i8: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r4 +; CHECK-LE-P8-NEXT: xxswapd v2, vs0 +; CHECK-LE-P8-NEXT: mtfprd f0, r3 +; CHECK-LE-P8-NEXT: xxmrgld v2, vs0, v2 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_none_v16i8: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: lxv v2, 0(r4) +; CHECK-LE-P9-NEXT: mtfprd f0, r3 +; CHECK-LE-P9-NEXT: xxmrgld v2, vs0, v2 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_none_v16i8: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: lxvd2x v2, 0, r4 +; CHECK-BE-P8-NEXT: mtfprwz f0, r3 +; CHECK-BE-P8-NEXT: xxmrghd v2, v2, vs0 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_none_v16i8: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: lxv v2, 0(r4) +; CHECK-BE-P9-NEXT: mtfprwz f0, r3 +; CHECK-BE-P9-NEXT: xxmrghd v2, v2, vs0 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_none_v16i8: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: lxvd2x v2, 0, r4 +; CHECK-AIX-64-P8-NEXT: mtfprwz f0, r3 +; CHECK-AIX-64-P8-NEXT: xxmrghd v2, v2, vs0 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_none_v16i8: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: lxv v2, 0(r4) +; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r3 +; CHECK-AIX-64-P9-NEXT: xxmrghd v2, v2, vs0 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_none_v16i8: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: addi r5, r1, -16 +; CHECK-AIX-32-P8-NEXT: stb r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r5 +; CHECK-AIX-32-P8-NEXT: lxvd2x v2, 0, r4 +; CHECK-AIX-32-P8-NEXT: xxmrghd v2, v2, vs0 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_none_v16i8: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: stb r3, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv v2, 0(r4) +; CHECK-AIX-32-P9-NEXT: xxmrghd v2, v2, vs0 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %lhs.tmp = insertelement <16 x i8> undef, i8 %arg1, i32 0 + %lhs = bitcast <16 x i8> %lhs.tmp to <2 x i64> + %rhs = load <2 x i64>, ptr %b, align 4 + %shuffle = shufflevector <2 x i64> %rhs, <2 x i64> %lhs, <2 x i32> + ret <2 x i64> %shuffle +} + +define <2 x i64> @test_v16i8_none(i8 %arg1, ptr nocapture noundef readonly %b) { +; CHECK-LE-P8-LABEL: test_v16i8_none: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r4 +; CHECK-LE-P8-NEXT: xxswapd v2, vs0 +; CHECK-LE-P8-NEXT: mtfprd f0, r3 +; CHECK-LE-P8-NEXT: xxmrgld v2, v2, vs0 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v16i8_none: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: lxv v2, 0(r4) +; CHECK-LE-P9-NEXT: mtfprd f0, r3 +; CHECK-LE-P9-NEXT: xxmrgld v2, v2, vs0 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v16i8_none: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: lxvd2x v2, 0, r4 +; CHECK-BE-P8-NEXT: mtfprwz f0, r3 +; CHECK-BE-P8-NEXT: xxmrghd v2, vs0, v2 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v16i8_none: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: lxv v2, 0(r4) +; CHECK-BE-P9-NEXT: mtfprwz f0, r3 +; CHECK-BE-P9-NEXT: xxmrghd v2, vs0, v2 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v16i8_none: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: lxvd2x v2, 0, r4 +; CHECK-AIX-64-P8-NEXT: mtfprwz f0, r3 +; CHECK-AIX-64-P8-NEXT: xxmrghd v2, vs0, v2 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v16i8_none: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: lxv v2, 0(r4) +; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r3 +; CHECK-AIX-64-P9-NEXT: xxmrghd v2, vs0, v2 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v16i8_none: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: addi r5, r1, -16 +; CHECK-AIX-32-P8-NEXT: stb r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r5 +; CHECK-AIX-32-P8-NEXT: lxvd2x v2, 0, r4 +; CHECK-AIX-32-P8-NEXT: xxmrghd v2, vs0, v2 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v16i8_none: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: stb r3, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv v2, 0(r4) +; CHECK-AIX-32-P9-NEXT: xxmrghd v2, vs0, v2 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %lhs.tmp = insertelement <16 x i8> undef, i8 %arg1, i32 0 + %lhs = bitcast <16 x i8> %lhs.tmp to <2 x i64> + %rhs = load <2 x i64>, ptr %b, align 4 + %shuffle = shufflevector <2 x i64> %lhs, <2 x i64> %rhs, <2 x i32> + ret <2 x i64> %shuffle +} + +define <2 x i64> @test_v16i8_v8i16(i8 %arg1, i16 %arg) { +; CHECK-LE-P8-LABEL: test_v16i8_v8i16: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: mtfprd f0, r3 +; CHECK-LE-P8-NEXT: mtfprd f1, r4 +; CHECK-LE-P8-NEXT: xxswapd vs0, vs0 +; CHECK-LE-P8-NEXT: xxswapd vs1, vs1 +; CHECK-LE-P8-NEXT: xxmrgld v2, vs1, vs0 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v16i8_v8i16: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: mtfprd f0, r3 +; CHECK-LE-P9-NEXT: mtfprd f1, r4 +; CHECK-LE-P9-NEXT: xxswapd vs0, vs0 +; CHECK-LE-P9-NEXT: xxswapd vs1, vs1 +; CHECK-LE-P9-NEXT: xxmrgld v2, vs1, vs0 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v16i8_v8i16: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: sldi r3, r3, 56 +; CHECK-BE-P8-NEXT: sldi r4, r4, 48 +; CHECK-BE-P8-NEXT: mtfprd f0, r3 +; CHECK-BE-P8-NEXT: mtfprd f1, r4 +; CHECK-BE-P8-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v16i8_v8i16: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: sldi r3, r3, 56 +; CHECK-BE-P9-NEXT: mtfprd f0, r3 +; CHECK-BE-P9-NEXT: sldi r3, r4, 48 +; CHECK-BE-P9-NEXT: mtfprd f1, r3 +; CHECK-BE-P9-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v16i8_v8i16: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 56 +; CHECK-AIX-64-P8-NEXT: sldi r4, r4, 48 +; CHECK-AIX-64-P8-NEXT: mtfprd f0, r3 +; CHECK-AIX-64-P8-NEXT: mtfprd f1, r4 +; CHECK-AIX-64-P8-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v16i8_v8i16: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: sldi r3, r3, 56 +; CHECK-AIX-64-P9-NEXT: mtfprd f0, r3 +; CHECK-AIX-64-P9-NEXT: sldi r3, r4, 48 +; CHECK-AIX-64-P9-NEXT: mtfprd f1, r3 +; CHECK-AIX-64-P9-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v16i8_v8i16: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: addi r5, r1, -16 +; CHECK-AIX-32-P8-NEXT: stb r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: addi r3, r1, -32 +; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r5 +; CHECK-AIX-32-P8-NEXT: sth r4, -32(r1) +; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r3 +; CHECK-AIX-32-P8-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v16i8_v8i16: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: stb r3, -16(r1) +; CHECK-AIX-32-P9-NEXT: sth r4, -32(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs1, -32(r1) +; CHECK-AIX-32-P9-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %lhs.tmp = insertelement <16 x i8> undef, i8 %arg1, i32 0 + %lhs = bitcast <16 x i8> %lhs.tmp to <2 x i64> + %rhs.tmp = insertelement <8 x i16> undef, i16 %arg, i32 0 + %rhs = bitcast <8 x i16> %rhs.tmp to <2 x i64> + %shuffle = shufflevector <2 x i64> %lhs, <2 x i64> %rhs, <2 x i32> + ret <2 x i64> %shuffle +} + +define <2 x i64> @test_v8i16_v16i8(i8 %arg1, i16 %arg) { +; CHECK-LE-P8-LABEL: test_v8i16_v16i8: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: mtfprd f0, r3 +; CHECK-LE-P8-NEXT: mtfprd f1, r4 +; CHECK-LE-P8-NEXT: xxswapd vs0, vs0 +; CHECK-LE-P8-NEXT: xxswapd vs1, vs1 +; CHECK-LE-P8-NEXT: xxmrgld v2, vs0, vs1 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v8i16_v16i8: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: mtfprd f0, r3 +; CHECK-LE-P9-NEXT: mtfprd f1, r4 +; CHECK-LE-P9-NEXT: xxswapd vs0, vs0 +; CHECK-LE-P9-NEXT: xxswapd vs1, vs1 +; CHECK-LE-P9-NEXT: xxmrgld v2, vs0, vs1 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v8i16_v16i8: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: sldi r3, r3, 56 +; CHECK-BE-P8-NEXT: sldi r4, r4, 48 +; CHECK-BE-P8-NEXT: mtfprd f0, r3 +; CHECK-BE-P8-NEXT: mtfprd f1, r4 +; CHECK-BE-P8-NEXT: xxmrghd v2, vs1, vs0 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v8i16_v16i8: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: sldi r3, r3, 56 +; CHECK-BE-P9-NEXT: mtfprd f0, r3 +; CHECK-BE-P9-NEXT: sldi r3, r4, 48 +; CHECK-BE-P9-NEXT: mtfprd f1, r3 +; CHECK-BE-P9-NEXT: xxmrghd v2, vs1, vs0 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v8i16_v16i8: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 56 +; CHECK-AIX-64-P8-NEXT: sldi r4, r4, 48 +; CHECK-AIX-64-P8-NEXT: mtfprd f0, r3 +; CHECK-AIX-64-P8-NEXT: mtfprd f1, r4 +; CHECK-AIX-64-P8-NEXT: xxmrghd v2, vs1, vs0 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v8i16_v16i8: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: sldi r3, r3, 56 +; CHECK-AIX-64-P9-NEXT: mtfprd f0, r3 +; CHECK-AIX-64-P9-NEXT: sldi r3, r4, 48 +; CHECK-AIX-64-P9-NEXT: mtfprd f1, r3 +; CHECK-AIX-64-P9-NEXT: xxmrghd v2, vs1, vs0 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v8i16_v16i8: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: addi r5, r1, -16 +; CHECK-AIX-32-P8-NEXT: stb r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: addi r3, r1, -32 +; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r5 +; CHECK-AIX-32-P8-NEXT: sth r4, -32(r1) +; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r3 +; CHECK-AIX-32-P8-NEXT: xxmrghd v2, vs1, vs0 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v8i16_v16i8: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: stb r3, -16(r1) +; CHECK-AIX-32-P9-NEXT: sth r4, -32(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs1, -32(r1) +; CHECK-AIX-32-P9-NEXT: xxmrghd v2, vs1, vs0 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %lhs.tmp = insertelement <16 x i8> undef, i8 %arg1, i32 0 + %lhs = bitcast <16 x i8> %lhs.tmp to <2 x i64> + %rhs.tmp = insertelement <8 x i16> undef, i16 %arg, i32 0 + %rhs = bitcast <8 x i16> %rhs.tmp to <2 x i64> + %shuffle = shufflevector <2 x i64> %rhs, <2 x i64> %lhs, <2 x i32> + ret <2 x i64> %shuffle +} + +define <2 x i64> @test_v8i16_none(i16 %arg1, ptr nocapture noundef readonly %b) { +; CHECK-LE-P8-LABEL: test_v8i16_none: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r4 +; CHECK-LE-P8-NEXT: xxswapd v2, vs0 +; CHECK-LE-P8-NEXT: mtfprd f0, r3 +; CHECK-LE-P8-NEXT: xxmrgld v2, v2, vs0 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v8i16_none: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: lxv v2, 0(r4) +; CHECK-LE-P9-NEXT: mtfprd f0, r3 +; CHECK-LE-P9-NEXT: xxmrgld v2, v2, vs0 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v8i16_none: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: lxvd2x v2, 0, r4 +; CHECK-BE-P8-NEXT: mtfprwz f0, r3 +; CHECK-BE-P8-NEXT: xxmrghd v2, vs0, v2 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v8i16_none: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: lxv v2, 0(r4) +; CHECK-BE-P9-NEXT: mtfprwz f0, r3 +; CHECK-BE-P9-NEXT: xxmrghd v2, vs0, v2 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v8i16_none: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: lxvd2x v2, 0, r4 +; CHECK-AIX-64-P8-NEXT: mtfprwz f0, r3 +; CHECK-AIX-64-P8-NEXT: xxmrghd v2, vs0, v2 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v8i16_none: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: lxv v2, 0(r4) +; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r3 +; CHECK-AIX-64-P9-NEXT: xxmrghd v2, vs0, v2 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v8i16_none: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: addi r5, r1, -16 +; CHECK-AIX-32-P8-NEXT: sth r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r5 +; CHECK-AIX-32-P8-NEXT: lxvd2x v2, 0, r4 +; CHECK-AIX-32-P8-NEXT: xxmrghd v2, vs0, v2 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v8i16_none: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: sth r3, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv v2, 0(r4) +; CHECK-AIX-32-P9-NEXT: xxmrghd v2, vs0, v2 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %lhs.tmp = insertelement <8 x i16> undef, i16 %arg1, i32 0 + %lhs = bitcast <8 x i16> %lhs.tmp to <2 x i64> + %rhs = load <2 x i64>, ptr %b, align 4 + %shuffle = shufflevector <2 x i64> %lhs, <2 x i64> %rhs, <2 x i32> + ret <2 x i64> %shuffle +} + +define <2 x i64> @test_none_v8i16(i16 %arg1, ptr nocapture noundef readonly %b) { +; CHECK-LE-P8-LABEL: test_none_v8i16: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r4 +; CHECK-LE-P8-NEXT: xxswapd v2, vs0 +; CHECK-LE-P8-NEXT: mtfprd f0, r3 +; CHECK-LE-P8-NEXT: xxmrgld v2, vs0, v2 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_none_v8i16: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: lxv v2, 0(r4) +; CHECK-LE-P9-NEXT: mtfprd f0, r3 +; CHECK-LE-P9-NEXT: xxmrgld v2, vs0, v2 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_none_v8i16: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: lxvd2x v2, 0, r4 +; CHECK-BE-P8-NEXT: mtfprwz f0, r3 +; CHECK-BE-P8-NEXT: xxmrghd v2, v2, vs0 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_none_v8i16: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: lxv v2, 0(r4) +; CHECK-BE-P9-NEXT: mtfprwz f0, r3 +; CHECK-BE-P9-NEXT: xxmrghd v2, v2, vs0 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_none_v8i16: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: lxvd2x v2, 0, r4 +; CHECK-AIX-64-P8-NEXT: mtfprwz f0, r3 +; CHECK-AIX-64-P8-NEXT: xxmrghd v2, v2, vs0 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_none_v8i16: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: lxv v2, 0(r4) +; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r3 +; CHECK-AIX-64-P9-NEXT: xxmrghd v2, v2, vs0 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_none_v8i16: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: addi r5, r1, -16 +; CHECK-AIX-32-P8-NEXT: sth r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r5 +; CHECK-AIX-32-P8-NEXT: lxvd2x v2, 0, r4 +; CHECK-AIX-32-P8-NEXT: xxmrghd v2, v2, vs0 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_none_v8i16: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: sth r3, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv v2, 0(r4) +; CHECK-AIX-32-P9-NEXT: xxmrghd v2, v2, vs0 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %lhs.tmp = insertelement <8 x i16> undef, i16 %arg1, i32 0 + %lhs = bitcast <8 x i16> %lhs.tmp to <2 x i64> + %rhs = load <2 x i64>, ptr %b, align 4 + %shuffle = shufflevector <2 x i64> %rhs, <2 x i64> %lhs, <2 x i32> + ret <2 x i64> %shuffle +} + +define <2 x i64> @test_v16i8_v4i32(i8 %arg1, i32 %arg) { +; CHECK-LE-P8-LABEL: test_v16i8_v4i32: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: mtfprd f0, r3 +; CHECK-LE-P8-NEXT: mtfprd f1, r4 +; CHECK-LE-P8-NEXT: xxswapd vs0, vs0 +; CHECK-LE-P8-NEXT: xxswapd vs1, vs1 +; CHECK-LE-P8-NEXT: xxmrgld v2, vs1, vs0 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v16i8_v4i32: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: mtfprd f0, r3 +; CHECK-LE-P9-NEXT: mtvsrws vs1, r4 +; CHECK-LE-P9-NEXT: xxswapd vs0, vs0 +; CHECK-LE-P9-NEXT: xxmrgld v2, vs1, vs0 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v16i8_v4i32: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: sldi r3, r3, 56 +; CHECK-BE-P8-NEXT: sldi r4, r4, 32 +; CHECK-BE-P8-NEXT: mtfprd f0, r3 +; CHECK-BE-P8-NEXT: mtfprd f1, r4 +; CHECK-BE-P8-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v16i8_v4i32: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: sldi r3, r3, 56 +; CHECK-BE-P9-NEXT: mtvsrws vs1, r4 +; CHECK-BE-P9-NEXT: mtfprd f0, r3 +; CHECK-BE-P9-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v16i8_v4i32: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 56 +; CHECK-AIX-64-P8-NEXT: sldi r4, r4, 32 +; CHECK-AIX-64-P8-NEXT: mtfprd f0, r3 +; CHECK-AIX-64-P8-NEXT: mtfprd f1, r4 +; CHECK-AIX-64-P8-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v16i8_v4i32: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: sldi r3, r3, 56 +; CHECK-AIX-64-P9-NEXT: mtvsrws vs1, r4 +; CHECK-AIX-64-P9-NEXT: mtfprd f0, r3 +; CHECK-AIX-64-P9-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v16i8_v4i32: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: addi r5, r1, -16 +; CHECK-AIX-32-P8-NEXT: stb r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: addi r3, r1, -32 +; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r5 +; CHECK-AIX-32-P8-NEXT: stw r4, -32(r1) +; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r3 +; CHECK-AIX-32-P8-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v16i8_v4i32: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: stb r3, -16(r1) +; CHECK-AIX-32-P9-NEXT: stw r4, -32(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs1, -32(r1) +; CHECK-AIX-32-P9-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %lhs.tmp = insertelement <16 x i8> undef, i8 %arg1, i32 0 + %lhs = bitcast <16 x i8> %lhs.tmp to <2 x i64> + %rhs.tmp = insertelement <4 x i32> undef, i32 %arg, i32 0 + %rhs = bitcast <4 x i32> %rhs.tmp to <2 x i64> + %shuffle = shufflevector <2 x i64> %lhs, <2 x i64> %rhs, <2 x i32> + ret <2 x i64> %shuffle +} + +define <2 x i64> @test_v4i32_v16i8(i8 %arg1, i32 %arg) { +; CHECK-LE-P8-LABEL: test_v4i32_v16i8: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: mtfprd f0, r3 +; CHECK-LE-P8-NEXT: mtfprd f1, r4 +; CHECK-LE-P8-NEXT: xxswapd vs0, vs0 +; CHECK-LE-P8-NEXT: xxswapd vs1, vs1 +; CHECK-LE-P8-NEXT: xxmrgld v2, vs0, vs1 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v4i32_v16i8: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: mtfprd f0, r3 +; CHECK-LE-P9-NEXT: mtvsrws vs1, r4 +; CHECK-LE-P9-NEXT: xxswapd vs0, vs0 +; CHECK-LE-P9-NEXT: xxmrgld v2, vs0, vs1 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v4i32_v16i8: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: sldi r3, r3, 56 +; CHECK-BE-P8-NEXT: sldi r4, r4, 32 +; CHECK-BE-P8-NEXT: mtfprd f0, r3 +; CHECK-BE-P8-NEXT: mtfprd f1, r4 +; CHECK-BE-P8-NEXT: xxmrghd v2, vs1, vs0 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v4i32_v16i8: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: sldi r3, r3, 56 +; CHECK-BE-P9-NEXT: mtvsrws vs1, r4 +; CHECK-BE-P9-NEXT: mtfprd f0, r3 +; CHECK-BE-P9-NEXT: xxmrghd v2, vs1, vs0 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v4i32_v16i8: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 56 +; CHECK-AIX-64-P8-NEXT: sldi r4, r4, 32 +; CHECK-AIX-64-P8-NEXT: mtfprd f0, r3 +; CHECK-AIX-64-P8-NEXT: mtfprd f1, r4 +; CHECK-AIX-64-P8-NEXT: xxmrghd v2, vs1, vs0 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v4i32_v16i8: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: sldi r3, r3, 56 +; CHECK-AIX-64-P9-NEXT: mtvsrws vs1, r4 +; CHECK-AIX-64-P9-NEXT: mtfprd f0, r3 +; CHECK-AIX-64-P9-NEXT: xxmrghd v2, vs1, vs0 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v4i32_v16i8: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: addi r5, r1, -16 +; CHECK-AIX-32-P8-NEXT: stb r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: addi r3, r1, -32 +; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r5 +; CHECK-AIX-32-P8-NEXT: stw r4, -32(r1) +; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r3 +; CHECK-AIX-32-P8-NEXT: xxmrghd v2, vs1, vs0 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v4i32_v16i8: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: stb r3, -16(r1) +; CHECK-AIX-32-P9-NEXT: stw r4, -32(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs1, -32(r1) +; CHECK-AIX-32-P9-NEXT: xxmrghd v2, vs1, vs0 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %lhs.tmp = insertelement <16 x i8> undef, i8 %arg1, i32 0 + %lhs = bitcast <16 x i8> %lhs.tmp to <2 x i64> + %rhs.tmp = insertelement <4 x i32> undef, i32 %arg, i32 0 + %rhs = bitcast <4 x i32> %rhs.tmp to <2 x i64> + %shuffle = shufflevector <2 x i64> %rhs, <2 x i64> %lhs, <2 x i32> + ret <2 x i64> %shuffle +} + +define <2 x i64> @test_none_v4i32(i32 %arg1, ptr nocapture noundef readonly %b) { +; CHECK-LE-P8-LABEL: test_none_v4i32: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r4 +; CHECK-LE-P8-NEXT: xxswapd v2, vs0 +; CHECK-LE-P8-NEXT: mtfprwz f0, r3 +; CHECK-LE-P8-NEXT: xxmrgld v2, vs0, v2 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_none_v4i32: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: lxv v2, 0(r4) +; CHECK-LE-P9-NEXT: mtfprwz f0, r3 +; CHECK-LE-P9-NEXT: xxmrgld v2, vs0, v2 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_none_v4i32: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: lxvd2x v2, 0, r4 +; CHECK-BE-P8-NEXT: mtfprwz f0, r3 +; CHECK-BE-P8-NEXT: xxmrghd v2, v2, vs0 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_none_v4i32: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: lxv v2, 0(r4) +; CHECK-BE-P9-NEXT: mtfprwz f0, r3 +; CHECK-BE-P9-NEXT: xxmrghd v2, v2, vs0 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_none_v4i32: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: lxvd2x v2, 0, r4 +; CHECK-AIX-64-P8-NEXT: mtfprwz f0, r3 +; CHECK-AIX-64-P8-NEXT: xxmrghd v2, v2, vs0 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_none_v4i32: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: lxv v2, 0(r4) +; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r3 +; CHECK-AIX-64-P9-NEXT: xxmrghd v2, v2, vs0 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_none_v4i32: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: addi r5, r1, -16 +; CHECK-AIX-32-P8-NEXT: stw r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r5 +; CHECK-AIX-32-P8-NEXT: lxvd2x v2, 0, r4 +; CHECK-AIX-32-P8-NEXT: xxmrghd v2, v2, vs0 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_none_v4i32: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: stw r3, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv v2, 0(r4) +; CHECK-AIX-32-P9-NEXT: xxmrghd v2, v2, vs0 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %lhs.tmp = insertelement <4 x i32> undef, i32 %arg1, i32 0 + %lhs = bitcast <4 x i32> %lhs.tmp to <2 x i64> + %rhs = load <2 x i64>, ptr %b, align 4 + %shuffle = shufflevector <2 x i64> %rhs, <2 x i64> %lhs, <2 x i32> + ret <2 x i64> %shuffle +} + +define <2 x i64> @test_v4i32_none(i32 %arg1, ptr nocapture noundef readonly %b) { +; CHECK-LE-P8-LABEL: test_v4i32_none: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r4 +; CHECK-LE-P8-NEXT: xxswapd v2, vs0 +; CHECK-LE-P8-NEXT: mtfprwz f0, r3 +; CHECK-LE-P8-NEXT: xxmrgld v2, v2, vs0 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v4i32_none: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: lxv v2, 0(r4) +; CHECK-LE-P9-NEXT: mtfprwz f0, r3 +; CHECK-LE-P9-NEXT: xxmrgld v2, v2, vs0 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v4i32_none: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: lxvd2x v2, 0, r4 +; CHECK-BE-P8-NEXT: mtfprwz f0, r3 +; CHECK-BE-P8-NEXT: xxmrghd v2, vs0, v2 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v4i32_none: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: lxv v2, 0(r4) +; CHECK-BE-P9-NEXT: mtfprwz f0, r3 +; CHECK-BE-P9-NEXT: xxmrghd v2, vs0, v2 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v4i32_none: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: lxvd2x v2, 0, r4 +; CHECK-AIX-64-P8-NEXT: mtfprwz f0, r3 +; CHECK-AIX-64-P8-NEXT: xxmrghd v2, vs0, v2 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v4i32_none: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: lxv v2, 0(r4) +; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r3 +; CHECK-AIX-64-P9-NEXT: xxmrghd v2, vs0, v2 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v4i32_none: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: addi r5, r1, -16 +; CHECK-AIX-32-P8-NEXT: stw r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r5 +; CHECK-AIX-32-P8-NEXT: lxvd2x v2, 0, r4 +; CHECK-AIX-32-P8-NEXT: xxmrghd v2, vs0, v2 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v4i32_none: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: stw r3, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv v2, 0(r4) +; CHECK-AIX-32-P9-NEXT: xxmrghd v2, vs0, v2 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %lhs.tmp = insertelement <4 x i32> undef, i32 %arg1, i32 0 + %lhs = bitcast <4 x i32> %lhs.tmp to <2 x i64> + %rhs = load <2 x i64>, ptr %b, align 4 + %shuffle = shufflevector <2 x i64> %lhs, <2 x i64> %rhs, <2 x i32> + ret <2 x i64> %shuffle +} + +define <2 x i64> @test_v16i8_v2i64(i8 %arg1, i64 %arg) { +; CHECK-LE-P8-LABEL: test_v16i8_v2i64: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: mtfprd f0, r3 +; CHECK-LE-P8-NEXT: mtfprd f1, r4 +; CHECK-LE-P8-NEXT: xxswapd vs0, vs0 +; CHECK-LE-P8-NEXT: xxswapd v2, vs1 +; CHECK-LE-P8-NEXT: xxmrgld v2, v2, vs0 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v16i8_v2i64: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: mtfprd f0, r3 +; CHECK-LE-P9-NEXT: mtfprd f1, r4 +; CHECK-LE-P9-NEXT: xxswapd vs0, vs0 +; CHECK-LE-P9-NEXT: xxswapd v2, vs1 +; CHECK-LE-P9-NEXT: xxmrgld v2, v2, vs0 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v16i8_v2i64: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: sldi r3, r3, 56 +; CHECK-BE-P8-NEXT: mtfprd f1, r4 +; CHECK-BE-P8-NEXT: mtfprd f0, r3 +; CHECK-BE-P8-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v16i8_v2i64: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: sldi r3, r3, 56 +; CHECK-BE-P9-NEXT: mtfprd f1, r4 +; CHECK-BE-P9-NEXT: mtfprd f0, r3 +; CHECK-BE-P9-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v16i8_v2i64: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 56 +; CHECK-AIX-64-P8-NEXT: mtfprd f1, r4 +; CHECK-AIX-64-P8-NEXT: mtfprd f0, r3 +; CHECK-AIX-64-P8-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v16i8_v2i64: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: sldi r3, r3, 56 +; CHECK-AIX-64-P9-NEXT: mtfprd f1, r4 +; CHECK-AIX-64-P9-NEXT: mtfprd f0, r3 +; CHECK-AIX-64-P9-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v16i8_v2i64: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: stb r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: lwz r3, L..C0(r2) # %const.0 +; CHECK-AIX-32-P8-NEXT: addi r6, r1, -16 +; CHECK-AIX-32-P8-NEXT: lxvw4x v2, 0, r6 +; CHECK-AIX-32-P8-NEXT: stw r4, -32(r1) +; CHECK-AIX-32-P8-NEXT: stw r5, -48(r1) +; CHECK-AIX-32-P8-NEXT: addi r4, r1, -32 +; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3 +; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r4 +; CHECK-AIX-32-P8-NEXT: lwz r3, L..C1(r2) # %const.1 +; CHECK-AIX-32-P8-NEXT: addi r4, r1, -48 +; CHECK-AIX-32-P8-NEXT: vperm v2, v2, v4, v3 +; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3 +; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r4 +; CHECK-AIX-32-P8-NEXT: vperm v2, v2, v4, v3 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v16i8_v2i64: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: stb r3, -16(r1) +; CHECK-AIX-32-P9-NEXT: mtfprwz f0, r4 +; CHECK-AIX-32-P9-NEXT: lxv v2, -16(r1) +; CHECK-AIX-32-P9-NEXT: xxinsertw v2, vs0, 8 +; CHECK-AIX-32-P9-NEXT: mtfprwz f0, r5 +; CHECK-AIX-32-P9-NEXT: xxinsertw v2, vs0, 12 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %lhs.tmp = insertelement <16 x i8> undef, i8 %arg1, i32 0 + %lhs = bitcast <16 x i8> %lhs.tmp to <2 x i64> + %rhs.tmp = insertelement <2 x i64> undef, i64 %arg, i32 0 + %rhs = bitcast <2 x i64> %rhs.tmp to <2 x i64> + %shuffle = shufflevector <2 x i64> %lhs, <2 x i64> %rhs, <2 x i32> + ret <2 x i64> %shuffle +} + +define <2 x i64> @test_v2i64_v16i8(i8 %arg1, i64 %arg) { +; CHECK-LE-P8-LABEL: test_v2i64_v16i8: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: mtfprd f0, r3 +; CHECK-LE-P8-NEXT: mtfprd f1, r4 +; CHECK-LE-P8-NEXT: xxswapd vs0, vs0 +; CHECK-LE-P8-NEXT: xxswapd v2, vs1 +; CHECK-LE-P8-NEXT: xxmrgld v2, vs0, v2 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v2i64_v16i8: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: mtfprd f0, r3 +; CHECK-LE-P9-NEXT: mtfprd f1, r4 +; CHECK-LE-P9-NEXT: xxswapd vs0, vs0 +; CHECK-LE-P9-NEXT: xxswapd v2, vs1 +; CHECK-LE-P9-NEXT: xxmrgld v2, vs0, v2 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v2i64_v16i8: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: mtfprd f0, r4 +; CHECK-BE-P8-NEXT: xxspltd v2, vs0, 0 +; CHECK-BE-P8-NEXT: mtfprwz f0, r3 +; CHECK-BE-P8-NEXT: xxmrghd v2, v2, vs0 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v2i64_v16i8: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: mtfprwz f0, r3 +; CHECK-BE-P9-NEXT: mtvsrdd v2, r4, r4 +; CHECK-BE-P9-NEXT: xxmrghd v2, v2, vs0 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v2i64_v16i8: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: mtfprd f0, r4 +; CHECK-AIX-64-P8-NEXT: xxmrghd v2, vs0, vs0 +; CHECK-AIX-64-P8-NEXT: mtfprwz f0, r3 +; CHECK-AIX-64-P8-NEXT: xxmrghd v2, v2, vs0 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v2i64_v16i8: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r3 +; CHECK-AIX-64-P9-NEXT: mtvsrdd v2, r4, r4 +; CHECK-AIX-64-P9-NEXT: xxmrghd v2, v2, vs0 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v2i64_v16i8: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: addi r6, r1, -16 +; CHECK-AIX-32-P8-NEXT: stb r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: addi r3, r1, -32 +; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r6 +; CHECK-AIX-32-P8-NEXT: stw r5, -32(r1) +; CHECK-AIX-32-P8-NEXT: stw r4, -48(r1) +; CHECK-AIX-32-P8-NEXT: addi r4, r1, -48 +; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r3 +; CHECK-AIX-32-P8-NEXT: lxvw4x vs2, 0, r4 +; CHECK-AIX-32-P8-NEXT: xxmrghw vs1, vs2, vs1 +; CHECK-AIX-32-P8-NEXT: xxmrghd v2, vs1, vs0 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v2i64_v16i8: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: stw r5, -32(r1) +; CHECK-AIX-32-P9-NEXT: stw r4, -48(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs1, -32(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs2, -48(r1) +; CHECK-AIX-32-P9-NEXT: stb r3, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1) +; CHECK-AIX-32-P9-NEXT: xxmrghw vs1, vs2, vs1 +; CHECK-AIX-32-P9-NEXT: xxmrghd v2, vs1, vs0 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %lhs.tmp = insertelement <16 x i8> undef, i8 %arg1, i32 0 + %lhs = bitcast <16 x i8> %lhs.tmp to <2 x i64> + %rhs.tmp = insertelement <2 x i64> undef, i64 %arg, i32 0 + %rhs = bitcast <2 x i64> %rhs.tmp to <2 x i64> + %shuffle = shufflevector <2 x i64> %rhs, <2 x i64> %lhs, <2 x i32> + ret <2 x i64> %shuffle +} + +define <2 x i64> @test_none_v2i64(ptr nocapture noundef readonly %b, i64 %arg) { +; CHECK-LE-P8-LABEL: test_none_v2i64: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-LE-P8-NEXT: xxswapd v2, vs0 +; CHECK-LE-P8-NEXT: mtfprd f0, r4 +; CHECK-LE-P8-NEXT: xxpermdi v2, vs0, v2, 1 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_none_v2i64: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: lxv v2, 0(r3) +; CHECK-LE-P9-NEXT: mtfprd f0, r4 +; CHECK-LE-P9-NEXT: xxpermdi v2, vs0, v2, 1 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_none_v2i64: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: lxvd2x v2, 0, r3 +; CHECK-BE-P8-NEXT: mtfprd f0, r4 +; CHECK-BE-P8-NEXT: xxmrghd v2, v2, vs0 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_none_v2i64: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: lxv v2, 0(r3) +; CHECK-BE-P9-NEXT: mtfprd f0, r4 +; CHECK-BE-P9-NEXT: xxmrghd v2, v2, vs0 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_none_v2i64: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: lxvd2x v2, 0, r3 +; CHECK-AIX-64-P8-NEXT: mtfprd f0, r4 +; CHECK-AIX-64-P8-NEXT: xxmrghd v2, v2, vs0 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_none_v2i64: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: lxv v2, 0(r3) +; CHECK-AIX-64-P9-NEXT: mtfprd f0, r4 +; CHECK-AIX-64-P9-NEXT: xxmrghd v2, v2, vs0 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_none_v2i64: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: lwz r6, L..C2(r2) # %const.0 +; CHECK-AIX-32-P8-NEXT: stw r4, -16(r1) +; CHECK-AIX-32-P8-NEXT: stw r5, -32(r1) +; CHECK-AIX-32-P8-NEXT: addi r4, r1, -16 +; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3 +; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r4 +; CHECK-AIX-32-P8-NEXT: lwz r3, L..C3(r2) # %const.1 +; CHECK-AIX-32-P8-NEXT: addi r4, r1, -32 +; CHECK-AIX-32-P8-NEXT: lxvw4x v2, 0, r6 +; CHECK-AIX-32-P8-NEXT: vperm v2, v3, v4, v2 +; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3 +; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r4 +; CHECK-AIX-32-P8-NEXT: vperm v2, v2, v4, v3 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_none_v2i64: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: lxv v2, 0(r3) +; CHECK-AIX-32-P9-NEXT: mtfprwz f0, r4 +; CHECK-AIX-32-P9-NEXT: xxinsertw v2, vs0, 8 +; CHECK-AIX-32-P9-NEXT: mtfprwz f0, r5 +; CHECK-AIX-32-P9-NEXT: xxinsertw v2, vs0, 12 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %lhs = load <2 x i64>, ptr %b, align 4 + %rhs = insertelement <2 x i64> undef, i64 %arg, i32 0 + %shuffle = shufflevector <2 x i64> %lhs, <2 x i64> %rhs, <2 x i32> + ret <2 x i64> %shuffle +} + +define <2 x i64> @test_v2i64_none(ptr nocapture noundef readonly %b, i64 %arg) { +; CHECK-LE-P8-LABEL: test_v2i64_none: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-LE-P8-NEXT: xxswapd v2, vs0 +; CHECK-LE-P8-NEXT: mtfprd f0, r4 +; CHECK-LE-P8-NEXT: xxpermdi v2, v2, vs0, 2 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v2i64_none: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: lxv v2, 0(r3) +; CHECK-LE-P9-NEXT: mtfprd f0, r4 +; CHECK-LE-P9-NEXT: xxpermdi v2, v2, vs0, 2 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v2i64_none: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: mtfprd f0, r4 +; CHECK-BE-P8-NEXT: lxvd2x v2, 0, r3 +; CHECK-BE-P8-NEXT: xxspltd v3, vs0, 0 +; CHECK-BE-P8-NEXT: xxmrghd v2, v3, v2 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v2i64_none: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: lxv v2, 0(r3) +; CHECK-BE-P9-NEXT: mtvsrdd v3, r4, r4 +; CHECK-BE-P9-NEXT: xxmrghd v2, v3, v2 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v2i64_none: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: mtfprd f0, r4 +; CHECK-AIX-64-P8-NEXT: lxvd2x v2, 0, r3 +; CHECK-AIX-64-P8-NEXT: xxmrghd v3, vs0, vs0 +; CHECK-AIX-64-P8-NEXT: xxmrghd v2, v3, v2 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v2i64_none: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: lxv v2, 0(r3) +; CHECK-AIX-64-P9-NEXT: mtvsrdd v3, r4, r4 +; CHECK-AIX-64-P9-NEXT: xxmrghd v2, v3, v2 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v2i64_none: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: lxvd2x v2, 0, r3 +; CHECK-AIX-32-P8-NEXT: stw r5, -16(r1) +; CHECK-AIX-32-P8-NEXT: stw r4, -32(r1) +; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16 +; CHECK-AIX-32-P8-NEXT: addi r4, r1, -32 +; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r3 +; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r4 +; CHECK-AIX-32-P8-NEXT: xxmrghw vs0, vs1, vs0 +; CHECK-AIX-32-P8-NEXT: xxmrghd v2, vs0, v2 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v2i64_none: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: lxv v2, 0(r3) +; CHECK-AIX-32-P9-NEXT: stw r5, -16(r1) +; CHECK-AIX-32-P9-NEXT: stw r4, -32(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs1, -32(r1) +; CHECK-AIX-32-P9-NEXT: xxmrghw vs0, vs1, vs0 +; CHECK-AIX-32-P9-NEXT: xxmrghd v2, vs0, v2 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %lhs = load <2 x i64>, ptr %b, align 4 + %rhs = insertelement <2 x i64> undef, i64 %arg, i32 0 + %shuffle = shufflevector <2 x i64> %rhs, <2 x i64> %lhs, <2 x i32> + ret <2 x i64> %shuffle +} + +define <2 x i64> @test_v8i16_v8i16(i16 %arg1, i16 %arg) { +; CHECK-LE-P8-LABEL: test_v8i16_v8i16: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: mtfprd f0, r3 +; CHECK-LE-P8-NEXT: mtfprd f1, r4 +; CHECK-LE-P8-NEXT: xxmrgld v2, vs1, vs0 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v8i16_v8i16: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: mtfprd f0, r3 +; CHECK-LE-P9-NEXT: mtfprd f1, r4 +; CHECK-LE-P9-NEXT: xxmrgld v2, vs1, vs0 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v8i16_v8i16: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: mtfprwz f0, r3 +; CHECK-BE-P8-NEXT: mtfprwz f1, r4 +; CHECK-BE-P8-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v8i16_v8i16: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: mtfprwz f0, r3 +; CHECK-BE-P9-NEXT: mtfprwz f1, r4 +; CHECK-BE-P9-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v8i16_v8i16: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: mtfprwz f0, r3 +; CHECK-AIX-64-P8-NEXT: mtfprwz f1, r4 +; CHECK-AIX-64-P8-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v8i16_v8i16: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r3 +; CHECK-AIX-64-P9-NEXT: mtfprwz f1, r4 +; CHECK-AIX-64-P9-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v8i16_v8i16: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: addi r5, r1, -16 +; CHECK-AIX-32-P8-NEXT: sth r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: addi r3, r1, -32 +; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r5 +; CHECK-AIX-32-P8-NEXT: sth r4, -32(r1) +; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r3 +; CHECK-AIX-32-P8-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v8i16_v8i16: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: sth r3, -16(r1) +; CHECK-AIX-32-P9-NEXT: sth r4, -32(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs1, -32(r1) +; CHECK-AIX-32-P9-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %lhs.tmp = insertelement <8 x i16> undef, i16 %arg1, i32 0 + %lhs = bitcast <8 x i16> %lhs.tmp to <2 x i64> + %rhs.tmp = insertelement <8 x i16> undef, i16 %arg, i32 0 + %rhs = bitcast <8 x i16> %rhs.tmp to <2 x i64> + %shuffle = shufflevector <2 x i64> %lhs, <2 x i64> %rhs, <2 x i32> + ret <2 x i64> %shuffle +} + +define <2 x i64> @test_v8i16_v4i32(i16 %arg1, i32 %arg) { +; CHECK-LE-P8-LABEL: test_v8i16_v4i32: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: mtfprd f0, r3 +; CHECK-LE-P8-NEXT: mtfprd f1, r4 +; CHECK-LE-P8-NEXT: xxswapd vs0, vs0 +; CHECK-LE-P8-NEXT: xxswapd vs1, vs1 +; CHECK-LE-P8-NEXT: xxmrgld v2, vs1, vs0 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v8i16_v4i32: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: mtfprd f0, r3 +; CHECK-LE-P9-NEXT: mtvsrws vs1, r4 +; CHECK-LE-P9-NEXT: xxswapd vs0, vs0 +; CHECK-LE-P9-NEXT: xxmrgld v2, vs1, vs0 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v8i16_v4i32: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: sldi r3, r3, 48 +; CHECK-BE-P8-NEXT: sldi r4, r4, 32 +; CHECK-BE-P8-NEXT: mtfprd f0, r3 +; CHECK-BE-P8-NEXT: mtfprd f1, r4 +; CHECK-BE-P8-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v8i16_v4i32: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: sldi r3, r3, 48 +; CHECK-BE-P9-NEXT: mtvsrws vs1, r4 +; CHECK-BE-P9-NEXT: mtfprd f0, r3 +; CHECK-BE-P9-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v8i16_v4i32: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 48 +; CHECK-AIX-64-P8-NEXT: sldi r4, r4, 32 +; CHECK-AIX-64-P8-NEXT: mtfprd f0, r3 +; CHECK-AIX-64-P8-NEXT: mtfprd f1, r4 +; CHECK-AIX-64-P8-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v8i16_v4i32: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: sldi r3, r3, 48 +; CHECK-AIX-64-P9-NEXT: mtvsrws vs1, r4 +; CHECK-AIX-64-P9-NEXT: mtfprd f0, r3 +; CHECK-AIX-64-P9-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v8i16_v4i32: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: addi r5, r1, -16 +; CHECK-AIX-32-P8-NEXT: sth r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: addi r3, r1, -32 +; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r5 +; CHECK-AIX-32-P8-NEXT: stw r4, -32(r1) +; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r3 +; CHECK-AIX-32-P8-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v8i16_v4i32: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: sth r3, -16(r1) +; CHECK-AIX-32-P9-NEXT: stw r4, -32(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs1, -32(r1) +; CHECK-AIX-32-P9-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %lhs.tmp = insertelement <8 x i16> undef, i16 %arg1, i32 0 + %lhs = bitcast <8 x i16> %lhs.tmp to <2 x i64> + %rhs.tmp = insertelement <4 x i32> undef, i32 %arg, i32 0 + %rhs = bitcast <4 x i32> %rhs.tmp to <2 x i64> + %shuffle = shufflevector <2 x i64> %lhs, <2 x i64> %rhs, <2 x i32> + ret <2 x i64> %shuffle +} + +define <2 x i64> @test_v8i16_v2i64(i16 %arg1, i64 %arg) { +; CHECK-LE-P8-LABEL: test_v8i16_v2i64: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: mtfprd f0, r3 +; CHECK-LE-P8-NEXT: mtfprd f1, r4 +; CHECK-LE-P8-NEXT: xxswapd vs0, vs0 +; CHECK-LE-P8-NEXT: xxswapd v2, vs1 +; CHECK-LE-P8-NEXT: xxmrgld v2, v2, vs0 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v8i16_v2i64: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: mtfprd f0, r3 +; CHECK-LE-P9-NEXT: mtfprd f1, r4 +; CHECK-LE-P9-NEXT: xxswapd vs0, vs0 +; CHECK-LE-P9-NEXT: xxswapd v2, vs1 +; CHECK-LE-P9-NEXT: xxmrgld v2, v2, vs0 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v8i16_v2i64: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: sldi r3, r3, 48 +; CHECK-BE-P8-NEXT: mtfprd f1, r4 +; CHECK-BE-P8-NEXT: mtfprd f0, r3 +; CHECK-BE-P8-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v8i16_v2i64: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: sldi r3, r3, 48 +; CHECK-BE-P9-NEXT: mtfprd f1, r4 +; CHECK-BE-P9-NEXT: mtfprd f0, r3 +; CHECK-BE-P9-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v8i16_v2i64: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 48 +; CHECK-AIX-64-P8-NEXT: mtfprd f1, r4 +; CHECK-AIX-64-P8-NEXT: mtfprd f0, r3 +; CHECK-AIX-64-P8-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v8i16_v2i64: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: sldi r3, r3, 48 +; CHECK-AIX-64-P9-NEXT: mtfprd f1, r4 +; CHECK-AIX-64-P9-NEXT: mtfprd f0, r3 +; CHECK-AIX-64-P9-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v8i16_v2i64: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: sth r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: lwz r3, L..C4(r2) # %const.0 +; CHECK-AIX-32-P8-NEXT: addi r6, r1, -16 +; CHECK-AIX-32-P8-NEXT: lxvw4x v2, 0, r6 +; CHECK-AIX-32-P8-NEXT: stw r4, -32(r1) +; CHECK-AIX-32-P8-NEXT: stw r5, -48(r1) +; CHECK-AIX-32-P8-NEXT: addi r4, r1, -32 +; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3 +; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r4 +; CHECK-AIX-32-P8-NEXT: lwz r3, L..C5(r2) # %const.1 +; CHECK-AIX-32-P8-NEXT: addi r4, r1, -48 +; CHECK-AIX-32-P8-NEXT: vperm v2, v2, v4, v3 +; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3 +; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r4 +; CHECK-AIX-32-P8-NEXT: vperm v2, v2, v4, v3 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v8i16_v2i64: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: sth r3, -16(r1) +; CHECK-AIX-32-P9-NEXT: mtfprwz f0, r4 +; CHECK-AIX-32-P9-NEXT: lxv v2, -16(r1) +; CHECK-AIX-32-P9-NEXT: xxinsertw v2, vs0, 8 +; CHECK-AIX-32-P9-NEXT: mtfprwz f0, r5 +; CHECK-AIX-32-P9-NEXT: xxinsertw v2, vs0, 12 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %lhs.tmp = insertelement <8 x i16> undef, i16 %arg1, i32 0 + %lhs = bitcast <8 x i16> %lhs.tmp to <2 x i64> + %rhs.tmp = insertelement <2 x i64> undef, i64 %arg, i32 0 + %rhs = bitcast <2 x i64> %rhs.tmp to <2 x i64> + %shuffle = shufflevector <2 x i64> %lhs, <2 x i64> %rhs, <2 x i32> + ret <2 x i64> %shuffle +} + +define <2 x i64> @test_v4i32_v4i32(i32 %arg1, i32 %arg) { +; CHECK-LE-P8-LABEL: test_v4i32_v4i32: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: mtfprwz f0, r3 +; CHECK-LE-P8-NEXT: mtfprwz f1, r4 +; CHECK-LE-P8-NEXT: xxmrgld v2, vs1, vs0 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v4i32_v4i32: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: mtfprwz f0, r3 +; CHECK-LE-P9-NEXT: mtfprwz f1, r4 +; CHECK-LE-P9-NEXT: xxmrgld v2, vs1, vs0 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v4i32_v4i32: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: mtfprwz f0, r3 +; CHECK-BE-P8-NEXT: mtfprwz f1, r4 +; CHECK-BE-P8-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v4i32_v4i32: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: mtfprwz f0, r3 +; CHECK-BE-P9-NEXT: mtfprwz f1, r4 +; CHECK-BE-P9-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v4i32_v4i32: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: mtfprwz f0, r3 +; CHECK-AIX-64-P8-NEXT: mtfprwz f1, r4 +; CHECK-AIX-64-P8-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v4i32_v4i32: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r3 +; CHECK-AIX-64-P9-NEXT: mtfprwz f1, r4 +; CHECK-AIX-64-P9-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v4i32_v4i32: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: addi r5, r1, -16 +; CHECK-AIX-32-P8-NEXT: stw r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: addi r3, r1, -32 +; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r5 +; CHECK-AIX-32-P8-NEXT: stw r4, -32(r1) +; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r3 +; CHECK-AIX-32-P8-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v4i32_v4i32: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: stw r3, -16(r1) +; CHECK-AIX-32-P9-NEXT: stw r4, -32(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs1, -32(r1) +; CHECK-AIX-32-P9-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %lhs.tmp = insertelement <4 x i32> undef, i32 %arg1, i32 0 + %lhs = bitcast <4 x i32> %lhs.tmp to <2 x i64> + %rhs.tmp = insertelement <4 x i32> undef, i32 %arg, i32 0 + %rhs = bitcast <4 x i32> %rhs.tmp to <2 x i64> + %shuffle = shufflevector <2 x i64> %lhs, <2 x i64> %rhs, <2 x i32> + ret <2 x i64> %shuffle +} + +define <2 x i64> @test_v4i32_v8i16(i32 %arg1, i16 %arg) { +; CHECK-LE-P8-LABEL: test_v4i32_v8i16: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: mtfprd f0, r3 +; CHECK-LE-P8-NEXT: mtfprd f1, r4 +; CHECK-LE-P8-NEXT: xxswapd vs0, vs0 +; CHECK-LE-P8-NEXT: xxswapd vs1, vs1 +; CHECK-LE-P8-NEXT: xxmrgld v2, vs1, vs0 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v4i32_v8i16: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: mtfprd f1, r4 +; CHECK-LE-P9-NEXT: mtvsrws vs0, r3 +; CHECK-LE-P9-NEXT: xxswapd vs1, vs1 +; CHECK-LE-P9-NEXT: xxmrgld v2, vs1, vs0 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v4i32_v8i16: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: sldi r3, r3, 32 +; CHECK-BE-P8-NEXT: sldi r4, r4, 48 +; CHECK-BE-P8-NEXT: mtfprd f0, r3 +; CHECK-BE-P8-NEXT: mtfprd f1, r4 +; CHECK-BE-P8-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v4i32_v8i16: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: mtvsrws vs0, r3 +; CHECK-BE-P9-NEXT: sldi r3, r4, 48 +; CHECK-BE-P9-NEXT: mtfprd f1, r3 +; CHECK-BE-P9-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v4i32_v8i16: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 32 +; CHECK-AIX-64-P8-NEXT: sldi r4, r4, 48 +; CHECK-AIX-64-P8-NEXT: mtfprd f0, r3 +; CHECK-AIX-64-P8-NEXT: mtfprd f1, r4 +; CHECK-AIX-64-P8-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v4i32_v8i16: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: mtvsrws vs0, r3 +; CHECK-AIX-64-P9-NEXT: sldi r3, r4, 48 +; CHECK-AIX-64-P9-NEXT: mtfprd f1, r3 +; CHECK-AIX-64-P9-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v4i32_v8i16: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: addi r5, r1, -16 +; CHECK-AIX-32-P8-NEXT: stw r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: addi r3, r1, -32 +; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r5 +; CHECK-AIX-32-P8-NEXT: sth r4, -32(r1) +; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r3 +; CHECK-AIX-32-P8-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v4i32_v8i16: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: stw r3, -16(r1) +; CHECK-AIX-32-P9-NEXT: sth r4, -32(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs1, -32(r1) +; CHECK-AIX-32-P9-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %lhs.tmp = insertelement <4 x i32> undef, i32 %arg1, i32 0 + %lhs = bitcast <4 x i32> %lhs.tmp to <2 x i64> + %rhs.tmp = insertelement <8 x i16> undef, i16 %arg, i32 0 + %rhs = bitcast <8 x i16> %rhs.tmp to <2 x i64> + %shuffle = shufflevector <2 x i64> %lhs, <2 x i64> %rhs, <2 x i32> + ret <2 x i64> %shuffle +} + +define <2 x i64> @test_v4i32_v2i64(i32 %arg1, i64 %arg) { +; CHECK-LE-P8-LABEL: test_v4i32_v2i64: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: mtfprd f0, r3 +; CHECK-LE-P8-NEXT: mtfprd f1, r4 +; CHECK-LE-P8-NEXT: xxswapd vs0, vs0 +; CHECK-LE-P8-NEXT: xxswapd v2, vs1 +; CHECK-LE-P8-NEXT: xxmrgld v2, v2, vs0 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v4i32_v2i64: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: mtfprd f1, r4 +; CHECK-LE-P9-NEXT: mtvsrws vs0, r3 +; CHECK-LE-P9-NEXT: xxswapd v2, vs1 +; CHECK-LE-P9-NEXT: xxmrgld v2, v2, vs0 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v4i32_v2i64: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: sldi r3, r3, 32 +; CHECK-BE-P8-NEXT: mtfprd f1, r4 +; CHECK-BE-P8-NEXT: mtfprd f0, r3 +; CHECK-BE-P8-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v4i32_v2i64: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: mtvsrws vs0, r3 +; CHECK-BE-P9-NEXT: mtfprd f1, r4 +; CHECK-BE-P9-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v4i32_v2i64: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 32 +; CHECK-AIX-64-P8-NEXT: mtfprd f1, r4 +; CHECK-AIX-64-P8-NEXT: mtfprd f0, r3 +; CHECK-AIX-64-P8-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v4i32_v2i64: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: mtvsrws vs0, r3 +; CHECK-AIX-64-P9-NEXT: mtfprd f1, r4 +; CHECK-AIX-64-P9-NEXT: xxmrghd v2, vs0, vs1 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v4i32_v2i64: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: stw r4, -32(r1) +; CHECK-AIX-32-P8-NEXT: stw r3, -48(r1) +; CHECK-AIX-32-P8-NEXT: addi r3, r1, -32 +; CHECK-AIX-32-P8-NEXT: addi r4, r1, -48 +; CHECK-AIX-32-P8-NEXT: stw r5, -16(r1) +; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r3 +; CHECK-AIX-32-P8-NEXT: lwz r3, L..C6(r2) # %const.0 +; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r4 +; CHECK-AIX-32-P8-NEXT: addi r4, r1, -16 +; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r4 +; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3 +; CHECK-AIX-32-P8-NEXT: xxmrghw v2, vs1, vs0 +; CHECK-AIX-32-P8-NEXT: vperm v2, v2, v4, v3 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v4i32_v2i64: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: stw r3, -48(r1) +; CHECK-AIX-32-P9-NEXT: lwz r3, L..C0(r2) # %const.0 +; CHECK-AIX-32-P9-NEXT: stw r4, -32(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs0, -32(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs1, -48(r1) +; CHECK-AIX-32-P9-NEXT: stw r5, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv v4, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv v3, 0(r3) +; CHECK-AIX-32-P9-NEXT: xxmrghw v2, vs1, vs0 +; CHECK-AIX-32-P9-NEXT: vperm v2, v2, v4, v3 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %lhs.tmp = insertelement <4 x i32> undef, i32 %arg1, i32 0 + %lhs = bitcast <4 x i32> %lhs.tmp to <2 x i64> + %rhs = insertelement <2 x i64> undef, i64 %arg, i32 0 + %shuffle = shufflevector <2 x i64> %lhs, <2 x i64> %rhs, <2 x i32> + ret <2 x i64> %shuffle +} + +define <2 x i64> @test_v2i64_v2i64(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b) { +; CHECK-LE-P8-LABEL: test_v2i64_v2i64: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: ld r3, 0(r3) +; CHECK-LE-P8-NEXT: lfdx f0, 0, r4 +; CHECK-LE-P8-NEXT: mtfprd f1, r3 +; CHECK-LE-P8-NEXT: xxswapd v2, vs1 +; CHECK-LE-P8-NEXT: xxmrghd v3, vs0, vs1 +; CHECK-LE-P8-NEXT: vaddudm v2, v3, v2 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v2i64_v2i64: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: ld r3, 0(r3) +; CHECK-LE-P9-NEXT: lfd f1, 0(r4) +; CHECK-LE-P9-NEXT: mtfprd f0, r3 +; CHECK-LE-P9-NEXT: xxswapd v2, vs0 +; CHECK-LE-P9-NEXT: xxmrghd v3, vs1, vs0 +; CHECK-LE-P9-NEXT: vaddudm v2, v3, v2 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v2i64_v2i64: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: lxsdx v2, 0, r3 +; CHECK-BE-P8-NEXT: lfdx f0, 0, r4 +; CHECK-BE-P8-NEXT: xxmrghd v3, v2, vs0 +; CHECK-BE-P8-NEXT: vaddudm v2, v3, v2 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v2i64_v2i64: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: lxsd v2, 0(r3) +; CHECK-BE-P9-NEXT: lfd f0, 0(r4) +; CHECK-BE-P9-NEXT: xxmrghd v3, v2, vs0 +; CHECK-BE-P9-NEXT: vaddudm v2, v3, v2 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v2i64_v2i64: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: lxsdx v2, 0, r3 +; CHECK-AIX-64-P8-NEXT: lfdx f0, 0, r4 +; CHECK-AIX-64-P8-NEXT: xxmrghd v3, v2, vs0 +; CHECK-AIX-64-P8-NEXT: vaddudm v2, v3, v2 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v2i64_v2i64: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: lxsd v2, 0(r3) +; CHECK-AIX-64-P9-NEXT: lfd f0, 0(r4) +; CHECK-AIX-64-P9-NEXT: xxmrghd v3, v2, vs0 +; CHECK-AIX-64-P9-NEXT: vaddudm v2, v3, v2 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v2i64_v2i64: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: lwz r5, 4(r3) +; CHECK-AIX-32-P8-NEXT: addi r6, r1, -32 +; CHECK-AIX-32-P8-NEXT: stw r5, -16(r1) +; CHECK-AIX-32-P8-NEXT: addi r5, r1, -16 +; CHECK-AIX-32-P8-NEXT: lwz r3, 0(r3) +; CHECK-AIX-32-P8-NEXT: stw r3, -32(r1) +; CHECK-AIX-32-P8-NEXT: lwz r3, 4(r4) +; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r5 +; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r6 +; CHECK-AIX-32-P8-NEXT: stw r3, -48(r1) +; CHECK-AIX-32-P8-NEXT: lwz r3, 0(r4) +; CHECK-AIX-32-P8-NEXT: addi r4, r1, -64 +; CHECK-AIX-32-P8-NEXT: xxmrghw v2, vs1, vs0 +; CHECK-AIX-32-P8-NEXT: stw r3, -64(r1) +; CHECK-AIX-32-P8-NEXT: addi r3, r1, -48 +; CHECK-AIX-32-P8-NEXT: lxvw4x vs2, 0, r3 +; CHECK-AIX-32-P8-NEXT: lxvw4x vs3, 0, r4 +; CHECK-AIX-32-P8-NEXT: xxmrghw vs0, vs3, vs2 +; CHECK-AIX-32-P8-NEXT: xxmrghd v3, v2, vs0 +; CHECK-AIX-32-P8-NEXT: vaddudm v2, v3, v2 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v2i64_v2i64: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: lwz r5, 4(r3) +; CHECK-AIX-32-P9-NEXT: stw r5, -16(r1) +; CHECK-AIX-32-P9-NEXT: lwz r3, 0(r3) +; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1) +; CHECK-AIX-32-P9-NEXT: stw r3, -32(r1) +; CHECK-AIX-32-P9-NEXT: lwz r3, 4(r4) +; CHECK-AIX-32-P9-NEXT: lxv vs1, -32(r1) +; CHECK-AIX-32-P9-NEXT: stw r3, -48(r1) +; CHECK-AIX-32-P9-NEXT: lwz r3, 0(r4) +; CHECK-AIX-32-P9-NEXT: xxmrghw v2, vs1, vs0 +; CHECK-AIX-32-P9-NEXT: lxv vs0, -48(r1) +; CHECK-AIX-32-P9-NEXT: stw r3, -64(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs1, -64(r1) +; CHECK-AIX-32-P9-NEXT: xxmrghw vs0, vs1, vs0 +; CHECK-AIX-32-P9-NEXT: xxmrghd v3, v2, vs0 +; CHECK-AIX-32-P9-NEXT: vaddudm v2, v3, v2 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %0 = load <8 x i8>, ptr %a, align 8 + %bc1 = bitcast <8 x i8> %0 to i64 + %vecinit3 = insertelement <2 x i64> poison, i64 %bc1, i64 0 + %1 = load <8 x i8>, ptr %b, align 8 + %bc2 = bitcast <8 x i8> %1 to i64 + %vecinit6 = insertelement <2 x i64> undef, i64 %bc2, i64 0 + %2 = bitcast <2 x i64> %vecinit3 to <2 x i64> + %3 = bitcast <2 x i64> %vecinit6 to <2 x i64> + %shuffle = shufflevector <2 x i64> %2, <2 x i64> %3, <2 x i32> + %4 = add <2 x i64> %shuffle, %2 + ret <2 x i64> %4 +} + +define <2 x i64> @test_v2i64_v4i32(i64 %arg1, i32 %arg) { +; CHECK-LE-P8-LABEL: test_v2i64_v4i32: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: mtfprd f0, r3 +; CHECK-LE-P8-NEXT: mtfprd f1, r4 +; CHECK-LE-P8-NEXT: xxswapd v2, vs0 +; CHECK-LE-P8-NEXT: xxswapd vs0, vs1 +; CHECK-LE-P8-NEXT: xxmrgld v2, vs0, v2 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v2i64_v4i32: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: mtfprd f0, r3 +; CHECK-LE-P9-NEXT: xxswapd v2, vs0 +; CHECK-LE-P9-NEXT: mtvsrws vs0, r4 +; CHECK-LE-P9-NEXT: xxmrgld v2, vs0, v2 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v2i64_v4i32: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: mtfprd f0, r3 +; CHECK-BE-P8-NEXT: xxspltd v2, vs0, 0 +; CHECK-BE-P8-NEXT: mtfprwz f0, r4 +; CHECK-BE-P8-NEXT: xxmrghd v2, v2, vs0 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v2i64_v4i32: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: mtfprwz f0, r4 +; CHECK-BE-P9-NEXT: mtvsrdd v2, r3, r3 +; CHECK-BE-P9-NEXT: xxmrghd v2, v2, vs0 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v2i64_v4i32: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: mtfprd f0, r3 +; CHECK-AIX-64-P8-NEXT: xxmrghd v2, vs0, vs0 +; CHECK-AIX-64-P8-NEXT: mtfprwz f0, r4 +; CHECK-AIX-64-P8-NEXT: xxmrghd v2, v2, vs0 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v2i64_v4i32: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r4 +; CHECK-AIX-64-P9-NEXT: mtvsrdd v2, r3, r3 +; CHECK-AIX-64-P9-NEXT: xxmrghd v2, v2, vs0 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v2i64_v4i32: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: addi r6, r1, -48 +; CHECK-AIX-32-P8-NEXT: stw r5, -48(r1) +; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r6 +; CHECK-AIX-32-P8-NEXT: stw r4, -16(r1) +; CHECK-AIX-32-P8-NEXT: stw r3, -32(r1) +; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16 +; CHECK-AIX-32-P8-NEXT: addi r4, r1, -32 +; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r3 +; CHECK-AIX-32-P8-NEXT: lxvw4x vs2, 0, r4 +; CHECK-AIX-32-P8-NEXT: xxmrghw vs1, vs2, vs1 +; CHECK-AIX-32-P8-NEXT: xxmrghd v2, vs1, vs0 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v2i64_v4i32: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: stw r4, -16(r1) +; CHECK-AIX-32-P9-NEXT: stw r3, -32(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs1, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs2, -32(r1) +; CHECK-AIX-32-P9-NEXT: stw r5, -48(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs0, -48(r1) +; CHECK-AIX-32-P9-NEXT: xxmrghw vs1, vs2, vs1 +; CHECK-AIX-32-P9-NEXT: xxmrghd v2, vs1, vs0 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %lhs.tmp = insertelement <2 x i64> undef, i64 %arg1, i32 0 + %lhs = bitcast <2 x i64> %lhs.tmp to <2 x i64> + %rhs.tmp = insertelement <4 x i32> undef, i32 %arg, i32 0 + %rhs = bitcast <4 x i32> %rhs.tmp to <2 x i64> + %shuffle = shufflevector <2 x i64> %lhs, <2 x i64> %rhs, <2 x i32> + ret <2 x i64> %shuffle +} + +define <2 x i64> @test_v2i64_v8i16(i64 %arg1, i16 %arg) { +; CHECK-LE-P8-LABEL: test_v2i64_v8i16: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: mtfprd f0, r3 +; CHECK-LE-P8-NEXT: mtfprd f1, r4 +; CHECK-LE-P8-NEXT: xxswapd v2, vs0 +; CHECK-LE-P8-NEXT: xxswapd vs0, vs1 +; CHECK-LE-P8-NEXT: xxmrgld v2, vs0, v2 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v2i64_v8i16: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: mtfprd f0, r3 +; CHECK-LE-P9-NEXT: xxswapd v2, vs0 +; CHECK-LE-P9-NEXT: mtfprd f0, r4 +; CHECK-LE-P9-NEXT: xxswapd vs0, vs0 +; CHECK-LE-P9-NEXT: xxmrgld v2, vs0, v2 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v2i64_v8i16: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: mtfprd f0, r3 +; CHECK-BE-P8-NEXT: xxspltd v2, vs0, 0 +; CHECK-BE-P8-NEXT: mtfprwz f0, r4 +; CHECK-BE-P8-NEXT: xxmrghd v2, v2, vs0 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v2i64_v8i16: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: mtfprwz f0, r4 +; CHECK-BE-P9-NEXT: mtvsrdd v2, r3, r3 +; CHECK-BE-P9-NEXT: xxmrghd v2, v2, vs0 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v2i64_v8i16: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: mtfprd f0, r3 +; CHECK-AIX-64-P8-NEXT: xxmrghd v2, vs0, vs0 +; CHECK-AIX-64-P8-NEXT: mtfprwz f0, r4 +; CHECK-AIX-64-P8-NEXT: xxmrghd v2, v2, vs0 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v2i64_v8i16: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r4 +; CHECK-AIX-64-P9-NEXT: mtvsrdd v2, r3, r3 +; CHECK-AIX-64-P9-NEXT: xxmrghd v2, v2, vs0 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v2i64_v8i16: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: addi r6, r1, -48 +; CHECK-AIX-32-P8-NEXT: sth r5, -48(r1) +; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r6 +; CHECK-AIX-32-P8-NEXT: stw r4, -16(r1) +; CHECK-AIX-32-P8-NEXT: stw r3, -32(r1) +; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16 +; CHECK-AIX-32-P8-NEXT: addi r4, r1, -32 +; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r3 +; CHECK-AIX-32-P8-NEXT: lxvw4x vs2, 0, r4 +; CHECK-AIX-32-P8-NEXT: xxmrghw vs1, vs2, vs1 +; CHECK-AIX-32-P8-NEXT: xxmrghd v2, vs1, vs0 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v2i64_v8i16: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: stw r4, -16(r1) +; CHECK-AIX-32-P9-NEXT: stw r3, -32(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs1, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs2, -32(r1) +; CHECK-AIX-32-P9-NEXT: sth r5, -48(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs0, -48(r1) +; CHECK-AIX-32-P9-NEXT: xxmrghw vs1, vs2, vs1 +; CHECK-AIX-32-P9-NEXT: xxmrghd v2, vs1, vs0 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %lhs.tmp = insertelement <2 x i64> undef, i64 %arg1, i32 0 + %lhs = bitcast <2 x i64> %lhs.tmp to <2 x i64> + %rhs.tmp = insertelement <8 x i16> undef, i16 %arg, i32 0 + %rhs = bitcast <8 x i16> %rhs.tmp to <2 x i64> + %shuffle = shufflevector <2 x i64> %lhs, <2 x i64> %rhs, <2 x i32> + ret <2 x i64> %shuffle +} + diff --git a/llvm/test/CodeGen/PowerPC/v4i32_scalar_to_vector_shuffle.ll b/llvm/test/CodeGen/PowerPC/v4i32_scalar_to_vector_shuffle.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/v4i32_scalar_to_vector_shuffle.ll @@ -0,0 +1,1445 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64le-unknown-linux-gnu < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-LE-P8 +; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64le-unknown-linux-gnu < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-LE-P9 +; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64-unknown-linux-gnu < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-BE-P8 +; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64-unknown-linux-gnu < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-BE-P9 + +; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64-ibm-aix < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-AIX-64-P8 +; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64-ibm-aix < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-AIX-64-P9 +; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc-ibm-aix < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-AIX-32-P8 +; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc-ibm-aix < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-AIX-32-P9 + +define void @test_none_v8i16(ptr %a) { +; CHECK-LE-P8-LABEL: test_none_v8i16: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: addis r4, r2, .LCPI0_0@toc@ha +; CHECK-LE-P8-NEXT: lxsdx v2, 0, r3 +; CHECK-LE-P8-NEXT: addi r4, r4, .LCPI0_0@toc@l +; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r4 +; CHECK-LE-P8-NEXT: lhz r4, 0(r3) +; CHECK-LE-P8-NEXT: mtvsrd v4, r4 +; CHECK-LE-P8-NEXT: xxswapd v3, vs0 +; CHECK-LE-P8-NEXT: vperm v2, v4, v2, v3 +; CHECK-LE-P8-NEXT: xxswapd vs0, v2 +; CHECK-LE-P8-NEXT: stfdx f0, 0, r3 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_none_v8i16: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: lxsihzx v2, 0, r3 +; CHECK-LE-P9-NEXT: lxsd v3, 0(r3) +; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI0_0@toc@ha +; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI0_0@toc@l +; CHECK-LE-P9-NEXT: lxv v4, 0(r3) +; CHECK-LE-P9-NEXT: vperm v2, v2, v3, v4 +; CHECK-LE-P9-NEXT: xxswapd vs0, v2 +; CHECK-LE-P9-NEXT: stfd f0, 0(r3) +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_none_v8i16: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: lhz r4, 0(r3) +; CHECK-BE-P8-NEXT: lfdx f0, 0, r3 +; CHECK-BE-P8-NEXT: sldi r3, r4, 48 +; CHECK-BE-P8-NEXT: mtfprd f1, r3 +; CHECK-BE-P8-NEXT: xxmrghw vs0, vs0, vs1 +; CHECK-BE-P8-NEXT: stfdx f0, 0, r3 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_none_v8i16: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: lxsihzx v2, 0, r3 +; CHECK-BE-P9-NEXT: lfd f0, 0(r3) +; CHECK-BE-P9-NEXT: vsplth v2, v2, 3 +; CHECK-BE-P9-NEXT: xxmrghw vs0, vs0, v2 +; CHECK-BE-P9-NEXT: stfd f0, 0(r3) +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_none_v8i16: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: lhz r4, 0(r3) +; CHECK-AIX-64-P8-NEXT: lfdx f0, 0, r3 +; CHECK-AIX-64-P8-NEXT: sldi r3, r4, 48 +; CHECK-AIX-64-P8-NEXT: mtfprd f1, r3 +; CHECK-AIX-64-P8-NEXT: xxmrghw vs0, vs0, vs1 +; CHECK-AIX-64-P8-NEXT: stfdx f0, 0, r3 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_none_v8i16: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: lxsihzx v2, 0, r3 +; CHECK-AIX-64-P9-NEXT: lfd f0, 0(r3) +; CHECK-AIX-64-P9-NEXT: vsplth v2, v2, 3 +; CHECK-AIX-64-P9-NEXT: xxmrghw vs0, vs0, v2 +; CHECK-AIX-64-P9-NEXT: stfd f0, 0(r3) +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_none_v8i16: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: lhz r4, 0(r3) +; CHECK-AIX-32-P8-NEXT: sth r4, -32(r1) +; CHECK-AIX-32-P8-NEXT: addi r4, r1, -32 +; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r4 +; CHECK-AIX-32-P8-NEXT: lwz r3, 0(r3) +; CHECK-AIX-32-P8-NEXT: addi r4, r1, -16 +; CHECK-AIX-32-P8-NEXT: stxvw4x vs0, 0, r4 +; CHECK-AIX-32-P8-NEXT: stw r3, 0(r3) +; CHECK-AIX-32-P8-NEXT: lwz r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: stw r3, 0(r3) +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_none_v8i16: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: lhz r4, 0(r3) +; CHECK-AIX-32-P9-NEXT: sth r4, -32(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs0, -32(r1) +; CHECK-AIX-32-P9-NEXT: lwz r3, 0(r3) +; CHECK-AIX-32-P9-NEXT: stw r3, 0(r3) +; CHECK-AIX-32-P9-NEXT: stxv vs0, -16(r1) +; CHECK-AIX-32-P9-NEXT: lwz r3, -16(r1) +; CHECK-AIX-32-P9-NEXT: stw r3, 0(r3) +; CHECK-AIX-32-P9-NEXT: blr +entry: + %0 = load <2 x i8>, ptr undef, align 1 + %tmp0_1 = bitcast <2 x i8> %0 to i16 + %tmp0_2 = insertelement <8 x i16> undef, i16 %tmp0_1, i32 0 + %tmp0_3 = bitcast <8 x i16> %tmp0_2 to <4 x i32> + %1 = load <2 x i32>, ptr %a + %tmp1_1 = shufflevector <2 x i32> %1, <2 x i32> undef, <4 x i32> + %2 = shufflevector <4 x i32> %tmp0_3, <4 x i32> %tmp1_1, <2 x i32> + store <2 x i32> %2, ptr undef, align 4 + ret void +} + +define void @test_v8i16_none(ptr %a) { +; CHECK-LE-P8-LABEL: test_v8i16_none: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-LE-P8-NEXT: lhz r3, 0(r3) +; CHECK-LE-P8-NEXT: mtfprd f1, r3 +; CHECK-LE-P8-NEXT: xxswapd vs0, vs0 +; CHECK-LE-P8-NEXT: xxmrglw vs0, vs0, vs1 +; CHECK-LE-P8-NEXT: xxswapd vs0, vs0 +; CHECK-LE-P8-NEXT: stxvd2x vs0, 0, r3 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v8i16_none: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: lxsihzx f0, 0, r3 +; CHECK-LE-P9-NEXT: lxv vs1, 0(r3) +; CHECK-LE-P9-NEXT: xxmrglw vs0, vs1, vs0 +; CHECK-LE-P9-NEXT: stxv vs0, 0(r3) +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v8i16_none: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: lhz r4, 0(r3) +; CHECK-BE-P8-NEXT: lxvw4x vs0, 0, r3 +; CHECK-BE-P8-NEXT: mtfprwz f1, r4 +; CHECK-BE-P8-NEXT: xxmrghw vs0, vs1, vs0 +; CHECK-BE-P8-NEXT: stxvw4x vs0, 0, r3 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v8i16_none: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: lxsihzx f0, 0, r3 +; CHECK-BE-P9-NEXT: lxv vs1, 0(r3) +; CHECK-BE-P9-NEXT: xxmrghw vs0, vs0, vs1 +; CHECK-BE-P9-NEXT: stxv vs0, 0(r3) +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v8i16_none: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: lhz r4, 0(r3) +; CHECK-AIX-64-P8-NEXT: lxvw4x vs0, 0, r3 +; CHECK-AIX-64-P8-NEXT: mtfprwz f1, r4 +; CHECK-AIX-64-P8-NEXT: xxmrghw vs0, vs1, vs0 +; CHECK-AIX-64-P8-NEXT: stxvw4x vs0, 0, r3 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v8i16_none: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: lxsihzx f0, 0, r3 +; CHECK-AIX-64-P9-NEXT: lxv vs1, 0(r3) +; CHECK-AIX-64-P9-NEXT: xxmrghw vs0, vs0, vs1 +; CHECK-AIX-64-P9-NEXT: stxv vs0, 0(r3) +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v8i16_none: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: lhz r4, 0(r3) +; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r3 +; CHECK-AIX-32-P8-NEXT: mtfprwz f1, r4 +; CHECK-AIX-32-P8-NEXT: xxmrghw vs0, vs1, vs0 +; CHECK-AIX-32-P8-NEXT: stxvw4x vs0, 0, r3 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v8i16_none: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: lxsihzx f0, 0, r3 +; CHECK-AIX-32-P9-NEXT: lxv vs1, 0(r3) +; CHECK-AIX-32-P9-NEXT: xxmrghw vs0, vs0, vs1 +; CHECK-AIX-32-P9-NEXT: stxv vs0, 0(r3) +; CHECK-AIX-32-P9-NEXT: blr +entry: + %0 = load <2 x i8>, ptr undef, align 1 + %tmp0_1 = bitcast <2 x i8> %0 to i16 + %tmp0_2 = insertelement <8 x i16> undef, i16 %tmp0_1, i32 0 + %tmp0_3 = bitcast <8 x i16> %tmp0_2 to <4 x i32> + %1 = load <4 x i32>, ptr %a, align 1 + %2 = shufflevector <4 x i32> %tmp0_3, <4 x i32> %1, <4 x i32> + store <4 x i32> %2, ptr undef, align 4 + ret void +} + +define void @test_none_v4i32(<2 x i32> %vec, ptr %ptr1) { +; CHECK-LE-P8-LABEL: test_none_v4i32: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: addis r3, r2, .LCPI2_0@toc@ha +; CHECK-LE-P8-NEXT: xxswapd vs0, v2 +; CHECK-LE-P8-NEXT: addi r3, r3, .LCPI2_0@toc@l +; CHECK-LE-P8-NEXT: lxvd2x vs1, 0, r3 +; CHECK-LE-P8-NEXT: mffprwz r3, f0 +; CHECK-LE-P8-NEXT: xxswapd v3, vs1 +; CHECK-LE-P8-NEXT: mtvsrwz v4, r3 +; CHECK-LE-P8-NEXT: vperm v2, v4, v2, v3 +; CHECK-LE-P8-NEXT: xxswapd vs0, v2 +; CHECK-LE-P8-NEXT: stxvd2x vs0, 0, r5 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_none_v4i32: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: li r3, 0 +; CHECK-LE-P9-NEXT: vextuwrx r3, r3, v2 +; CHECK-LE-P9-NEXT: mtvsrwz v3, r3 +; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI2_0@toc@ha +; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI2_0@toc@l +; CHECK-LE-P9-NEXT: lxv v4, 0(r3) +; CHECK-LE-P9-NEXT: vperm v2, v3, v2, v4 +; CHECK-LE-P9-NEXT: stxv v2, 0(r5) +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_none_v4i32: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: xxsldwi vs0, v2, v2, 3 +; CHECK-BE-P8-NEXT: addis r3, r2, .LCPI2_0@toc@ha +; CHECK-BE-P8-NEXT: addi r3, r3, .LCPI2_0@toc@l +; CHECK-BE-P8-NEXT: lxvw4x v3, 0, r3 +; CHECK-BE-P8-NEXT: mffprwz r4, f0 +; CHECK-BE-P8-NEXT: mtvsrwz v4, r4 +; CHECK-BE-P8-NEXT: vperm v2, v2, v4, v3 +; CHECK-BE-P8-NEXT: stxvw4x v2, 0, r5 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_none_v4i32: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: li r3, 0 +; CHECK-BE-P9-NEXT: vextuwlx r3, r3, v2 +; CHECK-BE-P9-NEXT: mtvsrwz v3, r3 +; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI2_0@toc@ha +; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI2_0@toc@l +; CHECK-BE-P9-NEXT: lxv v4, 0(r3) +; CHECK-BE-P9-NEXT: vperm v2, v2, v3, v4 +; CHECK-BE-P9-NEXT: stxv v2, 0(r5) +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_none_v4i32: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: xxsldwi vs0, v2, v2, 3 +; CHECK-AIX-64-P8-NEXT: ld r4, L..C0(r2) # %const.0 +; CHECK-AIX-64-P8-NEXT: mffprwz r5, f0 +; CHECK-AIX-64-P8-NEXT: lxvw4x v3, 0, r4 +; CHECK-AIX-64-P8-NEXT: mtvsrwz v4, r5 +; CHECK-AIX-64-P8-NEXT: vperm v2, v2, v4, v3 +; CHECK-AIX-64-P8-NEXT: stxvw4x v2, 0, r3 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_none_v4i32: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: li r4, 0 +; CHECK-AIX-64-P9-NEXT: vextuwlx r4, r4, v2 +; CHECK-AIX-64-P9-NEXT: mtvsrwz v3, r4 +; CHECK-AIX-64-P9-NEXT: ld r4, L..C0(r2) # %const.0 +; CHECK-AIX-64-P9-NEXT: lxv v4, 0(r4) +; CHECK-AIX-64-P9-NEXT: vperm v2, v2, v3, v4 +; CHECK-AIX-64-P9-NEXT: stxv v2, 0(r3) +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_none_v4i32: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: lwz r4, L..C0(r2) # %const.0 +; CHECK-AIX-32-P8-NEXT: addi r5, r1, -16 +; CHECK-AIX-32-P8-NEXT: stxvw4x v2, 0, r5 +; CHECK-AIX-32-P8-NEXT: lxsiwzx v3, 0, r5 +; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r4 +; CHECK-AIX-32-P8-NEXT: vperm v2, v2, v3, v4 +; CHECK-AIX-32-P8-NEXT: stxvw4x v2, 0, r3 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_none_v4i32: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: addi r4, r1, -16 +; CHECK-AIX-32-P9-NEXT: stxv v2, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxsiwzx v3, 0, r4 +; CHECK-AIX-32-P9-NEXT: lwz r4, L..C0(r2) # %const.0 +; CHECK-AIX-32-P9-NEXT: lxv v4, 0(r4) +; CHECK-AIX-32-P9-NEXT: vperm v2, v2, v3, v4 +; CHECK-AIX-32-P9-NEXT: stxv v2, 0(r3) +; CHECK-AIX-32-P9-NEXT: blr +entry: + %0 = extractelement <2 x i32> %vec, i64 0 + %1 = bitcast i32 %0 to <2 x i16> + %2 = shufflevector <2 x i16> %1, <2 x i16> %1, <8 x i32> + %3 = shufflevector <2 x i32> %vec, <2 x i32> %vec, <4 x i32> + %4 = bitcast <4 x i32> %3 to <8 x i16> + %5 = shufflevector <8 x i16> %4, <8 x i16> %2, <8 x i32> + store <8 x i16> %5, ptr %ptr1, align 16 + ret void +} + +define void @test_v4i32_none(<2 x i32> %vec, ptr %ptr1) { +; CHECK-LE-P8-LABEL: test_v4i32_none: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: addis r3, r2, .LCPI3_0@toc@ha +; CHECK-LE-P8-NEXT: xxswapd vs0, v2 +; CHECK-LE-P8-NEXT: addi r3, r3, .LCPI3_0@toc@l +; CHECK-LE-P8-NEXT: lxvd2x vs1, 0, r3 +; CHECK-LE-P8-NEXT: mffprwz r3, f0 +; CHECK-LE-P8-NEXT: xxswapd v3, vs1 +; CHECK-LE-P8-NEXT: mtvsrwz v4, r3 +; CHECK-LE-P8-NEXT: vperm v2, v2, v4, v3 +; CHECK-LE-P8-NEXT: xxswapd vs0, v2 +; CHECK-LE-P8-NEXT: stxvd2x vs0, 0, r5 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v4i32_none: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: li r3, 0 +; CHECK-LE-P9-NEXT: vextuwrx r3, r3, v2 +; CHECK-LE-P9-NEXT: mtvsrwz v3, r3 +; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI3_0@toc@ha +; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI3_0@toc@l +; CHECK-LE-P9-NEXT: lxv v4, 0(r3) +; CHECK-LE-P9-NEXT: vperm v2, v2, v3, v4 +; CHECK-LE-P9-NEXT: stxv v2, 0(r5) +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v4i32_none: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: xxsldwi vs0, v2, v2, 3 +; CHECK-BE-P8-NEXT: addis r3, r2, .LCPI3_0@toc@ha +; CHECK-BE-P8-NEXT: addi r3, r3, .LCPI3_0@toc@l +; CHECK-BE-P8-NEXT: lxvw4x v3, 0, r3 +; CHECK-BE-P8-NEXT: mffprwz r4, f0 +; CHECK-BE-P8-NEXT: mtvsrwz v4, r4 +; CHECK-BE-P8-NEXT: vperm v2, v4, v2, v3 +; CHECK-BE-P8-NEXT: stxvw4x v2, 0, r5 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v4i32_none: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: li r3, 0 +; CHECK-BE-P9-NEXT: vextuwlx r3, r3, v2 +; CHECK-BE-P9-NEXT: mtvsrwz v3, r3 +; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI3_0@toc@ha +; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI3_0@toc@l +; CHECK-BE-P9-NEXT: lxv v4, 0(r3) +; CHECK-BE-P9-NEXT: vperm v2, v3, v2, v4 +; CHECK-BE-P9-NEXT: stxv v2, 0(r5) +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v4i32_none: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: xxsldwi vs0, v2, v2, 3 +; CHECK-AIX-64-P8-NEXT: ld r4, L..C1(r2) # %const.0 +; CHECK-AIX-64-P8-NEXT: mffprwz r5, f0 +; CHECK-AIX-64-P8-NEXT: lxvw4x v3, 0, r4 +; CHECK-AIX-64-P8-NEXT: mtvsrwz v4, r5 +; CHECK-AIX-64-P8-NEXT: vperm v2, v4, v2, v3 +; CHECK-AIX-64-P8-NEXT: stxvw4x v2, 0, r3 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v4i32_none: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: li r4, 0 +; CHECK-AIX-64-P9-NEXT: vextuwlx r4, r4, v2 +; CHECK-AIX-64-P9-NEXT: mtvsrwz v3, r4 +; CHECK-AIX-64-P9-NEXT: ld r4, L..C1(r2) # %const.0 +; CHECK-AIX-64-P9-NEXT: lxv v4, 0(r4) +; CHECK-AIX-64-P9-NEXT: vperm v2, v3, v2, v4 +; CHECK-AIX-64-P9-NEXT: stxv v2, 0(r3) +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v4i32_none: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: lwz r4, L..C1(r2) # %const.0 +; CHECK-AIX-32-P8-NEXT: addi r5, r1, -16 +; CHECK-AIX-32-P8-NEXT: stxvw4x v2, 0, r5 +; CHECK-AIX-32-P8-NEXT: lxsiwzx v3, 0, r5 +; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r4 +; CHECK-AIX-32-P8-NEXT: vperm v2, v3, v2, v4 +; CHECK-AIX-32-P8-NEXT: stxvw4x v2, 0, r3 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v4i32_none: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: addi r4, r1, -16 +; CHECK-AIX-32-P9-NEXT: stxv v2, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxsiwzx v3, 0, r4 +; CHECK-AIX-32-P9-NEXT: lwz r4, L..C1(r2) # %const.0 +; CHECK-AIX-32-P9-NEXT: lxv v4, 0(r4) +; CHECK-AIX-32-P9-NEXT: vperm v2, v3, v2, v4 +; CHECK-AIX-32-P9-NEXT: stxv v2, 0(r3) +; CHECK-AIX-32-P9-NEXT: blr +entry: + %0 = extractelement <2 x i32> %vec, i64 0 + %1 = bitcast i32 %0 to <2 x i16> + %2 = shufflevector <2 x i16> %1, <2 x i16> %1, <8 x i32> + %3 = shufflevector <2 x i32> %vec, <2 x i32> %vec, <4 x i32> + %4 = bitcast <4 x i32> %3 to <8 x i16> + %5 = shufflevector <8 x i16> %2, <8 x i16> %4, <8 x i32> + store <8 x i16> %5, ptr %ptr1, align 16 + ret void +} + +define void @test_none_v2i64(ptr %ptr, i32 %v1, <2 x i32> %vec) local_unnamed_addr #0 { +; CHECK-LE-P8-LABEL: test_none_v2i64: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: addis r5, r2, .LCPI4_0@toc@ha +; CHECK-LE-P8-NEXT: mtvsrwz v4, r4 +; CHECK-LE-P8-NEXT: addis r4, r2, .LCPI4_1@toc@ha +; CHECK-LE-P8-NEXT: addi r5, r5, .LCPI4_0@toc@l +; CHECK-LE-P8-NEXT: addi r4, r4, .LCPI4_1@toc@l +; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r5 +; CHECK-LE-P8-NEXT: xxswapd v3, vs0 +; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r4 +; CHECK-LE-P8-NEXT: vperm v2, v2, v4, v3 +; CHECK-LE-P8-NEXT: lxsdx v3, 0, r3 +; CHECK-LE-P8-NEXT: xxswapd v4, vs0 +; CHECK-LE-P8-NEXT: vperm v2, v3, v2, v4 +; CHECK-LE-P8-NEXT: xxswapd vs0, v2 +; CHECK-LE-P8-NEXT: stxvd2x vs0, 0, r3 +; +; CHECK-LE-P9-LABEL: test_none_v2i64: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: lxsd v3, 0(r3) +; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI4_0@toc@ha +; CHECK-LE-P9-NEXT: mtfprwz f0, r4 +; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI4_0@toc@l +; CHECK-LE-P9-NEXT: xxinsertw v2, vs0, 12 +; CHECK-LE-P9-NEXT: lxv v4, 0(r3) +; CHECK-LE-P9-NEXT: vperm v2, v3, v2, v4 +; CHECK-LE-P9-NEXT: stxv v2, 0(r3) +; +; CHECK-BE-P8-LABEL: test_none_v2i64: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: addis r5, r2, .LCPI4_0@toc@ha +; CHECK-BE-P8-NEXT: mtvsrwz v4, r4 +; CHECK-BE-P8-NEXT: addis r4, r2, .LCPI4_1@toc@ha +; CHECK-BE-P8-NEXT: addi r5, r5, .LCPI4_0@toc@l +; CHECK-BE-P8-NEXT: addi r4, r4, .LCPI4_1@toc@l +; CHECK-BE-P8-NEXT: lxvw4x v3, 0, r5 +; CHECK-BE-P8-NEXT: vperm v2, v4, v2, v3 +; CHECK-BE-P8-NEXT: lxsdx v3, 0, r3 +; CHECK-BE-P8-NEXT: lxvw4x v4, 0, r4 +; CHECK-BE-P8-NEXT: vperm v2, v2, v3, v4 +; CHECK-BE-P8-NEXT: stxvw4x v2, 0, r3 +; +; CHECK-BE-P9-LABEL: test_none_v2i64: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: lxsd v3, 0(r3) +; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI4_0@toc@ha +; CHECK-BE-P9-NEXT: mtfprwz f0, r4 +; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI4_0@toc@l +; CHECK-BE-P9-NEXT: xxinsertw v2, vs0, 0 +; CHECK-BE-P9-NEXT: lxv v4, 0(r3) +; CHECK-BE-P9-NEXT: vperm v2, v2, v3, v4 +; CHECK-BE-P9-NEXT: stxv v2, 0(r3) +; +; CHECK-AIX-64-P8-LABEL: test_none_v2i64: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: ld r5, L..C2(r2) # %const.0 +; CHECK-AIX-64-P8-NEXT: mtvsrwz v4, r4 +; CHECK-AIX-64-P8-NEXT: ld r4, L..C3(r2) # %const.1 +; CHECK-AIX-64-P8-NEXT: lxvw4x v3, 0, r5 +; CHECK-AIX-64-P8-NEXT: vperm v2, v4, v2, v3 +; CHECK-AIX-64-P8-NEXT: lxsdx v3, 0, r3 +; CHECK-AIX-64-P8-NEXT: lxvw4x v4, 0, r4 +; CHECK-AIX-64-P8-NEXT: vperm v2, v2, v3, v4 +; CHECK-AIX-64-P8-NEXT: stxvw4x v2, 0, r3 +; +; CHECK-AIX-64-P9-LABEL: test_none_v2i64: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: lxsd v3, 0(r3) +; CHECK-AIX-64-P9-NEXT: ld r3, L..C2(r2) # %const.0 +; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r4 +; CHECK-AIX-64-P9-NEXT: xxinsertw v2, vs0, 0 +; CHECK-AIX-64-P9-NEXT: lxv v4, 0(r3) +; CHECK-AIX-64-P9-NEXT: vperm v2, v2, v3, v4 +; CHECK-AIX-64-P9-NEXT: stxv v2, 0(r3) +; +; CHECK-AIX-32-P8-LABEL: test_none_v2i64: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: lwz r5, L..C2(r2) # %const.0 +; CHECK-AIX-32-P8-NEXT: lxsiwzx v3, 0, r3 +; CHECK-AIX-32-P8-NEXT: stw r4, -16(r1) +; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16 +; CHECK-AIX-32-P8-NEXT: lxvw4x v5, 0, r3 +; CHECK-AIX-32-P8-NEXT: lwz r3, L..C3(r2) # %const.1 +; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r5 +; CHECK-AIX-32-P8-NEXT: vperm v2, v5, v2, v4 +; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r3 +; CHECK-AIX-32-P8-NEXT: vperm v2, v2, v3, v4 +; CHECK-AIX-32-P8-NEXT: stxvw4x v2, 0, r3 +; +; CHECK-AIX-32-P9-LABEL: test_none_v2i64: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: lxsiwzx v3, 0, r3 +; CHECK-AIX-32-P9-NEXT: lwz r3, L..C2(r2) # %const.0 +; CHECK-AIX-32-P9-NEXT: mtfprwz f0, r4 +; CHECK-AIX-32-P9-NEXT: xxinsertw v2, vs0, 0 +; CHECK-AIX-32-P9-NEXT: lxv v4, 0(r3) +; CHECK-AIX-32-P9-NEXT: vperm v2, v2, v3, v4 +; CHECK-AIX-32-P9-NEXT: stxv v2, 0(r3) +entry: + %0 = load <2 x i32>, ptr %ptr, align 4 + %tmp = insertelement <2 x i32> %vec, i32 %v1, i32 0 + %1 = shufflevector <2 x i32> %0, <2 x i32> %tmp, <4 x i32> + store <4 x i32> %1, ptr undef, align 4 + unreachable +} + +define void @test_v2i64_none() { +; CHECK-LE-P8-LABEL: test_v2i64_none: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: lfdx f0, 0, r3 +; CHECK-LE-P8-NEXT: xxmrghw vs0, vs0, vs0 +; CHECK-LE-P8-NEXT: xxswapd vs0, vs0 +; CHECK-LE-P8-NEXT: stxvd2x vs0, 0, r3 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v2i64_none: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: lfd f0, 0(r3) +; CHECK-LE-P9-NEXT: xxmrghw vs0, vs0, vs0 +; CHECK-LE-P9-NEXT: stxv vs0, 0(r3) +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v2i64_none: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: lfdx f0, 0, r3 +; CHECK-BE-P8-NEXT: xxmrghw vs0, vs0, vs0 +; CHECK-BE-P8-NEXT: stxvw4x vs0, 0, r3 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v2i64_none: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: lfd f0, 0(r3) +; CHECK-BE-P9-NEXT: xxmrghw vs0, vs0, vs0 +; CHECK-BE-P9-NEXT: stxv vs0, 0(r3) +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v2i64_none: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: lfdx f0, 0, r3 +; CHECK-AIX-64-P8-NEXT: xxmrghw vs0, vs0, vs0 +; CHECK-AIX-64-P8-NEXT: stxvw4x vs0, 0, r3 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v2i64_none: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: lfd f0, 0(r3) +; CHECK-AIX-64-P9-NEXT: xxmrghw vs0, vs0, vs0 +; CHECK-AIX-64-P9-NEXT: stxv vs0, 0(r3) +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v2i64_none: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: lfiwzx f0, 0, r3 +; CHECK-AIX-32-P8-NEXT: xxspltw vs0, vs0, 1 +; CHECK-AIX-32-P8-NEXT: xxmrghw vs0, vs0, vs0 +; CHECK-AIX-32-P8-NEXT: stxvw4x vs0, 0, r3 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v2i64_none: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: lxvwsx vs0, 0, r3 +; CHECK-AIX-32-P9-NEXT: xxmrghw vs0, vs0, vs0 +; CHECK-AIX-32-P9-NEXT: stxv vs0, 0(r3) +; CHECK-AIX-32-P9-NEXT: blr +entry: + %0 = load <2 x i32>, ptr undef, align 4 + %1 = shufflevector <2 x i32> %0, <2 x i32> undef, <4 x i32> + store <4 x i32> %1, ptr undef, align 4 + ret void +} + +define void @test_v8i16_v8i16(ptr %a) { +; CHECK-LE-P8-LABEL: test_v8i16_v8i16: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: lhz r4, 0(r3) +; CHECK-LE-P8-NEXT: lhz r3, 0(r3) +; CHECK-LE-P8-NEXT: mtfprd f0, r4 +; CHECK-LE-P8-NEXT: mtfprd f1, r3 +; CHECK-LE-P8-NEXT: xxmrglw vs0, vs1, vs0 +; CHECK-LE-P8-NEXT: xxswapd vs0, vs0 +; CHECK-LE-P8-NEXT: stxvd2x vs0, 0, r3 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v8i16_v8i16: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: lxsihzx f0, 0, r3 +; CHECK-LE-P9-NEXT: lxsihzx f1, 0, r3 +; CHECK-LE-P9-NEXT: xxmrglw vs0, vs1, vs0 +; CHECK-LE-P9-NEXT: stxv vs0, 0(r3) +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v8i16_v8i16: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: lhz r4, 0(r3) +; CHECK-BE-P8-NEXT: lhz r3, 0(r3) +; CHECK-BE-P8-NEXT: mtfprwz f0, r4 +; CHECK-BE-P8-NEXT: mtfprwz f1, r3 +; CHECK-BE-P8-NEXT: xxmrghw vs0, vs0, vs1 +; CHECK-BE-P8-NEXT: stxvw4x vs0, 0, r3 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v8i16_v8i16: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: lxsihzx f0, 0, r3 +; CHECK-BE-P9-NEXT: lxsihzx f1, 0, r3 +; CHECK-BE-P9-NEXT: xxmrghw vs0, vs0, vs1 +; CHECK-BE-P9-NEXT: stxv vs0, 0(r3) +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v8i16_v8i16: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: lhz r4, 0(r3) +; CHECK-AIX-64-P8-NEXT: lhz r3, 0(r3) +; CHECK-AIX-64-P8-NEXT: mtfprwz f0, r4 +; CHECK-AIX-64-P8-NEXT: mtfprwz f1, r3 +; CHECK-AIX-64-P8-NEXT: xxmrghw vs0, vs0, vs1 +; CHECK-AIX-64-P8-NEXT: stxvw4x vs0, 0, r3 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v8i16_v8i16: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: lxsihzx f0, 0, r3 +; CHECK-AIX-64-P9-NEXT: lxsihzx f1, 0, r3 +; CHECK-AIX-64-P9-NEXT: xxmrghw vs0, vs0, vs1 +; CHECK-AIX-64-P9-NEXT: stxv vs0, 0(r3) +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v8i16_v8i16: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: lhz r4, 0(r3) +; CHECK-AIX-32-P8-NEXT: lhz r3, 0(r3) +; CHECK-AIX-32-P8-NEXT: mtfprwz f0, r4 +; CHECK-AIX-32-P8-NEXT: mtfprwz f1, r3 +; CHECK-AIX-32-P8-NEXT: xxmrghw vs0, vs0, vs1 +; CHECK-AIX-32-P8-NEXT: stxvw4x vs0, 0, r3 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v8i16_v8i16: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: lxsihzx f0, 0, r3 +; CHECK-AIX-32-P9-NEXT: lxsihzx f1, 0, r3 +; CHECK-AIX-32-P9-NEXT: xxmrghw vs0, vs0, vs1 +; CHECK-AIX-32-P9-NEXT: stxv vs0, 0(r3) +; CHECK-AIX-32-P9-NEXT: blr +entry: + %0 = load <2 x i8>, ptr undef, align 1 + %tmp0_1 = bitcast <2 x i8> %0 to i16 + %tmp0_2 = insertelement <8 x i16> undef, i16 %tmp0_1, i32 0 + %tmp0_3 = bitcast <8 x i16> %tmp0_2 to <4 x i32> + %1 = load <2 x i8>, ptr %a, align 1 + %tmp1_1 = bitcast <2 x i8> %1 to i16 + %tmp1_2 = insertelement <8 x i16> undef, i16 %tmp1_1, i32 0 + %tmp1_3 = bitcast <8 x i16> %tmp1_2 to <4 x i32> + %2 = shufflevector <4 x i32> %tmp0_3, <4 x i32> %tmp1_3, <4 x i32> + store <4 x i32> %2, ptr undef, align 4 + ret void +} + +define void @test_v8i16_v4i32(ptr %a) { +; CHECK-LE-P8-LABEL: test_v8i16_v4i32: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: lhz r4, 0(r3) +; CHECK-LE-P8-NEXT: lfiwzx f0, 0, r3 +; CHECK-LE-P8-NEXT: mtfprd f1, r4 +; CHECK-LE-P8-NEXT: xxswapd vs0, f0 +; CHECK-LE-P8-NEXT: xxswapd vs1, vs1 +; CHECK-LE-P8-NEXT: xxmrglw vs0, vs0, vs1 +; CHECK-LE-P8-NEXT: xxswapd vs0, vs0 +; CHECK-LE-P8-NEXT: stxvd2x vs0, 0, r3 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v8i16_v4i32: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: lxsihzx v2, 0, r3 +; CHECK-LE-P9-NEXT: lfiwzx f0, 0, r3 +; CHECK-LE-P9-NEXT: xxswapd vs0, f0 +; CHECK-LE-P9-NEXT: vsplth v2, v2, 3 +; CHECK-LE-P9-NEXT: xxmrglw vs0, vs0, v2 +; CHECK-LE-P9-NEXT: stxv vs0, 0(r3) +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v8i16_v4i32: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: lfiwzx f0, 0, r3 +; CHECK-BE-P8-NEXT: lhz r4, 0(r3) +; CHECK-BE-P8-NEXT: sldi r3, r4, 48 +; CHECK-BE-P8-NEXT: xxsldwi vs0, f0, f0, 1 +; CHECK-BE-P8-NEXT: mtfprd f1, r3 +; CHECK-BE-P8-NEXT: xxmrghw vs0, vs1, vs0 +; CHECK-BE-P8-NEXT: stxvw4x vs0, 0, r3 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v8i16_v4i32: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: lxsihzx v2, 0, r3 +; CHECK-BE-P9-NEXT: lfiwzx f0, 0, r3 +; CHECK-BE-P9-NEXT: xxsldwi vs0, f0, f0, 1 +; CHECK-BE-P9-NEXT: vsplth v2, v2, 3 +; CHECK-BE-P9-NEXT: xxmrghw vs0, v2, vs0 +; CHECK-BE-P9-NEXT: stxv vs0, 0(r3) +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v8i16_v4i32: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: lfiwzx f0, 0, r3 +; CHECK-AIX-64-P8-NEXT: lhz r4, 0(r3) +; CHECK-AIX-64-P8-NEXT: sldi r3, r4, 48 +; CHECK-AIX-64-P8-NEXT: xxsldwi vs0, f0, f0, 1 +; CHECK-AIX-64-P8-NEXT: mtfprd f1, r3 +; CHECK-AIX-64-P8-NEXT: xxmrghw vs0, vs1, vs0 +; CHECK-AIX-64-P8-NEXT: stxvw4x vs0, 0, r3 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v8i16_v4i32: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: lxsihzx v2, 0, r3 +; CHECK-AIX-64-P9-NEXT: lfiwzx f0, 0, r3 +; CHECK-AIX-64-P9-NEXT: xxsldwi vs0, f0, f0, 1 +; CHECK-AIX-64-P9-NEXT: vsplth v2, v2, 3 +; CHECK-AIX-64-P9-NEXT: xxmrghw vs0, v2, vs0 +; CHECK-AIX-64-P9-NEXT: stxv vs0, 0(r3) +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v8i16_v4i32: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: lhz r4, 0(r3) +; CHECK-AIX-32-P8-NEXT: sth r4, -32(r1) +; CHECK-AIX-32-P8-NEXT: addi r4, r1, -32 +; CHECK-AIX-32-P8-NEXT: lwz r3, 0(r3) +; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r4 +; CHECK-AIX-32-P8-NEXT: stw r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16 +; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r3 +; CHECK-AIX-32-P8-NEXT: xxmrghw vs0, vs0, vs1 +; CHECK-AIX-32-P8-NEXT: stxvw4x vs0, 0, r3 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v8i16_v4i32: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: lhz r4, 0(r3) +; CHECK-AIX-32-P9-NEXT: sth r4, -32(r1) +; CHECK-AIX-32-P9-NEXT: lwz r3, 0(r3) +; CHECK-AIX-32-P9-NEXT: lxv vs0, -32(r1) +; CHECK-AIX-32-P9-NEXT: stw r3, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs1, -16(r1) +; CHECK-AIX-32-P9-NEXT: xxmrghw vs0, vs0, vs1 +; CHECK-AIX-32-P9-NEXT: stxv vs0, 0(r3) +; CHECK-AIX-32-P9-NEXT: blr +entry: + %0 = load <2 x i8>, ptr undef, align 1 + %tmp0_1 = bitcast <2 x i8> %0 to i16 + %tmp0_2 = insertelement <8 x i16> undef, i16 %tmp0_1, i32 0 + %tmp0_3 = bitcast <8 x i16> %tmp0_2 to <4 x i32> + %1 = load <2 x i16>, ptr %a, align 4 + %tmp1_1 = bitcast <2 x i16> %1 to i32 + %tmp1_2 = insertelement <4 x i32> undef, i32 %tmp1_1, i32 0 + %2 = shufflevector <4 x i32> %tmp0_3, <4 x i32> %tmp1_2, <4 x i32> + store <4 x i32> %2, ptr undef, align 4 + ret void +} + +define void @test_v8i16_v2i64(ptr %a) { +; CHECK-LE-P8-LABEL: test_v8i16_v2i64: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: lhz r4, 0(r3) +; CHECK-LE-P8-NEXT: lfdx f0, 0, r3 +; CHECK-LE-P8-NEXT: mtfprd f1, r4 +; CHECK-LE-P8-NEXT: xxswapd vs0, f0 +; CHECK-LE-P8-NEXT: xxswapd vs1, vs1 +; CHECK-LE-P8-NEXT: xxmrglw vs0, vs0, vs1 +; CHECK-LE-P8-NEXT: xxswapd vs0, vs0 +; CHECK-LE-P8-NEXT: stxvd2x vs0, 0, r3 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v8i16_v2i64: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: lxsihzx v2, 0, r3 +; CHECK-LE-P9-NEXT: lfd f0, 0(r3) +; CHECK-LE-P9-NEXT: xxswapd vs0, f0 +; CHECK-LE-P9-NEXT: vsplth v2, v2, 3 +; CHECK-LE-P9-NEXT: xxmrglw vs0, vs0, v2 +; CHECK-LE-P9-NEXT: stxv vs0, 0(r3) +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v8i16_v2i64: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: lhz r4, 0(r3) +; CHECK-BE-P8-NEXT: lfdx f0, 0, r3 +; CHECK-BE-P8-NEXT: sldi r3, r4, 48 +; CHECK-BE-P8-NEXT: mtfprd f1, r3 +; CHECK-BE-P8-NEXT: xxmrghw vs0, vs1, vs0 +; CHECK-BE-P8-NEXT: stxvw4x vs0, 0, r3 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v8i16_v2i64: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: lxsihzx v2, 0, r3 +; CHECK-BE-P9-NEXT: lfd f0, 0(r3) +; CHECK-BE-P9-NEXT: vsplth v2, v2, 3 +; CHECK-BE-P9-NEXT: xxmrghw vs0, v2, vs0 +; CHECK-BE-P9-NEXT: stxv vs0, 0(r3) +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v8i16_v2i64: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: lhz r4, 0(r3) +; CHECK-AIX-64-P8-NEXT: lfdx f0, 0, r3 +; CHECK-AIX-64-P8-NEXT: sldi r3, r4, 48 +; CHECK-AIX-64-P8-NEXT: mtfprd f1, r3 +; CHECK-AIX-64-P8-NEXT: xxmrghw vs0, vs1, vs0 +; CHECK-AIX-64-P8-NEXT: stxvw4x vs0, 0, r3 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v8i16_v2i64: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: lxsihzx v2, 0, r3 +; CHECK-AIX-64-P9-NEXT: lfd f0, 0(r3) +; CHECK-AIX-64-P9-NEXT: vsplth v2, v2, 3 +; CHECK-AIX-64-P9-NEXT: xxmrghw vs0, v2, vs0 +; CHECK-AIX-64-P9-NEXT: stxv vs0, 0(r3) +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v8i16_v2i64: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: lhz r4, 0(r3) +; CHECK-AIX-32-P8-NEXT: sth r4, -32(r1) +; CHECK-AIX-32-P8-NEXT: addi r4, r1, -32 +; CHECK-AIX-32-P8-NEXT: lwz r3, 0(r3) +; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r4 +; CHECK-AIX-32-P8-NEXT: stw r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16 +; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r3 +; CHECK-AIX-32-P8-NEXT: xxmrghw vs0, vs0, vs1 +; CHECK-AIX-32-P8-NEXT: stxvw4x vs0, 0, r3 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v8i16_v2i64: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: lhz r4, 0(r3) +; CHECK-AIX-32-P9-NEXT: sth r4, -32(r1) +; CHECK-AIX-32-P9-NEXT: lwz r3, 0(r3) +; CHECK-AIX-32-P9-NEXT: lxv vs0, -32(r1) +; CHECK-AIX-32-P9-NEXT: stw r3, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs1, -16(r1) +; CHECK-AIX-32-P9-NEXT: xxmrghw vs0, vs0, vs1 +; CHECK-AIX-32-P9-NEXT: stxv vs0, 0(r3) +; CHECK-AIX-32-P9-NEXT: blr +entry: + %0 = load <2 x i8>, ptr undef, align 1 + %tmp0_1 = bitcast <2 x i8> %0 to i16 + %tmp0_2 = insertelement <8 x i16> undef, i16 %tmp0_1, i32 0 + %tmp0_3 = bitcast <8 x i16> %tmp0_2 to <4 x i32> + %1 = load <2 x i16>, ptr %a, align 8 + %tmp1_1 = bitcast <2 x i16> %1 to i32 + %tmp1_2 = insertelement <4 x i32> undef, i32 %tmp1_1, i32 0 + %2 = shufflevector <4 x i32> %tmp0_3, <4 x i32> %tmp1_2, <4 x i32> + store <4 x i32> %2, ptr undef, align 4 + ret void +} + +define <16 x i8> @test_v4i32_v4i32(ptr %a, ptr %b) { +; CHECK-LE-P8-LABEL: test_v4i32_v4i32: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: addis r5, r2, .LCPI9_0@toc@ha +; CHECK-LE-P8-NEXT: lxsiwzx v2, 0, r3 +; CHECK-LE-P8-NEXT: lxsiwzx v3, 0, r4 +; CHECK-LE-P8-NEXT: addi r5, r5, .LCPI9_0@toc@l +; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r5 +; CHECK-LE-P8-NEXT: xxswapd v4, vs0 +; CHECK-LE-P8-NEXT: vperm v2, v3, v2, v4 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v4i32_v4i32: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: lxsiwzx v2, 0, r3 +; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI9_0@toc@ha +; CHECK-LE-P9-NEXT: lxsiwzx v3, 0, r4 +; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI9_0@toc@l +; CHECK-LE-P9-NEXT: lxv v4, 0(r3) +; CHECK-LE-P9-NEXT: vperm v2, v3, v2, v4 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v4i32_v4i32: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: addis r5, r2, .LCPI9_0@toc@ha +; CHECK-BE-P8-NEXT: lxsiwzx v2, 0, r3 +; CHECK-BE-P8-NEXT: lxsiwzx v3, 0, r4 +; CHECK-BE-P8-NEXT: addi r5, r5, .LCPI9_0@toc@l +; CHECK-BE-P8-NEXT: lxvw4x v4, 0, r5 +; CHECK-BE-P8-NEXT: vperm v2, v2, v3, v4 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v4i32_v4i32: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: lxsiwzx v2, 0, r3 +; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI9_0@toc@ha +; CHECK-BE-P9-NEXT: lxsiwzx v3, 0, r4 +; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI9_0@toc@l +; CHECK-BE-P9-NEXT: lxv v4, 0(r3) +; CHECK-BE-P9-NEXT: vperm v2, v2, v3, v4 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v4i32_v4i32: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: ld r5, L..C4(r2) # %const.0 +; CHECK-AIX-64-P8-NEXT: lxsiwzx v2, 0, r3 +; CHECK-AIX-64-P8-NEXT: lxsiwzx v3, 0, r4 +; CHECK-AIX-64-P8-NEXT: lxvw4x v4, 0, r5 +; CHECK-AIX-64-P8-NEXT: vperm v2, v2, v3, v4 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v4i32_v4i32: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: lxsiwzx v2, 0, r3 +; CHECK-AIX-64-P9-NEXT: ld r3, L..C3(r2) # %const.0 +; CHECK-AIX-64-P9-NEXT: lxsiwzx v3, 0, r4 +; CHECK-AIX-64-P9-NEXT: lxv v4, 0(r3) +; CHECK-AIX-64-P9-NEXT: vperm v2, v2, v3, v4 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v4i32_v4i32: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: lwz r5, L..C4(r2) # %const.0 +; CHECK-AIX-32-P8-NEXT: lxsiwzx v2, 0, r3 +; CHECK-AIX-32-P8-NEXT: lxsiwzx v3, 0, r4 +; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r5 +; CHECK-AIX-32-P8-NEXT: vperm v2, v2, v3, v4 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v4i32_v4i32: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: lxsiwzx v2, 0, r3 +; CHECK-AIX-32-P9-NEXT: lwz r3, L..C3(r2) # %const.0 +; CHECK-AIX-32-P9-NEXT: lxsiwzx v3, 0, r4 +; CHECK-AIX-32-P9-NEXT: lxv v4, 0(r3) +; CHECK-AIX-32-P9-NEXT: vperm v2, v2, v3, v4 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %load1 = load <4 x i8>, ptr %a + %load2 = load <4 x i8>, ptr %b + %shuffle1 = shufflevector <4 x i8> %load1, <4 x i8> %load2, <8 x i32> + %shuffle2 = shufflevector <8 x i8> %shuffle1, <8 x i8> %shuffle1, <16 x i32> + ret <16 x i8> %shuffle2 +} + +define void @test_v4i32_v8i16(ptr %a) { +; CHECK-LE-P8-LABEL: test_v4i32_v8i16: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: lhz r4, 0(r3) +; CHECK-LE-P8-NEXT: lfiwzx f0, 0, r3 +; CHECK-LE-P8-NEXT: mtfprd f1, r4 +; CHECK-LE-P8-NEXT: xxswapd vs0, f0 +; CHECK-LE-P8-NEXT: xxswapd vs1, vs1 +; CHECK-LE-P8-NEXT: xxmrglw vs0, vs1, vs0 +; CHECK-LE-P8-NEXT: xxswapd vs0, vs0 +; CHECK-LE-P8-NEXT: stxvd2x vs0, 0, r3 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v4i32_v8i16: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: lxsihzx v2, 0, r3 +; CHECK-LE-P9-NEXT: lfiwzx f0, 0, r3 +; CHECK-LE-P9-NEXT: xxswapd vs0, f0 +; CHECK-LE-P9-NEXT: vsplth v2, v2, 3 +; CHECK-LE-P9-NEXT: xxmrglw vs0, v2, vs0 +; CHECK-LE-P9-NEXT: stxv vs0, 0(r3) +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v4i32_v8i16: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: lfiwzx f0, 0, r3 +; CHECK-BE-P8-NEXT: lhz r4, 0(r3) +; CHECK-BE-P8-NEXT: sldi r3, r4, 48 +; CHECK-BE-P8-NEXT: xxsldwi vs0, f0, f0, 1 +; CHECK-BE-P8-NEXT: mtfprd f1, r3 +; CHECK-BE-P8-NEXT: xxmrghw vs0, vs0, vs1 +; CHECK-BE-P8-NEXT: stxvw4x vs0, 0, r3 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v4i32_v8i16: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: lxsihzx v2, 0, r3 +; CHECK-BE-P9-NEXT: lfiwzx f0, 0, r3 +; CHECK-BE-P9-NEXT: xxsldwi vs0, f0, f0, 1 +; CHECK-BE-P9-NEXT: vsplth v2, v2, 3 +; CHECK-BE-P9-NEXT: xxmrghw vs0, vs0, v2 +; CHECK-BE-P9-NEXT: stxv vs0, 0(r3) +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v4i32_v8i16: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: lfiwzx f0, 0, r3 +; CHECK-AIX-64-P8-NEXT: lhz r4, 0(r3) +; CHECK-AIX-64-P8-NEXT: sldi r3, r4, 48 +; CHECK-AIX-64-P8-NEXT: xxsldwi vs0, f0, f0, 1 +; CHECK-AIX-64-P8-NEXT: mtfprd f1, r3 +; CHECK-AIX-64-P8-NEXT: xxmrghw vs0, vs0, vs1 +; CHECK-AIX-64-P8-NEXT: stxvw4x vs0, 0, r3 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v4i32_v8i16: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: lxsihzx v2, 0, r3 +; CHECK-AIX-64-P9-NEXT: lfiwzx f0, 0, r3 +; CHECK-AIX-64-P9-NEXT: xxsldwi vs0, f0, f0, 1 +; CHECK-AIX-64-P9-NEXT: vsplth v2, v2, 3 +; CHECK-AIX-64-P9-NEXT: xxmrghw vs0, vs0, v2 +; CHECK-AIX-64-P9-NEXT: stxv vs0, 0(r3) +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v4i32_v8i16: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: lhz r4, 0(r3) +; CHECK-AIX-32-P8-NEXT: sth r4, -32(r1) +; CHECK-AIX-32-P8-NEXT: addi r4, r1, -32 +; CHECK-AIX-32-P8-NEXT: lwz r3, 0(r3) +; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r4 +; CHECK-AIX-32-P8-NEXT: stw r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16 +; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r3 +; CHECK-AIX-32-P8-NEXT: xxmrghw vs0, vs1, vs0 +; CHECK-AIX-32-P8-NEXT: stxvw4x vs0, 0, r3 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v4i32_v8i16: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: lhz r4, 0(r3) +; CHECK-AIX-32-P9-NEXT: sth r4, -32(r1) +; CHECK-AIX-32-P9-NEXT: lwz r3, 0(r3) +; CHECK-AIX-32-P9-NEXT: lxv vs0, -32(r1) +; CHECK-AIX-32-P9-NEXT: stw r3, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs1, -16(r1) +; CHECK-AIX-32-P9-NEXT: xxmrghw vs0, vs1, vs0 +; CHECK-AIX-32-P9-NEXT: stxv vs0, 0(r3) +; CHECK-AIX-32-P9-NEXT: blr +entry: + %0 = load <2 x i8>, ptr undef, align 1 + %tmp0_1 = bitcast <2 x i8> %0 to i16 + %tmp0_2 = insertelement <8 x i16> undef, i16 %tmp0_1, i32 0 + %tmp0_3 = bitcast <8 x i16> %tmp0_2 to <4 x i32> + %1 = load <2 x i16>, ptr %a, align 4 + %tmp1_1 = bitcast <2 x i16> %1 to i32 + %tmp1_2 = insertelement <4 x i32> undef, i32 %tmp1_1, i32 0 + %2 = shufflevector <4 x i32> %tmp1_2, <4 x i32> %tmp0_3, <4 x i32> + store <4 x i32> %2, ptr undef, align 4 + ret void +} + +define void @test_v4i32_v2i64(ptr %a) { +; CHECK-LE-P8-LABEL: test_v4i32_v2i64: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: lfdx f0, 0, r3 +; CHECK-LE-P8-NEXT: lfiwzx f1, 0, r3 +; CHECK-LE-P8-NEXT: xxswapd vs0, f0 +; CHECK-LE-P8-NEXT: xxswapd vs1, f1 +; CHECK-LE-P8-NEXT: xxmrglw vs0, vs0, vs1 +; CHECK-LE-P8-NEXT: xxswapd vs0, vs0 +; CHECK-LE-P8-NEXT: stxvd2x vs0, 0, r3 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v4i32_v2i64: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: lfd f0, 0(r3) +; CHECK-LE-P9-NEXT: lfiwzx f1, 0, r3 +; CHECK-LE-P9-NEXT: xxswapd vs0, f0 +; CHECK-LE-P9-NEXT: xxswapd vs1, f1 +; CHECK-LE-P9-NEXT: xxmrglw vs0, vs0, vs1 +; CHECK-LE-P9-NEXT: stxv vs0, 0(r3) +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v4i32_v2i64: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: lfiwzx f1, 0, r3 +; CHECK-BE-P8-NEXT: lfdx f0, 0, r3 +; CHECK-BE-P8-NEXT: xxsldwi vs1, f1, f1, 1 +; CHECK-BE-P8-NEXT: xxmrghw vs0, vs1, vs0 +; CHECK-BE-P8-NEXT: stxvw4x vs0, 0, r3 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v4i32_v2i64: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: lfiwzx f1, 0, r3 +; CHECK-BE-P9-NEXT: lfd f0, 0(r3) +; CHECK-BE-P9-NEXT: xxsldwi vs1, f1, f1, 1 +; CHECK-BE-P9-NEXT: xxmrghw vs0, vs1, vs0 +; CHECK-BE-P9-NEXT: stxv vs0, 0(r3) +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v4i32_v2i64: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: lfiwzx f1, 0, r3 +; CHECK-AIX-64-P8-NEXT: lfdx f0, 0, r3 +; CHECK-AIX-64-P8-NEXT: xxsldwi vs1, f1, f1, 1 +; CHECK-AIX-64-P8-NEXT: xxmrghw vs0, vs1, vs0 +; CHECK-AIX-64-P8-NEXT: stxvw4x vs0, 0, r3 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v4i32_v2i64: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: lfiwzx f1, 0, r3 +; CHECK-AIX-64-P9-NEXT: lfd f0, 0(r3) +; CHECK-AIX-64-P9-NEXT: xxsldwi vs1, f1, f1, 1 +; CHECK-AIX-64-P9-NEXT: xxmrghw vs0, vs1, vs0 +; CHECK-AIX-64-P9-NEXT: stxv vs0, 0(r3) +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v4i32_v2i64: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: lwz r4, 0(r3) +; CHECK-AIX-32-P8-NEXT: lwz r3, 0(r3) +; CHECK-AIX-32-P8-NEXT: stw r4, -16(r1) +; CHECK-AIX-32-P8-NEXT: stw r3, -32(r1) +; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16 +; CHECK-AIX-32-P8-NEXT: addi r4, r1, -32 +; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r3 +; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r4 +; CHECK-AIX-32-P8-NEXT: xxmrghw vs0, vs1, vs0 +; CHECK-AIX-32-P8-NEXT: stxvw4x vs0, 0, r3 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v4i32_v2i64: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: lwz r4, 0(r3) +; CHECK-AIX-32-P9-NEXT: lwz r3, 0(r3) +; CHECK-AIX-32-P9-NEXT: stw r4, -16(r1) +; CHECK-AIX-32-P9-NEXT: stw r3, -32(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs1, -32(r1) +; CHECK-AIX-32-P9-NEXT: xxmrghw vs0, vs1, vs0 +; CHECK-AIX-32-P9-NEXT: stxv vs0, 0(r3) +; CHECK-AIX-32-P9-NEXT: blr +entry: + %0 = load <2 x i16>, ptr undef, align 8 + %tmp0_1 = bitcast <2 x i16> %0 to i32 + %tmp0_2 = insertelement <4 x i32> undef, i32 %tmp0_1, i32 0 + %1 = load <2 x i16>, ptr %a, align 4 + %tmp1_1 = bitcast <2 x i16> %1 to i32 + %tmp1_2 = insertelement <4 x i32> undef, i32 %tmp1_1, i32 0 + %2 = shufflevector <4 x i32> %tmp1_2, <4 x i32> %tmp0_2, <4 x i32> + store <4 x i32> %2, ptr undef, align 4 + ret void +} + +define void @test_v2i64_v2i64(ptr %a) { +; CHECK-LE-P8-LABEL: test_v2i64_v2i64: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: lfdx f0, 0, r3 +; CHECK-LE-P8-NEXT: lfdx f1, 0, r3 +; CHECK-LE-P8-NEXT: xxmrghw vs0, vs1, vs0 +; CHECK-LE-P8-NEXT: xxswapd vs0, vs0 +; CHECK-LE-P8-NEXT: stxvd2x vs0, 0, r3 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v2i64_v2i64: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: lfd f0, 0(r3) +; CHECK-LE-P9-NEXT: lfd f1, 0(r3) +; CHECK-LE-P9-NEXT: xxmrghw vs0, vs1, vs0 +; CHECK-LE-P9-NEXT: stxv vs0, 0(r3) +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v2i64_v2i64: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: lfdx f0, 0, r3 +; CHECK-BE-P8-NEXT: lfdx f1, 0, r3 +; CHECK-BE-P8-NEXT: xxmrghw vs0, vs0, vs1 +; CHECK-BE-P8-NEXT: stxvw4x vs0, 0, r3 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v2i64_v2i64: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: lfd f0, 0(r3) +; CHECK-BE-P9-NEXT: lfd f1, 0(r3) +; CHECK-BE-P9-NEXT: xxmrghw vs0, vs0, vs1 +; CHECK-BE-P9-NEXT: stxv vs0, 0(r3) +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v2i64_v2i64: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: lfdx f0, 0, r3 +; CHECK-AIX-64-P8-NEXT: lfdx f1, 0, r3 +; CHECK-AIX-64-P8-NEXT: xxmrghw vs0, vs0, vs1 +; CHECK-AIX-64-P8-NEXT: stxvw4x vs0, 0, r3 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v2i64_v2i64: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: lfd f0, 0(r3) +; CHECK-AIX-64-P9-NEXT: lfd f1, 0(r3) +; CHECK-AIX-64-P9-NEXT: xxmrghw vs0, vs0, vs1 +; CHECK-AIX-64-P9-NEXT: stxv vs0, 0(r3) +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v2i64_v2i64: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: lwz r5, 4(r3) +; CHECK-AIX-32-P8-NEXT: addi r4, r1, -16 +; CHECK-AIX-32-P8-NEXT: stw r5, -16(r1) +; CHECK-AIX-32-P8-NEXT: lwz r3, 0(r3) +; CHECK-AIX-32-P8-NEXT: stw r3, -32(r1) +; CHECK-AIX-32-P8-NEXT: addi r3, r1, -32 +; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r4 +; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r3 +; CHECK-AIX-32-P8-NEXT: lfiwzx f2, 0, r3 +; CHECK-AIX-32-P8-NEXT: xxmrghw vs0, vs1, vs0 +; CHECK-AIX-32-P8-NEXT: xxspltw vs1, vs2, 1 +; CHECK-AIX-32-P8-NEXT: xxmrghw vs0, vs1, vs0 +; CHECK-AIX-32-P8-NEXT: stxvw4x vs0, 0, r3 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v2i64_v2i64: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: lwz r4, 4(r3) +; CHECK-AIX-32-P9-NEXT: stw r4, -16(r1) +; CHECK-AIX-32-P9-NEXT: lwz r3, 0(r3) +; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1) +; CHECK-AIX-32-P9-NEXT: stw r3, -32(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs1, -32(r1) +; CHECK-AIX-32-P9-NEXT: xxmrghw vs0, vs1, vs0 +; CHECK-AIX-32-P9-NEXT: lxvwsx vs1, 0, r3 +; CHECK-AIX-32-P9-NEXT: xxmrghw vs0, vs1, vs0 +; CHECK-AIX-32-P9-NEXT: stxv vs0, 0(r3) +; CHECK-AIX-32-P9-NEXT: blr +entry: + %0 = load <2 x i32>, ptr undef, align 4 + %1 = load <2 x i32>, ptr %a, align 4 + %2 = shufflevector <2 x i32> %0, <2 x i32> %1, <4 x i32> + store <4 x i32> %2, ptr undef, align 4 + ret void +} + +define void @test_v2i64_v4i32(ptr %a) { +; CHECK-LE-P8-LABEL: test_v2i64_v4i32: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: lfdx f0, 0, r3 +; CHECK-LE-P8-NEXT: lfiwzx f1, 0, r3 +; CHECK-LE-P8-NEXT: xxswapd vs0, f0 +; CHECK-LE-P8-NEXT: xxswapd vs1, f1 +; CHECK-LE-P8-NEXT: xxmrglw vs0, vs1, vs0 +; CHECK-LE-P8-NEXT: xxswapd vs0, vs0 +; CHECK-LE-P8-NEXT: stxvd2x vs0, 0, r3 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v2i64_v4i32: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: lfd f0, 0(r3) +; CHECK-LE-P9-NEXT: lfiwzx f1, 0, r3 +; CHECK-LE-P9-NEXT: xxswapd vs0, f0 +; CHECK-LE-P9-NEXT: xxswapd vs1, f1 +; CHECK-LE-P9-NEXT: xxmrglw vs0, vs1, vs0 +; CHECK-LE-P9-NEXT: stxv vs0, 0(r3) +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v2i64_v4i32: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: lfiwzx f1, 0, r3 +; CHECK-BE-P8-NEXT: lfdx f0, 0, r3 +; CHECK-BE-P8-NEXT: xxsldwi vs1, f1, f1, 1 +; CHECK-BE-P8-NEXT: xxmrghw vs0, vs0, vs1 +; CHECK-BE-P8-NEXT: stxvw4x vs0, 0, r3 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v2i64_v4i32: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: lfiwzx f1, 0, r3 +; CHECK-BE-P9-NEXT: lfd f0, 0(r3) +; CHECK-BE-P9-NEXT: xxsldwi vs1, f1, f1, 1 +; CHECK-BE-P9-NEXT: xxmrghw vs0, vs0, vs1 +; CHECK-BE-P9-NEXT: stxv vs0, 0(r3) +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v2i64_v4i32: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: lfiwzx f1, 0, r3 +; CHECK-AIX-64-P8-NEXT: lfdx f0, 0, r3 +; CHECK-AIX-64-P8-NEXT: xxsldwi vs1, f1, f1, 1 +; CHECK-AIX-64-P8-NEXT: xxmrghw vs0, vs0, vs1 +; CHECK-AIX-64-P8-NEXT: stxvw4x vs0, 0, r3 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v2i64_v4i32: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: lfiwzx f1, 0, r3 +; CHECK-AIX-64-P9-NEXT: lfd f0, 0(r3) +; CHECK-AIX-64-P9-NEXT: xxsldwi vs1, f1, f1, 1 +; CHECK-AIX-64-P9-NEXT: xxmrghw vs0, vs0, vs1 +; CHECK-AIX-64-P9-NEXT: stxv vs0, 0(r3) +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v2i64_v4i32: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: lwz r4, 0(r3) +; CHECK-AIX-32-P8-NEXT: lwz r3, 0(r3) +; CHECK-AIX-32-P8-NEXT: stw r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: stw r4, -32(r1) +; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16 +; CHECK-AIX-32-P8-NEXT: addi r4, r1, -32 +; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r3 +; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r4 +; CHECK-AIX-32-P8-NEXT: xxmrghw vs0, vs1, vs0 +; CHECK-AIX-32-P8-NEXT: stxvw4x vs0, 0, r3 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v2i64_v4i32: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: lwz r4, 0(r3) +; CHECK-AIX-32-P9-NEXT: lwz r3, 0(r3) +; CHECK-AIX-32-P9-NEXT: stw r3, -16(r1) +; CHECK-AIX-32-P9-NEXT: stw r4, -32(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs1, -32(r1) +; CHECK-AIX-32-P9-NEXT: xxmrghw vs0, vs1, vs0 +; CHECK-AIX-32-P9-NEXT: stxv vs0, 0(r3) +; CHECK-AIX-32-P9-NEXT: blr +entry: + %0 = load <2 x i16>, ptr undef, align 8 + %tmp0_1 = bitcast <2 x i16> %0 to i32 + %tmp0_2 = insertelement <4 x i32> undef, i32 %tmp0_1, i32 0 + %1 = load <2 x i16>, ptr %a, align 4 + %tmp1_1 = bitcast <2 x i16> %1 to i32 + %tmp1_2 = insertelement <4 x i32> undef, i32 %tmp1_1, i32 0 + %2 = shufflevector <4 x i32> %tmp0_2, <4 x i32> %tmp1_2, <4 x i32> + store <4 x i32> %2, ptr undef, align 4 + ret void +} + +define void @test_v2i64_v8i16(ptr %a) { +; CHECK-LE-P8-LABEL: test_v2i64_v8i16: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: lhz r4, 0(r3) +; CHECK-LE-P8-NEXT: lfdx f0, 0, r3 +; CHECK-LE-P8-NEXT: mtfprd f1, r4 +; CHECK-LE-P8-NEXT: xxswapd vs0, f0 +; CHECK-LE-P8-NEXT: xxswapd vs1, vs1 +; CHECK-LE-P8-NEXT: xxmrglw vs0, vs1, vs0 +; CHECK-LE-P8-NEXT: xxswapd vs0, vs0 +; CHECK-LE-P8-NEXT: stxvd2x vs0, 0, r3 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v2i64_v8i16: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: lxsihzx v2, 0, r3 +; CHECK-LE-P9-NEXT: lfd f0, 0(r3) +; CHECK-LE-P9-NEXT: xxswapd vs0, f0 +; CHECK-LE-P9-NEXT: vsplth v2, v2, 3 +; CHECK-LE-P9-NEXT: xxmrglw vs0, v2, vs0 +; CHECK-LE-P9-NEXT: stxv vs0, 0(r3) +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v2i64_v8i16: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: lhz r4, 0(r3) +; CHECK-BE-P8-NEXT: lfdx f0, 0, r3 +; CHECK-BE-P8-NEXT: sldi r3, r4, 48 +; CHECK-BE-P8-NEXT: mtfprd f1, r3 +; CHECK-BE-P8-NEXT: xxmrghw vs0, vs0, vs1 +; CHECK-BE-P8-NEXT: stxvw4x vs0, 0, r3 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v2i64_v8i16: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: lxsihzx v2, 0, r3 +; CHECK-BE-P9-NEXT: lfd f0, 0(r3) +; CHECK-BE-P9-NEXT: vsplth v2, v2, 3 +; CHECK-BE-P9-NEXT: xxmrghw vs0, vs0, v2 +; CHECK-BE-P9-NEXT: stxv vs0, 0(r3) +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v2i64_v8i16: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: lhz r4, 0(r3) +; CHECK-AIX-64-P8-NEXT: lfdx f0, 0, r3 +; CHECK-AIX-64-P8-NEXT: sldi r3, r4, 48 +; CHECK-AIX-64-P8-NEXT: mtfprd f1, r3 +; CHECK-AIX-64-P8-NEXT: xxmrghw vs0, vs0, vs1 +; CHECK-AIX-64-P8-NEXT: stxvw4x vs0, 0, r3 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v2i64_v8i16: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: lxsihzx v2, 0, r3 +; CHECK-AIX-64-P9-NEXT: lfd f0, 0(r3) +; CHECK-AIX-64-P9-NEXT: vsplth v2, v2, 3 +; CHECK-AIX-64-P9-NEXT: xxmrghw vs0, vs0, v2 +; CHECK-AIX-64-P9-NEXT: stxv vs0, 0(r3) +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v2i64_v8i16: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: lhz r4, 0(r3) +; CHECK-AIX-32-P8-NEXT: sth r4, -32(r1) +; CHECK-AIX-32-P8-NEXT: addi r4, r1, -32 +; CHECK-AIX-32-P8-NEXT: lwz r3, 0(r3) +; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r4 +; CHECK-AIX-32-P8-NEXT: stw r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16 +; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r3 +; CHECK-AIX-32-P8-NEXT: xxmrghw vs0, vs1, vs0 +; CHECK-AIX-32-P8-NEXT: stxvw4x vs0, 0, r3 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v2i64_v8i16: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: lhz r4, 0(r3) +; CHECK-AIX-32-P9-NEXT: sth r4, -32(r1) +; CHECK-AIX-32-P9-NEXT: lwz r3, 0(r3) +; CHECK-AIX-32-P9-NEXT: lxv vs0, -32(r1) +; CHECK-AIX-32-P9-NEXT: stw r3, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv vs1, -16(r1) +; CHECK-AIX-32-P9-NEXT: xxmrghw vs0, vs1, vs0 +; CHECK-AIX-32-P9-NEXT: stxv vs0, 0(r3) +; CHECK-AIX-32-P9-NEXT: blr +entry: + %0 = load <2 x i8>, ptr undef, align 1 + %tmp0_1 = bitcast <2 x i8> %0 to i16 + %tmp0_2 = insertelement <8 x i16> undef, i16 %tmp0_1, i32 0 + %tmp0_3 = bitcast <8 x i16> %tmp0_2 to <4 x i32> + %1 = load <2 x i16>, ptr %a, align 8 + %tmp1_1 = bitcast <2 x i16> %1 to i32 + %tmp1_2 = insertelement <4 x i32> undef, i32 %tmp1_1, i32 0 + %2 = shufflevector <4 x i32> %tmp1_2, <4 x i32> %tmp0_3, <4 x i32> + store <4 x i32> %2, ptr undef, align 4 + ret void +} diff --git a/llvm/test/CodeGen/PowerPC/v8i16_scalar_to_vector_shuffle.ll b/llvm/test/CodeGen/PowerPC/v8i16_scalar_to_vector_shuffle.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/v8i16_scalar_to_vector_shuffle.ll @@ -0,0 +1,1554 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64le-unknown-linux-gnu < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-LE-P8 +; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64le-unknown-linux-gnu < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-LE-P9 +; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64-unknown-linux-gnu < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-BE-P8 +; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64-unknown-linux-gnu < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-BE-P9 + +; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64-ibm-aix < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-AIX-64-P8 +; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64-ibm-aix < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-AIX-64-P9 +; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc-ibm-aix < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-AIX-32-P8 +; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc-ibm-aix < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-AIX-32-P9 + +define void @test_none_v8i16(ptr %a0, ptr %a1, <16 x i8> %a, <8 x i16> %b, i8 %arg) { +; CHECK-LE-P8-LABEL: test_none_v8i16: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: addis r4, r2, .LCPI0_0@toc@ha +; CHECK-LE-P8-NEXT: lhz r3, 0(r3) +; CHECK-LE-P8-NEXT: addi r4, r4, .LCPI0_0@toc@l +; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r4 +; CHECK-LE-P8-NEXT: mtvsrd v4, r3 +; CHECK-LE-P8-NEXT: xxswapd v3, vs0 +; CHECK-LE-P8-NEXT: vperm v2, v4, v2, v3 +; CHECK-LE-P8-NEXT: xxswapd vs0, v2 +; CHECK-LE-P8-NEXT: stxvd2x vs0, 0, r3 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_none_v8i16: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: lxsihzx v3, 0, r3 +; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI0_0@toc@ha +; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI0_0@toc@l +; CHECK-LE-P9-NEXT: lxv v4, 0(r3) +; CHECK-LE-P9-NEXT: vperm v2, v3, v2, v4 +; CHECK-LE-P9-NEXT: stxv v2, 0(r3) +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_none_v8i16: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: addis r4, r2, .LCPI0_0@toc@ha +; CHECK-BE-P8-NEXT: lhz r3, 0(r3) +; CHECK-BE-P8-NEXT: addi r4, r4, .LCPI0_0@toc@l +; CHECK-BE-P8-NEXT: lxvw4x v3, 0, r4 +; CHECK-BE-P8-NEXT: mtvsrwz v4, r3 +; CHECK-BE-P8-NEXT: vperm v2, v2, v4, v3 +; CHECK-BE-P8-NEXT: stxvw4x v2, 0, r3 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_none_v8i16: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: lxsihzx v3, 0, r3 +; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI0_0@toc@ha +; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI0_0@toc@l +; CHECK-BE-P9-NEXT: lxv v4, 0(r3) +; CHECK-BE-P9-NEXT: vperm v2, v2, v3, v4 +; CHECK-BE-P9-NEXT: stxv v2, 0(r3) +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_none_v8i16: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: ld r4, L..C0(r2) # %const.0 +; CHECK-AIX-64-P8-NEXT: lhz r3, 0(r3) +; CHECK-AIX-64-P8-NEXT: mtvsrwz v4, r3 +; CHECK-AIX-64-P8-NEXT: lxvw4x v3, 0, r4 +; CHECK-AIX-64-P8-NEXT: vperm v2, v2, v4, v3 +; CHECK-AIX-64-P8-NEXT: stxvw4x v2, 0, r3 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_none_v8i16: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: lxsihzx v3, 0, r3 +; CHECK-AIX-64-P9-NEXT: ld r3, L..C0(r2) # %const.0 +; CHECK-AIX-64-P9-NEXT: lxv v4, 0(r3) +; CHECK-AIX-64-P9-NEXT: vperm v2, v2, v3, v4 +; CHECK-AIX-64-P9-NEXT: stxv v2, 0(r3) +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_none_v8i16: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: lwz r4, L..C0(r2) # %const.0 +; CHECK-AIX-32-P8-NEXT: lhz r3, 0(r3) +; CHECK-AIX-32-P8-NEXT: mtvsrwz v4, r3 +; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r4 +; CHECK-AIX-32-P8-NEXT: vperm v2, v2, v4, v3 +; CHECK-AIX-32-P8-NEXT: stxvw4x v2, 0, r3 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_none_v8i16: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: lxsihzx v3, 0, r3 +; CHECK-AIX-32-P9-NEXT: lwz r3, L..C0(r2) # %const.0 +; CHECK-AIX-32-P9-NEXT: lxv v4, 0(r3) +; CHECK-AIX-32-P9-NEXT: vperm v2, v2, v3, v4 +; CHECK-AIX-32-P9-NEXT: stxv v2, 0(r3) +; CHECK-AIX-32-P9-NEXT: blr +entry: + %load0.tmp = load <2 x i8>, ptr %a0 + %load0.tmp1 = bitcast <2 x i8> %load0.tmp to i16 + %load0 = insertelement <8 x i16> %b, i16 %load0.tmp1, i64 0 + %load1.tmp = insertelement <16 x i8> %a, i8 %arg, i32 0 + %load1 = bitcast <16 x i8> %load1.tmp to <8 x i16> + %shuff = shufflevector <8 x i16> %load0, <8 x i16> %load1, <8 x i32> + store <8 x i16> %shuff, ptr undef + ret void +} + +define void @test_v8i16_none(ptr %a0, ptr %a1, <16 x i8> %a, <8 x i16> %b, i8 %arg) { +; CHECK-LE-P8-LABEL: test_v8i16_none: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: addis r4, r2, .LCPI1_0@toc@ha +; CHECK-LE-P8-NEXT: mtvsrd v4, r9 +; CHECK-LE-P8-NEXT: lhz r3, 0(r3) +; CHECK-LE-P8-NEXT: addi r4, r4, .LCPI1_0@toc@l +; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r4 +; CHECK-LE-P8-NEXT: addis r4, r2, .LCPI1_1@toc@ha +; CHECK-LE-P8-NEXT: addi r4, r4, .LCPI1_1@toc@l +; CHECK-LE-P8-NEXT: xxswapd v3, vs0 +; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r4 +; CHECK-LE-P8-NEXT: vperm v2, v2, v4, v3 +; CHECK-LE-P8-NEXT: xxswapd v3, vs0 +; CHECK-LE-P8-NEXT: mtvsrd v4, r3 +; CHECK-LE-P8-NEXT: vperm v2, v2, v4, v3 +; CHECK-LE-P8-NEXT: xxswapd vs0, v2 +; CHECK-LE-P8-NEXT: stxvd2x vs0, 0, r3 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v8i16_none: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: lxsihzx v3, 0, r3 +; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI1_0@toc@ha +; CHECK-LE-P9-NEXT: mtvsrwz v4, r9 +; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI1_0@toc@l +; CHECK-LE-P9-NEXT: vinsertb v2, v4, 15 +; CHECK-LE-P9-NEXT: lxv v4, 0(r3) +; CHECK-LE-P9-NEXT: vperm v2, v2, v3, v4 +; CHECK-LE-P9-NEXT: stxv v2, 0(r3) +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v8i16_none: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: addis r4, r2, .LCPI1_0@toc@ha +; CHECK-BE-P8-NEXT: mtvsrwz v4, r9 +; CHECK-BE-P8-NEXT: lhz r3, 0(r3) +; CHECK-BE-P8-NEXT: addi r4, r4, .LCPI1_0@toc@l +; CHECK-BE-P8-NEXT: lxvw4x v3, 0, r4 +; CHECK-BE-P8-NEXT: addis r4, r2, .LCPI1_1@toc@ha +; CHECK-BE-P8-NEXT: addi r4, r4, .LCPI1_1@toc@l +; CHECK-BE-P8-NEXT: vperm v2, v4, v2, v3 +; CHECK-BE-P8-NEXT: lxvw4x v3, 0, r4 +; CHECK-BE-P8-NEXT: mtvsrwz v4, r3 +; CHECK-BE-P8-NEXT: vperm v2, v4, v2, v3 +; CHECK-BE-P8-NEXT: stxvw4x v2, 0, r3 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v8i16_none: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: lxsihzx v3, 0, r3 +; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI1_0@toc@ha +; CHECK-BE-P9-NEXT: mtvsrwz v4, r9 +; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI1_0@toc@l +; CHECK-BE-P9-NEXT: vinsertb v2, v4, 0 +; CHECK-BE-P9-NEXT: lxv v4, 0(r3) +; CHECK-BE-P9-NEXT: vperm v2, v3, v2, v4 +; CHECK-BE-P9-NEXT: stxv v2, 0(r3) +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v8i16_none: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: ld r4, L..C1(r2) # %const.0 +; CHECK-AIX-64-P8-NEXT: mtvsrwz v4, r5 +; CHECK-AIX-64-P8-NEXT: lhz r3, 0(r3) +; CHECK-AIX-64-P8-NEXT: lxvw4x v3, 0, r4 +; CHECK-AIX-64-P8-NEXT: ld r4, L..C2(r2) # %const.1 +; CHECK-AIX-64-P8-NEXT: vperm v2, v4, v2, v3 +; CHECK-AIX-64-P8-NEXT: lxvw4x v3, 0, r4 +; CHECK-AIX-64-P8-NEXT: mtvsrwz v4, r3 +; CHECK-AIX-64-P8-NEXT: vperm v2, v4, v2, v3 +; CHECK-AIX-64-P8-NEXT: stxvw4x v2, 0, r3 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v8i16_none: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: lxsihzx v3, 0, r3 +; CHECK-AIX-64-P9-NEXT: ld r3, L..C1(r2) # %const.0 +; CHECK-AIX-64-P9-NEXT: mtvsrwz v4, r5 +; CHECK-AIX-64-P9-NEXT: vinsertb v2, v4, 0 +; CHECK-AIX-64-P9-NEXT: lxv v4, 0(r3) +; CHECK-AIX-64-P9-NEXT: vperm v2, v3, v2, v4 +; CHECK-AIX-64-P9-NEXT: stxv v2, 0(r3) +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v8i16_none: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: lwz r4, L..C1(r2) # %const.0 +; CHECK-AIX-32-P8-NEXT: mtvsrwz v4, r5 +; CHECK-AIX-32-P8-NEXT: lhz r3, 0(r3) +; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r4 +; CHECK-AIX-32-P8-NEXT: lwz r4, L..C2(r2) # %const.1 +; CHECK-AIX-32-P8-NEXT: vperm v2, v4, v2, v3 +; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r4 +; CHECK-AIX-32-P8-NEXT: mtvsrwz v4, r3 +; CHECK-AIX-32-P8-NEXT: vperm v2, v4, v2, v3 +; CHECK-AIX-32-P8-NEXT: stxvw4x v2, 0, r3 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v8i16_none: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: lxsihzx v3, 0, r3 +; CHECK-AIX-32-P9-NEXT: lwz r3, L..C1(r2) # %const.0 +; CHECK-AIX-32-P9-NEXT: mtvsrwz v4, r5 +; CHECK-AIX-32-P9-NEXT: vinsertb v2, v4, 0 +; CHECK-AIX-32-P9-NEXT: lxv v4, 0(r3) +; CHECK-AIX-32-P9-NEXT: vperm v2, v3, v2, v4 +; CHECK-AIX-32-P9-NEXT: stxv v2, 0(r3) +; CHECK-AIX-32-P9-NEXT: blr +entry: + %load0.tmp = load <2 x i8>, ptr %a0 + %load0.tmp1 = bitcast <2 x i8> %load0.tmp to i16 + %load0 = insertelement <8 x i16> %b, i16 %load0.tmp1, i64 0 + %load1.tmp = insertelement <16 x i8> %a, i8 %arg, i32 0 + %load1 = bitcast <16 x i8> %load1.tmp to <8 x i16> + %shuff = shufflevector <8 x i16> %load0, <8 x i16> %load1, <8 x i32> + store <8 x i16> %shuff, ptr undef + ret void +} + +define void @test_none_v4i32(ptr %ptr, ptr %ptr2, i8 %v3) local_unnamed_addr #0 { +; CHECK-LE-P8-LABEL: test_none_v4i32: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: addis r4, r2, .LCPI2_0@toc@ha +; CHECK-LE-P8-NEXT: mtvsrd v3, r5 +; CHECK-LE-P8-NEXT: addi r4, r4, .LCPI2_0@toc@l +; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r4 +; CHECK-LE-P8-NEXT: addis r4, r2, .LCPI2_1@toc@ha +; CHECK-LE-P8-NEXT: addi r4, r4, .LCPI2_1@toc@l +; CHECK-LE-P8-NEXT: xxswapd v2, vs0 +; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r4 +; CHECK-LE-P8-NEXT: vperm v2, v3, v3, v2 +; CHECK-LE-P8-NEXT: lxsiwzx v3, 0, r3 +; CHECK-LE-P8-NEXT: xxswapd v4, vs0 +; CHECK-LE-P8-NEXT: vperm v2, v2, v3, v4 +; CHECK-LE-P8-NEXT: xxswapd vs0, v2 +; CHECK-LE-P8-NEXT: stfdx f0, 0, r3 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_none_v4i32: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: lxsiwzx v2, 0, r3 +; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI2_0@toc@ha +; CHECK-LE-P9-NEXT: mtvsrd v3, r5 +; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI2_0@toc@l +; CHECK-LE-P9-NEXT: lxv v4, 0(r3) +; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI2_1@toc@ha +; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI2_1@toc@l +; CHECK-LE-P9-NEXT: vperm v3, v3, v3, v4 +; CHECK-LE-P9-NEXT: lxv v4, 0(r3) +; CHECK-LE-P9-NEXT: vperm v2, v3, v2, v4 +; CHECK-LE-P9-NEXT: xxswapd vs0, v2 +; CHECK-LE-P9-NEXT: stfd f0, 0(r3) +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_none_v4i32: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: addis r4, r2, .LCPI2_0@toc@ha +; CHECK-BE-P8-NEXT: mtvsrwz v3, r5 +; CHECK-BE-P8-NEXT: addi r4, r4, .LCPI2_0@toc@l +; CHECK-BE-P8-NEXT: lxvw4x v2, 0, r4 +; CHECK-BE-P8-NEXT: addis r4, r2, .LCPI2_1@toc@ha +; CHECK-BE-P8-NEXT: addi r4, r4, .LCPI2_1@toc@l +; CHECK-BE-P8-NEXT: lxvw4x v4, 0, r4 +; CHECK-BE-P8-NEXT: vperm v2, v3, v3, v2 +; CHECK-BE-P8-NEXT: lxsiwzx v3, 0, r3 +; CHECK-BE-P8-NEXT: vperm v2, v3, v2, v4 +; CHECK-BE-P8-NEXT: stxsdx v2, 0, r3 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_none_v4i32: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: lxsiwzx v2, 0, r3 +; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI2_0@toc@ha +; CHECK-BE-P9-NEXT: mtvsrwz v3, r5 +; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI2_0@toc@l +; CHECK-BE-P9-NEXT: lxv v4, 0(r3) +; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI2_1@toc@ha +; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI2_1@toc@l +; CHECK-BE-P9-NEXT: vperm v3, v3, v3, v4 +; CHECK-BE-P9-NEXT: lxv v4, 0(r3) +; CHECK-BE-P9-NEXT: vperm v2, v2, v3, v4 +; CHECK-BE-P9-NEXT: stxsd v2, 0(r3) +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_none_v4i32: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: ld r4, L..C3(r2) # %const.0 +; CHECK-AIX-64-P8-NEXT: mtvsrwz v3, r5 +; CHECK-AIX-64-P8-NEXT: lxvw4x v2, 0, r4 +; CHECK-AIX-64-P8-NEXT: ld r4, L..C4(r2) # %const.1 +; CHECK-AIX-64-P8-NEXT: vperm v2, v3, v3, v2 +; CHECK-AIX-64-P8-NEXT: lxsiwzx v3, 0, r3 +; CHECK-AIX-64-P8-NEXT: lxvw4x v4, 0, r4 +; CHECK-AIX-64-P8-NEXT: vperm v2, v3, v2, v4 +; CHECK-AIX-64-P8-NEXT: stxsdx v2, 0, r3 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_none_v4i32: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: lxsiwzx v2, 0, r3 +; CHECK-AIX-64-P9-NEXT: ld r3, L..C2(r2) # %const.0 +; CHECK-AIX-64-P9-NEXT: mtvsrwz v3, r5 +; CHECK-AIX-64-P9-NEXT: lxv v4, 0(r3) +; CHECK-AIX-64-P9-NEXT: ld r3, L..C3(r2) # %const.1 +; CHECK-AIX-64-P9-NEXT: vperm v3, v3, v3, v4 +; CHECK-AIX-64-P9-NEXT: lxv v4, 0(r3) +; CHECK-AIX-64-P9-NEXT: vperm v2, v2, v3, v4 +; CHECK-AIX-64-P9-NEXT: stxsd v2, 0(r3) +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_none_v4i32: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: addi r4, r1, -32 +; CHECK-AIX-32-P8-NEXT: lxsiwzx v2, 0, r3 +; CHECK-AIX-32-P8-NEXT: stb r5, -32(r1) +; CHECK-AIX-32-P8-NEXT: lwz r3, L..C3(r2) # %const.0 +; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r4 +; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r3 +; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16 +; CHECK-AIX-32-P8-NEXT: vmrghh v3, v3, v3 +; CHECK-AIX-32-P8-NEXT: vperm v2, v2, v3, v4 +; CHECK-AIX-32-P8-NEXT: stxvw4x v2, 0, r3 +; CHECK-AIX-32-P8-NEXT: lwz r3, -12(r1) +; CHECK-AIX-32-P8-NEXT: stw r3, 0(r3) +; CHECK-AIX-32-P8-NEXT: lwz r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: stw r3, 0(r3) +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_none_v4i32: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: lxsiwzx v2, 0, r3 +; CHECK-AIX-32-P9-NEXT: lwz r3, L..C2(r2) # %const.0 +; CHECK-AIX-32-P9-NEXT: stb r5, -32(r1) +; CHECK-AIX-32-P9-NEXT: lxv v3, -32(r1) +; CHECK-AIX-32-P9-NEXT: lxv v4, 0(r3) +; CHECK-AIX-32-P9-NEXT: vmrghh v3, v3, v3 +; CHECK-AIX-32-P9-NEXT: vperm v2, v2, v3, v4 +; CHECK-AIX-32-P9-NEXT: stxv v2, -16(r1) +; CHECK-AIX-32-P9-NEXT: lwz r3, -12(r1) +; CHECK-AIX-32-P9-NEXT: stw r3, 0(r3) +; CHECK-AIX-32-P9-NEXT: lwz r3, -16(r1) +; CHECK-AIX-32-P9-NEXT: stw r3, 0(r3) +; CHECK-AIX-32-P9-NEXT: blr +entry: + %0 = load <2 x i16>, ptr %ptr, align 4 + %tmp = insertelement <4 x i8> undef, i8 %v3, i32 0 + %tmp0 = bitcast <4 x i8> %tmp to <2 x i16> + %1 = shufflevector <2 x i16> %0, <2 x i16> %tmp0, <4 x i32> + store <4 x i16> %1, ptr undef, align 4 + ret void +} + +define void @test_v4i32_none(ptr nocapture readonly %ptr1, ptr nocapture readonly %ptr2) { +; CHECK-LE-P8-LABEL: test_v4i32_none: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: addis r4, r2, .LCPI3_0@toc@ha +; CHECK-LE-P8-NEXT: lxsiwzx v2, 0, r3 +; CHECK-LE-P8-NEXT: xxlxor v4, v4, v4 +; CHECK-LE-P8-NEXT: addi r4, r4, .LCPI3_0@toc@l +; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r4 +; CHECK-LE-P8-NEXT: xxswapd v3, vs0 +; CHECK-LE-P8-NEXT: vperm v2, v4, v2, v3 +; CHECK-LE-P8-NEXT: xxswapd vs0, v2 +; CHECK-LE-P8-NEXT: stxvd2x vs0, 0, r3 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v4i32_none: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: lxsiwzx v2, 0, r3 +; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI3_0@toc@ha +; CHECK-LE-P9-NEXT: xxlxor v4, v4, v4 +; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI3_0@toc@l +; CHECK-LE-P9-NEXT: lxv v3, 0(r3) +; CHECK-LE-P9-NEXT: vperm v2, v4, v2, v3 +; CHECK-LE-P9-NEXT: stxv v2, 0(r3) +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v4i32_none: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: addis r4, r2, .LCPI3_0@toc@ha +; CHECK-BE-P8-NEXT: lxsiwzx v2, 0, r3 +; CHECK-BE-P8-NEXT: xxlxor v4, v4, v4 +; CHECK-BE-P8-NEXT: addi r4, r4, .LCPI3_0@toc@l +; CHECK-BE-P8-NEXT: lxvw4x v3, 0, r4 +; CHECK-BE-P8-NEXT: vperm v2, v4, v2, v3 +; CHECK-BE-P8-NEXT: stxvw4x v2, 0, r3 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v4i32_none: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: lxsiwzx v2, 0, r3 +; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI3_0@toc@ha +; CHECK-BE-P9-NEXT: xxlxor v4, v4, v4 +; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI3_0@toc@l +; CHECK-BE-P9-NEXT: lxv v3, 0(r3) +; CHECK-BE-P9-NEXT: vperm v2, v4, v2, v3 +; CHECK-BE-P9-NEXT: stxv v2, 0(r3) +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v4i32_none: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: ld r4, L..C5(r2) # %const.0 +; CHECK-AIX-64-P8-NEXT: lxsiwzx v2, 0, r3 +; CHECK-AIX-64-P8-NEXT: xxlxor v4, v4, v4 +; CHECK-AIX-64-P8-NEXT: lxvw4x v3, 0, r4 +; CHECK-AIX-64-P8-NEXT: vperm v2, v4, v2, v3 +; CHECK-AIX-64-P8-NEXT: stxvw4x v2, 0, r3 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v4i32_none: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: lxsiwzx v2, 0, r3 +; CHECK-AIX-64-P9-NEXT: ld r3, L..C4(r2) # %const.0 +; CHECK-AIX-64-P9-NEXT: xxlxor v4, v4, v4 +; CHECK-AIX-64-P9-NEXT: lxv v3, 0(r3) +; CHECK-AIX-64-P9-NEXT: vperm v2, v4, v2, v3 +; CHECK-AIX-64-P9-NEXT: stxv v2, 0(r3) +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v4i32_none: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: lwz r4, L..C4(r2) # %const.0 +; CHECK-AIX-32-P8-NEXT: lxsiwzx v2, 0, r3 +; CHECK-AIX-32-P8-NEXT: xxlxor v4, v4, v4 +; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r4 +; CHECK-AIX-32-P8-NEXT: vperm v2, v4, v2, v3 +; CHECK-AIX-32-P8-NEXT: stxvw4x v2, 0, r3 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v4i32_none: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: lxsiwzx v2, 0, r3 +; CHECK-AIX-32-P9-NEXT: lwz r3, L..C3(r2) # %const.0 +; CHECK-AIX-32-P9-NEXT: xxlxor v4, v4, v4 +; CHECK-AIX-32-P9-NEXT: lxv v3, 0(r3) +; CHECK-AIX-32-P9-NEXT: vperm v2, v4, v2, v3 +; CHECK-AIX-32-P9-NEXT: stxv v2, 0(r3) +; CHECK-AIX-32-P9-NEXT: blr +entry: + %0 = load <2 x i16>, ptr %ptr1, align 1 + %1 = load <2 x i16>, ptr %ptr2, align 1 + %shuffle1 = shufflevector <2 x i16> %0, <2 x i16> %1, <4 x i32> + %2 = zext <4 x i16> %shuffle1 to <4 x i32> + store <4 x i32> %2, ptr undef, align 16 + ret void +} + +define void @test_none_v2i64(ptr nocapture readonly %ptr1, ptr nocapture readonly %ptr2) { +; CHECK-LE-P8-LABEL: test_none_v2i64: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: addis r5, r2, .LCPI4_0@toc@ha +; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r4 +; CHECK-LE-P8-NEXT: lxsdx v2, 0, r3 +; CHECK-LE-P8-NEXT: addis r3, r2, .LCPI4_1@toc@ha +; CHECK-LE-P8-NEXT: addi r5, r5, .LCPI4_0@toc@l +; CHECK-LE-P8-NEXT: addi r3, r3, .LCPI4_1@toc@l +; CHECK-LE-P8-NEXT: lxvd2x vs1, 0, r5 +; CHECK-LE-P8-NEXT: xxswapd v3, vs0 +; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-LE-P8-NEXT: xxswapd v4, vs1 +; CHECK-LE-P8-NEXT: vperm v2, v2, v3, v4 +; CHECK-LE-P8-NEXT: xxswapd v3, vs0 +; CHECK-LE-P8-NEXT: xxlxor v4, v4, v4 +; CHECK-LE-P8-NEXT: vperm v2, v4, v2, v3 +; CHECK-LE-P8-NEXT: xxswapd vs0, v2 +; CHECK-LE-P8-NEXT: stxvd2x vs0, 0, r3 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_none_v2i64: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: lxsd v2, 0(r3) +; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI4_0@toc@ha +; CHECK-LE-P9-NEXT: lxv v3, 0(r4) +; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI4_0@toc@l +; CHECK-LE-P9-NEXT: lxv v4, 0(r3) +; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI4_1@toc@ha +; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI4_1@toc@l +; CHECK-LE-P9-NEXT: vperm v2, v2, v3, v4 +; CHECK-LE-P9-NEXT: lxv v3, 0(r3) +; CHECK-LE-P9-NEXT: xxlxor v4, v4, v4 +; CHECK-LE-P9-NEXT: vperm v2, v4, v2, v3 +; CHECK-LE-P9-NEXT: stxv v2, 0(r3) +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_none_v2i64: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: addis r5, r2, .LCPI4_0@toc@ha +; CHECK-BE-P8-NEXT: lxsdx v2, 0, r3 +; CHECK-BE-P8-NEXT: lxvw4x v3, 0, r4 +; CHECK-BE-P8-NEXT: addi r5, r5, .LCPI4_0@toc@l +; CHECK-BE-P8-NEXT: lxvw4x v4, 0, r5 +; CHECK-BE-P8-NEXT: vperm v2, v3, v2, v4 +; CHECK-BE-P8-NEXT: xxlxor v3, v3, v3 +; CHECK-BE-P8-NEXT: vmrghh v2, v3, v2 +; CHECK-BE-P8-NEXT: stxvw4x v2, 0, r3 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_none_v2i64: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: lxsd v2, 0(r3) +; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI4_0@toc@ha +; CHECK-BE-P9-NEXT: lxv v3, 0(r4) +; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI4_0@toc@l +; CHECK-BE-P9-NEXT: lxv v4, 0(r3) +; CHECK-BE-P9-NEXT: vperm v2, v3, v2, v4 +; CHECK-BE-P9-NEXT: xxlxor v3, v3, v3 +; CHECK-BE-P9-NEXT: vmrghh v2, v3, v2 +; CHECK-BE-P9-NEXT: stxv v2, 0(r3) +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_none_v2i64: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: ld r5, L..C6(r2) # %const.0 +; CHECK-AIX-64-P8-NEXT: lxsdx v2, 0, r3 +; CHECK-AIX-64-P8-NEXT: lxvw4x v3, 0, r4 +; CHECK-AIX-64-P8-NEXT: lxvw4x v4, 0, r5 +; CHECK-AIX-64-P8-NEXT: vperm v2, v3, v2, v4 +; CHECK-AIX-64-P8-NEXT: xxlxor v3, v3, v3 +; CHECK-AIX-64-P8-NEXT: vmrghh v2, v3, v2 +; CHECK-AIX-64-P8-NEXT: stxvw4x v2, 0, r3 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_none_v2i64: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: lxsd v2, 0(r3) +; CHECK-AIX-64-P9-NEXT: ld r3, L..C5(r2) # %const.0 +; CHECK-AIX-64-P9-NEXT: lxv v3, 0(r4) +; CHECK-AIX-64-P9-NEXT: lxv v4, 0(r3) +; CHECK-AIX-64-P9-NEXT: vperm v2, v3, v2, v4 +; CHECK-AIX-64-P9-NEXT: xxlxor v3, v3, v3 +; CHECK-AIX-64-P9-NEXT: vmrghh v2, v3, v2 +; CHECK-AIX-64-P9-NEXT: stxv v2, 0(r3) +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_none_v2i64: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: lwz r5, L..C5(r2) # %const.0 +; CHECK-AIX-32-P8-NEXT: lxsiwzx v2, 0, r3 +; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r4 +; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r5 +; CHECK-AIX-32-P8-NEXT: vperm v2, v3, v2, v4 +; CHECK-AIX-32-P8-NEXT: xxlxor v3, v3, v3 +; CHECK-AIX-32-P8-NEXT: vmrghh v2, v3, v2 +; CHECK-AIX-32-P8-NEXT: stxvw4x v2, 0, r3 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_none_v2i64: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: lxsiwzx v2, 0, r3 +; CHECK-AIX-32-P9-NEXT: lwz r3, L..C4(r2) # %const.0 +; CHECK-AIX-32-P9-NEXT: lxv v3, 0(r4) +; CHECK-AIX-32-P9-NEXT: lxv v4, 0(r3) +; CHECK-AIX-32-P9-NEXT: vperm v2, v3, v2, v4 +; CHECK-AIX-32-P9-NEXT: xxlxor v3, v3, v3 +; CHECK-AIX-32-P9-NEXT: vmrghh v2, v3, v2 +; CHECK-AIX-32-P9-NEXT: stxv v2, 0(r3) +; CHECK-AIX-32-P9-NEXT: blr +entry: + %0 = load <4 x i16>, ptr %ptr1, align 1 + %1 = load <4 x i32>, ptr %ptr2, align 1 + %bc = trunc <4 x i32> %1 to <4 x i16> + %shuffle1 = shufflevector <4 x i16> %0, <4 x i16> %bc, <4 x i32> + %2 = zext <4 x i16> %shuffle1 to <4 x i32> + store <4 x i32> %2, ptr undef, align 16 + ret void +} + +define void @test_v2i64_none(ptr nocapture readonly %ptr1) { +; CHECK-LE-P8-LABEL: test_v2i64_none: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: addis r4, r2, .LCPI5_0@toc@ha +; CHECK-LE-P8-NEXT: lxsdx v2, 0, r3 +; CHECK-LE-P8-NEXT: xxlxor v4, v4, v4 +; CHECK-LE-P8-NEXT: addi r4, r4, .LCPI5_0@toc@l +; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r4 +; CHECK-LE-P8-NEXT: xxswapd v3, vs0 +; CHECK-LE-P8-NEXT: vperm v2, v4, v2, v3 +; CHECK-LE-P8-NEXT: xxswapd vs0, v2 +; CHECK-LE-P8-NEXT: stxvd2x vs0, 0, r3 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v2i64_none: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: lxsd v2, 0(r3) +; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI5_0@toc@ha +; CHECK-LE-P9-NEXT: xxlxor v4, v4, v4 +; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI5_0@toc@l +; CHECK-LE-P9-NEXT: lxv v3, 0(r3) +; CHECK-LE-P9-NEXT: vperm v2, v4, v2, v3 +; CHECK-LE-P9-NEXT: stxv v2, 0(r3) +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v2i64_none: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: addis r4, r2, .LCPI5_0@toc@ha +; CHECK-BE-P8-NEXT: lxsdx v2, 0, r3 +; CHECK-BE-P8-NEXT: xxlxor v4, v4, v4 +; CHECK-BE-P8-NEXT: addi r4, r4, .LCPI5_0@toc@l +; CHECK-BE-P8-NEXT: lxvw4x v3, 0, r4 +; CHECK-BE-P8-NEXT: vperm v2, v4, v2, v3 +; CHECK-BE-P8-NEXT: stxvw4x v2, 0, r3 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v2i64_none: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: lxsd v2, 0(r3) +; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI5_0@toc@ha +; CHECK-BE-P9-NEXT: xxlxor v4, v4, v4 +; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI5_0@toc@l +; CHECK-BE-P9-NEXT: lxv v3, 0(r3) +; CHECK-BE-P9-NEXT: vperm v2, v4, v2, v3 +; CHECK-BE-P9-NEXT: stxv v2, 0(r3) +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v2i64_none: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: ld r4, L..C7(r2) # %const.0 +; CHECK-AIX-64-P8-NEXT: lxsdx v2, 0, r3 +; CHECK-AIX-64-P8-NEXT: xxlxor v4, v4, v4 +; CHECK-AIX-64-P8-NEXT: lxvw4x v3, 0, r4 +; CHECK-AIX-64-P8-NEXT: vperm v2, v4, v2, v3 +; CHECK-AIX-64-P8-NEXT: stxvw4x v2, 0, r3 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v2i64_none: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: lxsd v2, 0(r3) +; CHECK-AIX-64-P9-NEXT: ld r3, L..C6(r2) # %const.0 +; CHECK-AIX-64-P9-NEXT: xxlxor v4, v4, v4 +; CHECK-AIX-64-P9-NEXT: lxv v3, 0(r3) +; CHECK-AIX-64-P9-NEXT: vperm v2, v4, v2, v3 +; CHECK-AIX-64-P9-NEXT: stxv v2, 0(r3) +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v2i64_none: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: lwz r4, 4(r3) +; CHECK-AIX-32-P8-NEXT: xxlxor v4, v4, v4 +; CHECK-AIX-32-P8-NEXT: stw r4, -16(r1) +; CHECK-AIX-32-P8-NEXT: addi r4, r1, -32 +; CHECK-AIX-32-P8-NEXT: lwz r3, 0(r3) +; CHECK-AIX-32-P8-NEXT: stw r3, -32(r1) +; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16 +; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r3 +; CHECK-AIX-32-P8-NEXT: lwz r3, L..C6(r2) # %const.0 +; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r4 +; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3 +; CHECK-AIX-32-P8-NEXT: xxmrghw v2, vs1, vs0 +; CHECK-AIX-32-P8-NEXT: vperm v2, v4, v2, v3 +; CHECK-AIX-32-P8-NEXT: stxvw4x v2, 0, r3 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v2i64_none: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: lwz r4, 4(r3) +; CHECK-AIX-32-P9-NEXT: xxlxor v4, v4, v4 +; CHECK-AIX-32-P9-NEXT: stw r4, -16(r1) +; CHECK-AIX-32-P9-NEXT: lwz r3, 0(r3) +; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1) +; CHECK-AIX-32-P9-NEXT: stw r3, -32(r1) +; CHECK-AIX-32-P9-NEXT: lwz r3, L..C5(r2) # %const.0 +; CHECK-AIX-32-P9-NEXT: lxv vs1, -32(r1) +; CHECK-AIX-32-P9-NEXT: lxv v3, 0(r3) +; CHECK-AIX-32-P9-NEXT: xxmrghw v2, vs1, vs0 +; CHECK-AIX-32-P9-NEXT: vperm v2, v4, v2, v3 +; CHECK-AIX-32-P9-NEXT: stxv v2, 0(r3) +; CHECK-AIX-32-P9-NEXT: blr +entry: + %0 = load <4 x i16>, ptr %ptr1, align 1 + %shuffle1 = shufflevector <4 x i16> %0, <4 x i16> undef, <4 x i32> + %1 = zext <4 x i16> %shuffle1 to <4 x i32> + store <4 x i32> %1, ptr undef, align 16 + ret void +} + +define <16 x i8> @test_v8i16_v8i16(ptr %a, ptr %b) { +; CHECK-LE-P8-LABEL: test_v8i16_v8i16: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: addis r5, r2, .LCPI6_0@toc@ha +; CHECK-LE-P8-NEXT: lhz r3, 0(r3) +; CHECK-LE-P8-NEXT: lhz r4, 0(r4) +; CHECK-LE-P8-NEXT: addi r5, r5, .LCPI6_0@toc@l +; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r5 +; CHECK-LE-P8-NEXT: mtvsrd v2, r3 +; CHECK-LE-P8-NEXT: mtvsrd v4, r4 +; CHECK-LE-P8-NEXT: xxswapd v3, vs0 +; CHECK-LE-P8-NEXT: vperm v2, v4, v2, v3 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v8i16_v8i16: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: lxsihzx v2, 0, r3 +; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI6_0@toc@ha +; CHECK-LE-P9-NEXT: lxsihzx v3, 0, r4 +; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI6_0@toc@l +; CHECK-LE-P9-NEXT: lxv v4, 0(r3) +; CHECK-LE-P9-NEXT: vperm v2, v3, v2, v4 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v8i16_v8i16: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: addis r5, r2, .LCPI6_0@toc@ha +; CHECK-BE-P8-NEXT: lhz r3, 0(r3) +; CHECK-BE-P8-NEXT: lhz r4, 0(r4) +; CHECK-BE-P8-NEXT: addi r5, r5, .LCPI6_0@toc@l +; CHECK-BE-P8-NEXT: lxvw4x v2, 0, r5 +; CHECK-BE-P8-NEXT: mtvsrwz v3, r3 +; CHECK-BE-P8-NEXT: mtvsrwz v4, r4 +; CHECK-BE-P8-NEXT: vperm v2, v3, v4, v2 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v8i16_v8i16: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: lxsihzx v2, 0, r3 +; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI6_0@toc@ha +; CHECK-BE-P9-NEXT: lxsihzx v3, 0, r4 +; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI6_0@toc@l +; CHECK-BE-P9-NEXT: lxv v4, 0(r3) +; CHECK-BE-P9-NEXT: vperm v2, v2, v3, v4 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v8i16_v8i16: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: ld r5, L..C8(r2) # %const.0 +; CHECK-AIX-64-P8-NEXT: lhz r3, 0(r3) +; CHECK-AIX-64-P8-NEXT: lhz r4, 0(r4) +; CHECK-AIX-64-P8-NEXT: mtvsrwz v2, r3 +; CHECK-AIX-64-P8-NEXT: lxvw4x v3, 0, r5 +; CHECK-AIX-64-P8-NEXT: mtvsrwz v4, r4 +; CHECK-AIX-64-P8-NEXT: vperm v2, v2, v4, v3 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v8i16_v8i16: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: lxsihzx v2, 0, r3 +; CHECK-AIX-64-P9-NEXT: ld r3, L..C7(r2) # %const.0 +; CHECK-AIX-64-P9-NEXT: lxsihzx v3, 0, r4 +; CHECK-AIX-64-P9-NEXT: lxv v4, 0(r3) +; CHECK-AIX-64-P9-NEXT: vperm v2, v2, v3, v4 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v8i16_v8i16: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: lwz r5, L..C7(r2) # %const.0 +; CHECK-AIX-32-P8-NEXT: lhz r3, 0(r3) +; CHECK-AIX-32-P8-NEXT: lhz r4, 0(r4) +; CHECK-AIX-32-P8-NEXT: mtvsrwz v2, r3 +; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r5 +; CHECK-AIX-32-P8-NEXT: mtvsrwz v4, r4 +; CHECK-AIX-32-P8-NEXT: vperm v2, v2, v4, v3 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v8i16_v8i16: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: lxsihzx v2, 0, r3 +; CHECK-AIX-32-P9-NEXT: lwz r3, L..C6(r2) # %const.0 +; CHECK-AIX-32-P9-NEXT: lxsihzx v3, 0, r4 +; CHECK-AIX-32-P9-NEXT: lxv v4, 0(r3) +; CHECK-AIX-32-P9-NEXT: vperm v2, v2, v3, v4 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %load1 = load <2 x i8>, ptr %a + %load2 = load <2 x i8>, ptr %b + %shuffle1 = shufflevector <2 x i8> %load1, <2 x i8> %load2, <8 x i32> + %shuffle2 = shufflevector <8 x i8> %shuffle1, <8 x i8> %shuffle1, <16 x i32> + ret <16 x i8> %shuffle2 +} + +define <16 x i8> @test_v8i16_v4i32(ptr %a, ptr %b) local_unnamed_addr { +; CHECK-LE-P8-LABEL: test_v8i16_v4i32: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: lhz r3, 0(r3) +; CHECK-LE-P8-NEXT: lfiwzx f0, 0, r4 +; CHECK-LE-P8-NEXT: mtfprd f1, r3 +; CHECK-LE-P8-NEXT: xxswapd v3, f0 +; CHECK-LE-P8-NEXT: xxswapd v2, vs1 +; CHECK-LE-P8-NEXT: vmrglh v2, v3, v2 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v8i16_v4i32: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: lxsihzx v2, 0, r3 +; CHECK-LE-P9-NEXT: lfiwzx f0, 0, r4 +; CHECK-LE-P9-NEXT: xxswapd v3, f0 +; CHECK-LE-P9-NEXT: vsplth v2, v2, 3 +; CHECK-LE-P9-NEXT: vmrglh v2, v3, v2 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v8i16_v4i32: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: lfiwzx f0, 0, r4 +; CHECK-BE-P8-NEXT: lhz r3, 0(r3) +; CHECK-BE-P8-NEXT: sldi r3, r3, 48 +; CHECK-BE-P8-NEXT: xxsldwi v2, f0, f0, 1 +; CHECK-BE-P8-NEXT: mtvsrd v3, r3 +; CHECK-BE-P8-NEXT: vmrghh v2, v3, v2 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v8i16_v4i32: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: lxsihzx v2, 0, r3 +; CHECK-BE-P9-NEXT: lfiwzx f0, 0, r4 +; CHECK-BE-P9-NEXT: xxsldwi v3, f0, f0, 1 +; CHECK-BE-P9-NEXT: vsplth v2, v2, 3 +; CHECK-BE-P9-NEXT: vmrghh v2, v2, v3 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v8i16_v4i32: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: lfiwzx f0, 0, r4 +; CHECK-AIX-64-P8-NEXT: lhz r3, 0(r3) +; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 48 +; CHECK-AIX-64-P8-NEXT: xxsldwi v2, f0, f0, 1 +; CHECK-AIX-64-P8-NEXT: mtvsrd v3, r3 +; CHECK-AIX-64-P8-NEXT: vmrghh v2, v3, v2 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v8i16_v4i32: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: lxsihzx v2, 0, r3 +; CHECK-AIX-64-P9-NEXT: lfiwzx f0, 0, r4 +; CHECK-AIX-64-P9-NEXT: xxsldwi v3, f0, f0, 1 +; CHECK-AIX-64-P9-NEXT: vsplth v2, v2, 3 +; CHECK-AIX-64-P9-NEXT: vmrghh v2, v2, v3 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v8i16_v4i32: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: lhz r3, 0(r3) +; CHECK-AIX-32-P8-NEXT: sth r3, -32(r1) +; CHECK-AIX-32-P8-NEXT: lwz r3, 0(r4) +; CHECK-AIX-32-P8-NEXT: addi r4, r1, -32 +; CHECK-AIX-32-P8-NEXT: lxvw4x v2, 0, r4 +; CHECK-AIX-32-P8-NEXT: stw r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16 +; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3 +; CHECK-AIX-32-P8-NEXT: vmrghh v2, v2, v3 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v8i16_v4i32: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: lhz r3, 0(r3) +; CHECK-AIX-32-P9-NEXT: sth r3, -32(r1) +; CHECK-AIX-32-P9-NEXT: lwz r3, 0(r4) +; CHECK-AIX-32-P9-NEXT: lxv v2, -32(r1) +; CHECK-AIX-32-P9-NEXT: stw r3, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv v3, -16(r1) +; CHECK-AIX-32-P9-NEXT: vmrghh v2, v2, v3 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %0 = load <2 x i8>, ptr %a + %bc1 = bitcast <2 x i8> %0 to i16 + %vecinit3 = insertelement <8 x i16> poison, i16 %bc1, i64 0 + %1 = load <2 x i8>, ptr %b, align 4 + %bc2 = bitcast <2 x i8> %1 to i16 + %vecinit6 = insertelement <8 x i16> undef, i16 %bc2, i64 0 + %2 = bitcast <8 x i16> %vecinit3 to <16 x i8> + %3 = bitcast <8 x i16> %vecinit6 to <16 x i8> + %shuffle = shufflevector <16 x i8> %2, <16 x i8> %3, <16 x i32> + ret <16 x i8> %shuffle +} + +define <16 x i8> @test_v8i16_v2i64(ptr %a, ptr %b) local_unnamed_addr { +; CHECK-LE-P8-LABEL: test_v8i16_v2i64: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: lhz r3, 0(r3) +; CHECK-LE-P8-NEXT: lfdx f0, 0, r4 +; CHECK-LE-P8-NEXT: mtfprd f1, r3 +; CHECK-LE-P8-NEXT: xxswapd v3, f0 +; CHECK-LE-P8-NEXT: xxswapd v2, vs1 +; CHECK-LE-P8-NEXT: vmrglh v2, v3, v2 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v8i16_v2i64: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: lxsihzx v2, 0, r3 +; CHECK-LE-P9-NEXT: lfd f0, 0(r4) +; CHECK-LE-P9-NEXT: xxswapd v3, f0 +; CHECK-LE-P9-NEXT: vsplth v2, v2, 3 +; CHECK-LE-P9-NEXT: vmrglh v2, v3, v2 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v8i16_v2i64: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: lhz r3, 0(r3) +; CHECK-BE-P8-NEXT: lxsdx v2, 0, r4 +; CHECK-BE-P8-NEXT: sldi r3, r3, 48 +; CHECK-BE-P8-NEXT: mtvsrd v3, r3 +; CHECK-BE-P8-NEXT: vmrghh v2, v3, v2 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v8i16_v2i64: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: lxsihzx v2, 0, r3 +; CHECK-BE-P9-NEXT: lxsd v3, 0(r4) +; CHECK-BE-P9-NEXT: vsplth v2, v2, 3 +; CHECK-BE-P9-NEXT: vmrghh v2, v2, v3 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v8i16_v2i64: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: lhz r3, 0(r3) +; CHECK-AIX-64-P8-NEXT: lxsdx v2, 0, r4 +; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 48 +; CHECK-AIX-64-P8-NEXT: mtvsrd v3, r3 +; CHECK-AIX-64-P8-NEXT: vmrghh v2, v3, v2 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v8i16_v2i64: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: lxsihzx v2, 0, r3 +; CHECK-AIX-64-P9-NEXT: lxsd v3, 0(r4) +; CHECK-AIX-64-P9-NEXT: vsplth v2, v2, 3 +; CHECK-AIX-64-P9-NEXT: vmrghh v2, v2, v3 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v8i16_v2i64: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: lhz r3, 0(r3) +; CHECK-AIX-32-P8-NEXT: sth r3, -32(r1) +; CHECK-AIX-32-P8-NEXT: lwz r3, 0(r4) +; CHECK-AIX-32-P8-NEXT: addi r4, r1, -32 +; CHECK-AIX-32-P8-NEXT: lxvw4x v2, 0, r4 +; CHECK-AIX-32-P8-NEXT: stw r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16 +; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3 +; CHECK-AIX-32-P8-NEXT: vmrghh v2, v2, v3 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v8i16_v2i64: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: lhz r3, 0(r3) +; CHECK-AIX-32-P9-NEXT: sth r3, -32(r1) +; CHECK-AIX-32-P9-NEXT: lwz r3, 0(r4) +; CHECK-AIX-32-P9-NEXT: lxv v2, -32(r1) +; CHECK-AIX-32-P9-NEXT: stw r3, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv v3, -16(r1) +; CHECK-AIX-32-P9-NEXT: vmrghh v2, v2, v3 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %0 = load <2 x i8>, ptr %a + %bc1 = bitcast <2 x i8> %0 to i16 + %vecinit3 = insertelement <8 x i16> poison, i16 %bc1, i64 0 + %1 = load <2 x i8>, ptr %b, align 8 + %bc2 = bitcast <2 x i8> %1 to i16 + %vecinit6 = insertelement <8 x i16> undef, i16 %bc2, i64 0 + %2 = bitcast <8 x i16> %vecinit3 to <16 x i8> + %3 = bitcast <8 x i16> %vecinit6 to <16 x i8> + %shuffle = shufflevector <16 x i8> %2, <16 x i8> %3, <16 x i32> + ret <16 x i8> %shuffle +} + +define void @test_v4i32_v4i32(ptr nocapture readonly %ptr1, ptr nocapture readonly %ptr2) { +; CHECK-LE-P8-LABEL: test_v4i32_v4i32: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: addis r5, r2, .LCPI9_0@toc@ha +; CHECK-LE-P8-NEXT: lxsiwzx v2, 0, r3 +; CHECK-LE-P8-NEXT: lxsiwzx v3, 0, r4 +; CHECK-LE-P8-NEXT: addis r3, r2, .LCPI9_1@toc@ha +; CHECK-LE-P8-NEXT: addi r5, r5, .LCPI9_0@toc@l +; CHECK-LE-P8-NEXT: addi r3, r3, .LCPI9_1@toc@l +; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r5 +; CHECK-LE-P8-NEXT: xxswapd v4, vs0 +; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-LE-P8-NEXT: vperm v2, v2, v3, v4 +; CHECK-LE-P8-NEXT: xxswapd v3, vs0 +; CHECK-LE-P8-NEXT: xxlxor v4, v4, v4 +; CHECK-LE-P8-NEXT: vperm v2, v4, v2, v3 +; CHECK-LE-P8-NEXT: xxswapd vs0, v2 +; CHECK-LE-P8-NEXT: stxvd2x vs0, 0, r3 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v4i32_v4i32: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: lxsiwzx v2, 0, r3 +; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI9_0@toc@ha +; CHECK-LE-P9-NEXT: lxsiwzx v3, 0, r4 +; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI9_0@toc@l +; CHECK-LE-P9-NEXT: lxv v4, 0(r3) +; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI9_1@toc@ha +; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI9_1@toc@l +; CHECK-LE-P9-NEXT: vperm v2, v2, v3, v4 +; CHECK-LE-P9-NEXT: lxv v3, 0(r3) +; CHECK-LE-P9-NEXT: xxlxor v4, v4, v4 +; CHECK-LE-P9-NEXT: vperm v2, v4, v2, v3 +; CHECK-LE-P9-NEXT: stxv v2, 0(r3) +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v4i32_v4i32: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: addis r5, r2, .LCPI9_0@toc@ha +; CHECK-BE-P8-NEXT: lxsiwzx v2, 0, r3 +; CHECK-BE-P8-NEXT: lxsiwzx v3, 0, r4 +; CHECK-BE-P8-NEXT: addi r5, r5, .LCPI9_0@toc@l +; CHECK-BE-P8-NEXT: lxvw4x v4, 0, r5 +; CHECK-BE-P8-NEXT: vperm v2, v3, v2, v4 +; CHECK-BE-P8-NEXT: xxlxor v3, v3, v3 +; CHECK-BE-P8-NEXT: vmrghh v2, v3, v2 +; CHECK-BE-P8-NEXT: stxvw4x v2, 0, r3 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v4i32_v4i32: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: lxsiwzx v2, 0, r3 +; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI9_0@toc@ha +; CHECK-BE-P9-NEXT: lxsiwzx v3, 0, r4 +; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI9_0@toc@l +; CHECK-BE-P9-NEXT: lxv v4, 0(r3) +; CHECK-BE-P9-NEXT: vperm v2, v3, v2, v4 +; CHECK-BE-P9-NEXT: xxlxor v3, v3, v3 +; CHECK-BE-P9-NEXT: vmrghh v2, v3, v2 +; CHECK-BE-P9-NEXT: stxv v2, 0(r3) +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v4i32_v4i32: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: ld r5, L..C9(r2) # %const.0 +; CHECK-AIX-64-P8-NEXT: lxsiwzx v2, 0, r3 +; CHECK-AIX-64-P8-NEXT: lxsiwzx v3, 0, r4 +; CHECK-AIX-64-P8-NEXT: lxvw4x v4, 0, r5 +; CHECK-AIX-64-P8-NEXT: vperm v2, v3, v2, v4 +; CHECK-AIX-64-P8-NEXT: xxlxor v3, v3, v3 +; CHECK-AIX-64-P8-NEXT: vmrghh v2, v3, v2 +; CHECK-AIX-64-P8-NEXT: stxvw4x v2, 0, r3 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v4i32_v4i32: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: lxsiwzx v2, 0, r3 +; CHECK-AIX-64-P9-NEXT: ld r3, L..C8(r2) # %const.0 +; CHECK-AIX-64-P9-NEXT: lxsiwzx v3, 0, r4 +; CHECK-AIX-64-P9-NEXT: lxv v4, 0(r3) +; CHECK-AIX-64-P9-NEXT: vperm v2, v3, v2, v4 +; CHECK-AIX-64-P9-NEXT: xxlxor v3, v3, v3 +; CHECK-AIX-64-P9-NEXT: vmrghh v2, v3, v2 +; CHECK-AIX-64-P9-NEXT: stxv v2, 0(r3) +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v4i32_v4i32: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: lwz r5, L..C8(r2) # %const.0 +; CHECK-AIX-32-P8-NEXT: lxsiwzx v2, 0, r3 +; CHECK-AIX-32-P8-NEXT: lxsiwzx v3, 0, r4 +; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r5 +; CHECK-AIX-32-P8-NEXT: vperm v2, v3, v2, v4 +; CHECK-AIX-32-P8-NEXT: xxlxor v3, v3, v3 +; CHECK-AIX-32-P8-NEXT: vmrghh v2, v3, v2 +; CHECK-AIX-32-P8-NEXT: stxvw4x v2, 0, r3 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v4i32_v4i32: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: lxsiwzx v2, 0, r3 +; CHECK-AIX-32-P9-NEXT: lwz r3, L..C7(r2) # %const.0 +; CHECK-AIX-32-P9-NEXT: lxsiwzx v3, 0, r4 +; CHECK-AIX-32-P9-NEXT: lxv v4, 0(r3) +; CHECK-AIX-32-P9-NEXT: vperm v2, v3, v2, v4 +; CHECK-AIX-32-P9-NEXT: xxlxor v3, v3, v3 +; CHECK-AIX-32-P9-NEXT: vmrghh v2, v3, v2 +; CHECK-AIX-32-P9-NEXT: stxv v2, 0(r3) +; CHECK-AIX-32-P9-NEXT: blr +entry: + %0 = load <2 x i16>, ptr %ptr1, align 1 + %1 = load <2 x i16>, ptr %ptr2, align 1 + %shuffle1 = shufflevector <2 x i16> %0, <2 x i16> %1, <4 x i32> + %2 = zext <4 x i16> %shuffle1 to <4 x i32> + store <4 x i32> %2, ptr undef, align 16 + ret void +} + +define <16 x i8> @test_v4i32_v8i16(ptr %a, ptr %b) local_unnamed_addr { +; CHECK-LE-P8-LABEL: test_v4i32_v8i16: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: lhz r3, 0(r3) +; CHECK-LE-P8-NEXT: lfiwzx f0, 0, r4 +; CHECK-LE-P8-NEXT: mtfprd f1, r3 +; CHECK-LE-P8-NEXT: xxswapd v3, f0 +; CHECK-LE-P8-NEXT: xxswapd v2, vs1 +; CHECK-LE-P8-NEXT: vmrglh v2, v2, v3 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v4i32_v8i16: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: lxsihzx v2, 0, r3 +; CHECK-LE-P9-NEXT: lfiwzx f0, 0, r4 +; CHECK-LE-P9-NEXT: xxswapd v3, f0 +; CHECK-LE-P9-NEXT: vsplth v2, v2, 3 +; CHECK-LE-P9-NEXT: vmrglh v2, v2, v3 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v4i32_v8i16: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: lfiwzx f0, 0, r4 +; CHECK-BE-P8-NEXT: lhz r3, 0(r3) +; CHECK-BE-P8-NEXT: sldi r3, r3, 48 +; CHECK-BE-P8-NEXT: xxsldwi v2, f0, f0, 1 +; CHECK-BE-P8-NEXT: mtvsrd v3, r3 +; CHECK-BE-P8-NEXT: vmrghh v2, v2, v3 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v4i32_v8i16: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: lxsihzx v2, 0, r3 +; CHECK-BE-P9-NEXT: lfiwzx f0, 0, r4 +; CHECK-BE-P9-NEXT: xxsldwi v3, f0, f0, 1 +; CHECK-BE-P9-NEXT: vsplth v2, v2, 3 +; CHECK-BE-P9-NEXT: vmrghh v2, v3, v2 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v4i32_v8i16: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: lfiwzx f0, 0, r4 +; CHECK-AIX-64-P8-NEXT: lhz r3, 0(r3) +; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 48 +; CHECK-AIX-64-P8-NEXT: xxsldwi v2, f0, f0, 1 +; CHECK-AIX-64-P8-NEXT: mtvsrd v3, r3 +; CHECK-AIX-64-P8-NEXT: vmrghh v2, v2, v3 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v4i32_v8i16: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: lxsihzx v2, 0, r3 +; CHECK-AIX-64-P9-NEXT: lfiwzx f0, 0, r4 +; CHECK-AIX-64-P9-NEXT: xxsldwi v3, f0, f0, 1 +; CHECK-AIX-64-P9-NEXT: vsplth v2, v2, 3 +; CHECK-AIX-64-P9-NEXT: vmrghh v2, v3, v2 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v4i32_v8i16: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: lhz r3, 0(r3) +; CHECK-AIX-32-P8-NEXT: sth r3, -32(r1) +; CHECK-AIX-32-P8-NEXT: lwz r3, 0(r4) +; CHECK-AIX-32-P8-NEXT: addi r4, r1, -32 +; CHECK-AIX-32-P8-NEXT: lxvw4x v2, 0, r4 +; CHECK-AIX-32-P8-NEXT: stw r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16 +; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3 +; CHECK-AIX-32-P8-NEXT: vmrghh v2, v3, v2 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v4i32_v8i16: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: lhz r3, 0(r3) +; CHECK-AIX-32-P9-NEXT: sth r3, -32(r1) +; CHECK-AIX-32-P9-NEXT: lwz r3, 0(r4) +; CHECK-AIX-32-P9-NEXT: lxv v2, -32(r1) +; CHECK-AIX-32-P9-NEXT: stw r3, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv v3, -16(r1) +; CHECK-AIX-32-P9-NEXT: vmrghh v2, v3, v2 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %0 = load <2 x i8>, ptr %a + %bc1 = bitcast <2 x i8> %0 to i16 + %vecinit3 = insertelement <8 x i16> poison, i16 %bc1, i64 0 + %1 = load <2 x i8>, ptr %b, align 4 + %bc2 = bitcast <2 x i8> %1 to i16 + %vecinit6 = insertelement <8 x i16> undef, i16 %bc2, i64 0 + %2 = bitcast <8 x i16> %vecinit3 to <16 x i8> + %3 = bitcast <8 x i16> %vecinit6 to <16 x i8> + %shuffle = shufflevector <16 x i8> %3, <16 x i8> %2, <16 x i32> + ret <16 x i8> %shuffle +} + +define <16 x i8> @test_v4i32_v2i64(ptr %a, ptr %b) local_unnamed_addr { +; CHECK-LE-P8-LABEL: test_v4i32_v2i64: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: lfiwzx f0, 0, r3 +; CHECK-LE-P8-NEXT: lfdx f1, 0, r4 +; CHECK-LE-P8-NEXT: xxswapd v2, f0 +; CHECK-LE-P8-NEXT: xxswapd v3, f1 +; CHECK-LE-P8-NEXT: vmrglh v2, v3, v2 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v4i32_v2i64: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: lfiwzx f0, 0, r3 +; CHECK-LE-P9-NEXT: xxswapd v2, f0 +; CHECK-LE-P9-NEXT: lfd f0, 0(r4) +; CHECK-LE-P9-NEXT: xxswapd v3, f0 +; CHECK-LE-P9-NEXT: vmrglh v2, v3, v2 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v4i32_v2i64: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: lfiwzx f0, 0, r3 +; CHECK-BE-P8-NEXT: lxsdx v3, 0, r4 +; CHECK-BE-P8-NEXT: xxsldwi v2, f0, f0, 1 +; CHECK-BE-P8-NEXT: vmrghh v2, v2, v3 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v4i32_v2i64: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: lfiwzx f0, 0, r3 +; CHECK-BE-P9-NEXT: lxsd v3, 0(r4) +; CHECK-BE-P9-NEXT: xxsldwi v2, f0, f0, 1 +; CHECK-BE-P9-NEXT: vmrghh v2, v2, v3 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v4i32_v2i64: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: lfiwzx f0, 0, r3 +; CHECK-AIX-64-P8-NEXT: lxsdx v3, 0, r4 +; CHECK-AIX-64-P8-NEXT: xxsldwi v2, f0, f0, 1 +; CHECK-AIX-64-P8-NEXT: vmrghh v2, v2, v3 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v4i32_v2i64: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: lfiwzx f0, 0, r3 +; CHECK-AIX-64-P9-NEXT: lxsd v3, 0(r4) +; CHECK-AIX-64-P9-NEXT: xxsldwi v2, f0, f0, 1 +; CHECK-AIX-64-P9-NEXT: vmrghh v2, v2, v3 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v4i32_v2i64: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: lwz r5, L..C9(r2) # %const.0 +; CHECK-AIX-32-P8-NEXT: lxsiwzx v2, 0, r3 +; CHECK-AIX-32-P8-NEXT: lxsiwzx v3, 0, r4 +; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r5 +; CHECK-AIX-32-P8-NEXT: vperm v2, v2, v3, v4 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v4i32_v2i64: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: lxsiwzx v2, 0, r3 +; CHECK-AIX-32-P9-NEXT: lwz r3, L..C8(r2) # %const.0 +; CHECK-AIX-32-P9-NEXT: lxsiwzx v3, 0, r4 +; CHECK-AIX-32-P9-NEXT: lxv v4, 0(r3) +; CHECK-AIX-32-P9-NEXT: vperm v2, v2, v3, v4 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %0 = load <2 x i8>, ptr %a, align 4 + %bc1 = bitcast <2 x i8> %0 to i16 + %vecinit3 = insertelement <8 x i16> poison, i16 %bc1, i64 0 + %1 = load <2 x i8>, ptr %b, align 8 + %bc2 = bitcast <2 x i8> %1 to i16 + %vecinit6 = insertelement <8 x i16> undef, i16 %bc2, i64 0 + %2 = bitcast <8 x i16> %vecinit3 to <16 x i8> + %3 = bitcast <8 x i16> %vecinit6 to <16 x i8> + %shuffle = shufflevector <16 x i8> %2, <16 x i8> %3, <16 x i32> + ret <16 x i8> %shuffle +} + +define void @test_v2i64_v2i64(ptr nocapture readonly %ptr1, ptr nocapture readonly %ptr2) { +; CHECK-LE-P8-LABEL: test_v2i64_v2i64: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: addis r5, r2, .LCPI12_0@toc@ha +; CHECK-LE-P8-NEXT: lxsdx v2, 0, r3 +; CHECK-LE-P8-NEXT: lxsdx v3, 0, r4 +; CHECK-LE-P8-NEXT: addis r3, r2, .LCPI12_1@toc@ha +; CHECK-LE-P8-NEXT: addi r5, r5, .LCPI12_0@toc@l +; CHECK-LE-P8-NEXT: addi r3, r3, .LCPI12_1@toc@l +; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r5 +; CHECK-LE-P8-NEXT: xxswapd v4, vs0 +; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r3 +; CHECK-LE-P8-NEXT: vperm v2, v2, v3, v4 +; CHECK-LE-P8-NEXT: xxswapd v3, vs0 +; CHECK-LE-P8-NEXT: xxlxor v4, v4, v4 +; CHECK-LE-P8-NEXT: vperm v2, v4, v2, v3 +; CHECK-LE-P8-NEXT: xxswapd vs0, v2 +; CHECK-LE-P8-NEXT: stxvd2x vs0, 0, r3 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v2i64_v2i64: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: lxsd v2, 0(r3) +; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI12_0@toc@ha +; CHECK-LE-P9-NEXT: lxsd v3, 0(r4) +; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI12_0@toc@l +; CHECK-LE-P9-NEXT: lxv v4, 0(r3) +; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI12_1@toc@ha +; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI12_1@toc@l +; CHECK-LE-P9-NEXT: vperm v2, v2, v3, v4 +; CHECK-LE-P9-NEXT: lxv v3, 0(r3) +; CHECK-LE-P9-NEXT: xxlxor v4, v4, v4 +; CHECK-LE-P9-NEXT: vperm v2, v4, v2, v3 +; CHECK-LE-P9-NEXT: stxv v2, 0(r3) +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v2i64_v2i64: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: addis r5, r2, .LCPI12_0@toc@ha +; CHECK-BE-P8-NEXT: lxsdx v2, 0, r3 +; CHECK-BE-P8-NEXT: lxsdx v3, 0, r4 +; CHECK-BE-P8-NEXT: addi r5, r5, .LCPI12_0@toc@l +; CHECK-BE-P8-NEXT: lxvw4x v4, 0, r5 +; CHECK-BE-P8-NEXT: vperm v2, v3, v2, v4 +; CHECK-BE-P8-NEXT: xxlxor v3, v3, v3 +; CHECK-BE-P8-NEXT: vmrghh v2, v3, v2 +; CHECK-BE-P8-NEXT: stxvw4x v2, 0, r3 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v2i64_v2i64: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: lxsd v2, 0(r3) +; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI12_0@toc@ha +; CHECK-BE-P9-NEXT: lxsd v3, 0(r4) +; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI12_0@toc@l +; CHECK-BE-P9-NEXT: lxv v4, 0(r3) +; CHECK-BE-P9-NEXT: vperm v2, v3, v2, v4 +; CHECK-BE-P9-NEXT: xxlxor v3, v3, v3 +; CHECK-BE-P9-NEXT: vmrghh v2, v3, v2 +; CHECK-BE-P9-NEXT: stxv v2, 0(r3) +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v2i64_v2i64: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: ld r5, L..C10(r2) # %const.0 +; CHECK-AIX-64-P8-NEXT: lxsdx v2, 0, r3 +; CHECK-AIX-64-P8-NEXT: lxsdx v3, 0, r4 +; CHECK-AIX-64-P8-NEXT: lxvw4x v4, 0, r5 +; CHECK-AIX-64-P8-NEXT: vperm v2, v3, v2, v4 +; CHECK-AIX-64-P8-NEXT: xxlxor v3, v3, v3 +; CHECK-AIX-64-P8-NEXT: vmrghh v2, v3, v2 +; CHECK-AIX-64-P8-NEXT: stxvw4x v2, 0, r3 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v2i64_v2i64: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: lxsd v2, 0(r3) +; CHECK-AIX-64-P9-NEXT: ld r3, L..C9(r2) # %const.0 +; CHECK-AIX-64-P9-NEXT: lxsd v3, 0(r4) +; CHECK-AIX-64-P9-NEXT: lxv v4, 0(r3) +; CHECK-AIX-64-P9-NEXT: vperm v2, v3, v2, v4 +; CHECK-AIX-64-P9-NEXT: xxlxor v3, v3, v3 +; CHECK-AIX-64-P9-NEXT: vmrghh v2, v3, v2 +; CHECK-AIX-64-P9-NEXT: stxv v2, 0(r3) +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v2i64_v2i64: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: lwz r5, L..C10(r2) # %const.0 +; CHECK-AIX-32-P8-NEXT: lxsiwzx v2, 0, r3 +; CHECK-AIX-32-P8-NEXT: lxsiwzx v3, 0, r4 +; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r5 +; CHECK-AIX-32-P8-NEXT: vperm v2, v3, v2, v4 +; CHECK-AIX-32-P8-NEXT: xxlxor v3, v3, v3 +; CHECK-AIX-32-P8-NEXT: vmrghh v2, v3, v2 +; CHECK-AIX-32-P8-NEXT: stxvw4x v2, 0, r3 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v2i64_v2i64: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: lxsiwzx v2, 0, r3 +; CHECK-AIX-32-P9-NEXT: lwz r3, L..C9(r2) # %const.0 +; CHECK-AIX-32-P9-NEXT: lxsiwzx v3, 0, r4 +; CHECK-AIX-32-P9-NEXT: lxv v4, 0(r3) +; CHECK-AIX-32-P9-NEXT: vperm v2, v3, v2, v4 +; CHECK-AIX-32-P9-NEXT: xxlxor v3, v3, v3 +; CHECK-AIX-32-P9-NEXT: vmrghh v2, v3, v2 +; CHECK-AIX-32-P9-NEXT: stxv v2, 0(r3) +; CHECK-AIX-32-P9-NEXT: blr +entry: + %0 = load <4 x i16>, ptr %ptr1, align 1 + %1 = load <4 x i16>, ptr %ptr2, align 1 + %shuffle1 = shufflevector <4 x i16> %0, <4 x i16> %1, <4 x i32> + %2 = zext <4 x i16> %shuffle1 to <4 x i32> + store <4 x i32> %2, ptr undef, align 16 + ret void +} + +define <16 x i8> @test_v2i64_v4i32(ptr %a, ptr %b) local_unnamed_addr { +; CHECK-LE-P8-LABEL: test_v2i64_v4i32: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: lfiwzx f0, 0, r3 +; CHECK-LE-P8-NEXT: lfdx f1, 0, r4 +; CHECK-LE-P8-NEXT: xxswapd v2, f0 +; CHECK-LE-P8-NEXT: xxswapd v3, f1 +; CHECK-LE-P8-NEXT: vmrglh v2, v2, v3 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v2i64_v4i32: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: lfiwzx f0, 0, r3 +; CHECK-LE-P9-NEXT: xxswapd v2, f0 +; CHECK-LE-P9-NEXT: lfd f0, 0(r4) +; CHECK-LE-P9-NEXT: xxswapd v3, f0 +; CHECK-LE-P9-NEXT: vmrglh v2, v2, v3 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v2i64_v4i32: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: lfiwzx f0, 0, r3 +; CHECK-BE-P8-NEXT: lxsdx v3, 0, r4 +; CHECK-BE-P8-NEXT: xxsldwi v2, f0, f0, 1 +; CHECK-BE-P8-NEXT: vmrghh v2, v3, v2 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v2i64_v4i32: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: lfiwzx f0, 0, r3 +; CHECK-BE-P9-NEXT: lxsd v3, 0(r4) +; CHECK-BE-P9-NEXT: xxsldwi v2, f0, f0, 1 +; CHECK-BE-P9-NEXT: vmrghh v2, v3, v2 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v2i64_v4i32: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: lfiwzx f0, 0, r3 +; CHECK-AIX-64-P8-NEXT: lxsdx v3, 0, r4 +; CHECK-AIX-64-P8-NEXT: xxsldwi v2, f0, f0, 1 +; CHECK-AIX-64-P8-NEXT: vmrghh v2, v3, v2 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v2i64_v4i32: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: lfiwzx f0, 0, r3 +; CHECK-AIX-64-P9-NEXT: lxsd v3, 0(r4) +; CHECK-AIX-64-P9-NEXT: xxsldwi v2, f0, f0, 1 +; CHECK-AIX-64-P9-NEXT: vmrghh v2, v3, v2 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v2i64_v4i32: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: lwz r5, L..C11(r2) # %const.0 +; CHECK-AIX-32-P8-NEXT: lxsiwzx v2, 0, r3 +; CHECK-AIX-32-P8-NEXT: lxsiwzx v3, 0, r4 +; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r5 +; CHECK-AIX-32-P8-NEXT: vperm v2, v3, v2, v4 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v2i64_v4i32: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: lxsiwzx v2, 0, r3 +; CHECK-AIX-32-P9-NEXT: lwz r3, L..C10(r2) # %const.0 +; CHECK-AIX-32-P9-NEXT: lxsiwzx v3, 0, r4 +; CHECK-AIX-32-P9-NEXT: lxv v4, 0(r3) +; CHECK-AIX-32-P9-NEXT: vperm v2, v3, v2, v4 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %0 = load <2 x i8>, ptr %a, align 4 + %bc1 = bitcast <2 x i8> %0 to i16 + %vecinit3 = insertelement <8 x i16> poison, i16 %bc1, i64 0 + %1 = load <2 x i8>, ptr %b, align 8 + %bc2 = bitcast <2 x i8> %1 to i16 + %vecinit6 = insertelement <8 x i16> undef, i16 %bc2, i64 0 + %2 = bitcast <8 x i16> %vecinit3 to <16 x i8> + %3 = bitcast <8 x i16> %vecinit6 to <16 x i8> + %shuffle = shufflevector <16 x i8> %3, <16 x i8> %2, <16 x i32> + ret <16 x i8> %shuffle +} + +define <16 x i8> @test_v2i64_v8i16(ptr %a, ptr %b) local_unnamed_addr { +; CHECK-LE-P8-LABEL: test_v2i64_v8i16: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: lhz r3, 0(r3) +; CHECK-LE-P8-NEXT: lfdx f0, 0, r4 +; CHECK-LE-P8-NEXT: mtfprd f1, r3 +; CHECK-LE-P8-NEXT: xxswapd v3, f0 +; CHECK-LE-P8-NEXT: xxswapd v2, vs1 +; CHECK-LE-P8-NEXT: vmrglh v2, v2, v3 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_v2i64_v8i16: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: lxsihzx v2, 0, r3 +; CHECK-LE-P9-NEXT: lfd f0, 0(r4) +; CHECK-LE-P9-NEXT: xxswapd v3, f0 +; CHECK-LE-P9-NEXT: vsplth v2, v2, 3 +; CHECK-LE-P9-NEXT: vmrglh v2, v2, v3 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_v2i64_v8i16: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: lhz r3, 0(r3) +; CHECK-BE-P8-NEXT: lxsdx v2, 0, r4 +; CHECK-BE-P8-NEXT: sldi r3, r3, 48 +; CHECK-BE-P8-NEXT: mtvsrd v3, r3 +; CHECK-BE-P8-NEXT: vmrghh v2, v2, v3 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_v2i64_v8i16: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: lxsihzx v2, 0, r3 +; CHECK-BE-P9-NEXT: lxsd v3, 0(r4) +; CHECK-BE-P9-NEXT: vsplth v2, v2, 3 +; CHECK-BE-P9-NEXT: vmrghh v2, v3, v2 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_v2i64_v8i16: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: lhz r3, 0(r3) +; CHECK-AIX-64-P8-NEXT: lxsdx v2, 0, r4 +; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 48 +; CHECK-AIX-64-P8-NEXT: mtvsrd v3, r3 +; CHECK-AIX-64-P8-NEXT: vmrghh v2, v2, v3 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_v2i64_v8i16: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: lxsihzx v2, 0, r3 +; CHECK-AIX-64-P9-NEXT: lxsd v3, 0(r4) +; CHECK-AIX-64-P9-NEXT: vsplth v2, v2, 3 +; CHECK-AIX-64-P9-NEXT: vmrghh v2, v3, v2 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_v2i64_v8i16: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: lhz r3, 0(r3) +; CHECK-AIX-32-P8-NEXT: sth r3, -32(r1) +; CHECK-AIX-32-P8-NEXT: lwz r3, 0(r4) +; CHECK-AIX-32-P8-NEXT: addi r4, r1, -32 +; CHECK-AIX-32-P8-NEXT: lxvw4x v2, 0, r4 +; CHECK-AIX-32-P8-NEXT: stw r3, -16(r1) +; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16 +; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3 +; CHECK-AIX-32-P8-NEXT: vmrghh v2, v3, v2 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_v2i64_v8i16: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: lhz r3, 0(r3) +; CHECK-AIX-32-P9-NEXT: sth r3, -32(r1) +; CHECK-AIX-32-P9-NEXT: lwz r3, 0(r4) +; CHECK-AIX-32-P9-NEXT: lxv v2, -32(r1) +; CHECK-AIX-32-P9-NEXT: stw r3, -16(r1) +; CHECK-AIX-32-P9-NEXT: lxv v3, -16(r1) +; CHECK-AIX-32-P9-NEXT: vmrghh v2, v3, v2 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %0 = load <2 x i8>, ptr %a + %bc1 = bitcast <2 x i8> %0 to i16 + %vecinit3 = insertelement <8 x i16> poison, i16 %bc1, i64 0 + %1 = load <2 x i8>, ptr %b, align 8 + %bc2 = bitcast <2 x i8> %1 to i16 + %vecinit6 = insertelement <8 x i16> undef, i16 %bc2, i64 0 + %2 = bitcast <8 x i16> %vecinit3 to <16 x i8> + %3 = bitcast <8 x i16> %vecinit6 to <16 x i8> + %shuffle = shufflevector <16 x i8> %3, <16 x i8> %2, <16 x i32> + ret <16 x i8> %shuffle +}