diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -14889,6 +14889,17 @@ SDValue SToVLHS = isScalarToVec(LHS); SDValue SToVRHS = isScalarToVec(RHS); if (SToVLHS || SToVRHS) { + // FIXME: If both LHS and RHS are SCALAR_TO_VECTOR, but are not the + // same type and have differing element sizes, then do not perform + // the following transformation. The current transformation for + // SCALAR_TO_VECTOR assumes that both input vectors have the same + // element size. This will be updated in the future to account for + // differing sizes of the LHS and RHS. + if (SToVLHS && SToVRHS && + (SToVLHS.getValueType().getScalarSizeInBits() != + SToVRHS.getValueType().getScalarSizeInBits())) + return Res; + int NumEltsIn = SToVLHS ? SToVLHS.getValueType().getVectorNumElements() : SToVRHS.getValueType().getVectorNumElements(); int NumEltsOut = ShuffV.size(); diff --git a/llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll b/llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll --- a/llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll +++ b/llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll @@ -170,7 +170,7 @@ ; ; CHECK-AIX-LABEL: buildd: ; CHECK-AIX: # %bb.0: # %entry -; CHECK-AIX-NEXT: ld 3, L..C0(2) +; CHECK-AIX-NEXT: ld 3, L..C0(2) # @d ; CHECK-AIX-NEXT: lxvdsx 34, 0, 3 ; CHECK-AIX-NEXT: blr entry: @@ -2567,9 +2567,11 @@ ; ; CHECK-LE-LABEL: buildi2: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: mtfprwz f0, r4 +; CHECK-LE-NEXT: mtfprd f0, r4 ; CHECK-LE-NEXT: mtfprd f1, r3 -; CHECK-LE-NEXT: xxmrgld v2, vs1, vs0 +; CHECK-LE-NEXT: xxswapd vs0, vs0 +; CHECK-LE-NEXT: xxswapd v2, vs1 +; CHECK-LE-NEXT: xxmrgld v2, v2, vs0 ; CHECK-LE-NEXT: blr ; ; CHECK-AIX-LABEL: buildi2: diff --git a/llvm/test/CodeGen/PowerPC/scalar_to_vector_shuffle.ll b/llvm/test/CodeGen/PowerPC/scalar_to_vector_shuffle.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/scalar_to_vector_shuffle.ll @@ -0,0 +1,137 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64le-unknown-linux-gnu < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-LE-P8 +; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64le-unknown-linux-gnu < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-LE-P9 +; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64-unknown-linux-gnu < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-BE-P8 +; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64-unknown-linux-gnu < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-BE-P9 + +; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64-ibm-aix < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-AIX-64-P8 +; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64-ibm-aix < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-AIX-64-P9 +; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc-ibm-aix < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-AIX-32-P8 +; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc-ibm-aix < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-AIX-32-P9 + +define <16 x i8> @test_4_8(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b) local_unnamed_addr { +; CHECK-LE-P8-LABEL: test_4_8: +; CHECK-LE-P8: # %bb.0: # %entry +; CHECK-LE-P8-NEXT: lfiwzx f0, 0, r3 +; CHECK-LE-P8-NEXT: lfdx f1, 0, r4 +; CHECK-LE-P8-NEXT: addis r5, r2, .LCPI0_0@toc@ha +; CHECK-LE-P8-NEXT: addi r3, r5, .LCPI0_0@toc@l +; CHECK-LE-P8-NEXT: lvx v4, 0, r3 +; CHECK-LE-P8-NEXT: xxswapd v2, f0 +; CHECK-LE-P8-NEXT: xxswapd v3, f1 +; CHECK-LE-P8-NEXT: vperm v2, v3, v2, v4 +; CHECK-LE-P8-NEXT: blr +; +; CHECK-LE-P9-LABEL: test_4_8: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: lfiwzx f0, 0, r3 +; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI0_0@toc@ha +; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI0_0@toc@l +; CHECK-LE-P9-NEXT: xxswapd v2, f0 +; CHECK-LE-P9-NEXT: lfd f0, 0(r4) +; CHECK-LE-P9-NEXT: lxv v4, 0(r3) +; CHECK-LE-P9-NEXT: xxswapd v3, f0 +; CHECK-LE-P9-NEXT: vperm v2, v3, v2, v4 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P8-LABEL: test_4_8: +; CHECK-BE-P8: # %bb.0: # %entry +; CHECK-BE-P8-NEXT: lfiwzx f0, 0, r3 +; CHECK-BE-P8-NEXT: addis r5, r2, .LCPI0_0@toc@ha +; CHECK-BE-P8-NEXT: lxsdx v3, 0, r4 +; CHECK-BE-P8-NEXT: addi r3, r5, .LCPI0_0@toc@l +; CHECK-BE-P8-NEXT: lxvw4x v4, 0, r3 +; CHECK-BE-P8-NEXT: xxsldwi v2, f0, f0, 1 +; CHECK-BE-P8-NEXT: vperm v2, v2, v3, v4 +; CHECK-BE-P8-NEXT: blr +; +; CHECK-BE-P9-LABEL: test_4_8: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: lfiwzx f0, 0, r3 +; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI0_0@toc@ha +; CHECK-BE-P9-NEXT: lxsd v3, 0(r4) +; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI0_0@toc@l +; CHECK-BE-P9-NEXT: lxv v4, 0(r3) +; CHECK-BE-P9-NEXT: xxsldwi v2, f0, f0, 1 +; CHECK-BE-P9-NEXT: vperm v2, v2, v3, v4 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P8-LABEL: test_4_8: +; CHECK-AIX-64-P8: # %bb.0: # %entry +; CHECK-AIX-64-P8-NEXT: ld r5, L..C0(r2) # %const.0 +; CHECK-AIX-64-P8-NEXT: lfiwzx f0, 0, r3 +; CHECK-AIX-64-P8-NEXT: lxsdx v3, 0, r4 +; CHECK-AIX-64-P8-NEXT: xxsldwi v2, f0, f0, 1 +; CHECK-AIX-64-P8-NEXT: lxvw4x v4, 0, r5 +; CHECK-AIX-64-P8-NEXT: vperm v2, v2, v3, v4 +; CHECK-AIX-64-P8-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: test_4_8: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: lfiwzx f0, 0, r3 +; CHECK-AIX-64-P9-NEXT: ld r3, L..C0(r2) # %const.0 +; CHECK-AIX-64-P9-NEXT: lxsd v3, 0(r4) +; CHECK-AIX-64-P9-NEXT: xxsldwi v2, f0, f0, 1 +; CHECK-AIX-64-P9-NEXT: lxv v4, 0(r3) +; CHECK-AIX-64-P9-NEXT: vperm v2, v2, v3, v4 +; CHECK-AIX-64-P9-NEXT: blr +; +; CHECK-AIX-32-P8-LABEL: test_4_8: +; CHECK-AIX-32-P8: # %bb.0: # %entry +; CHECK-AIX-32-P8-NEXT: lwz r5, 4(r4) +; CHECK-AIX-32-P8-NEXT: lxsiwzx v2, 0, r3 +; CHECK-AIX-32-P8-NEXT: stw r5, -16(r1) +; CHECK-AIX-32-P8-NEXT: lwz r3, 0(r4) +; CHECK-AIX-32-P8-NEXT: addi r4, r1, -32 +; CHECK-AIX-32-P8-NEXT: stw r3, -32(r1) +; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16 +; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r3 +; CHECK-AIX-32-P8-NEXT: lwz r3, L..C0(r2) # %const.0 +; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r4 +; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r3 +; CHECK-AIX-32-P8-NEXT: xxmrghw v3, vs1, vs0 +; CHECK-AIX-32-P8-NEXT: vperm v2, v2, v3, v4 +; CHECK-AIX-32-P8-NEXT: blr +; +; CHECK-AIX-32-P9-LABEL: test_4_8: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: lxsiwzx v2, 0, r3 +; CHECK-AIX-32-P9-NEXT: lwz r3, 4(r4) +; CHECK-AIX-32-P9-NEXT: stw r3, -16(r1) +; CHECK-AIX-32-P9-NEXT: lwz r3, 0(r4) +; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1) +; CHECK-AIX-32-P9-NEXT: stw r3, -32(r1) +; CHECK-AIX-32-P9-NEXT: lwz r3, L..C0(r2) # %const.0 +; CHECK-AIX-32-P9-NEXT: lxv vs1, -32(r1) +; CHECK-AIX-32-P9-NEXT: lxv v4, 0(r3) +; CHECK-AIX-32-P9-NEXT: xxmrghw v3, vs1, vs0 +; CHECK-AIX-32-P9-NEXT: vperm v2, v2, v3, v4 +; CHECK-AIX-32-P9-NEXT: blr +entry: + %0 = load <4 x i8>, ptr %a, align 4 + %bc1 = bitcast <4 x i8> %0 to i32 + %vecinit3 = insertelement <4 x i32> poison, i32 %bc1, i64 0 + %1 = load <8 x i8>, ptr %b, align 8 + %bc2 = bitcast <8 x i8> %1 to i64 + %vecinit6 = insertelement <2 x i64> undef, i64 %bc2, i64 0 + %2 = bitcast <4 x i32> %vecinit3 to <16 x i8> + %3 = bitcast <2 x i64> %vecinit6 to <16 x i8> + %shuffle = shufflevector <16 x i8> %2, <16 x i8> %3, <16 x i32> + ret <16 x i8> %shuffle +}