diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -14972,24 +14972,36 @@ // Example (even elements from first vector): // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> t1, if (Mask[0] < NumElts) - for (int i = 1, e = Mask.size(); i < e; i += 2) + for (int i = 1, e = Mask.size(); i < e; i += 2) { + if (ShuffV[i] < 0) + continue; ShuffV[i] = (ShuffV[i - 1] + NumElts); + } // Example (odd elements from first vector): // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> t1, else - for (int i = 0, e = Mask.size(); i < e; i += 2) + for (int i = 0, e = Mask.size(); i < e; i += 2) { + if (ShuffV[i] < 0) + continue; ShuffV[i] = (ShuffV[i + 1] + NumElts); + } } else { // Example (even elements from first vector): // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> , t1 if (Mask[0] < NumElts) - for (int i = 0, e = Mask.size(); i < e; i += 2) + for (int i = 0, e = Mask.size(); i < e; i += 2) { + if (ShuffV[i] < 0) + continue; ShuffV[i] = ShuffV[i + 1] - NumElts; + } // Example (odd elements from first vector): // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> , t1 else - for (int i = 1, e = Mask.size(); i < e; i += 2) + for (int i = 1, e = Mask.size(); i < e; i += 2) { + if (ShuffV[i] < 0) + continue; ShuffV[i] = ShuffV[i - 1] - NumElts; + } } // If the RHS has undefs, we need to remove them since we may have created diff --git a/llvm/test/CodeGen/PowerPC/ppc-shufflevector-combine.ll b/llvm/test/CodeGen/PowerPC/ppc-shufflevector-combine.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/ppc-shufflevector-combine.ll @@ -0,0 +1,96 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -mcpu=pwr10 -verify-machineinstrs \ +; RUN: < %s | FileCheck %s --check-prefix=AIX +; RUN: llc -mtriple powerpc-ibm-aix-xcoff -mcpu=pwr10 -verify-machineinstrs \ +; RUN: < %s | FileCheck %s --check-prefix=AIX-32 +; RUN: llc -verify-machineinstrs -mtriple powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr10 -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names < %s \ +; RUN: | FileCheck %s --check-prefix=LE +; RUN: llc -verify-machineinstrs -mtriple powerpcle-unknown-linux-gnu \ +; RUN: -mcpu=pwr10 -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names < %s \ +; RUN: | FileCheck %s --check-prefix=LE-32 +; RUN: llc -verify-machineinstrs -mtriple powerpc64-unknown-linux-gnu \ +; RUN: -mcpu=pwr10 -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names < %s \ +; RUN: | FileCheck %s --check-prefix=BE +; RUN: llc -verify-machineinstrs -mtriple powerpc-unknown-linux-gnu \ +; RUN: -mcpu=pwr10 -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names < %s \ +; RUN: | FileCheck %s --check-prefix=BE-32 + + +; Function Attrs: nounwind +define dso_local <4 x i16> @shufflevector_combine(<4 x i32> %0) #0 { +; AIX-LABEL: shufflevector_combine: +; AIX: # %bb.0: # %newFuncRoot +; AIX-NEXT: ld 3, L..C0(2) # %const.0 +; AIX-NEXT: xxlxor 36, 36, 36 +; AIX-NEXT: lxv 35, 0(3) +; AIX-NEXT: li 3, 0 +; AIX-NEXT: vperm 2, 4, 2, 3 +; AIX-NEXT: vinsw 2, 3, 8 +; AIX-NEXT: vpkuwum 2, 2, 2 +; AIX-NEXT: blr +; +; AIX-32-LABEL: shufflevector_combine: +; AIX-32: # %bb.0: # %newFuncRoot +; AIX-32-NEXT: lwz 3, L..C0(2) # %const.0 +; AIX-32-NEXT: xxlxor 36, 36, 36 +; AIX-32-NEXT: lxv 35, 0(3) +; AIX-32-NEXT: li 3, 0 +; AIX-32-NEXT: vperm 2, 4, 2, 3 +; AIX-32-NEXT: vinsw 2, 3, 8 +; AIX-32-NEXT: vpkuwum 2, 2, 2 +; AIX-32-NEXT: blr +; +; LE-LABEL: shufflevector_combine: +; LE: # %bb.0: # %newFuncRoot +; LE-NEXT: plxv v3, .LCPI0_0@PCREL(0), 1 +; LE-NEXT: xxlxor v4, v4, v4 +; LE-NEXT: li r3, 0 +; LE-NEXT: vperm v2, v2, v4, v3 +; LE-NEXT: vinsw v2, r3, 4 +; LE-NEXT: vpkuwum v2, v2, v2 +; LE-NEXT: blr +; +; LE-32-LABEL: shufflevector_combine: +; LE-32: # %bb.0: # %newFuncRoot +; LE-32-NEXT: li r3, .LCPI0_0@l +; LE-32-NEXT: lis r4, .LCPI0_0@ha +; LE-32-NEXT: xxlxor v4, v4, v4 +; LE-32-NEXT: lxvx v3, r4, r3 +; LE-32-NEXT: li r3, 0 +; LE-32-NEXT: vperm v2, v2, v4, v3 +; LE-32-NEXT: vinsw v2, r3, 4 +; LE-32-NEXT: vpkuwum v2, v2, v2 +; LE-32-NEXT: blr +; +; BE-LABEL: shufflevector_combine: +; BE: # %bb.0: # %newFuncRoot +; BE-NEXT: addis r3, r2, .LCPI0_0@toc@ha +; BE-NEXT: xxlxor v4, v4, v4 +; BE-NEXT: addi r3, r3, .LCPI0_0@toc@l +; BE-NEXT: lxv v3, 0(r3) +; BE-NEXT: li r3, 0 +; BE-NEXT: vperm v2, v4, v2, v3 +; BE-NEXT: vinsw v2, r3, 8 +; BE-NEXT: vpkuwum v2, v2, v2 +; BE-NEXT: blr +; +; BE-32-LABEL: shufflevector_combine: +; BE-32: # %bb.0: # %newFuncRoot +; BE-32-NEXT: li r3, .LCPI0_0@l +; BE-32-NEXT: lis r4, .LCPI0_0@ha +; BE-32-NEXT: xxlxor v4, v4, v4 +; BE-32-NEXT: lxvx v3, r4, r3 +; BE-32-NEXT: li r3, 0 +; BE-32-NEXT: vperm v2, v4, v2, v3 +; BE-32-NEXT: vinsw v2, r3, 8 +; BE-32-NEXT: vpkuwum v2, v2, v2 +; BE-32-NEXT: blr +newFuncRoot: + %1 = shufflevector <4 x i32> zeroinitializer, <4 x i32> %0, <4 x i32> + %2 = insertelement <4 x i32> %1, i32 0, i64 2 + %3 = trunc <4 x i32> %2 to <4 x i16> + ret <4 x i16> %3 +} + +attributes #0 = { nounwind }