diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -9772,8 +9772,11 @@ // Canonicalize the RHS being a BUILD_VECTOR when lowering to xxsplti32dx. if (RHS->getOpcode() != ISD::BUILD_VECTOR) { std::swap(LHS, RHS); - VecShuffle = DAG.getCommutedVectorShuffle(*SVN); - ShuffleMask = cast(VecShuffle)->getMask(); + VecShuffle = peekThroughBitcasts(DAG.getCommutedVectorShuffle(*SVN)); + ShuffleVectorSDNode *CommutedSV = dyn_cast(VecShuffle); + if (!CommutedSV) + return SDValue(); + ShuffleMask = CommutedSV->getMask(); } // Ensure that the RHS is a vector of constants. diff --git a/llvm/test/CodeGen/PowerPC/p10-splatImm32-undef.ll b/llvm/test/CodeGen/PowerPC/p10-splatImm32-undef.ll --- a/llvm/test/CodeGen/PowerPC/p10-splatImm32-undef.ll +++ b/llvm/test/CodeGen/PowerPC/p10-splatImm32-undef.ll @@ -8,6 +8,12 @@ ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix-xcoff \ ; RUN: -ppc-asm-full-reg-names -mcpu=pwr10 < %s | \ ; RUN: FileCheck %s --check-prefix=CHECK-AIX +; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-linux-gnu \ +; RUN: -ppc-asm-full-reg-names -mcpu=pwr10 < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-LINUX-32 +; RUN: llc -verify-machineinstrs -mtriple=powerpc-ibm-aix-xcoff \ +; RUN: -ppc-asm-full-reg-names -mcpu=pwr10 < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-AIX-32 declare hidden i32 @call1() define hidden void @function1() { @@ -49,6 +55,37 @@ ; CHECK-AIX-NEXT: ld r0, 16(r1) ; CHECK-AIX-NEXT: mtlr r0 ; CHECK-AIX-NEXT: blr +; +; CHECK-LINUX-32-LABEL: function1: +; CHECK-LINUX-32: # %bb.0: # %entry +; CHECK-LINUX-32-NEXT: mflr r0 +; CHECK-LINUX-32-NEXT: stw r0, 4(r1) +; CHECK-LINUX-32-NEXT: stwu r1, -48(r1) +; CHECK-LINUX-32-NEXT: .cfi_def_cfa_offset 48 +; CHECK-LINUX-32-NEXT: .cfi_offset lr, 4 +; CHECK-LINUX-32-NEXT: bl call1 +; CHECK-LINUX-32-NEXT: li r4, 0 +; CHECK-LINUX-32-NEXT: stw r3, 16(r1) +; CHECK-LINUX-32-NEXT: stw r4, 32(r1) +; CHECK-LINUX-32-NEXT: lwz r0, 52(r1) +; CHECK-LINUX-32-NEXT: addi r1, r1, 48 +; CHECK-LINUX-32-NEXT: mtlr r0 +; CHECK-LINUX-32-NEXT: blr +; +; CHECK-AIX-32-LABEL: function1: +; CHECK-AIX-32: # %bb.0: # %entry +; CHECK-AIX-32-NEXT: mflr r0 +; CHECK-AIX-32-NEXT: stw r0, 8(r1) +; CHECK-AIX-32-NEXT: stwu r1, -96(r1) +; CHECK-AIX-32-NEXT: bl .call1[PR] +; CHECK-AIX-32-NEXT: nop +; CHECK-AIX-32-NEXT: li r4, 0 +; CHECK-AIX-32-NEXT: stw r3, 64(r1) +; CHECK-AIX-32-NEXT: stw r4, 80(r1) +; CHECK-AIX-32-NEXT: addi r1, r1, 96 +; CHECK-AIX-32-NEXT: lwz r0, 8(r1) +; CHECK-AIX-32-NEXT: mtlr r0 +; CHECK-AIX-32-NEXT: blr entry: %tailcall1 = tail call i32 @call1() %0 = insertelement <4 x i32> poison, i32 %tailcall1, i64 1