diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -9770,8 +9770,11 @@ // Canonicalize the RHS being a BUILD_VECTOR when lowering to xxsplti32dx. if (RHS->getOpcode() != ISD::BUILD_VECTOR) { std::swap(LHS, RHS); - VecShuffle = DAG.getCommutedVectorShuffle(*SVN); - ShuffleMask = cast(VecShuffle)->getMask(); + VecShuffle = peekThroughBitcasts(DAG.getCommutedVectorShuffle(*SVN)); + ShuffleVectorSDNode *CommutedSV = dyn_cast(VecShuffle); + if (!CommutedSV) + return SDValue(); + ShuffleMask = CommutedSV->getMask(); } // Ensure that the RHS is a vector of constants. diff --git a/llvm/test/CodeGen/PowerPC/p10-splatImm32-undef.ll b/llvm/test/CodeGen/PowerPC/p10-splatImm32-undef.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/p10-splatImm32-undef.ll @@ -0,0 +1,121 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -ppc-asm-full-reg-names -mcpu=pwr10 < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-LINUX +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix-xcoff \ +; RUN: -ppc-asm-full-reg-names -mcpu=pwr10 < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-AIX +; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-linux-gnu \ +; RUN: -ppc-asm-full-reg-names -mcpu=pwr10 < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-LINUX-32 +; RUN: llc -verify-machineinstrs -mtriple=powerpc-ibm-aix-xcoff \ +; RUN: -ppc-asm-full-reg-names -mcpu=pwr10 < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-AIX-32 + +declare hidden i32 @call1() +define hidden void @function1() { +; CHECK-LINUX-LABEL: function1: +; CHECK-LINUX: # %bb.0: # %entry +; CHECK-LINUX-NEXT: mflr r0 +; CHECK-LINUX-NEXT: std r0, 16(r1) +; CHECK-LINUX-NEXT: stdu r1, -32(r1) +; CHECK-LINUX-NEXT: .cfi_def_cfa_offset 32 +; CHECK-LINUX-NEXT: .cfi_offset lr, 16 +; CHECK-LINUX-NEXT: bl call1@notoc +; CHECK-LINUX-NEXT: addi r1, r1, 32 +; CHECK-LINUX-NEXT: ld r0, 16(r1) +; CHECK-LINUX-NEXT: mtlr r0 +; CHECK-LINUX-NEXT: blr +; +; CHECK-AIX-LABEL: function1: +; CHECK-AIX: # %bb.0: # %entry +; CHECK-AIX-NEXT: mflr r0 +; CHECK-AIX-NEXT: std r0, 16(r1) +; CHECK-AIX-NEXT: stdu r1, -304(r1) +; CHECK-AIX-NEXT: stxv vs52, 112(r1) # 16-byte Folded Spill +; CHECK-AIX-NEXT: stxv vs53, 128(r1) # 16-byte Folded Spill +; CHECK-AIX-NEXT: stxv vs54, 144(r1) # 16-byte Folded Spill +; CHECK-AIX-NEXT: stxv vs55, 160(r1) # 16-byte Folded Spill +; CHECK-AIX-NEXT: stxv vs56, 176(r1) # 16-byte Folded Spill +; CHECK-AIX-NEXT: stxv vs57, 192(r1) # 16-byte Folded Spill +; CHECK-AIX-NEXT: stxv vs58, 208(r1) # 16-byte Folded Spill +; CHECK-AIX-NEXT: stxv vs59, 224(r1) # 16-byte Folded Spill +; CHECK-AIX-NEXT: stxv vs60, 240(r1) # 16-byte Folded Spill +; CHECK-AIX-NEXT: stxv vs61, 256(r1) # 16-byte Folded Spill +; CHECK-AIX-NEXT: stxv vs62, 272(r1) # 16-byte Folded Spill +; CHECK-AIX-NEXT: stxv vs63, 288(r1) # 16-byte Folded Spill +; CHECK-AIX-NEXT: bl .call1[PR] +; CHECK-AIX-NEXT: nop +; CHECK-AIX-NEXT: lxv vs63, 288(r1) # 16-byte Folded Reload +; CHECK-AIX-NEXT: lxv vs62, 272(r1) # 16-byte Folded Reload +; CHECK-AIX-NEXT: lxv vs61, 256(r1) # 16-byte Folded Reload +; CHECK-AIX-NEXT: lxv vs60, 240(r1) # 16-byte Folded Reload +; CHECK-AIX-NEXT: lxv vs59, 224(r1) # 16-byte Folded Reload +; CHECK-AIX-NEXT: lxv vs58, 208(r1) # 16-byte Folded Reload +; CHECK-AIX-NEXT: lxv vs57, 192(r1) # 16-byte Folded Reload +; CHECK-AIX-NEXT: lxv vs56, 176(r1) # 16-byte Folded Reload +; CHECK-AIX-NEXT: lxv vs55, 160(r1) # 16-byte Folded Reload +; CHECK-AIX-NEXT: lxv vs54, 144(r1) # 16-byte Folded Reload +; CHECK-AIX-NEXT: lxv vs53, 128(r1) # 16-byte Folded Reload +; CHECK-AIX-NEXT: lxv vs52, 112(r1) # 16-byte Folded Reload +; CHECK-AIX-NEXT: addi r1, r1, 304 +; CHECK-AIX-NEXT: ld r0, 16(r1) +; CHECK-AIX-NEXT: mtlr r0 +; CHECK-AIX-NEXT: blr +; +; CHECK-LINUX-32-LABEL: function1: +; CHECK-LINUX-32: # %bb.0: # %entry +; CHECK-LINUX-32-NEXT: mflr r0 +; CHECK-LINUX-32-NEXT: stw r0, 4(r1) +; CHECK-LINUX-32-NEXT: stwu r1, -48(r1) +; CHECK-LINUX-32-NEXT: .cfi_def_cfa_offset 48 +; CHECK-LINUX-32-NEXT: .cfi_offset lr, 4 +; CHECK-LINUX-32-NEXT: bl call1 +; CHECK-LINUX-32-NEXT: li r4, 0 +; CHECK-LINUX-32-NEXT: stw r3, 16(r1) +; CHECK-LINUX-32-NEXT: stw r4, 32(r1) +; CHECK-LINUX-32-NEXT: lwz r0, 52(r1) +; CHECK-LINUX-32-NEXT: addi r1, r1, 48 +; CHECK-LINUX-32-NEXT: mtlr r0 +; CHECK-LINUX-32-NEXT: blr +; +; CHECK-AIX-32-LABEL: function1: +; CHECK-AIX-32: # %bb.0: # %entry +; CHECK-AIX-32-NEXT: mflr r0 +; CHECK-AIX-32-NEXT: stw r0, 8(r1) +; CHECK-AIX-32-NEXT: stwu r1, -96(r1) +; CHECK-AIX-32-NEXT: bl .call1[PR] +; CHECK-AIX-32-NEXT: nop +; CHECK-AIX-32-NEXT: li r4, 0 +; CHECK-AIX-32-NEXT: stw r3, 64(r1) +; CHECK-AIX-32-NEXT: stw r4, 80(r1) +; CHECK-AIX-32-NEXT: addi r1, r1, 96 +; CHECK-AIX-32-NEXT: lwz r0, 8(r1) +; CHECK-AIX-32-NEXT: mtlr r0 +; CHECK-AIX-32-NEXT: blr +entry: + %tailcall1 = tail call i32 @call1() + %0 = insertelement <4 x i32> poison, i32 %tailcall1, i64 1 + %1 = insertelement <4 x i32> %0, i32 0, i64 2 + %2 = insertelement <4 x i32> %1, i32 0, i64 3 + %3 = trunc <4 x i32> %2 to <4 x i8> + %4 = icmp eq <4 x i8> %3, zeroinitializer + %5 = shufflevector <4 x i1> %4, <4 x i1> poison, <2 x i32> + %6 = shufflevector <4 x i1> %4, <4 x i1> poison, <2 x i32> + %7 = xor <2 x i1> %5, + %8 = shufflevector <2 x i1> %7, <2 x i1> poison, <2 x i32> zeroinitializer + %9 = zext <2 x i1> %8 to <2 x i64> + %10 = xor <2 x i1> %6, + %11 = shufflevector <2 x i1> %10, <2 x i1> poison, <2 x i32> zeroinitializer + %12 = zext <2 x i1> %11 to <2 x i64> + br label %next_block + +next_block: + %13 = add <2 x i64> zeroinitializer, %9 + %14 = add <2 x i64> zeroinitializer, %12 + %shift704 = shufflevector <2 x i64> %13, <2 x i64> poison, <2 x i32> + %15 = add <2 x i64> %shift704, %13 + %shift705 = shufflevector <2 x i64> %14, <2 x i64> poison, <2 x i32> + %16 = add <2 x i64> %shift705, %14 + ret void +}