diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -944,6 +944,7 @@ if (Subtarget.hasVSX()) { setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal); + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i64, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal); if (Subtarget.hasP8Vector()) { setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal); @@ -16440,11 +16441,23 @@ return Subtarget.isPPC64() ? PPC::X4 : PPC::R4; } -bool -PPCTargetLowering::shouldExpandBuildVectorWithShuffles( - EVT VT , unsigned DefinedValues) const { - if (VT == MVT::v2i64) - return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves +bool PPCTargetLowering::shouldExpandBuildVectorWithShuffles( + EVT VT, unsigned DefinedValues) const { + if (VT == MVT::v2i64) { + // Don't need stack ops with direct moves. + if (Subtarget.hasDirectMove()) + return true; + + // Only one scalar_to_vector is needed for splat build_vector. + assert(DefinedValues && "no defined values!\n"); + if (DefinedValues == 1) + return true; + + // After expanding to shuffles, we need one scalar_to_vector for one defined + // values, and one scalar_to_vector may need one stack ops, so don't expand + // for more than one defined values. + return false; + } if (Subtarget.hasVSX()) return true; diff --git a/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll b/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll --- a/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll +++ b/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll @@ -682,13 +682,10 @@ ; CHECK-P7-LABEL: no_RAUW_in_combine_during_legalize: ; CHECK-P7: # %bb.0: # %entry ; CHECK-P7-NEXT: sldi r4, r4, 2 -; CHECK-P7-NEXT: addi r5, r1, -16 -; CHECK-P7-NEXT: xxlxor v3, v3, v3 -; CHECK-P7-NEXT: lwzx r3, r3, r4 -; CHECK-P7-NEXT: std r3, -16(r1) -; CHECK-P7-NEXT: lxvd2x vs0, 0, r5 -; CHECK-P7-NEXT: xxswapd v2, vs0 -; CHECK-P7-NEXT: vmrglb v2, v3, v2 +; CHECK-P7-NEXT: xxlxor v2, v2, v2 +; CHECK-P7-NEXT: lfiwzx f0, r3, r4 +; CHECK-P7-NEXT: xxlor v3, vs0, vs0 +; CHECK-P7-NEXT: vmrglb v2, v2, v3 ; CHECK-P7-NEXT: blr entry: %idx.ext = sext i32 %offset to i64 diff --git a/llvm/test/CodeGen/PowerPC/load-and-splat.ll b/llvm/test/CodeGen/PowerPC/load-and-splat.ll --- a/llvm/test/CodeGen/PowerPC/load-and-splat.ll +++ b/llvm/test/CodeGen/PowerPC/load-and-splat.ll @@ -133,7 +133,6 @@ ret void } -; FIXME: use lfiwax for this case at PWR7. define void @test5(<2 x i64>* %a, i32* %in) { ; P9-LABEL: test5: ; P9: # %bb.0: # %entry @@ -151,11 +150,8 @@ ; ; P7-LABEL: test5: ; P7: # %bb.0: # %entry -; P7-NEXT: lwa r4, 0(r4) -; P7-NEXT: addi r5, r1, -16 -; P7-NEXT: std r4, -8(r1) -; P7-NEXT: std r4, -16(r1) -; P7-NEXT: lxvd2x vs0, 0, r5 +; P7-NEXT: lfiwax f0, 0, r4 +; P7-NEXT: xxspltd vs0, vs0, 0 ; P7-NEXT: stxvd2x vs0, 0, r3 ; P7-NEXT: blr entry: @@ -167,7 +163,6 @@ ret void } -; FIXME: use lfiwzx for this case at PWR7. define void @test6(<2 x i64>* %a, i32* %in) { ; P9-LABEL: test6: ; P9: # %bb.0: # %entry @@ -185,11 +180,8 @@ ; ; P7-LABEL: test6: ; P7: # %bb.0: # %entry -; P7-NEXT: lwz r4, 0(r4) -; P7-NEXT: addi r5, r1, -16 -; P7-NEXT: std r4, -8(r1) -; P7-NEXT: std r4, -16(r1) -; P7-NEXT: lxvd2x vs0, 0, r5 +; P7-NEXT: lfiwzx f0, 0, r4 +; P7-NEXT: xxspltd vs0, vs0, 0 ; P7-NEXT: stxvd2x vs0, 0, r3 ; P7-NEXT: blr entry: diff --git a/llvm/test/CodeGen/PowerPC/vec_add_sub_doubleword.ll b/llvm/test/CodeGen/PowerPC/vec_add_sub_doubleword.ll --- a/llvm/test/CodeGen/PowerPC/vec_add_sub_doubleword.ll +++ b/llvm/test/CodeGen/PowerPC/vec_add_sub_doubleword.ll @@ -44,9 +44,10 @@ ; NOVSX-LABEL: increment_by_val: ; NOVSX: # %bb.0: ; NOVSX-NEXT: addi 3, 1, -16 -; NOVSX-NEXT: std 5, -8(1) ; NOVSX-NEXT: std 5, -16(1) ; NOVSX-NEXT: lvx 3, 0, 3 +; NOVSX-NEXT: vsldoi 4, 3, 3, 8 +; NOVSX-NEXT: vsldoi 3, 4, 3, 8 ; NOVSX-NEXT: vaddudm 2, 2, 3 ; NOVSX-NEXT: blr %tmpvec = insertelement <2 x i64> , i64 %val, i32 0 @@ -106,9 +107,10 @@ ; NOVSX-LABEL: decrement_by_val: ; NOVSX: # %bb.0: ; NOVSX-NEXT: addi 3, 1, -16 -; NOVSX-NEXT: std 5, -8(1) ; NOVSX-NEXT: std 5, -16(1) ; NOVSX-NEXT: lvx 3, 0, 3 +; NOVSX-NEXT: vsldoi 4, 3, 3, 8 +; NOVSX-NEXT: vsldoi 3, 4, 3, 8 ; NOVSX-NEXT: vsubudm 2, 2, 3 ; NOVSX-NEXT: blr %tmpvec = insertelement <2 x i64> , i64 %val, i32 0