diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -4863,7 +4863,16 @@ NewInVT = EVT::getVectorVT(*DAG.getContext(), InEltVT, WidenSize / InEltVT.getSizeInBits()); } else { - NewInVT = EVT::getVectorVT(*DAG.getContext(), InVT, NewNumParts); + // For big endian systems, using the promoted input scalar type + // to produce the scalar_to_vector would put the desired bits into + // the least significant byte(s) of the wider element zero. This + // will mean that the users of the result vector are using incorrect + // bits. Use the original input type instead. Although either input + // type can be used on little endian systems, for consistency we + // use the original type there as well. + EVT OrigInVT = N->getOperand(0).getValueType(); + NewNumParts = WidenSize / OrigInVT.getSizeInBits(); + NewInVT = EVT::getVectorVT(*DAG.getContext(), OrigInVT, NewNumParts); } if (TLI.isTypeLegal(NewInVT)) { diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll --- a/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll +++ b/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll @@ -31,7 +31,8 @@ ; ; CHECK-P9-LABEL: test2elt: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: mtvsrws v2, r3 +; CHECK-P9-NEXT: mtfprd f0, r3 +; CHECK-P9-NEXT: xxswapd v2, vs0 ; CHECK-P9-NEXT: vextractub v3, v2, 15 ; CHECK-P9-NEXT: vextractub v2, v2, 14 ; CHECK-P9-NEXT: xscvuxdsp f0, v3 @@ -44,7 +45,8 @@ ; ; CHECK-BE-LABEL: test2elt: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: mtvsrws v2, r3 +; CHECK-BE-NEXT: sldi r3, r3, 48 +; CHECK-BE-NEXT: mtvsrd v2, r3 ; CHECK-BE-NEXT: vextractub v3, v2, 2 ; CHECK-BE-NEXT: vextractub v2, v2, 0 ; CHECK-BE-NEXT: xscvuxdsp f0, v3 @@ -293,7 +295,8 @@ ; ; CHECK-P9-LABEL: test2elt_signed: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: mtvsrws v2, r3 +; CHECK-P9-NEXT: mtfprd f0, r3 +; CHECK-P9-NEXT: xxswapd v2, vs0 ; CHECK-P9-NEXT: vextractub v3, v2, 15 ; CHECK-P9-NEXT: vextractub v2, v2, 14 ; CHECK-P9-NEXT: vextsh2d v3, v3 @@ -308,7 +311,8 @@ ; ; CHECK-BE-LABEL: test2elt_signed: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: mtvsrws v2, r3 +; CHECK-BE-NEXT: sldi r3, r3, 48 +; CHECK-BE-NEXT: mtvsrd v2, r3 ; CHECK-BE-NEXT: vextractub v3, v2, 2 ; CHECK-BE-NEXT: vextractub v2, v2, 0 ; CHECK-BE-NEXT: vextsh2d v3, v3 diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll --- a/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll +++ b/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll @@ -13,7 +13,7 @@ ; CHECK-P8-LABEL: test2elt: ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: addis r4, r2, .LCPI0_0@toc@ha -; CHECK-P8-NEXT: mtvsrwz v2, r3 +; CHECK-P8-NEXT: mtvsrd v2, r3 ; CHECK-P8-NEXT: addi r4, r4, .LCPI0_0@toc@l ; CHECK-P8-NEXT: xxlxor v4, v4, v4 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r4 @@ -24,7 +24,7 @@ ; ; CHECK-P9-LABEL: test2elt: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: mtfprwz f0, r3 +; CHECK-P9-NEXT: mtfprd f0, r3 ; CHECK-P9-NEXT: addis r3, r2, .LCPI0_0@toc@ha ; CHECK-P9-NEXT: xxlxor vs2, vs2, vs2 ; CHECK-P9-NEXT: addi r3, r3, .LCPI0_0@toc@l @@ -415,7 +415,7 @@ ; CHECK-P8-LABEL: test2elt_signed: ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: addis r4, r2, .LCPI4_0@toc@ha -; CHECK-P8-NEXT: mtvsrwz v3, r3 +; CHECK-P8-NEXT: mtvsrd v3, r3 ; CHECK-P8-NEXT: addis r3, r2, .LCPI4_1@toc@ha ; CHECK-P8-NEXT: addi r4, r4, .LCPI4_0@toc@l ; CHECK-P8-NEXT: addi r3, r3, .LCPI4_1@toc@l @@ -431,7 +431,7 @@ ; ; CHECK-P9-LABEL: test2elt_signed: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: mtvsrwz v2, r3 +; CHECK-P9-NEXT: mtvsrd v2, r3 ; CHECK-P9-NEXT: addis r3, r2, .LCPI4_0@toc@ha ; CHECK-P9-NEXT: addi r3, r3, .LCPI4_0@toc@l ; CHECK-P9-NEXT: lxv vs0, 0(r3) diff --git a/llvm/test/CodeGen/PowerPC/widen-vec-correctly-be.ll b/llvm/test/CodeGen/PowerPC/widen-vec-correctly-be.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/widen-vec-correctly-be.ll @@ -0,0 +1,66 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -mcpu=pwr7 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-BE +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-P9-BE +define void @test() local_unnamed_addr #0 align 2 { +; CHECK-BE-LABEL: test: +; CHECK-BE: # %bb.0: # %bb +; CHECK-BE-NEXT: vspltisw v2, -16 +; CHECK-BE-NEXT: lhz r3, 0(r3) +; CHECK-BE-NEXT: xxlxor vs1, vs1, vs1 +; CHECK-BE-NEXT: addi r3, r3, 1 +; CHECK-BE-NEXT: vsrw v2, v2, v2 +; CHECK-BE-NEXT: sth r3, -32(r1) +; CHECK-BE-NEXT: addi r3, r1, -32 +; CHECK-BE-NEXT: lxvw4x vs0, 0, r3 +; CHECK-BE-NEXT: addi r3, r1, -16 +; CHECK-BE-NEXT: xxsel vs0, vs0, vs1, v2 +; CHECK-BE-NEXT: stxvw4x vs0, 0, r3 +; CHECK-BE-NEXT: lwz r3, -16(r1) +; CHECK-BE-NEXT: stw r3, 0(r3) +; CHECK-BE-NEXT: .p2align 4 +; CHECK-BE-NEXT: .LBB0_1: # %bb9 +; CHECK-BE-NEXT: # +; CHECK-BE-NEXT: b .LBB0_1 +; +; CHECK-P9-BE-LABEL: test: +; CHECK-P9-BE: # %bb.0: # %bb +; CHECK-P9-BE-NEXT: lhz r3, 0(r3) +; CHECK-P9-BE-NEXT: vspltisw v2, -16 +; CHECK-P9-BE-NEXT: xxlxor vs0, vs0, vs0 +; CHECK-P9-BE-NEXT: addi r3, r3, 1 +; CHECK-P9-BE-NEXT: vsrw v2, v2, v2 +; CHECK-P9-BE-NEXT: sldi r3, r3, 48 +; CHECK-P9-BE-NEXT: mtfprd f1, r3 +; CHECK-P9-BE-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-P9-BE-NEXT: xxsldwi vs0, v2, v2, 3 +; CHECK-P9-BE-NEXT: stfiwx f0, 0, r3 +; CHECK-P9-BE-NEXT: .p2align 4 +; CHECK-P9-BE-NEXT: .LBB0_1: # %bb9 +; CHECK-P9-BE-NEXT: # +; CHECK-P9-BE-NEXT: b .LBB0_1 +bb: + br i1 false, label %bb1, label %bb2 + +bb1: ; preds = %bb + unreachable + +bb2: ; preds = %bb + %i = load i32, ptr poison, align 4 + %i3 = trunc i32 %i to i16 + %i4 = add i16 %i3, 1 + %i5 = bitcast i16 %i4 to <2 x i8> + %i6 = shufflevector <2 x i8> %i5, <2 x i8> poison, <4 x i32> + %i7 = select <4 x i1> , <4 x i8> %i6, <4 x i8> undef + %i8 = select <4 x i1> , <4 x i8> , <4 x i8> %i7 + br label %bb9 + +bb9: ; preds = %bb9, %bb2 + %i10 = phi <4 x i8> [ %i8, %bb2 ], [ poison, %bb9 ] + %i11 = bitcast <4 x i8> %i10 to i32 + store i32 %i11, ptr poison, align 2 + br label %bb9 +}