diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -4852,6 +4852,15 @@ NewInVT = EVT::getVectorVT(*DAG.getContext(), InEltVT, WidenSize / InEltVT.getSizeInBits()); } else { + // For big endian systems, using the promoted input scalar type + // to produce the scalar_to_vector would put the desired bits into + // the least significant byte(s) of the wider element zero. This + // will mean that the users of the result vector are using incorrect + // bits. Use the original input type instead. + if (DAG.getDataLayout().isBigEndian()) { + InVT = N->getOperand(0).getValueType(); + NewNumElts = WidenSize / InVT.getSizeInBits(); + } NewInVT = EVT::getVectorVT(*DAG.getContext(), InVT, NewNumElts); } diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll --- a/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll +++ b/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll @@ -44,7 +44,8 @@ ; ; CHECK-BE-LABEL: test2elt: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: mtvsrws v2, r3 +; CHECK-BE-NEXT: sldi r3, r3, 48 +; CHECK-BE-NEXT: mtvsrd v2, r3 ; CHECK-BE-NEXT: vextractub v3, v2, 2 ; CHECK-BE-NEXT: vextractub v2, v2, 0 ; CHECK-BE-NEXT: xscvuxdsp f0, v3 @@ -308,7 +309,8 @@ ; ; CHECK-BE-LABEL: test2elt_signed: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: mtvsrws v2, r3 +; CHECK-BE-NEXT: sldi r3, r3, 48 +; CHECK-BE-NEXT: mtvsrd v2, r3 ; CHECK-BE-NEXT: vextractub v3, v2, 2 ; CHECK-BE-NEXT: vextractub v2, v2, 0 ; CHECK-BE-NEXT: vextsh2d v3, v3 diff --git a/llvm/test/CodeGen/PowerPC/widen-vec-correctly-be.ll b/llvm/test/CodeGen/PowerPC/widen-vec-correctly-be.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/widen-vec-correctly-be.ll @@ -0,0 +1,72 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -mcpu=pwr7 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-BE +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-P9-BE +define void @test() local_unnamed_addr #0 align 2 { +; CHECK-BE-LABEL: test: +; CHECK-BE: # %bb.0: # %bb +; CHECK-BE-NEXT: bc 4, 4*cr5+lt, .LBB0_3 +; CHECK-BE-NEXT: # %bb.1: # %bb2 +; CHECK-BE-NEXT: vspltisw v2, -16 +; CHECK-BE-NEXT: lhz r3, 0(r3) +; CHECK-BE-NEXT: xxlxor vs1, vs1, vs1 +; CHECK-BE-NEXT: addi r3, r3, 1 +; CHECK-BE-NEXT: vsrw v2, v2, v2 +; CHECK-BE-NEXT: sth r3, -32(r1) +; CHECK-BE-NEXT: addi r3, r1, -32 +; CHECK-BE-NEXT: lxvw4x vs0, 0, r3 +; CHECK-BE-NEXT: addi r3, r1, -16 +; CHECK-BE-NEXT: xxsel vs0, vs0, vs1, v2 +; CHECK-BE-NEXT: stxvw4x vs0, 0, r3 +; CHECK-BE-NEXT: lwz r3, -16(r1) +; CHECK-BE-NEXT: stw r3, 0(r3) +; CHECK-BE-NEXT: .p2align 4 +; CHECK-BE-NEXT: .LBB0_2: # %bb9 +; CHECK-BE-NEXT: # +; CHECK-BE-NEXT: b .LBB0_2 +; CHECK-BE-NEXT: .LBB0_3: # %bb1 +; +; CHECK-P9-BE-LABEL: test: +; CHECK-P9-BE: # %bb.0: # %bb +; CHECK-P9-BE-NEXT: bc 4, 4*cr5+lt, .LBB0_3 +; CHECK-P9-BE-NEXT: # %bb.1: # %bb2 +; CHECK-P9-BE-NEXT: lhz r3, 0(r3) +; CHECK-P9-BE-NEXT: vspltisw v2, -16 +; CHECK-P9-BE-NEXT: xxlxor vs0, vs0, vs0 +; CHECK-P9-BE-NEXT: addi r3, r3, 1 +; CHECK-P9-BE-NEXT: vsrw v2, v2, v2 +; CHECK-P9-BE-NEXT: sldi r3, r3, 48 +; CHECK-P9-BE-NEXT: mtfprd f1, r3 +; CHECK-P9-BE-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-P9-BE-NEXT: xxsldwi vs0, v2, v2, 3 +; CHECK-P9-BE-NEXT: stfiwx f0, 0, r3 +; CHECK-P9-BE-NEXT: .p2align 4 +; CHECK-P9-BE-NEXT: .LBB0_2: # %bb9 +; CHECK-P9-BE-NEXT: # +; CHECK-P9-BE-NEXT: b .LBB0_2 +; CHECK-P9-BE-NEXT: .LBB0_3: # %bb1 +bb: + br i1 poison, label %bb1, label %bb2 + +bb1: ; preds = %bb + unreachable + +bb2: ; preds = %bb + %i = load i32, ptr poison, align 4 + %i3 = trunc i32 %i to i16 + %i4 = add i16 %i3, 1 + %i5 = bitcast i16 %i4 to <2 x i8> + %i6 = shufflevector <2 x i8> %i5, <2 x i8> poison, <4 x i32> + %i7 = select <4 x i1> , <4 x i8> %i6, <4 x i8> undef + %i8 = select <4 x i1> , <4 x i8> , <4 x i8> %i7 + br label %bb9 + +bb9: ; preds = %bb9, %bb2 + %i10 = phi <4 x i8> [ %i8, %bb2 ], [ poison, %bb9 ] + %i11 = bitcast <4 x i8> %i10 to i32 + store i32 %i11, ptr poison, align 2 + br label %bb9 +}