diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -664,15 +664,15 @@ Src, DemandedBits, DemandedElts, DAG, Depth + 1)) return DAG.getBitcast(DstVT, V); - // TODO - bigendian once we have test coverage. - if (IsLE && SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0) { + if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0) { unsigned Scale = NumDstEltBits / NumSrcEltBits; unsigned NumSrcElts = SrcVT.getVectorNumElements(); APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits); APInt DemandedSrcElts = APInt::getZero(NumSrcElts); for (unsigned i = 0; i != Scale; ++i) { - unsigned Offset = i * NumSrcEltBits; - APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset); + unsigned EltOffset = IsLE ? i : (Scale - 1 - i); + unsigned BitOffset = EltOffset * NumSrcEltBits; + APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset); if (!Sub.isZero()) { DemandedSrcBits |= Sub; for (unsigned j = 0; j != NumElts; ++j) @@ -2166,15 +2166,15 @@ // Bitcast from a vector using SimplifyDemanded Bits/VectorElts. // Demand the elt/bit if any of the original elts/bits are demanded. - // TODO - bigendian once we have test coverage. - if (IsLE && SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0) { + if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0) { unsigned Scale = BitWidth / NumSrcEltBits; unsigned NumSrcElts = SrcVT.getVectorNumElements(); APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits); APInt DemandedSrcElts = APInt::getZero(NumSrcElts); for (unsigned i = 0; i != Scale; ++i) { - unsigned Offset = i * NumSrcEltBits; - APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset); + unsigned EltOffset = IsLE ? i : (Scale - 1 - i); + unsigned BitOffset = EltOffset * NumSrcEltBits; + APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset); if (!Sub.isZero()) { DemandedSrcBits |= Sub; for (unsigned j = 0; j != NumElts; ++j) @@ -2193,6 +2193,7 @@ KnownSrcBits, TLO, Depth + 1)) return true; } else if (IsLE && (NumSrcEltBits % BitWidth) == 0) { + // TODO - bigendian once we have test coverage. unsigned Scale = NumSrcEltBits / BitWidth; unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1; APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits); diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll --- a/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll +++ b/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll @@ -337,15 +337,14 @@ ; ; CHECK-BE-LABEL: test8elt_signed: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: xxlxor v3, v3, v3 -; CHECK-BE-NEXT: vmrglh v3, v3, v2 -; CHECK-BE-NEXT: vmrghh v2, v2, v2 +; CHECK-BE-NEXT: vmrghh v3, v2, v2 +; CHECK-BE-NEXT: vmrglh v2, v2, v2 ; CHECK-BE-NEXT: vextsh2w v3, v3 ; CHECK-BE-NEXT: vextsh2w v2, v2 ; CHECK-BE-NEXT: xvcvsxwsp vs0, v3 ; CHECK-BE-NEXT: xvcvsxwsp vs1, v2 -; CHECK-BE-NEXT: stxv vs0, 16(r3) -; CHECK-BE-NEXT: stxv vs1, 0(r3) +; CHECK-BE-NEXT: stxv vs1, 16(r3) +; CHECK-BE-NEXT: stxv vs0, 0(r3) ; CHECK-BE-NEXT: blr entry: %0 = sitofp <8 x i16> %a to <8 x float> @@ -409,25 +408,24 @@ ; ; CHECK-BE-LABEL: test16elt_signed: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv v2, 16(r4) ; CHECK-BE-NEXT: lxv v3, 0(r4) -; CHECK-BE-NEXT: xxlxor v4, v4, v4 -; CHECK-BE-NEXT: vmrglh v5, v4, v3 -; CHECK-BE-NEXT: vmrglh v4, v4, v2 -; CHECK-BE-NEXT: vmrghh v3, v3, v3 -; CHECK-BE-NEXT: vmrghh v2, v2, v2 -; CHECK-BE-NEXT: vextsh2w v5, v5 +; CHECK-BE-NEXT: lxv v2, 16(r4) +; CHECK-BE-NEXT: vmrghh v4, v3, v3 +; CHECK-BE-NEXT: vmrglh v3, v3, v3 +; CHECK-BE-NEXT: vextsh2w v3, v3 ; CHECK-BE-NEXT: vextsh2w v4, v4 +; CHECK-BE-NEXT: xvcvsxwsp vs1, v3 +; CHECK-BE-NEXT: vmrghh v3, v2, v2 +; CHECK-BE-NEXT: vmrglh v2, v2, v2 +; CHECK-BE-NEXT: xvcvsxwsp vs0, v4 ; CHECK-BE-NEXT: vextsh2w v3, v3 ; CHECK-BE-NEXT: vextsh2w v2, v2 -; CHECK-BE-NEXT: xvcvsxwsp vs0, v5 -; CHECK-BE-NEXT: xvcvsxwsp vs1, v4 ; CHECK-BE-NEXT: xvcvsxwsp vs2, v3 ; CHECK-BE-NEXT: xvcvsxwsp vs3, v2 -; CHECK-BE-NEXT: stxv vs1, 48(r3) -; CHECK-BE-NEXT: stxv vs3, 32(r3) -; CHECK-BE-NEXT: stxv vs0, 16(r3) -; CHECK-BE-NEXT: stxv vs2, 0(r3) +; CHECK-BE-NEXT: stxv vs1, 16(r3) +; CHECK-BE-NEXT: stxv vs0, 0(r3) +; CHECK-BE-NEXT: stxv vs3, 48(r3) +; CHECK-BE-NEXT: stxv vs2, 32(r3) ; CHECK-BE-NEXT: blr entry: %a = load <16 x i16>, <16 x i16>* %0, align 32 diff --git a/llvm/test/CodeGen/PowerPC/vec_extract_p9.ll b/llvm/test/CodeGen/PowerPC/vec_extract_p9.ll --- a/llvm/test/CodeGen/PowerPC/vec_extract_p9.ll +++ b/llvm/test/CodeGen/PowerPC/vec_extract_p9.ll @@ -190,7 +190,7 @@ ; CHECK-BE-LABEL: test10: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: addis 3, 2, .LCPI9_0@toc@ha -; CHECK-BE-NEXT: vmrghw 3, 3, 2 +; CHECK-BE-NEXT: vmrghw 3, 3, 3 ; CHECK-BE-NEXT: lfs 0, .LCPI9_0@toc@l(3) ; CHECK-BE-NEXT: vmrglw 2, 3, 2 ; CHECK-BE-NEXT: xsadddp 1, 34, 0