diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -1820,7 +1820,11 @@ } /// This input value (which is known to have vector type) is being zero extended -/// or truncated to the specified vector type. +/// or truncated to the specified vector type. Since the zext/trunc is done +/// using an integer type, we have a (bitcast(cast(bitcast))) pattern, +/// endianness will impact which end of the vector that is extended or +/// truncated. +/// /// Try to replace it with a shuffle (and vector/vector bitcast) if possible. /// /// The source and destination vector types may have different element types. @@ -1850,25 +1854,42 @@ SmallVector ShuffleMask; Value *V2; - if (SrcTy->getNumElements() > DestTy->getNumElements()) { - // If we're shrinking the number of elements, just shuffle in the low - // elements from the input and use undef as the second shuffle input. + bool IsBigEndian = IC.getDataLayout().isBigEndian(); + unsigned SrcElts = SrcTy->getNumElements(); + unsigned DestElts = DestTy->getNumElements(); + + if (SrcElts > DestElts) { + // If we're shrinking the number of elements, just shuffle in the elements + // from the input and use undef as the second shuffle input. V2 = UndefValue::get(SrcTy); - for (unsigned i = 0, e = DestTy->getNumElements(); i != e; ++i) - ShuffleMask.push_back(i); + // For big endian we should pick elements with high indices, for little + // endian from low indices. + unsigned FirstElt = IsBigEndian ? SrcElts - DestElts : 0; + for (unsigned i = 0, e = DestElts; i != e; ++i) + ShuffleMask.push_back(FirstElt + i); } else { // If we're increasing the number of elements, shuffle in all of the - // elements from InVal and fill the rest of the result elements with zeros - // from a constant zero. + // elements from InVal. Fill the rest of the result elements with zeros + // from a constant zero. We use the first element of the zero input. V2 = Constant::getNullValue(SrcTy); - unsigned SrcElts = SrcTy->getNumElements(); + + // Create a helper mask for picking excessive values from first element of + // the null vector. + SmallVector ZeroMask; + ZeroMask.assign(DestElts - SrcElts, SrcElts); + + // For big endian the zeroes should be inserted at low indices. + if (IsBigEndian) + ShuffleMask.insert(ShuffleMask.end(), ZeroMask.begin(), ZeroMask.end()); + + // Add the mask for picking values from the source vector <0, 1, 2 ...>. for (unsigned i = 0, e = SrcElts; i != e; ++i) ShuffleMask.push_back(i); - // The excess elements reference the first element of the zero input. - for (unsigned i = 0, e = DestTy->getNumElements()-SrcElts; i != e; ++i) - ShuffleMask.push_back(SrcElts); + // For little endian the zeroes should be inserted at high indices. + if (!IsBigEndian) + ShuffleMask.insert(ShuffleMask.end(), ZeroMask.begin(), ZeroMask.end()); } return new ShuffleVectorInst(InVal, V2, diff --git a/llvm/test/Transforms/InstCombine/cast.ll b/llvm/test/Transforms/InstCombine/cast.ll --- a/llvm/test/Transforms/InstCombine/cast.ll +++ b/llvm/test/Transforms/InstCombine/cast.ll @@ -823,9 +823,13 @@ } define <3 x i32> @test60(<4 x i32> %call4) { -; ALL-LABEL: @test60( -; ALL-NEXT: [[P10:%.*]] = shufflevector <4 x i32> [[CALL4:%.*]], <4 x i32> undef, <3 x i32> -; ALL-NEXT: ret <3 x i32> [[P10]] +; BE-LABEL: @test60( +; BE-NEXT: [[P10:%.*]] = shufflevector <4 x i32> [[CALL4:%.*]], <4 x i32> undef, <3 x i32> +; BE-NEXT: ret <3 x i32> [[P10]] +; +; LE-LABEL: @test60( +; LE-NEXT: [[P10:%.*]] = shufflevector <4 x i32> [[CALL4:%.*]], <4 x i32> undef, <3 x i32> +; LE-NEXT: ret <3 x i32> [[P10]] ; %p11 = bitcast <4 x i32> %call4 to i128 %p9 = trunc i128 %p11 to i96 @@ -835,9 +839,13 @@ } define <4 x i32> @test61(<3 x i32> %call4) { -; ALL-LABEL: @test61( -; ALL-NEXT: [[P10:%.*]] = shufflevector <3 x i32> [[CALL4:%.*]], <3 x i32> , <4 x i32> -; ALL-NEXT: ret <4 x i32> [[P10]] +; BE-LABEL: @test61( +; BE-NEXT: [[P10:%.*]] = shufflevector <3 x i32> [[CALL4:%.*]], <3 x i32> , <4 x i32> +; BE-NEXT: ret <4 x i32> [[P10]] +; +; LE-LABEL: @test61( +; LE-NEXT: [[P10:%.*]] = shufflevector <3 x i32> [[CALL4:%.*]], <3 x i32> , <4 x i32> +; LE-NEXT: ret <4 x i32> [[P10]] ; %p11 = bitcast <3 x i32> %call4 to i96 %p9 = zext i96 %p11 to i128 @@ -846,10 +854,15 @@ } define <4 x i32> @test62(<3 x float> %call4) { -; ALL-LABEL: @test62( -; ALL-NEXT: [[TMP1:%.*]] = bitcast <3 x float> [[CALL4:%.*]] to <3 x i32> -; ALL-NEXT: [[P10:%.*]] = shufflevector <3 x i32> [[TMP1]], <3 x i32> , <4 x i32> -; ALL-NEXT: ret <4 x i32> [[P10]] +; BE-LABEL: @test62( +; BE-NEXT: [[TMP1:%.*]] = bitcast <3 x float> [[CALL4:%.*]] to <3 x i32> +; BE-NEXT: [[P10:%.*]] = shufflevector <3 x i32> [[TMP1]], <3 x i32> , <4 x i32> +; BE-NEXT: ret <4 x i32> [[P10]] +; +; LE-LABEL: @test62( +; LE-NEXT: [[TMP1:%.*]] = bitcast <3 x float> [[CALL4:%.*]] to <3 x i32> +; LE-NEXT: [[P10:%.*]] = shufflevector <3 x i32> [[TMP1]], <3 x i32> , <4 x i32> +; LE-NEXT: ret <4 x i32> [[P10]] ; %p11 = bitcast <3 x float> %call4 to i96 %p9 = zext i96 %p11 to i128