diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -1820,7 +1820,11 @@ } /// This input value (which is known to have vector type) is being zero extended -/// or truncated to the specified vector type. +/// or truncated to the specified vector type. Since the zext/trunc is done +/// using an integer type, we have a (bitcast(cast(bitcast))) pattern, +/// endianness will impact which end of the vector that is extended or +/// truncated. +/// /// Try to replace it with a shuffle (and vector/vector bitcast) if possible. /// /// The source and destination vector types may have different element types. @@ -1850,25 +1854,36 @@ SmallVector ShuffleMask; Value *V2; - if (SrcTy->getNumElements() > DestTy->getNumElements()) { - // If we're shrinking the number of elements, just shuffle in the low + bool IsBigEndian = IC.getDataLayout().isBigEndian(); + unsigned SrcElts = SrcTy->getNumElements(); + unsigned DestElts = DestTy->getNumElements(); + + if (SrcElts > DestElts) { + // If we're shrinking the number of elements, just shuffle in the low/high // elements from the input and use undef as the second shuffle input. V2 = UndefValue::get(SrcTy); - for (unsigned i = 0, e = DestTy->getNumElements(); i != e; ++i) - ShuffleMask.push_back(i); + unsigned FirstElt = IsBigEndian ? SrcElts - DestElts : 0; + for (unsigned i = 0, e = DestElts; i != e; ++i) + ShuffleMask.push_back(FirstElt + i); } else { // If we're increasing the number of elements, shuffle in all of the // elements from InVal and fill the rest of the result elements with zeros // from a constant zero. + + // The excess elements reference the first element of the zero input (V2). V2 = Constant::getNullValue(SrcTy); - unsigned SrcElts = SrcTy->getNumElements(); + + if (IsBigEndian) + for (unsigned i = 0, e = DestElts - SrcElts; i != e; ++i) + ShuffleMask.push_back(SrcElts); + for (unsigned i = 0, e = SrcElts; i != e; ++i) ShuffleMask.push_back(i); - // The excess elements reference the first element of the zero input. - for (unsigned i = 0, e = DestTy->getNumElements()-SrcElts; i != e; ++i) - ShuffleMask.push_back(SrcElts); + if (!IsBigEndian) + for (unsigned i = 0, e = DestElts - SrcElts; i != e; ++i) + ShuffleMask.push_back(SrcElts); } return new ShuffleVectorInst(InVal, V2, diff --git a/llvm/test/Transforms/InstCombine/cast.ll b/llvm/test/Transforms/InstCombine/cast.ll --- a/llvm/test/Transforms/InstCombine/cast.ll +++ b/llvm/test/Transforms/InstCombine/cast.ll @@ -824,7 +824,7 @@ define <3 x i32> @test60(<4 x i32> %call4) { ; CHECK-LABEL: @test60( -; CHECK-NEXT: [[P10:%.*]] = shufflevector <4 x i32> [[CALL4:%.*]], <4 x i32> undef, <3 x i32> +; CHECK-NEXT: [[P10:%.*]] = shufflevector <4 x i32> [[CALL4:%.*]], <4 x i32> undef, <3 x i32> ; CHECK-NEXT: ret <3 x i32> [[P10]] ; %p11 = bitcast <4 x i32> %call4 to i128 @@ -836,7 +836,7 @@ define <4 x i32> @test61(<3 x i32> %call4) { ; CHECK-LABEL: @test61( -; CHECK-NEXT: [[P10:%.*]] = shufflevector <3 x i32> [[CALL4:%.*]], <3 x i32> , <4 x i32> +; CHECK-NEXT: [[P10:%.*]] = shufflevector <3 x i32> [[CALL4:%.*]], <3 x i32> , <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[P10]] ; %p11 = bitcast <3 x i32> %call4 to i96 @@ -848,7 +848,7 @@ define <4 x i32> @test62(<3 x float> %call4) { ; CHECK-LABEL: @test62( ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <3 x float> [[CALL4:%.*]] to <3 x i32> -; CHECK-NEXT: [[P10:%.*]] = shufflevector <3 x i32> [[TMP1]], <3 x i32> , <4 x i32> +; CHECK-NEXT: [[P10:%.*]] = shufflevector <3 x i32> [[TMP1]], <3 x i32> , <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[P10]] ; %p11 = bitcast <3 x float> %call4 to i96