diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -18,6 +18,7 @@ #include "llvm/IR/DIBuilder.h" #include "llvm/IR/PatternMatch.h" #include "llvm/Support/KnownBits.h" +#include using namespace llvm; using namespace PatternMatch; @@ -1820,12 +1821,24 @@ } /// This input value (which is known to have vector type) is being zero extended -/// or truncated to the specified vector type. +/// or truncated to the specified vector type. Since the zext/trunc is done +/// using an integer type, we have a (bitcast(cast(bitcast))) pattern, +/// endianness will impact which end of the vector that is extended or +/// truncated. +/// +/// A vector is always stored with index 0 at the lowest address, which +/// corresponds to the most significant bits for a big endian stored integer and +/// the least significant bits for little endian. A trunc/zext of an integer +/// impacts the big end of the integer. Thus, we need to add/remove elements at +/// the front of the vector for big endian targets, and the back of the vector +/// for little endian targets. +/// /// Try to replace it with a shuffle (and vector/vector bitcast) if possible. /// /// The source and destination vector types may have different element types. -static Instruction *optimizeVectorResize(Value *InVal, VectorType *DestTy, - InstCombiner &IC) { +static Instruction *optimizeVectorResizeWithIntegerBitCasts(Value *InVal, + VectorType *DestTy, + InstCombiner &IC) { // We can only do this optimization if the output is a multiple of the input // element size, or the input is a multiple of the output element size. // Convert the input type to have the same element type as the output. @@ -1844,31 +1857,53 @@ InVal = IC.Builder.CreateBitCast(InVal, SrcTy); } + bool IsBigEndian = IC.getDataLayout().isBigEndian(); + unsigned SrcElts = SrcTy->getNumElements(); + unsigned DestElts = DestTy->getNumElements(); + + assert(SrcElts != DestElts && "Element counts should be different."); + // Now that the element types match, get the shuffle mask and RHS of the // shuffle to use, which depends on whether we're increasing or decreasing the // size of the input. - SmallVector ShuffleMask; + SmallVector ShuffleMaskStorage; + ArrayRef ShuffleMask; Value *V2; - if (SrcTy->getNumElements() > DestTy->getNumElements()) { - // If we're shrinking the number of elements, just shuffle in the low - // elements from the input and use undef as the second shuffle input. - V2 = UndefValue::get(SrcTy); - for (unsigned i = 0, e = DestTy->getNumElements(); i != e; ++i) - ShuffleMask.push_back(i); + // Produce an identify shuffle mask for the src vector. + ShuffleMaskStorage.resize(SrcElts); + std::iota(ShuffleMaskStorage.begin(), ShuffleMaskStorage.end(), 0); + if (SrcElts > DestElts) { + // If we're shrinking the number of elements (rewriting an integer + // truncate), just shuffle in the elements corresponding to the least + // significant bits from the input and use undef as the second shuffle + // input. + V2 = UndefValue::get(SrcTy); + // Make sure the shuffle mask selects the "least significant bits" by + // keeping elements from back of the src vector for big endian, and from the + // front for little endian. + ShuffleMask = ShuffleMaskStorage; + if (IsBigEndian) + ShuffleMask = ShuffleMask.take_back(DestElts); + else + ShuffleMask = ShuffleMask.take_front(DestElts); } else { - // If we're increasing the number of elements, shuffle in all of the - // elements from InVal and fill the rest of the result elements with zeros - // from a constant zero. + // If we're increasing the number of elements (rewriting an integer zext), + // shuffle in all of the elements from InVal. Fill the rest of the result + // elements with zeros from a constant zero. V2 = Constant::getNullValue(SrcTy); - unsigned SrcElts = SrcTy->getNumElements(); - for (unsigned i = 0, e = SrcElts; i != e; ++i) - ShuffleMask.push_back(i); - - // The excess elements reference the first element of the zero input. - for (unsigned i = 0, e = DestTy->getNumElements()-SrcElts; i != e; ++i) - ShuffleMask.push_back(SrcElts); + // Use first elt from V2 when indicating zero in the shuffle mask. + uint32_t NullElt = SrcElts; + // Extend with null values in the "most significant bits" by adding elements + // in front of the src vector for big endian, and at the back for little + // endian. + unsigned DeltaElts = DestElts - SrcElts; + if (IsBigEndian) + ShuffleMaskStorage.insert(ShuffleMaskStorage.begin(), DeltaElts, NullElt); + else + ShuffleMaskStorage.append(DeltaElts, NullElt); + ShuffleMask = ShuffleMaskStorage; } return new ShuffleVectorInst(InVal, V2, @@ -2375,8 +2410,8 @@ CastInst *SrcCast = cast(Src); if (BitCastInst *BCIn = dyn_cast(SrcCast->getOperand(0))) if (isa(BCIn->getOperand(0)->getType())) - if (Instruction *I = optimizeVectorResize(BCIn->getOperand(0), - cast(DestTy), *this)) + if (Instruction *I = optimizeVectorResizeWithIntegerBitCasts( + BCIn->getOperand(0), cast(DestTy), *this)) return I; } diff --git a/llvm/test/Transforms/InstCombine/cast.ll b/llvm/test/Transforms/InstCombine/cast.ll --- a/llvm/test/Transforms/InstCombine/cast.ll +++ b/llvm/test/Transforms/InstCombine/cast.ll @@ -823,9 +823,13 @@ } define <3 x i32> @test60(<4 x i32> %call4) { -; ALL-LABEL: @test60( -; ALL-NEXT: [[P10:%.*]] = shufflevector <4 x i32> [[CALL4:%.*]], <4 x i32> undef, <3 x i32> -; ALL-NEXT: ret <3 x i32> [[P10]] +; BE-LABEL: @test60( +; BE-NEXT: [[P10:%.*]] = shufflevector <4 x i32> [[CALL4:%.*]], <4 x i32> undef, <3 x i32> +; BE-NEXT: ret <3 x i32> [[P10]] +; +; LE-LABEL: @test60( +; LE-NEXT: [[P10:%.*]] = shufflevector <4 x i32> [[CALL4:%.*]], <4 x i32> undef, <3 x i32> +; LE-NEXT: ret <3 x i32> [[P10]] ; %p11 = bitcast <4 x i32> %call4 to i128 %p9 = trunc i128 %p11 to i96 @@ -835,9 +839,13 @@ } define <4 x i32> @test61(<3 x i32> %call4) { -; ALL-LABEL: @test61( -; ALL-NEXT: [[P10:%.*]] = shufflevector <3 x i32> [[CALL4:%.*]], <3 x i32> , <4 x i32> -; ALL-NEXT: ret <4 x i32> [[P10]] +; BE-LABEL: @test61( +; BE-NEXT: [[P10:%.*]] = shufflevector <3 x i32> [[CALL4:%.*]], <3 x i32> , <4 x i32> +; BE-NEXT: ret <4 x i32> [[P10]] +; +; LE-LABEL: @test61( +; LE-NEXT: [[P10:%.*]] = shufflevector <3 x i32> [[CALL4:%.*]], <3 x i32> , <4 x i32> +; LE-NEXT: ret <4 x i32> [[P10]] ; %p11 = bitcast <3 x i32> %call4 to i96 %p9 = zext i96 %p11 to i128 @@ -846,10 +854,15 @@ } define <4 x i32> @test62(<3 x float> %call4) { -; ALL-LABEL: @test62( -; ALL-NEXT: [[TMP1:%.*]] = bitcast <3 x float> [[CALL4:%.*]] to <3 x i32> -; ALL-NEXT: [[P10:%.*]] = shufflevector <3 x i32> [[TMP1]], <3 x i32> , <4 x i32> -; ALL-NEXT: ret <4 x i32> [[P10]] +; BE-LABEL: @test62( +; BE-NEXT: [[TMP1:%.*]] = bitcast <3 x float> [[CALL4:%.*]] to <3 x i32> +; BE-NEXT: [[P10:%.*]] = shufflevector <3 x i32> [[TMP1]], <3 x i32> , <4 x i32> +; BE-NEXT: ret <4 x i32> [[P10]] +; +; LE-LABEL: @test62( +; LE-NEXT: [[TMP1:%.*]] = bitcast <3 x float> [[CALL4:%.*]] to <3 x i32> +; LE-NEXT: [[P10:%.*]] = shufflevector <3 x i32> [[TMP1]], <3 x i32> , <4 x i32> +; LE-NEXT: ret <4 x i32> [[P10]] ; %p11 = bitcast <3 x float> %call4 to i96 %p9 = zext i96 %p11 to i128