diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -1889,9 +1889,9 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { Value *LHS = SVI.getOperand(0); Value *RHS = SVI.getOperand(1); - if (auto *V = - SimplifyShuffleVectorInst(LHS, RHS, SVI.getShuffleMask(), - SVI.getType(), SQ.getWithInstruction(&SVI))) + SimplifyQuery ShufQuery = SQ.getWithInstruction(&SVI); + if (auto *V = SimplifyShuffleVectorInst(LHS, RHS, SVI.getShuffleMask(), + SVI.getType(), ShufQuery)) return replaceInstUsesWith(SVI, V); // shuffle x, x, mask --> shuffle x, undef, mask' @@ -1899,6 +1899,32 @@ unsigned LHSWidth = LHS->getType()->getVectorNumElements(); ArrayRef Mask = SVI.getShuffleMask(); Type *Int32Ty = Type::getInt32Ty(SVI.getContext()); + + // Peek through a bitcasted shuffle operand by scaling the mask. If the + // simulated shuffle can simplify, then this shuffle is unnecessary: + // shuf (bitcast X), undef, Mask --> bitcast X' + // TODO: This could be extended to allow length-changing shuffles and/or casts + // to narrower elements. The transform might also be obsoleted if we + // allowed canonicalization of bitcasted shuffles. + Value *X; + if (match(LHS, m_BitCast(m_Value(X))) && match(RHS, m_Undef()) && + X->getType()->isVectorTy() && VWidth == LHSWidth && + X->getType()->getVectorNumElements() >= VWidth) { + // Create the scaled mask constant. + Type *XType = X->getType(); + unsigned XNumElts = XType->getVectorNumElements(); + assert(XNumElts % VWidth == 0 && "Unexpected vector bitcast"); + unsigned ScaleFactor = XNumElts / VWidth; + SmallVector ScaledMask; + scaleShuffleMask(ScaleFactor, Mask, ScaledMask); + + // If the shuffled source vector simplifies, cast that value to this + // shuffle's type. + if (auto *V = SimplifyShuffleVectorInst(X, UndefValue::get(XType), + ScaledMask, XType, ShufQuery)) + return BitCastInst::Create(Instruction::BitCast, V, SVI.getType()); + } + if (LHS == RHS) { assert(!isa(RHS) && "Shuffle with 2 undef ops not simplified?"); // Remap any references to RHS to use LHS. diff --git a/llvm/test/Transforms/InstCombine/shufflevec-bitcast.ll b/llvm/test/Transforms/InstCombine/shufflevec-bitcast.ll --- a/llvm/test/Transforms/InstCombine/shufflevec-bitcast.ll +++ b/llvm/test/Transforms/InstCombine/shufflevec-bitcast.ll @@ -21,11 +21,12 @@ ret void } +; Shuffle-of-bitcast-splat --> splat-bitcast + define <4 x i16> @splat_bitcast_operand(<8 x i8> %x) { ; CHECK-LABEL: @splat_bitcast_operand( -; CHECK-NEXT: [[S1:%.*]] = shufflevector <8 x i8> [[X:%.*]], <8 x i8> undef, <8 x i32> -; CHECK-NEXT: [[BC:%.*]] = bitcast <8 x i8> [[S1]] to <4 x i16> -; CHECK-NEXT: [[S2:%.*]] = shufflevector <4 x i16> [[BC]], <4 x i16> undef, <4 x i32> +; CHECK-NEXT: [[S1:%.*]] = shufflevector <8 x i8> [[X:%.*]], <8 x i8> undef, <8 x i32> +; CHECK-NEXT: [[S2:%.*]] = bitcast <8 x i8> [[S1]] to <4 x i16> ; CHECK-NEXT: ret <4 x i16> [[S2]] ; %s1 = shufflevector <8 x i8> %x, <8 x i8> undef, <8 x i32> @@ -34,12 +35,14 @@ ret <4 x i16> %s2 } +; Shuffle-of-bitcast-splat --> splat-bitcast + define <4 x i16> @splat_bitcast_operand_uses(<8 x i8> %x) { ; CHECK-LABEL: @splat_bitcast_operand_uses( ; CHECK-NEXT: [[S1:%.*]] = shufflevector <8 x i8> [[X:%.*]], <8 x i8> undef, <8 x i32> ; CHECK-NEXT: [[BC:%.*]] = bitcast <8 x i8> [[S1]] to <4 x i16> ; CHECK-NEXT: call void @use(<4 x i16> [[BC]]) -; CHECK-NEXT: [[S2:%.*]] = shufflevector <4 x i16> [[BC]], <4 x i16> undef, <4 x i32> +; CHECK-NEXT: [[S2:%.*]] = bitcast <8 x i8> [[S1]] to <4 x i16> ; CHECK-NEXT: ret <4 x i16> [[S2]] ; %s1 = shufflevector <8 x i8> %x, <8 x i8> undef, <8 x i32> @@ -49,11 +52,12 @@ ret <4 x i16> %s2 } +; Shuffle-of-bitcast-splat --> splat-bitcast + define <4 x i32> @splat_bitcast_operand_same_size_src_elt(<4 x float> %x) { ; CHECK-LABEL: @splat_bitcast_operand_same_size_src_elt( -; CHECK-NEXT: [[S1:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> undef, <4 x i32> -; CHECK-NEXT: [[BC:%.*]] = bitcast <4 x float> [[S1]] to <4 x i32> -; CHECK-NEXT: [[S2:%.*]] = shufflevector <4 x i32> [[BC]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[S1:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> undef, <4 x i32> +; CHECK-NEXT: [[S2:%.*]] = bitcast <4 x float> [[S1]] to <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[S2]] ; %s1 = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> @@ -66,9 +70,7 @@ define <4 x i32> @shuf_bitcast_operand(<16 x i8> %x) { ; CHECK-LABEL: @shuf_bitcast_operand( -; CHECK-NEXT: [[S1:%.*]] = shufflevector <16 x i8> [[X:%.*]], <16 x i8> undef, <16 x i32> -; CHECK-NEXT: [[BC:%.*]] = bitcast <16 x i8> [[S1]] to <4 x i32> -; CHECK-NEXT: [[S2:%.*]] = shufflevector <4 x i32> [[BC]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[S2:%.*]] = bitcast <16 x i8> [[X:%.*]] to <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[S2]] ; %s1 = shufflevector <16 x i8> %x, <16 x i8> undef, <16 x i32> @@ -77,6 +79,8 @@ ret <4 x i32> %s2 } +; TODO: Could allow fold for length-changing shuffles. + define <5 x i16> @splat_bitcast_operand_change_type(<8 x i8> %x) { ; CHECK-LABEL: @splat_bitcast_operand_change_type( ; CHECK-NEXT: [[S1:%.*]] = shufflevector <8 x i8> [[X:%.*]], <8 x i8> undef, <8 x i32> @@ -90,6 +94,8 @@ ret <5 x i16> %s2 } +; TODO: Could allow fold for cast to narrow element. + define <4 x i16> @splat_bitcast_operand_wider_src_elt(<2 x i32> %x) { ; CHECK-LABEL: @splat_bitcast_operand_wider_src_elt( ; CHECK-NEXT: [[S1:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> undef, <2 x i32>