Index: llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -1916,6 +1916,36 @@ unsigned LHSWidth = LHS->getType()->getVectorNumElements(); SmallVector Mask = SVI.getShuffleMask(); Type *Int32Ty = Type::getInt32Ty(SVI.getContext()); + + // Peek through a bitcasted shuffle operand by scaling the mask. If the + // simulated shuffle can simplify, then this shuffle is unnecessary: + // shuf (bitcast X), undef, Mask --> bitcast X' + // TODO: This could be extended to allow length-changing shuffles and/or casts + // to narrower elements. The transform might also be obsoleted if we + // allowed canonicalization of bitcasted shuffles. + Value *X; + if (match(LHS, m_BitCast(m_Value(X))) && match(RHS, m_Undef()) && + X->getType()->isVectorTy() && VWidth == LHSWidth && + X->getType()->getVectorNumElements() >= VWidth) { + // Create the scaled mask constant. + Type *XType = X->getType(); + unsigned XNumElts = XType->getVectorNumElements(); + assert(XNumElts % VWidth == 0 && "Unexpected vector bitcast"); + unsigned ScaleFactor = XNumElts / VWidth; + SmallVector ScaledMask; + scaleShuffleMask(ScaleFactor, makeArrayRef(Mask), ScaledMask); + SmallVector ScaledMaskC; + for (unsigned i = 0; i != VWidth; ++i) + ScaledMaskC.push_back(Builder.getInt32(ScaledMask[i])); + + // If the shuffled source vector simplifies, cast that value to this + // shuffle's type. + if (auto *V = SimplifyShuffleVectorInst(X, UndefValue::get(XType), + ConstantVector::get(ScaledMaskC), + XType, SQ.getWithInstruction(&SVI))) + return BitCastInst::Create(Instruction::BitCast, V, SVI.getType()); + } + if (LHS == RHS) { assert(!isa(RHS) && "Shuffle with 2 undef ops not simplified?"); // Remap any references to RHS to use LHS. Index: llvm/test/Transforms/InstCombine/shufflevec-bitcast.ll =================================================================== --- llvm/test/Transforms/InstCombine/shufflevec-bitcast.ll +++ llvm/test/Transforms/InstCombine/shufflevec-bitcast.ll @@ -21,11 +21,12 @@ ret void } +; Shuffle-of-bitcast-splat --> splat-bitcast + define <4 x i16> @splat_bitcast_operand(<8 x i8> %x) { ; CHECK-LABEL: @splat_bitcast_operand( -; CHECK-NEXT: [[S1:%.*]] = shufflevector <8 x i8> [[X:%.*]], <8 x i8> undef, <8 x i32> -; CHECK-NEXT: [[BC:%.*]] = bitcast <8 x i8> [[S1]] to <4 x i16> -; CHECK-NEXT: [[S2:%.*]] = shufflevector <4 x i16> [[BC]], <4 x i16> undef, <4 x i32> +; CHECK-NEXT: [[S1:%.*]] = shufflevector <8 x i8> [[X:%.*]], <8 x i8> undef, <8 x i32> +; CHECK-NEXT: [[S2:%.*]] = bitcast <8 x i8> [[S1]] to <4 x i16> ; CHECK-NEXT: ret <4 x i16> [[S2]] ; %s1 = shufflevector <8 x i8> %x, <8 x i8> undef, <8 x i32> @@ -34,12 +35,14 @@ ret <4 x i16> %s2 } +; Shuffle-of-bitcast-splat --> splat-bitcast + define <4 x i16> @splat_bitcast_operand_uses(<8 x i8> %x) { ; CHECK-LABEL: @splat_bitcast_operand_uses( ; CHECK-NEXT: [[S1:%.*]] = shufflevector <8 x i8> [[X:%.*]], <8 x i8> undef, <8 x i32> ; CHECK-NEXT: [[BC:%.*]] = bitcast <8 x i8> [[S1]] to <4 x i16> ; CHECK-NEXT: call void @use(<4 x i16> [[BC]]) -; CHECK-NEXT: [[S2:%.*]] = shufflevector <4 x i16> [[BC]], <4 x i16> undef, <4 x i32> +; CHECK-NEXT: [[S2:%.*]] = bitcast <8 x i8> [[S1]] to <4 x i16> ; CHECK-NEXT: ret <4 x i16> [[S2]] ; %s1 = shufflevector <8 x i8> %x, <8 x i8> undef, <8 x i32> @@ -49,11 +52,12 @@ ret <4 x i16> %s2 } +; Shuffle-of-bitcast-splat --> splat-bitcast + define <4 x i32> @splat_bitcast_operand_same_size_src_elt(<4 x float> %x) { ; CHECK-LABEL: @splat_bitcast_operand_same_size_src_elt( -; CHECK-NEXT: [[S1:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> undef, <4 x i32> -; CHECK-NEXT: [[BC:%.*]] = bitcast <4 x float> [[S1]] to <4 x i32> -; CHECK-NEXT: [[S2:%.*]] = shufflevector <4 x i32> [[BC]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[S1:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> undef, <4 x i32> +; CHECK-NEXT: [[S2:%.*]] = bitcast <4 x float> [[S1]] to <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[S2]] ; %s1 = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> @@ -66,9 +70,7 @@ define <4 x i32> @shuf_bitcast_operand(<16 x i8> %x) { ; CHECK-LABEL: @shuf_bitcast_operand( -; CHECK-NEXT: [[S1:%.*]] = shufflevector <16 x i8> [[X:%.*]], <16 x i8> undef, <16 x i32> -; CHECK-NEXT: [[BC:%.*]] = bitcast <16 x i8> [[S1]] to <4 x i32> -; CHECK-NEXT: [[S2:%.*]] = shufflevector <4 x i32> [[BC]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[S2:%.*]] = bitcast <16 x i8> [[X:%.*]] to <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[S2]] ; %s1 = shufflevector <16 x i8> %x, <16 x i8> undef, <16 x i32> @@ -77,6 +79,8 @@ ret <4 x i32> %s2 } +; TODO: Could allow fold for length-changing shuffles. + define <5 x i16> @splat_bitcast_operand_change_type(<8 x i8> %x) { ; CHECK-LABEL: @splat_bitcast_operand_change_type( ; CHECK-NEXT: [[S1:%.*]] = shufflevector <8 x i8> [[X:%.*]], <8 x i8> undef, <8 x i32> @@ -90,6 +94,8 @@ ret <5 x i16> %s2 } +; TODO: Could allow fold for cast to narrow element. + define <4 x i16> @splat_bitcast_operand_wider_src_elt(<2 x i32> %x) { ; CHECK-LABEL: @splat_bitcast_operand_wider_src_elt( ; CHECK-NEXT: [[S1:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> undef, <2 x i32>